[PATCH] kvm tools: Implement virtio network device

public inbox for kvm@vger.kernel.org
 help / color / mirror / Atom feed

* [PATCH] kvm tools: Implement virtio network device
@ 2011-04-13 11:48 Asias He
  2011-04-13 11:51 ` Pekka Enberg
  0 siblings, 1 reply; 8+ messages in thread
From: Asias He @ 2011-04-13 11:48 UTC (permalink / raw)
  To: Pekka Enberg, Cyrill Gorcunov, Ingo Molnar; +Cc: kvm, Asias He

This patch implement virtio network device.
Use '-n virtio or --network=virtio' to enable it.

The current implementation uses tap which needs root privileges to create a
virtual network device (tap0) on host side. Actually, what we need is
CAP_NET_ADMIN.

The host side tap0 is set to 192.168.33.2/24.
You need to configure the guest side eth0 to any ip address in
192.168.33.0/24.

Here are some scp performance test for differenct implementations:
None of rx and tx as thread:
guest to host 3.2MB/s
host  to guest 3.1MB/s

Only rx as thread:
guest to host  14.7MB/s
host  to guest 33.4MB/s

Both rx and tx as thread(This patch works this way):
guest to host  19.8MB/s
host  to guest 32.5MB/s

Signed-off-by: Asias He <asias.hejun@gmail.com>
---
 tools/kvm/Makefile                 |    1 +
 tools/kvm/include/kvm/ioport.h     |    2 +
 tools/kvm/include/kvm/types.h      |    7 +
 tools/kvm/include/kvm/virtio-net.h |    7 +
 tools/kvm/kvm-run.c                |   11 ++
 tools/kvm/virtio-net.c             |  318 ++++++++++++++++++++++++++++++++++++
 6 files changed, 346 insertions(+), 0 deletions(-)
 create mode 100644 tools/kvm/include/kvm/types.h
 create mode 100644 tools/kvm/include/kvm/virtio-net.h
 create mode 100644 tools/kvm/virtio-net.c

diff --git a/tools/kvm/Makefile b/tools/kvm/Makefile
index 7a2863d..6895113 100644
--- a/tools/kvm/Makefile
+++ b/tools/kvm/Makefile
@@ -14,6 +14,7 @@ TAGS = ctags
 
 OBJS	+= 8250-serial.o
 OBJS	+= virtio-blk.o
+OBJS	+= virtio-net.o
 OBJS	+= virtio-console.o
 OBJS	+= cpuid.o
 OBJS	+= read-write.o
diff --git a/tools/kvm/include/kvm/ioport.h b/tools/kvm/include/kvm/ioport.h
index 0218329..2fdcca4 100644
--- a/tools/kvm/include/kvm/ioport.h
+++ b/tools/kvm/include/kvm/ioport.h
@@ -10,6 +10,8 @@
 #define IOPORT_VIRTIO_BLK_SIZE		256
 #define IOPORT_VIRTIO_CONSOLE		0xd200	/* Virtio console device */
 #define IOPORT_VIRTIO_CONSOLE_SIZE	256
+#define IOPORT_VIRTIO_NET		0xe200	/* Virtio network device */
+#define IOPORT_VIRTIO_NET_SIZE		256
 
 struct kvm;
 
diff --git a/tools/kvm/include/kvm/types.h b/tools/kvm/include/kvm/types.h
new file mode 100644
index 0000000..0cbc5fb
--- /dev/null
+++ b/tools/kvm/include/kvm/types.h
@@ -0,0 +1,7 @@
+#ifndef KVM_TYPES_H
+#define KVM_TYPES_H
+
+/* FIXME: include/linux/if_tun.h and include/linux/if_ether.h complains */
+#define __be16 u16
+
+#endif /* KVM_TYPES_H */
diff --git a/tools/kvm/include/kvm/virtio-net.h b/tools/kvm/include/kvm/virtio-net.h
new file mode 100644
index 0000000..a1cab15
--- /dev/null
+++ b/tools/kvm/include/kvm/virtio-net.h
@@ -0,0 +1,7 @@
+#ifndef KVM__VIRTIO_NET_H
+#define KVM__VIRTIO_NET_H
+
+struct kvm;
+void virtio_net__init(struct kvm *self);
+
+#endif /* KVM__VIRTIO_NET_H */
diff --git a/tools/kvm/kvm-run.c b/tools/kvm/kvm-run.c
index 65c4787..6046a0a 100644
--- a/tools/kvm/kvm-run.c
+++ b/tools/kvm/kvm-run.c
@@ -16,6 +16,7 @@
 #include <kvm/kvm-cpu.h>
 #include <kvm/8250-serial.h>
 #include <kvm/virtio-blk.h>
+#include <kvm/virtio-net.h>
 #include <kvm/virtio-console.h>
 #include <kvm/disk-image.h>
 #include <kvm/util.h>
@@ -29,6 +30,7 @@
 
 #define DEFAULT_KVM_DEV		"/dev/kvm"
 #define DEFAULT_CONSOLE		"serial"
+#define DEFAULT_NETWORK		"none"
 
 #define MB_SHIFT		(20)
 #define MIN_RAM_SIZE_MB		(64ULL)
@@ -63,6 +65,7 @@ static const char *initrd_filename;
 static const char *image_filename;
 static const char *console;
 static const char *kvm_dev;
+static const char *network;
 static bool single_step;
 static bool readonly_image;
 extern bool ioport_debug;
@@ -84,6 +87,8 @@ static const struct option options[] = {
 			"Don't write changes back to disk image"),
 	OPT_STRING('c', "console", &console, "serial or virtio",
 			"Console to use"),
+	OPT_STRING('n', "network", &network, "virtio",
+			"Network to use"),
 
 	OPT_GROUP("Kernel options:"),
 	OPT_STRING('k', "kernel", &kernel_filename, "kernel",
@@ -250,6 +255,12 @@ int kvm_cmd_run(int argc, const char **argv, const char *prefix)
 
 	virtio_console__init(kvm);
 
+	if (!network)
+		network = DEFAULT_NETWORK;
+
+	if (!strncmp(network, "virtio", 6))
+		virtio_net__init(kvm);
+
 	kvm__start_timer(kvm);
 
 	for (i = 0; i < nrcpus; i++) {
diff --git a/tools/kvm/virtio-net.c b/tools/kvm/virtio-net.c
new file mode 100644
index 0000000..ec70d5c
--- /dev/null
+++ b/tools/kvm/virtio-net.c
@@ -0,0 +1,318 @@
+#include "kvm/virtio-net.h"
+#include "kvm/virtio-pci.h"
+#include "kvm/virtio.h"
+#include "kvm/ioport.h"
+#include "kvm/types.h"
+#include "kvm/mutex.h"
+#include "kvm/util.h"
+#include "kvm/kvm.h"
+#include "kvm/pci.h"
+
+#include <linux/virtio_net.h>
+#include <linux/if_tun.h>
+#include <net/if.h>
+#include <sys/ioctl.h>
+#include <assert.h>
+#include <fcntl.h>
+
+#define VIRTIO_NET_IRQ		14
+#define VIRTIO_NET_QUEUE_SIZE	128
+#define VIRTIO_NET_NUM_QUEUES	2
+#define VIRTIO_NET_RX_QUEUE	0
+#define VIRTIO_NET_TX_QUEUE	1
+#define PCI_VIRTIO_NET_DEVNUM	3
+
+struct net_device {
+	pthread_mutex_t			mutex;
+
+	struct virt_queue		vqs[VIRTIO_NET_NUM_QUEUES];
+	struct virtio_net_config	net_config;
+	uint32_t			host_features;
+	uint32_t			guest_features;
+	uint16_t			config_vector;
+	uint8_t				status;
+	uint16_t			queue_selector;
+
+	pthread_t			io_rx_thread;
+	pthread_mutex_t			io_rx_mutex;
+	pthread_cond_t			io_rx_cond;
+
+	pthread_t			io_tx_thread;
+	pthread_mutex_t			io_tx_mutex;
+	pthread_cond_t			io_tx_cond;
+
+	int				tap_fd;
+	char				tap_name[IFNAMSIZ];
+};
+
+static struct net_device net_device = {
+	.mutex			= PTHREAD_MUTEX_INITIALIZER,
+
+	.net_config = {
+		.mac		= {0x00, 0x11, 0x22, 0x33, 0x44, 0x55},
+		.status		= VIRTIO_NET_S_LINK_UP,
+	},
+
+	.host_features		= 1UL << VIRTIO_NET_F_MAC,
+};
+
+static void *virtio_net_rx_thread(void *p)
+{
+	struct iovec iov[VIRTIO_NET_QUEUE_SIZE];
+	struct virt_queue *vq;
+	struct kvm *self;
+	uint16_t out, in;
+	uint16_t head;
+	int len;
+
+	self = p;
+	vq = &net_device.vqs[VIRTIO_NET_RX_QUEUE];
+
+	while (1) {
+		mutex_lock(&net_device.io_rx_mutex);
+		if (!virt_queue__available(vq))
+			pthread_cond_wait(&net_device.io_rx_cond, &net_device.io_rx_mutex);
+		mutex_unlock(&net_device.io_rx_mutex);
+
+		while (virt_queue__available(vq)) {
+			head = virt_queue__get_iov(vq, iov, &out, &in, self);
+
+			/* We do not specify GSO or CSUM features, So we can ignore virtio_net_hdr */
+			len = readv(net_device.tap_fd, iov + 1, in - 1);
+
+			/* However, We have to tell guest we have write the virtio_net_hdr */
+			virt_queue__set_used_elem(vq, head, sizeof(struct virtio_net_hdr) + len);
+
+			/* We should interrupt guest right now, otherwise latency is huge. */
+			kvm__irq_line(self, VIRTIO_NET_IRQ, 1);
+		}
+
+	}
+
+	pthread_exit(NULL);
+	return NULL;
+
+}
+
+static void *virtio_net_tx_thread(void *p)
+{
+	struct iovec iov[VIRTIO_NET_QUEUE_SIZE];
+	struct virt_queue *vq;
+	struct kvm *self;
+	uint16_t out, in;
+	uint16_t head;
+	int len;
+
+	self = p;
+	vq = &net_device.vqs[VIRTIO_NET_TX_QUEUE];
+
+	while (1) {
+		mutex_lock(&net_device.io_tx_mutex);
+		if (!virt_queue__available(vq))
+			pthread_cond_wait(&net_device.io_tx_cond, &net_device.io_tx_mutex);
+		mutex_unlock(&net_device.io_tx_mutex);
+
+		while (virt_queue__available(vq)) {
+			head = virt_queue__get_iov(vq, iov, &out, &in, self);
+			len = writev(net_device.tap_fd, iov + 1, out - 1);
+			virt_queue__set_used_elem(vq, head, len);
+		}
+
+		kvm__irq_line(self, VIRTIO_NET_IRQ, 1);
+	}
+
+	pthread_exit(NULL);
+	return NULL;
+
+}
+static bool virtio_net_pci_io_device_specific_in(void *data, unsigned long offset, int size, uint32_t count)
+{
+	uint8_t *config_space = (uint8_t *) &net_device.net_config;
+
+	if (size != 1 || count != 1)
+		return false;
+
+	if ((offset - VIRTIO_PCI_CONFIG_NOMSI) > sizeof(struct virtio_net_config))
+		error("config offset is too big: %li", offset - VIRTIO_PCI_CONFIG_NOMSI);
+
+	ioport__write8(data, config_space[offset - VIRTIO_PCI_CONFIG_NOMSI]);
+
+	return true;
+}
+
+static bool virtio_net_pci_io_in(struct kvm *self, uint16_t port, void *data, int size, uint32_t count)
+{
+	unsigned long offset = port - IOPORT_VIRTIO_NET;
+	bool ret = true;
+
+	mutex_lock(&net_device.mutex);
+
+	switch (offset) {
+	case VIRTIO_PCI_HOST_FEATURES:
+		ioport__write32(data, net_device.host_features);
+		break;
+	case VIRTIO_PCI_GUEST_FEATURES:
+		ret = false;
+		break;
+	case VIRTIO_PCI_QUEUE_PFN:
+		ioport__write32(data, net_device.vqs[net_device.queue_selector].pfn);
+		break;
+	case VIRTIO_PCI_QUEUE_NUM:
+		ioport__write16(data, VIRTIO_NET_QUEUE_SIZE);
+		break;
+	case VIRTIO_PCI_QUEUE_SEL:
+	case VIRTIO_PCI_QUEUE_NOTIFY:
+		ret = false;
+		break;
+	case VIRTIO_PCI_STATUS:
+		ioport__write8(data, net_device.status);
+		break;
+	case VIRTIO_PCI_ISR:
+		ioport__write8(data, 0x1);
+		kvm__irq_line(self, VIRTIO_NET_IRQ, 0);
+		break;
+	case VIRTIO_MSI_CONFIG_VECTOR:
+		ioport__write16(data, net_device.config_vector);
+		break;
+	default:
+		ret = virtio_net_pci_io_device_specific_in(data, offset, size, count);
+	};
+
+	mutex_unlock(&net_device.mutex);
+
+	return ret;
+}
+
+static void virtio_net_handle_callback(struct kvm *self, uint16_t queue_index)
+{
+	if (queue_index == VIRTIO_NET_TX_QUEUE) {
+
+		mutex_lock(&net_device.io_tx_mutex);
+		pthread_cond_signal(&net_device.io_tx_cond);
+		mutex_unlock(&net_device.io_tx_mutex);
+
+	} else if (queue_index == VIRTIO_NET_RX_QUEUE) {
+
+		mutex_lock(&net_device.io_rx_mutex);
+		pthread_cond_signal(&net_device.io_rx_cond);
+		mutex_unlock(&net_device.io_rx_mutex);
+
+	}
+}
+
+static bool virtio_net_pci_io_out(struct kvm *self, uint16_t port, void *data, int size, uint32_t count)
+{
+	unsigned long offset = port - IOPORT_VIRTIO_NET;
+	bool ret = true;
+
+	mutex_lock(&net_device.mutex);
+
+	switch (offset) {
+	case VIRTIO_PCI_GUEST_FEATURES:
+		net_device.guest_features	= ioport__read32(data);
+		break;
+	case VIRTIO_PCI_QUEUE_PFN: {
+		struct virt_queue *queue;
+		void *p;
+
+		assert(net_device.queue_selector < VIRTIO_NET_NUM_QUEUES);
+
+		queue		= &net_device.vqs[net_device.queue_selector];
+		queue->pfn	= ioport__read32(data);
+		p		= guest_flat_to_host(self, queue->pfn << 12);
+
+		vring_init(&queue->vring, VIRTIO_NET_QUEUE_SIZE, p, 4096);
+
+		break;
+	}
+	case VIRTIO_PCI_QUEUE_SEL:
+		net_device.queue_selector	= ioport__read16(data);
+		break;
+	case VIRTIO_PCI_QUEUE_NOTIFY: {
+		uint16_t queue_index;
+		queue_index	= ioport__read16(data);
+		virtio_net_handle_callback(self, queue_index);
+		break;
+	}
+	case VIRTIO_PCI_STATUS:
+		net_device.status		= ioport__read8(data);
+		break;
+	case VIRTIO_MSI_CONFIG_VECTOR:
+		net_device.config_vector	= VIRTIO_MSI_NO_VECTOR;
+		break;
+	case VIRTIO_MSI_QUEUE_VECTOR:
+		break;
+	default:
+		ret = false;
+	};
+
+	mutex_unlock(&net_device.mutex);
+	return ret;
+}
+
+static struct ioport_operations virtio_net_io_ops = {
+	.io_in	= virtio_net_pci_io_in,
+	.io_out	= virtio_net_pci_io_out,
+};
+
+#define PCI_VENDOR_ID_REDHAT_QUMRANET		0x1af4
+#define PCI_DEVICE_ID_VIRTIO_NET		0x1000
+#define PCI_SUBSYSTEM_VENDOR_ID_REDHAT_QUMRANET	0x1af4
+#define PCI_SUBSYSTEM_ID_VIRTIO_NET		0x0001
+
+static struct pci_device_header virtio_net_pci_device = {
+	.vendor_id		= PCI_VENDOR_ID_REDHAT_QUMRANET,
+	.device_id		= PCI_DEVICE_ID_VIRTIO_NET,
+	.header_type		= PCI_HEADER_TYPE_NORMAL,
+	.revision_id		= 0,
+	.class			= 0x020000,
+	.subsys_vendor_id	= PCI_SUBSYSTEM_VENDOR_ID_REDHAT_QUMRANET,
+	.subsys_id		= PCI_SUBSYSTEM_ID_VIRTIO_NET,
+	.bar[0]			= IOPORT_VIRTIO_NET | PCI_BASE_ADDRESS_SPACE_IO,
+	.irq_pin		= 3,
+	.irq_line		= VIRTIO_NET_IRQ,
+};
+
+static void virtio_net__tap_init(void)
+{
+	struct ifreq ifr;
+
+	net_device.tap_fd = open("/dev/net/tun", O_RDWR);
+	if (net_device.tap_fd < 0)
+		die("Unable to open /dev/net/tun\n");
+
+	memset(&ifr, 0, sizeof(ifr));
+	ifr.ifr_flags = IFF_TAP | IFF_NO_PI;
+
+	if (ioctl(net_device.tap_fd, TUNSETIFF, &ifr) < 0)
+		die("Config tap device error. Are you root?");
+
+	strncpy(net_device.tap_name, ifr.ifr_name, sizeof(net_device.tap_name));
+
+	ioctl(net_device.tap_fd, TUNSETNOCSUM, 1);
+
+	/*FIXME: Remove this after user can specify ip address and netmask*/
+	if (system("ifconfig tap0 192.168.33.2") < 0)
+		warning("Can not set ip address on tap0");
+}
+
+static void virtio_net__io_thread_init(struct kvm *self)
+{
+	pthread_mutex_init(&net_device.io_rx_mutex, NULL);
+	pthread_cond_init(&net_device.io_tx_cond, NULL);
+
+	pthread_mutex_init(&net_device.io_rx_mutex, NULL);
+	pthread_cond_init(&net_device.io_tx_cond, NULL);
+
+	pthread_create(&net_device.io_rx_thread, NULL, virtio_net_rx_thread, (void *)self);
+	pthread_create(&net_device.io_tx_thread, NULL, virtio_net_tx_thread, (void *)self);
+}
+
+void virtio_net__init(struct kvm *self)
+{
+	pci__register(&virtio_net_pci_device, PCI_VIRTIO_NET_DEVNUM);
+	ioport__register(IOPORT_VIRTIO_NET, &virtio_net_io_ops, IOPORT_VIRTIO_NET_SIZE);
+
+	virtio_net__tap_init();
+	virtio_net__io_thread_init(self);
+}
-- 
1.7.4.1


^ permalink raw reply related	[flat|nested] 8+ messages in thread

* Re: [PATCH] kvm tools: Implement virtio network device
  2011-04-13 11:48 [PATCH] kvm tools: Implement virtio network device Asias He
@ 2011-04-13 11:51 ` Pekka Enberg
  2011-04-13 12:00   ` Asias He
  0 siblings, 1 reply; 8+ messages in thread
From: Pekka Enberg @ 2011-04-13 11:51 UTC (permalink / raw)
  To: Asias He; +Cc: Cyrill Gorcunov, Ingo Molnar, kvm

On 4/13/11 2:48 PM, Asias He wrote:
> This patch implement virtio network device.
> Use '-n virtio or --network=virtio' to enable it.
>
> The current implementation uses tap which needs root privileges to create a
> virtual network device (tap0) on host side. Actually, what we need is
> CAP_NET_ADMIN.
>
> The host side tap0 is set to 192.168.33.2/24.
> You need to configure the guest side eth0 to any ip address in
> 192.168.33.0/24.
>
> Here are some scp performance test for differenct implementations:
> None of rx and tx as thread:
> guest to host 3.2MB/s
> host  to guest 3.1MB/s
>
> Only rx as thread:
> guest to host  14.7MB/s
> host  to guest 33.4MB/s
>
> Both rx and tx as thread(This patch works this way):
> guest to host  19.8MB/s
> host  to guest 32.5MB/s
>
> Signed-off-by: Asias He<asias.hejun@gmail.com>

This is already in master. Thanks!

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH] kvm tools: Implement virtio network device
  2011-04-13 11:51 ` Pekka Enberg
@ 2011-04-13 12:00   ` Asias He
  2011-04-13 13:02     ` [transparent networking] " Ingo Molnar
  0 siblings, 1 reply; 8+ messages in thread
From: Asias He @ 2011-04-13 12:00 UTC (permalink / raw)
  To: Pekka Enberg; +Cc: Cyrill Gorcunov, Ingo Molnar, kvm

On 04/13/2011 07:51 PM, Pekka Enberg wrote:
> On 4/13/11 2:48 PM, Asias He wrote:
>> This patch implement virtio network device.
>> Use '-n virtio or --network=virtio' to enable it.
>>
>> The current implementation uses tap which needs root privileges to
>> create a
>> virtual network device (tap0) on host side. Actually, what we need is
>> CAP_NET_ADMIN.
>>
>> The host side tap0 is set to 192.168.33.2/24.
>> You need to configure the guest side eth0 to any ip address in
>> 192.168.33.0/24.
>>
>> Here are some scp performance test for differenct implementations:
>> None of rx and tx as thread:
>> guest to host 3.2MB/s
>> host  to guest 3.1MB/s
>>
>> Only rx as thread:
>> guest to host  14.7MB/s
>> host  to guest 33.4MB/s
>>
>> Both rx and tx as thread(This patch works this way):
>> guest to host  19.8MB/s
>> host  to guest 32.5MB/s
>>
>> Signed-off-by: Asias He<asias.hejun@gmail.com>
> 
> This is already in master. Thanks!
> 

Ingo suggested to CC the updated version of this patch to kvm list. So I
am posting this patch again.

-- 
Best Regards,
Asias He

^ permalink raw reply	[flat|nested] 8+ messages in thread

* [transparent networking] Re: [PATCH] kvm tools: Implement virtio network device
  2011-04-13 12:00   ` Asias He
@ 2011-04-13 13:02     ` Ingo Molnar
  2011-04-13 13:33       ` Avi Kivity
  2011-04-13 16:21       ` Stefan Hajnoczi
  0 siblings, 2 replies; 8+ messages in thread
From: Ingo Molnar @ 2011-04-13 13:02 UTC (permalink / raw)
  To: Asias He; +Cc: Pekka Enberg, Cyrill Gorcunov, kvm

* Asias He <asias.hejun@gmail.com> wrote:

> >> Here are some scp performance test for differenct implementations:
> >> None of rx and tx as thread:
> >> guest to host 3.2MB/s
> >> host  to guest 3.1MB/s
> >>
> >> Only rx as thread:
> >> guest to host  14.7MB/s
> >> host  to guest 33.4MB/s
> >>
> >> Both rx and tx as thread(This patch works this way):
> >> guest to host  19.8MB/s
> >> host  to guest 32.5MB/s
> >>
> >> Signed-off-by: Asias He<asias.hejun@gmail.com>
> > 
> > This is already in master. Thanks!
> > 
> 
> Ingo suggested to CC the updated version of this patch to kvm list. So I
> am posting this patch again.

Thanks Asias, cool stuff.

Maybe other KVM developers want to chime in about how to best implement 
transparent (non-TAP-using) guest-side networking.

The best approach would be to not go down as low as the IP/Ethernet packeting 
level (it's unnecessary protocol overhead), but to implement some sort of 
streaming, virtio based TCP connection proxying support.

Strictly talking the guest does not need ICMP packets to have working Internet 
connectivity - only passing/tunneling through TCP sockets would be enough. The 
following highlevel ops are needed:

 - connect/shutdown/close
 - send/receive
 - poll

And would be passed through to the host side and mirrored there into real 
connect/shutdown TCP socket ops and into send/receive ops.

The guest OS does not need to be 'aware' of this in any way, as long as the 
bzImage has this magic guest tunneling support included.

Obviously, such a highlevel approach would be much faster as well than any 
packet level virtual networking approach.

Does something like this exist upstream, or do we have to implement it?

Thanks,

	Ingo

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [transparent networking] Re: [PATCH] kvm tools: Implement virtio network device
  2011-04-13 13:02     ` [transparent networking] " Ingo Molnar
@ 2011-04-13 13:33       ` Avi Kivity
  2011-04-13 13:38         ` Avi Kivity
  2011-04-13 13:39         ` Asias He
  2011-04-13 16:21       ` Stefan Hajnoczi
  1 sibling, 2 replies; 8+ messages in thread
From: Avi Kivity @ 2011-04-13 13:33 UTC (permalink / raw)
  To: Ingo Molnar; +Cc: Asias He, Pekka Enberg, Cyrill Gorcunov, kvm

On 04/13/2011 04:02 PM, Ingo Molnar wrote:
> * Asias He<asias.hejun@gmail.com>  wrote:
>
> >  >>  Here are some scp performance test for differenct implementations:
> >  >>  None of rx and tx as thread:
> >  >>  guest to host 3.2MB/s
> >  >>  host  to guest 3.1MB/s
> >  >>
> >  >>  Only rx as thread:
> >  >>  guest to host  14.7MB/s
> >  >>  host  to guest 33.4MB/s
> >  >>
> >  >>  Both rx and tx as thread(This patch works this way):
> >  >>  guest to host  19.8MB/s
> >  >>  host  to guest 32.5MB/s
> >  >>
> >  >>  Signed-off-by: Asias He<asias.hejun@gmail.com>
> >  >
> >  >  This is already in master. Thanks!
> >  >
> >
> >  Ingo suggested to CC the updated version of this patch to kvm list. So I
> >  am posting this patch again.
>
> Thanks Asias, cool stuff.
>
> Maybe other KVM developers want to chime in about how to best implement
> transparent (non-TAP-using) guest-side networking.
>
> The best approach would be to not go down as low as the IP/Ethernet packeting
> level (it's unnecessary protocol overhead), but to implement some sort of
> streaming, virtio based TCP connection proxying support.
>
> Strictly talking the guest does not need ICMP packets to have working Internet
> connectivity - only passing/tunneling through TCP sockets would be enough. The
> following highlevel ops are needed:
>
>   - connect/shutdown/close
>   - send/receive
>   - poll
>
> And would be passed through to the host side and mirrored there into real
> connect/shutdown TCP socket ops and into send/receive ops.
>
> The guest OS does not need to be 'aware' of this in any way, as long as the
> bzImage has this magic guest tunneling support included.
>
> Obviously, such a highlevel approach would be much faster as well than any
> packet level virtual networking approach.
>
> Does something like this exist upstream, or do we have to implement it?
>

macvtap does non-privileged setupless networking.

-- 
error compiling committee.c: too many arguments to function


^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [transparent networking] Re: [PATCH] kvm tools: Implement virtio network device
  2011-04-13 13:33       ` Avi Kivity
@ 2011-04-13 13:38         ` Avi Kivity
  2011-04-13 13:39         ` Asias He
  1 sibling, 0 replies; 8+ messages in thread
From: Avi Kivity @ 2011-04-13 13:38 UTC (permalink / raw)
  To: Ingo Molnar; +Cc: Asias He, Pekka Enberg, Cyrill Gorcunov, kvm

On 04/13/2011 04:33 PM, Avi Kivity wrote:
> On 04/13/2011 04:02 PM, Ingo Molnar wrote:
>> * Asias He<asias.hejun@gmail.com>  wrote:
>>
>> > >>  Here are some scp performance test for differenct implementations:
>> > >>  None of rx and tx as thread:
>> > >>  guest to host 3.2MB/s
>> > >>  host  to guest 3.1MB/s
>> > >>
>> > >>  Only rx as thread:
>> > >>  guest to host  14.7MB/s
>> > >>  host  to guest 33.4MB/s
>> > >>
>> > >>  Both rx and tx as thread(This patch works this way):
>> > >>  guest to host  19.8MB/s
>> > >>  host  to guest 32.5MB/s
>> > >>
>> > >>  Signed-off-by: Asias He<asias.hejun@gmail.com>
>> > >
>> > >  This is already in master. Thanks!
>> > >
>> >
>> >  Ingo suggested to CC the updated version of this patch to kvm 
>> list. So I
>> >  am posting this patch again.
>>
>> Thanks Asias, cool stuff.
>>
>> Maybe other KVM developers want to chime in about how to best implement
>> transparent (non-TAP-using) guest-side networking.
>>
>> The best approach would be to not go down as low as the IP/Ethernet 
>> packeting
>> level (it's unnecessary protocol overhead), but to implement some 
>> sort of
>> streaming, virtio based TCP connection proxying support.
>>
>> Strictly talking the guest does not need ICMP packets to have working 
>> Internet
>> connectivity - only passing/tunneling through TCP sockets would be 
>> enough. The
>> following highlevel ops are needed:
>>
>>   - connect/shutdown/close
>>   - send/receive
>>   - poll
>>
>> And would be passed through to the host side and mirrored there into 
>> real
>> connect/shutdown TCP socket ops and into send/receive ops.
>>
>> The guest OS does not need to be 'aware' of this in any way, as long 
>> as the
>> bzImage has this magic guest tunneling support included.
>>
>> Obviously, such a highlevel approach would be much faster as well 
>> than any
>> packet level virtual networking approach.
>>
>> Does something like this exist upstream, or do we have to implement it?
>>
>
> macvtap does non-privileged setupless networking.
>

But this doesn't really answer your message.  No, there is no tcp-level 
virtio device.  However, because of GSO/GRO, I don't think there is a 
huge win to be gained by bypassing the lower layers.  If you want to 
send a megabyte's worth of data into a tcp stream, you prepend a header 
and post it to virtio-net, and this goes all the way down to the real 
device.

I'm not sure tcp-offload like you propose would pass netdev@.  Similar 
approaches for real hardware were rejected since they would bypass the 
tcp stack.  Things like netfilter would no longer work.

-- 
error compiling committee.c: too many arguments to function


^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [transparent networking] Re: [PATCH] kvm tools: Implement virtio network device
  2011-04-13 13:33       ` Avi Kivity
  2011-04-13 13:38         ` Avi Kivity
@ 2011-04-13 13:39         ` Asias He
  1 sibling, 0 replies; 8+ messages in thread
From: Asias He @ 2011-04-13 13:39 UTC (permalink / raw)
  To: Avi Kivity; +Cc: Ingo Molnar, Pekka Enberg, Cyrill Gorcunov, kvm

On 04/13/2011 09:33 PM, Avi Kivity wrote:
> On 04/13/2011 04:02 PM, Ingo Molnar wrote:
>> * Asias He<asias.hejun@gmail.com>  wrote:
>>
>> >  >>  Here are some scp performance test for differenct implementations:
>> >  >>  None of rx and tx as thread:
>> >  >>  guest to host 3.2MB/s
>> >  >>  host  to guest 3.1MB/s
>> >  >>
>> >  >>  Only rx as thread:
>> >  >>  guest to host  14.7MB/s
>> >  >>  host  to guest 33.4MB/s
>> >  >>
>> >  >>  Both rx and tx as thread(This patch works this way):
>> >  >>  guest to host  19.8MB/s
>> >  >>  host  to guest 32.5MB/s
>> >  >>
>> >  >>  Signed-off-by: Asias He<asias.hejun@gmail.com>
>> >  >
>> >  >  This is already in master. Thanks!
>> >  >
>> >
>> >  Ingo suggested to CC the updated version of this patch to kvm list.
>> So I
>> >  am posting this patch again.
>>
>> Thanks Asias, cool stuff.
>>
>> Maybe other KVM developers want to chime in about how to best implement
>> transparent (non-TAP-using) guest-side networking.
>>
>> The best approach would be to not go down as low as the IP/Ethernet
>> packeting
>> level (it's unnecessary protocol overhead), but to implement some sort of
>> streaming, virtio based TCP connection proxying support.
>>
>> Strictly talking the guest does not need ICMP packets to have working
>> Internet
>> connectivity - only passing/tunneling through TCP sockets would be
>> enough. The
>> following highlevel ops are needed:
>>
>>   - connect/shutdown/close
>>   - send/receive
>>   - poll
>>
>> And would be passed through to the host side and mirrored there into real
>> connect/shutdown TCP socket ops and into send/receive ops.
>>
>> The guest OS does not need to be 'aware' of this in any way, as long
>> as the
>> bzImage has this magic guest tunneling support included.
>>
>> Obviously, such a highlevel approach would be much faster as well than
>> any
>> packet level virtual networking approach.
>>
>> Does something like this exist upstream, or do we have to implement it?
>>
> 
> macvtap does non-privileged setupless networking.

Great! Thanks Avi!

-- 
Best Regards,
Asias He

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [transparent networking] Re: [PATCH] kvm tools: Implement virtio network device
  2011-04-13 13:02     ` [transparent networking] " Ingo Molnar
  2011-04-13 13:33       ` Avi Kivity
@ 2011-04-13 16:21       ` Stefan Hajnoczi
  1 sibling, 0 replies; 8+ messages in thread
From: Stefan Hajnoczi @ 2011-04-13 16:21 UTC (permalink / raw)
  To: Ingo Molnar; +Cc: Asias He, Pekka Enberg, Cyrill Gorcunov, kvm

On Wed, Apr 13, 2011 at 2:02 PM, Ingo Molnar <mingo@elte.hu> wrote:
> Strictly talking the guest does not need ICMP packets to have working Internet
> connectivity - only passing/tunneling through TCP sockets would be enough.

Don't forget UDP for DNS.

Stefan

^ permalink raw reply	[flat|nested] 8+ messages in thread

end of thread, other threads:[~2011-04-13 16:21 UTC | newest]

Thread overview: 8+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2011-04-13 11:48 [PATCH] kvm tools: Implement virtio network device Asias He
2011-04-13 11:51 ` Pekka Enberg
2011-04-13 12:00   ` Asias He
2011-04-13 13:02     ` [transparent networking] " Ingo Molnar
2011-04-13 13:33       ` Avi Kivity
2011-04-13 13:38         ` Avi Kivity
2011-04-13 13:39         ` Asias He
2011-04-13 16:21       ` Stefan Hajnoczi

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox