* [PATCH] kvm tools: Implement virtio network device
@ 2011-04-13 11:48 Asias He
2011-04-13 11:51 ` Pekka Enberg
0 siblings, 1 reply; 8+ messages in thread
From: Asias He @ 2011-04-13 11:48 UTC (permalink / raw)
To: Pekka Enberg, Cyrill Gorcunov, Ingo Molnar; +Cc: kvm, Asias He
This patch implement virtio network device.
Use '-n virtio or --network=virtio' to enable it.
The current implementation uses tap which needs root privileges to create a
virtual network device (tap0) on host side. Actually, what we need is
CAP_NET_ADMIN.
The host side tap0 is set to 192.168.33.2/24.
You need to configure the guest side eth0 to any ip address in
192.168.33.0/24.
Here are some scp performance test for differenct implementations:
None of rx and tx as thread:
guest to host 3.2MB/s
host to guest 3.1MB/s
Only rx as thread:
guest to host 14.7MB/s
host to guest 33.4MB/s
Both rx and tx as thread(This patch works this way):
guest to host 19.8MB/s
host to guest 32.5MB/s
Signed-off-by: Asias He <asias.hejun@gmail.com>
---
tools/kvm/Makefile | 1 +
tools/kvm/include/kvm/ioport.h | 2 +
tools/kvm/include/kvm/types.h | 7 +
tools/kvm/include/kvm/virtio-net.h | 7 +
tools/kvm/kvm-run.c | 11 ++
tools/kvm/virtio-net.c | 318 ++++++++++++++++++++++++++++++++++++
6 files changed, 346 insertions(+), 0 deletions(-)
create mode 100644 tools/kvm/include/kvm/types.h
create mode 100644 tools/kvm/include/kvm/virtio-net.h
create mode 100644 tools/kvm/virtio-net.c
diff --git a/tools/kvm/Makefile b/tools/kvm/Makefile
index 7a2863d..6895113 100644
--- a/tools/kvm/Makefile
+++ b/tools/kvm/Makefile
@@ -14,6 +14,7 @@ TAGS = ctags
OBJS += 8250-serial.o
OBJS += virtio-blk.o
+OBJS += virtio-net.o
OBJS += virtio-console.o
OBJS += cpuid.o
OBJS += read-write.o
diff --git a/tools/kvm/include/kvm/ioport.h b/tools/kvm/include/kvm/ioport.h
index 0218329..2fdcca4 100644
--- a/tools/kvm/include/kvm/ioport.h
+++ b/tools/kvm/include/kvm/ioport.h
@@ -10,6 +10,8 @@
#define IOPORT_VIRTIO_BLK_SIZE 256
#define IOPORT_VIRTIO_CONSOLE 0xd200 /* Virtio console device */
#define IOPORT_VIRTIO_CONSOLE_SIZE 256
+#define IOPORT_VIRTIO_NET 0xe200 /* Virtio network device */
+#define IOPORT_VIRTIO_NET_SIZE 256
struct kvm;
diff --git a/tools/kvm/include/kvm/types.h b/tools/kvm/include/kvm/types.h
new file mode 100644
index 0000000..0cbc5fb
--- /dev/null
+++ b/tools/kvm/include/kvm/types.h
@@ -0,0 +1,7 @@
+#ifndef KVM_TYPES_H
+#define KVM_TYPES_H
+
+/* FIXME: include/linux/if_tun.h and include/linux/if_ether.h complains */
+#define __be16 u16
+
+#endif /* KVM_TYPES_H */
diff --git a/tools/kvm/include/kvm/virtio-net.h b/tools/kvm/include/kvm/virtio-net.h
new file mode 100644
index 0000000..a1cab15
--- /dev/null
+++ b/tools/kvm/include/kvm/virtio-net.h
@@ -0,0 +1,7 @@
+#ifndef KVM__VIRTIO_NET_H
+#define KVM__VIRTIO_NET_H
+
+struct kvm;
+void virtio_net__init(struct kvm *self);
+
+#endif /* KVM__VIRTIO_NET_H */
diff --git a/tools/kvm/kvm-run.c b/tools/kvm/kvm-run.c
index 65c4787..6046a0a 100644
--- a/tools/kvm/kvm-run.c
+++ b/tools/kvm/kvm-run.c
@@ -16,6 +16,7 @@
#include <kvm/kvm-cpu.h>
#include <kvm/8250-serial.h>
#include <kvm/virtio-blk.h>
+#include <kvm/virtio-net.h>
#include <kvm/virtio-console.h>
#include <kvm/disk-image.h>
#include <kvm/util.h>
@@ -29,6 +30,7 @@
#define DEFAULT_KVM_DEV "/dev/kvm"
#define DEFAULT_CONSOLE "serial"
+#define DEFAULT_NETWORK "none"
#define MB_SHIFT (20)
#define MIN_RAM_SIZE_MB (64ULL)
@@ -63,6 +65,7 @@ static const char *initrd_filename;
static const char *image_filename;
static const char *console;
static const char *kvm_dev;
+static const char *network;
static bool single_step;
static bool readonly_image;
extern bool ioport_debug;
@@ -84,6 +87,8 @@ static const struct option options[] = {
"Don't write changes back to disk image"),
OPT_STRING('c', "console", &console, "serial or virtio",
"Console to use"),
+ OPT_STRING('n', "network", &network, "virtio",
+ "Network to use"),
OPT_GROUP("Kernel options:"),
OPT_STRING('k', "kernel", &kernel_filename, "kernel",
@@ -250,6 +255,12 @@ int kvm_cmd_run(int argc, const char **argv, const char *prefix)
virtio_console__init(kvm);
+ if (!network)
+ network = DEFAULT_NETWORK;
+
+ if (!strncmp(network, "virtio", 6))
+ virtio_net__init(kvm);
+
kvm__start_timer(kvm);
for (i = 0; i < nrcpus; i++) {
diff --git a/tools/kvm/virtio-net.c b/tools/kvm/virtio-net.c
new file mode 100644
index 0000000..ec70d5c
--- /dev/null
+++ b/tools/kvm/virtio-net.c
@@ -0,0 +1,318 @@
+#include "kvm/virtio-net.h"
+#include "kvm/virtio-pci.h"
+#include "kvm/virtio.h"
+#include "kvm/ioport.h"
+#include "kvm/types.h"
+#include "kvm/mutex.h"
+#include "kvm/util.h"
+#include "kvm/kvm.h"
+#include "kvm/pci.h"
+
+#include <linux/virtio_net.h>
+#include <linux/if_tun.h>
+#include <net/if.h>
+#include <sys/ioctl.h>
+#include <assert.h>
+#include <fcntl.h>
+
+#define VIRTIO_NET_IRQ 14
+#define VIRTIO_NET_QUEUE_SIZE 128
+#define VIRTIO_NET_NUM_QUEUES 2
+#define VIRTIO_NET_RX_QUEUE 0
+#define VIRTIO_NET_TX_QUEUE 1
+#define PCI_VIRTIO_NET_DEVNUM 3
+
+struct net_device {
+ pthread_mutex_t mutex;
+
+ struct virt_queue vqs[VIRTIO_NET_NUM_QUEUES];
+ struct virtio_net_config net_config;
+ uint32_t host_features;
+ uint32_t guest_features;
+ uint16_t config_vector;
+ uint8_t status;
+ uint16_t queue_selector;
+
+ pthread_t io_rx_thread;
+ pthread_mutex_t io_rx_mutex;
+ pthread_cond_t io_rx_cond;
+
+ pthread_t io_tx_thread;
+ pthread_mutex_t io_tx_mutex;
+ pthread_cond_t io_tx_cond;
+
+ int tap_fd;
+ char tap_name[IFNAMSIZ];
+};
+
+static struct net_device net_device = {
+ .mutex = PTHREAD_MUTEX_INITIALIZER,
+
+ .net_config = {
+ .mac = {0x00, 0x11, 0x22, 0x33, 0x44, 0x55},
+ .status = VIRTIO_NET_S_LINK_UP,
+ },
+
+ .host_features = 1UL << VIRTIO_NET_F_MAC,
+};
+
+static void *virtio_net_rx_thread(void *p)
+{
+ struct iovec iov[VIRTIO_NET_QUEUE_SIZE];
+ struct virt_queue *vq;
+ struct kvm *self;
+ uint16_t out, in;
+ uint16_t head;
+ int len;
+
+ self = p;
+ vq = &net_device.vqs[VIRTIO_NET_RX_QUEUE];
+
+ while (1) {
+ mutex_lock(&net_device.io_rx_mutex);
+ if (!virt_queue__available(vq))
+ pthread_cond_wait(&net_device.io_rx_cond, &net_device.io_rx_mutex);
+ mutex_unlock(&net_device.io_rx_mutex);
+
+ while (virt_queue__available(vq)) {
+ head = virt_queue__get_iov(vq, iov, &out, &in, self);
+
+ /* We do not specify GSO or CSUM features, So we can ignore virtio_net_hdr */
+ len = readv(net_device.tap_fd, iov + 1, in - 1);
+
+ /* However, We have to tell guest we have write the virtio_net_hdr */
+ virt_queue__set_used_elem(vq, head, sizeof(struct virtio_net_hdr) + len);
+
+ /* We should interrupt guest right now, otherwise latency is huge. */
+ kvm__irq_line(self, VIRTIO_NET_IRQ, 1);
+ }
+
+ }
+
+ pthread_exit(NULL);
+ return NULL;
+
+}
+
+static void *virtio_net_tx_thread(void *p)
+{
+ struct iovec iov[VIRTIO_NET_QUEUE_SIZE];
+ struct virt_queue *vq;
+ struct kvm *self;
+ uint16_t out, in;
+ uint16_t head;
+ int len;
+
+ self = p;
+ vq = &net_device.vqs[VIRTIO_NET_TX_QUEUE];
+
+ while (1) {
+ mutex_lock(&net_device.io_tx_mutex);
+ if (!virt_queue__available(vq))
+ pthread_cond_wait(&net_device.io_tx_cond, &net_device.io_tx_mutex);
+ mutex_unlock(&net_device.io_tx_mutex);
+
+ while (virt_queue__available(vq)) {
+ head = virt_queue__get_iov(vq, iov, &out, &in, self);
+ len = writev(net_device.tap_fd, iov + 1, out - 1);
+ virt_queue__set_used_elem(vq, head, len);
+ }
+
+ kvm__irq_line(self, VIRTIO_NET_IRQ, 1);
+ }
+
+ pthread_exit(NULL);
+ return NULL;
+
+}
+static bool virtio_net_pci_io_device_specific_in(void *data, unsigned long offset, int size, uint32_t count)
+{
+ uint8_t *config_space = (uint8_t *) &net_device.net_config;
+
+ if (size != 1 || count != 1)
+ return false;
+
+ if ((offset - VIRTIO_PCI_CONFIG_NOMSI) > sizeof(struct virtio_net_config))
+ error("config offset is too big: %li", offset - VIRTIO_PCI_CONFIG_NOMSI);
+
+ ioport__write8(data, config_space[offset - VIRTIO_PCI_CONFIG_NOMSI]);
+
+ return true;
+}
+
+static bool virtio_net_pci_io_in(struct kvm *self, uint16_t port, void *data, int size, uint32_t count)
+{
+ unsigned long offset = port - IOPORT_VIRTIO_NET;
+ bool ret = true;
+
+ mutex_lock(&net_device.mutex);
+
+ switch (offset) {
+ case VIRTIO_PCI_HOST_FEATURES:
+ ioport__write32(data, net_device.host_features);
+ break;
+ case VIRTIO_PCI_GUEST_FEATURES:
+ ret = false;
+ break;
+ case VIRTIO_PCI_QUEUE_PFN:
+ ioport__write32(data, net_device.vqs[net_device.queue_selector].pfn);
+ break;
+ case VIRTIO_PCI_QUEUE_NUM:
+ ioport__write16(data, VIRTIO_NET_QUEUE_SIZE);
+ break;
+ case VIRTIO_PCI_QUEUE_SEL:
+ case VIRTIO_PCI_QUEUE_NOTIFY:
+ ret = false;
+ break;
+ case VIRTIO_PCI_STATUS:
+ ioport__write8(data, net_device.status);
+ break;
+ case VIRTIO_PCI_ISR:
+ ioport__write8(data, 0x1);
+ kvm__irq_line(self, VIRTIO_NET_IRQ, 0);
+ break;
+ case VIRTIO_MSI_CONFIG_VECTOR:
+ ioport__write16(data, net_device.config_vector);
+ break;
+ default:
+ ret = virtio_net_pci_io_device_specific_in(data, offset, size, count);
+ };
+
+ mutex_unlock(&net_device.mutex);
+
+ return ret;
+}
+
+static void virtio_net_handle_callback(struct kvm *self, uint16_t queue_index)
+{
+ if (queue_index == VIRTIO_NET_TX_QUEUE) {
+
+ mutex_lock(&net_device.io_tx_mutex);
+ pthread_cond_signal(&net_device.io_tx_cond);
+ mutex_unlock(&net_device.io_tx_mutex);
+
+ } else if (queue_index == VIRTIO_NET_RX_QUEUE) {
+
+ mutex_lock(&net_device.io_rx_mutex);
+ pthread_cond_signal(&net_device.io_rx_cond);
+ mutex_unlock(&net_device.io_rx_mutex);
+
+ }
+}
+
+static bool virtio_net_pci_io_out(struct kvm *self, uint16_t port, void *data, int size, uint32_t count)
+{
+ unsigned long offset = port - IOPORT_VIRTIO_NET;
+ bool ret = true;
+
+ mutex_lock(&net_device.mutex);
+
+ switch (offset) {
+ case VIRTIO_PCI_GUEST_FEATURES:
+ net_device.guest_features = ioport__read32(data);
+ break;
+ case VIRTIO_PCI_QUEUE_PFN: {
+ struct virt_queue *queue;
+ void *p;
+
+ assert(net_device.queue_selector < VIRTIO_NET_NUM_QUEUES);
+
+ queue = &net_device.vqs[net_device.queue_selector];
+ queue->pfn = ioport__read32(data);
+ p = guest_flat_to_host(self, queue->pfn << 12);
+
+ vring_init(&queue->vring, VIRTIO_NET_QUEUE_SIZE, p, 4096);
+
+ break;
+ }
+ case VIRTIO_PCI_QUEUE_SEL:
+ net_device.queue_selector = ioport__read16(data);
+ break;
+ case VIRTIO_PCI_QUEUE_NOTIFY: {
+ uint16_t queue_index;
+ queue_index = ioport__read16(data);
+ virtio_net_handle_callback(self, queue_index);
+ break;
+ }
+ case VIRTIO_PCI_STATUS:
+ net_device.status = ioport__read8(data);
+ break;
+ case VIRTIO_MSI_CONFIG_VECTOR:
+ net_device.config_vector = VIRTIO_MSI_NO_VECTOR;
+ break;
+ case VIRTIO_MSI_QUEUE_VECTOR:
+ break;
+ default:
+ ret = false;
+ };
+
+ mutex_unlock(&net_device.mutex);
+ return ret;
+}
+
+static struct ioport_operations virtio_net_io_ops = {
+ .io_in = virtio_net_pci_io_in,
+ .io_out = virtio_net_pci_io_out,
+};
+
+#define PCI_VENDOR_ID_REDHAT_QUMRANET 0x1af4
+#define PCI_DEVICE_ID_VIRTIO_NET 0x1000
+#define PCI_SUBSYSTEM_VENDOR_ID_REDHAT_QUMRANET 0x1af4
+#define PCI_SUBSYSTEM_ID_VIRTIO_NET 0x0001
+
+static struct pci_device_header virtio_net_pci_device = {
+ .vendor_id = PCI_VENDOR_ID_REDHAT_QUMRANET,
+ .device_id = PCI_DEVICE_ID_VIRTIO_NET,
+ .header_type = PCI_HEADER_TYPE_NORMAL,
+ .revision_id = 0,
+ .class = 0x020000,
+ .subsys_vendor_id = PCI_SUBSYSTEM_VENDOR_ID_REDHAT_QUMRANET,
+ .subsys_id = PCI_SUBSYSTEM_ID_VIRTIO_NET,
+ .bar[0] = IOPORT_VIRTIO_NET | PCI_BASE_ADDRESS_SPACE_IO,
+ .irq_pin = 3,
+ .irq_line = VIRTIO_NET_IRQ,
+};
+
+static void virtio_net__tap_init(void)
+{
+ struct ifreq ifr;
+
+ net_device.tap_fd = open("/dev/net/tun", O_RDWR);
+ if (net_device.tap_fd < 0)
+ die("Unable to open /dev/net/tun\n");
+
+ memset(&ifr, 0, sizeof(ifr));
+ ifr.ifr_flags = IFF_TAP | IFF_NO_PI;
+
+ if (ioctl(net_device.tap_fd, TUNSETIFF, &ifr) < 0)
+ die("Config tap device error. Are you root?");
+
+ strncpy(net_device.tap_name, ifr.ifr_name, sizeof(net_device.tap_name));
+
+ ioctl(net_device.tap_fd, TUNSETNOCSUM, 1);
+
+ /*FIXME: Remove this after user can specify ip address and netmask*/
+ if (system("ifconfig tap0 192.168.33.2") < 0)
+ warning("Can not set ip address on tap0");
+}
+
+static void virtio_net__io_thread_init(struct kvm *self)
+{
+ pthread_mutex_init(&net_device.io_rx_mutex, NULL);
+ pthread_cond_init(&net_device.io_tx_cond, NULL);
+
+ pthread_mutex_init(&net_device.io_rx_mutex, NULL);
+ pthread_cond_init(&net_device.io_tx_cond, NULL);
+
+ pthread_create(&net_device.io_rx_thread, NULL, virtio_net_rx_thread, (void *)self);
+ pthread_create(&net_device.io_tx_thread, NULL, virtio_net_tx_thread, (void *)self);
+}
+
+void virtio_net__init(struct kvm *self)
+{
+ pci__register(&virtio_net_pci_device, PCI_VIRTIO_NET_DEVNUM);
+ ioport__register(IOPORT_VIRTIO_NET, &virtio_net_io_ops, IOPORT_VIRTIO_NET_SIZE);
+
+ virtio_net__tap_init();
+ virtio_net__io_thread_init(self);
+}
--
1.7.4.1
^ permalink raw reply related [flat|nested] 8+ messages in thread* Re: [PATCH] kvm tools: Implement virtio network device
2011-04-13 11:48 [PATCH] kvm tools: Implement virtio network device Asias He
@ 2011-04-13 11:51 ` Pekka Enberg
2011-04-13 12:00 ` Asias He
0 siblings, 1 reply; 8+ messages in thread
From: Pekka Enberg @ 2011-04-13 11:51 UTC (permalink / raw)
To: Asias He; +Cc: Cyrill Gorcunov, Ingo Molnar, kvm
On 4/13/11 2:48 PM, Asias He wrote:
> This patch implement virtio network device.
> Use '-n virtio or --network=virtio' to enable it.
>
> The current implementation uses tap which needs root privileges to create a
> virtual network device (tap0) on host side. Actually, what we need is
> CAP_NET_ADMIN.
>
> The host side tap0 is set to 192.168.33.2/24.
> You need to configure the guest side eth0 to any ip address in
> 192.168.33.0/24.
>
> Here are some scp performance test for differenct implementations:
> None of rx and tx as thread:
> guest to host 3.2MB/s
> host to guest 3.1MB/s
>
> Only rx as thread:
> guest to host 14.7MB/s
> host to guest 33.4MB/s
>
> Both rx and tx as thread(This patch works this way):
> guest to host 19.8MB/s
> host to guest 32.5MB/s
>
> Signed-off-by: Asias He<asias.hejun@gmail.com>
This is already in master. Thanks!
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [PATCH] kvm tools: Implement virtio network device
2011-04-13 11:51 ` Pekka Enberg
@ 2011-04-13 12:00 ` Asias He
2011-04-13 13:02 ` [transparent networking] " Ingo Molnar
0 siblings, 1 reply; 8+ messages in thread
From: Asias He @ 2011-04-13 12:00 UTC (permalink / raw)
To: Pekka Enberg; +Cc: Cyrill Gorcunov, Ingo Molnar, kvm
On 04/13/2011 07:51 PM, Pekka Enberg wrote:
> On 4/13/11 2:48 PM, Asias He wrote:
>> This patch implement virtio network device.
>> Use '-n virtio or --network=virtio' to enable it.
>>
>> The current implementation uses tap which needs root privileges to
>> create a
>> virtual network device (tap0) on host side. Actually, what we need is
>> CAP_NET_ADMIN.
>>
>> The host side tap0 is set to 192.168.33.2/24.
>> You need to configure the guest side eth0 to any ip address in
>> 192.168.33.0/24.
>>
>> Here are some scp performance test for differenct implementations:
>> None of rx and tx as thread:
>> guest to host 3.2MB/s
>> host to guest 3.1MB/s
>>
>> Only rx as thread:
>> guest to host 14.7MB/s
>> host to guest 33.4MB/s
>>
>> Both rx and tx as thread(This patch works this way):
>> guest to host 19.8MB/s
>> host to guest 32.5MB/s
>>
>> Signed-off-by: Asias He<asias.hejun@gmail.com>
>
> This is already in master. Thanks!
>
Ingo suggested to CC the updated version of this patch to kvm list. So I
am posting this patch again.
--
Best Regards,
Asias He
^ permalink raw reply [flat|nested] 8+ messages in thread
* [transparent networking] Re: [PATCH] kvm tools: Implement virtio network device
2011-04-13 12:00 ` Asias He
@ 2011-04-13 13:02 ` Ingo Molnar
2011-04-13 13:33 ` Avi Kivity
2011-04-13 16:21 ` Stefan Hajnoczi
0 siblings, 2 replies; 8+ messages in thread
From: Ingo Molnar @ 2011-04-13 13:02 UTC (permalink / raw)
To: Asias He; +Cc: Pekka Enberg, Cyrill Gorcunov, kvm
* Asias He <asias.hejun@gmail.com> wrote:
> >> Here are some scp performance test for differenct implementations:
> >> None of rx and tx as thread:
> >> guest to host 3.2MB/s
> >> host to guest 3.1MB/s
> >>
> >> Only rx as thread:
> >> guest to host 14.7MB/s
> >> host to guest 33.4MB/s
> >>
> >> Both rx and tx as thread(This patch works this way):
> >> guest to host 19.8MB/s
> >> host to guest 32.5MB/s
> >>
> >> Signed-off-by: Asias He<asias.hejun@gmail.com>
> >
> > This is already in master. Thanks!
> >
>
> Ingo suggested to CC the updated version of this patch to kvm list. So I
> am posting this patch again.
Thanks Asias, cool stuff.
Maybe other KVM developers want to chime in about how to best implement
transparent (non-TAP-using) guest-side networking.
The best approach would be to not go down as low as the IP/Ethernet packeting
level (it's unnecessary protocol overhead), but to implement some sort of
streaming, virtio based TCP connection proxying support.
Strictly talking the guest does not need ICMP packets to have working Internet
connectivity - only passing/tunneling through TCP sockets would be enough. The
following highlevel ops are needed:
- connect/shutdown/close
- send/receive
- poll
And would be passed through to the host side and mirrored there into real
connect/shutdown TCP socket ops and into send/receive ops.
The guest OS does not need to be 'aware' of this in any way, as long as the
bzImage has this magic guest tunneling support included.
Obviously, such a highlevel approach would be much faster as well than any
packet level virtual networking approach.
Does something like this exist upstream, or do we have to implement it?
Thanks,
Ingo
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [transparent networking] Re: [PATCH] kvm tools: Implement virtio network device
2011-04-13 13:02 ` [transparent networking] " Ingo Molnar
@ 2011-04-13 13:33 ` Avi Kivity
2011-04-13 13:38 ` Avi Kivity
2011-04-13 13:39 ` Asias He
2011-04-13 16:21 ` Stefan Hajnoczi
1 sibling, 2 replies; 8+ messages in thread
From: Avi Kivity @ 2011-04-13 13:33 UTC (permalink / raw)
To: Ingo Molnar; +Cc: Asias He, Pekka Enberg, Cyrill Gorcunov, kvm
On 04/13/2011 04:02 PM, Ingo Molnar wrote:
> * Asias He<asias.hejun@gmail.com> wrote:
>
> > >> Here are some scp performance test for differenct implementations:
> > >> None of rx and tx as thread:
> > >> guest to host 3.2MB/s
> > >> host to guest 3.1MB/s
> > >>
> > >> Only rx as thread:
> > >> guest to host 14.7MB/s
> > >> host to guest 33.4MB/s
> > >>
> > >> Both rx and tx as thread(This patch works this way):
> > >> guest to host 19.8MB/s
> > >> host to guest 32.5MB/s
> > >>
> > >> Signed-off-by: Asias He<asias.hejun@gmail.com>
> > >
> > > This is already in master. Thanks!
> > >
> >
> > Ingo suggested to CC the updated version of this patch to kvm list. So I
> > am posting this patch again.
>
> Thanks Asias, cool stuff.
>
> Maybe other KVM developers want to chime in about how to best implement
> transparent (non-TAP-using) guest-side networking.
>
> The best approach would be to not go down as low as the IP/Ethernet packeting
> level (it's unnecessary protocol overhead), but to implement some sort of
> streaming, virtio based TCP connection proxying support.
>
> Strictly talking the guest does not need ICMP packets to have working Internet
> connectivity - only passing/tunneling through TCP sockets would be enough. The
> following highlevel ops are needed:
>
> - connect/shutdown/close
> - send/receive
> - poll
>
> And would be passed through to the host side and mirrored there into real
> connect/shutdown TCP socket ops and into send/receive ops.
>
> The guest OS does not need to be 'aware' of this in any way, as long as the
> bzImage has this magic guest tunneling support included.
>
> Obviously, such a highlevel approach would be much faster as well than any
> packet level virtual networking approach.
>
> Does something like this exist upstream, or do we have to implement it?
>
macvtap does non-privileged setupless networking.
--
error compiling committee.c: too many arguments to function
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [transparent networking] Re: [PATCH] kvm tools: Implement virtio network device
2011-04-13 13:33 ` Avi Kivity
@ 2011-04-13 13:38 ` Avi Kivity
2011-04-13 13:39 ` Asias He
1 sibling, 0 replies; 8+ messages in thread
From: Avi Kivity @ 2011-04-13 13:38 UTC (permalink / raw)
To: Ingo Molnar; +Cc: Asias He, Pekka Enberg, Cyrill Gorcunov, kvm
On 04/13/2011 04:33 PM, Avi Kivity wrote:
> On 04/13/2011 04:02 PM, Ingo Molnar wrote:
>> * Asias He<asias.hejun@gmail.com> wrote:
>>
>> > >> Here are some scp performance test for differenct implementations:
>> > >> None of rx and tx as thread:
>> > >> guest to host 3.2MB/s
>> > >> host to guest 3.1MB/s
>> > >>
>> > >> Only rx as thread:
>> > >> guest to host 14.7MB/s
>> > >> host to guest 33.4MB/s
>> > >>
>> > >> Both rx and tx as thread(This patch works this way):
>> > >> guest to host 19.8MB/s
>> > >> host to guest 32.5MB/s
>> > >>
>> > >> Signed-off-by: Asias He<asias.hejun@gmail.com>
>> > >
>> > > This is already in master. Thanks!
>> > >
>> >
>> > Ingo suggested to CC the updated version of this patch to kvm
>> list. So I
>> > am posting this patch again.
>>
>> Thanks Asias, cool stuff.
>>
>> Maybe other KVM developers want to chime in about how to best implement
>> transparent (non-TAP-using) guest-side networking.
>>
>> The best approach would be to not go down as low as the IP/Ethernet
>> packeting
>> level (it's unnecessary protocol overhead), but to implement some
>> sort of
>> streaming, virtio based TCP connection proxying support.
>>
>> Strictly talking the guest does not need ICMP packets to have working
>> Internet
>> connectivity - only passing/tunneling through TCP sockets would be
>> enough. The
>> following highlevel ops are needed:
>>
>> - connect/shutdown/close
>> - send/receive
>> - poll
>>
>> And would be passed through to the host side and mirrored there into
>> real
>> connect/shutdown TCP socket ops and into send/receive ops.
>>
>> The guest OS does not need to be 'aware' of this in any way, as long
>> as the
>> bzImage has this magic guest tunneling support included.
>>
>> Obviously, such a highlevel approach would be much faster as well
>> than any
>> packet level virtual networking approach.
>>
>> Does something like this exist upstream, or do we have to implement it?
>>
>
> macvtap does non-privileged setupless networking.
>
But this doesn't really answer your message. No, there is no tcp-level
virtio device. However, because of GSO/GRO, I don't think there is a
huge win to be gained by bypassing the lower layers. If you want to
send a megabyte's worth of data into a tcp stream, you prepend a header
and post it to virtio-net, and this goes all the way down to the real
device.
I'm not sure tcp-offload like you propose would pass netdev@. Similar
approaches for real hardware were rejected since they would bypass the
tcp stack. Things like netfilter would no longer work.
--
error compiling committee.c: too many arguments to function
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [transparent networking] Re: [PATCH] kvm tools: Implement virtio network device
2011-04-13 13:33 ` Avi Kivity
2011-04-13 13:38 ` Avi Kivity
@ 2011-04-13 13:39 ` Asias He
1 sibling, 0 replies; 8+ messages in thread
From: Asias He @ 2011-04-13 13:39 UTC (permalink / raw)
To: Avi Kivity; +Cc: Ingo Molnar, Pekka Enberg, Cyrill Gorcunov, kvm
On 04/13/2011 09:33 PM, Avi Kivity wrote:
> On 04/13/2011 04:02 PM, Ingo Molnar wrote:
>> * Asias He<asias.hejun@gmail.com> wrote:
>>
>> > >> Here are some scp performance test for differenct implementations:
>> > >> None of rx and tx as thread:
>> > >> guest to host 3.2MB/s
>> > >> host to guest 3.1MB/s
>> > >>
>> > >> Only rx as thread:
>> > >> guest to host 14.7MB/s
>> > >> host to guest 33.4MB/s
>> > >>
>> > >> Both rx and tx as thread(This patch works this way):
>> > >> guest to host 19.8MB/s
>> > >> host to guest 32.5MB/s
>> > >>
>> > >> Signed-off-by: Asias He<asias.hejun@gmail.com>
>> > >
>> > > This is already in master. Thanks!
>> > >
>> >
>> > Ingo suggested to CC the updated version of this patch to kvm list.
>> So I
>> > am posting this patch again.
>>
>> Thanks Asias, cool stuff.
>>
>> Maybe other KVM developers want to chime in about how to best implement
>> transparent (non-TAP-using) guest-side networking.
>>
>> The best approach would be to not go down as low as the IP/Ethernet
>> packeting
>> level (it's unnecessary protocol overhead), but to implement some sort of
>> streaming, virtio based TCP connection proxying support.
>>
>> Strictly talking the guest does not need ICMP packets to have working
>> Internet
>> connectivity - only passing/tunneling through TCP sockets would be
>> enough. The
>> following highlevel ops are needed:
>>
>> - connect/shutdown/close
>> - send/receive
>> - poll
>>
>> And would be passed through to the host side and mirrored there into real
>> connect/shutdown TCP socket ops and into send/receive ops.
>>
>> The guest OS does not need to be 'aware' of this in any way, as long
>> as the
>> bzImage has this magic guest tunneling support included.
>>
>> Obviously, such a highlevel approach would be much faster as well than
>> any
>> packet level virtual networking approach.
>>
>> Does something like this exist upstream, or do we have to implement it?
>>
>
> macvtap does non-privileged setupless networking.
Great! Thanks Avi!
--
Best Regards,
Asias He
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [transparent networking] Re: [PATCH] kvm tools: Implement virtio network device
2011-04-13 13:02 ` [transparent networking] " Ingo Molnar
2011-04-13 13:33 ` Avi Kivity
@ 2011-04-13 16:21 ` Stefan Hajnoczi
1 sibling, 0 replies; 8+ messages in thread
From: Stefan Hajnoczi @ 2011-04-13 16:21 UTC (permalink / raw)
To: Ingo Molnar; +Cc: Asias He, Pekka Enberg, Cyrill Gorcunov, kvm
On Wed, Apr 13, 2011 at 2:02 PM, Ingo Molnar <mingo@elte.hu> wrote:
> Strictly talking the guest does not need ICMP packets to have working Internet
> connectivity - only passing/tunneling through TCP sockets would be enough.
Don't forget UDP for DNS.
Stefan
^ permalink raw reply [flat|nested] 8+ messages in thread
end of thread, other threads:[~2011-04-13 16:21 UTC | newest]
Thread overview: 8+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2011-04-13 11:48 [PATCH] kvm tools: Implement virtio network device Asias He
2011-04-13 11:51 ` Pekka Enberg
2011-04-13 12:00 ` Asias He
2011-04-13 13:02 ` [transparent networking] " Ingo Molnar
2011-04-13 13:33 ` Avi Kivity
2011-04-13 13:38 ` Avi Kivity
2011-04-13 13:39 ` Asias He
2011-04-13 16:21 ` Stefan Hajnoczi
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox