public inbox for kvm@vger.kernel.org
 help / color / mirror / Atom feed
From: Mark McLoughlin <markmc@redhat.com>
To: kvm@vger.kernel.org
Cc: Herbert Xu <herbert@gondor.apana.org.au>,
	Rusty Russell <rusty@rustcorp.com.au>,
	Mark McLoughlin <markmc@redhat.com>
Subject: [PATCH 6/9] kvm: qemu: Add support for partial csums and GSO
Date: Thu, 24 Jul 2008 12:46:16 +0100	[thread overview]
Message-ID: <1216899979-32532-7-git-send-email-markmc@redhat.com> (raw)
In-Reply-To: <1216899979-32532-6-git-send-email-markmc@redhat.com>

Signed-off-by: Mark McLoughlin <markmc@redhat.com>
---
 qemu/hw/virtio-net.c |   86 +++++++++++++++++++++++++++++++++++++++++---------
 qemu/net.h           |    5 +++
 qemu/vl.c            |   73 +++++++++++++++++++++++++++++++++++++++---
 3 files changed, 144 insertions(+), 20 deletions(-)

diff --git a/qemu/hw/virtio-net.c b/qemu/hw/virtio-net.c
index 419a2d7..81282c4 100644
--- a/qemu/hw/virtio-net.c
+++ b/qemu/hw/virtio-net.c
@@ -22,9 +22,18 @@
 #define VIRTIO_ID_NET	1
 
 /* The feature bitmap for virtio net */
-#define VIRTIO_NET_F_NO_CSUM	0
-#define VIRTIO_NET_F_MAC	5
-#define VIRTIO_NET_F_GS0	6
+#define VIRTIO_NET_F_CSUM	0	/* Host handles pkts w/ partial csum */
+#define VIRTIO_NET_F_GUEST_CSUM	1	/* Guest handles pkts w/ partial csum */
+#define VIRTIO_NET_F_MAC	5	/* Host has given MAC address. */
+#define VIRTIO_NET_F_GSO	6	/* Host handles pkts w/ any GSO type */
+#define VIRTIO_NET_F_GUEST_TSO4	7	/* Guest can handle TSOv4 in. */
+#define VIRTIO_NET_F_GUEST_TSO6	8	/* Guest can handle TSOv6 in. */
+#define VIRTIO_NET_F_GUEST_ECN	9	/* Guest can handle TSO[6] w/ ECN in. */
+#define VIRTIO_NET_F_GUEST_UFO	10	/* Guest can handle UFO in. */
+#define VIRTIO_NET_F_HOST_TSO4	11	/* Host can handle TSOv4 in. */
+#define VIRTIO_NET_F_HOST_TSO6	12	/* Host can handle TSOv6 in. */
+#define VIRTIO_NET_F_HOST_ECN	13	/* Host can handle TSO[6] w/ ECN in. */
+#define VIRTIO_NET_F_HOST_UFO	14	/* Host can handle UFO in. */
 
 #define TX_TIMER_INTERVAL (150000) /* 150 us */
 
@@ -42,8 +51,6 @@ struct virtio_net_hdr
     uint8_t flags;
 #define VIRTIO_NET_HDR_GSO_NONE		0	// Not a GSO frame
 #define VIRTIO_NET_HDR_GSO_TCPV4	1	// GSO frame, IPv4 TCP (TSO)
-/* FIXME: Do we need this?  If they said they can handle ECN, do they care? */
-#define VIRTIO_NET_HDR_GSO_TCPV4_ECN	2	// GSO frame, IPv4 TCP w/ ECN
 #define VIRTIO_NET_HDR_GSO_UDP		3	// GSO frame, IPv4 UDP (UFO)
 #define VIRTIO_NET_HDR_GSO_TCPV6	4	// GSO frame, IPv6 TCP
 #define VIRTIO_NET_HDR_GSO_ECN		0x80	// TCP has ECN set
@@ -85,7 +92,38 @@ static void virtio_net_update_config(VirtIODevice *vdev, uint8_t *config)
 
 static uint32_t virtio_net_get_features(VirtIODevice *vdev)
 {
-    return (1 << VIRTIO_NET_F_MAC);
+    VirtIONet *n = to_virtio_net(vdev);
+    VLANClientState *host = n->vc->vlan->first_client;
+    uint32_t features = (1 << VIRTIO_NET_F_MAC);
+
+    if (tap_has_offload(host)) {
+	features |= (1 << VIRTIO_NET_F_CSUM);
+	features |= (1 << VIRTIO_NET_F_GUEST_CSUM);
+	features |= (1 << VIRTIO_NET_F_GUEST_TSO4);
+	features |= (1 << VIRTIO_NET_F_GUEST_TSO6);
+	features |= (1 << VIRTIO_NET_F_GUEST_ECN);
+	features |= (1 << VIRTIO_NET_F_HOST_TSO4);
+	features |= (1 << VIRTIO_NET_F_HOST_TSO6);
+	features |= (1 << VIRTIO_NET_F_HOST_ECN);
+	/* Kernel can't actually handle UFO in software currently. */
+    }
+
+    return features;
+}
+
+static void virtio_net_set_features(VirtIODevice *vdev, uint32_t features)
+{
+    VirtIONet *n = to_virtio_net(vdev);
+    VLANClientState *host = n->vc->vlan->first_client;
+
+    if (!tap_has_offload(host) || !host->set_offload)
+	return;
+
+    host->set_offload(host,
+		      (features >> VIRTIO_NET_F_GUEST_CSUM) & 1,
+		      (features >> VIRTIO_NET_F_GUEST_TSO4) & 1,
+		      (features >> VIRTIO_NET_F_GUEST_TSO6) & 1,
+		      (features >> VIRTIO_NET_F_GUEST_ECN)  & 1);
 }
 
 /* RX */
@@ -121,6 +159,7 @@ static void virtio_net_receive(void *opaque, const uint8_t *buf, int size)
     VirtQueueElement elem;
     struct virtio_net_hdr *hdr;
     int offset, i;
+    int total;
 
     if (virtqueue_pop(n->rx_vq, &elem) == 0)
 	return;
@@ -134,18 +173,26 @@ static void virtio_net_receive(void *opaque, const uint8_t *buf, int size)
     hdr->flags = 0;
     hdr->gso_type = VIRTIO_NET_HDR_GSO_NONE;
 
-    /* copy in packet.  ugh */
     offset = 0;
+    total = sizeof(*hdr);
+
+    if (tap_has_offload(n->vc->vlan->first_client)) {
+	memcpy(hdr, buf, sizeof(*hdr));
+	offset += total;
+    }
+
+    /* copy in packet.  ugh */
     i = 1;
     while (offset < size && i < elem.in_num) {
 	int len = MIN(elem.in_sg[i].iov_len, size - offset);
 	memcpy(elem.in_sg[i].iov_base, buf + offset, len);
 	offset += len;
+	total += len;
 	i++;
     }
 
     /* signal other side */
-    virtqueue_push(n->rx_vq, &elem, sizeof(*hdr) + offset);
+    virtqueue_push(n->rx_vq, &elem, total);
     virtio_notify(&n->vdev, n->rx_vq);
 }
 
@@ -153,23 +200,31 @@ static void virtio_net_receive(void *opaque, const uint8_t *buf, int size)
 static void virtio_net_flush_tx(VirtIONet *n, VirtQueue *vq)
 {
     VirtQueueElement elem;
+    int has_offload = tap_has_offload(n->vc->vlan->first_client);
 
     if (!(n->vdev.status & VIRTIO_CONFIG_S_DRIVER_OK))
         return;
 
     while (virtqueue_pop(vq, &elem)) {
 	ssize_t len = 0;
+	unsigned int out_num = elem.out_num;
+	struct iovec *out_sg = &elem.out_sg[0];
+
+	if (out_num < 1 || out_sg->iov_len != sizeof(struct virtio_net_hdr)) {
+	    fprintf(stderr, "virtio-net header not in first element\n");
+	    exit(1);
+	}
 
-	if (elem.out_num < 1 ||
-	    elem.out_sg[0].iov_len != sizeof(struct virtio_net_hdr)) {
-		fprintf(stderr, "virtio-net header not in first element\n");
-		exit(1);
+	/* ignore the header if GSO is not supported */
+	if (!has_offload) {
+	    out_num--;
+	    out_sg++;
+	    len += sizeof(struct virtio_net_hdr);
 	}
 
-	/* ignore the header for now */
-	len = qemu_sendv_packet(n->vc, &elem.out_sg[1], elem.out_num - 1);
+	len += qemu_sendv_packet(n->vc, out_sg, out_num);
 
-	virtqueue_push(vq, &elem, sizeof(struct virtio_net_hdr) + len);
+	virtqueue_push(vq, &elem, len);
 	virtio_notify(&n->vdev, vq);
     }
 }
@@ -249,6 +304,7 @@ PCIDevice *virtio_net_init(PCIBus *bus, NICInfo *nd, int devfn)
 
     n->vdev.update_config = virtio_net_update_config;
     n->vdev.get_features = virtio_net_get_features;
+    n->vdev.set_features = virtio_net_set_features;
     n->rx_vq = virtio_add_queue(&n->vdev, 128, virtio_net_handle_rx);
     n->tx_vq = virtio_add_queue(&n->vdev, 128, virtio_net_handle_tx);
     memcpy(n->mac, nd->macaddr, 6);
diff --git a/qemu/net.h b/qemu/net.h
index e8ee325..6cfd8ce 100644
--- a/qemu/net.h
+++ b/qemu/net.h
@@ -9,12 +9,15 @@ typedef ssize_t (IOReadvHandler)(void *, const struct iovec *, int);
 
 typedef struct VLANClientState VLANClientState;
 
+typedef void (SetOffload)(VLANClientState *, int, int, int, int);
+
 struct VLANClientState {
     IOReadHandler *fd_read;
     IOReadvHandler *fd_readv;
     /* Packets may still be sent if this returns zero.  It's used to
        rate-limit the slirp code.  */
     IOCanRWHandler *fd_can_read;
+    SetOffload *set_offload;
     void *opaque;
     struct VLANClientState *next;
     struct VLANState *vlan;
@@ -42,6 +45,8 @@ void qemu_handler_true(void *opaque);
 
 void do_info_network(void);
 
+int tap_has_offload(void *opaque);
+
 int net_client_init(const char *str);
 void net_client_uninit(NICInfo *nd);
 
diff --git a/qemu/vl.c b/qemu/vl.c
index b7d3397..efdaafd 100644
--- a/qemu/vl.c
+++ b/qemu/vl.c
@@ -4186,12 +4186,24 @@ void do_info_slirp(void)
 
 #if !defined(_WIN32)
 
+#ifndef IFF_VNET_HDR
+#define TAP_BUFSIZE 4096
+#else
+#include <linux/virtio_net.h>
+#define ETH_HLEN 14
+#define ETH_DATA_LEN 1500
+#define MAX_PACKET_LEN (ETH_HLEN + ETH_DATA_LEN)
+#define MAX_SKB_FRAGS ((65536/TARGET_PAGE_SIZE) + 2)
+#define TAP_BUFSIZE (sizeof(struct virtio_net_hdr) + MAX_PACKET_LEN + (MAX_SKB_FRAGS*TARGET_PAGE_SIZE))
+#endif
+
 typedef struct TAPState {
     VLANClientState *vc;
     int fd;
     char down_script[1024];
-    char buf[4096];
+    char buf[TAP_BUFSIZE];
     int size;
+    int offload;
 } TAPState;
 
 static void tap_receive(void *opaque, const uint8_t *buf, int size)
@@ -4286,6 +4298,37 @@ static void tap_send(void *opaque)
     } while (s->size > 0);
 }
 
+int tap_has_offload(void *opaque)
+{
+    VLANClientState *vc = opaque;
+    TAPState *ts = vc->opaque;
+
+    return ts ? ts->offload : 0;
+}
+
+#ifdef TUNSETOFFLOAD
+static void tap_set_offload(VLANClientState *vc, int csum, int tso4, int tso6,
+			    int ecn)
+{
+    TAPState *s = vc->opaque;
+    unsigned int offload = 0;
+
+    if (csum) {
+	offload |= TUN_F_CSUM;
+	if (tso4)
+	    offload |= TUN_F_TSO4;
+	if (tso6)
+	    offload |= TUN_F_TSO6;
+	if ((tso4 || tso6) && ecn)
+	    offload |= TUN_F_TSO_ECN;
+    }
+
+    if (ioctl(s->fd, TUNSETOFFLOAD, offload) != 0)
+	fprintf(stderr, "TUNSETOFFLOAD ioctl() failed: %s\n",
+		strerror(errno));
+}
+#endif /* TUNSETOFFLOAD */
+
 /* fd support */
 
 static TAPState *net_tap_fd_init(VLANState *vlan, int fd)
@@ -4298,13 +4341,16 @@ static TAPState *net_tap_fd_init(VLANState *vlan, int fd)
     s->fd = fd;
     s->vc = qemu_new_vlan_client(vlan, tap_receive, NULL, s);
     s->vc->fd_readv = tap_readv;
+#ifdef TUNSETOFFLOAD
+    s->vc->set_offload = tap_set_offload;
+#endif
     qemu_set_fd_handler2(s->fd, tap_can_send, tap_send, NULL, s);
     snprintf(s->vc->info_str, sizeof(s->vc->info_str), "tap: fd=%d", fd);
     return s;
 }
 
 #if defined (_BSD) || defined (__FreeBSD_kernel__)
-static int tap_open(char *ifname, int ifname_size)
+static int tap_open(char *ifname, int ifname_size, int *offload)
 {
     int fd;
     char *dev;
@@ -4446,7 +4492,7 @@ int tap_alloc(char *dev)
     return tap_fd;
 }
 
-static int tap_open(char *ifname, int ifname_size)
+static int tap_open(char *ifname, int ifname_size, int *offload)
 {
     char  dev[10]="";
     int fd;
@@ -4459,18 +4505,31 @@ static int tap_open(char *ifname, int ifname_size)
     return fd;
 }
 #else
-static int tap_open(char *ifname, int ifname_size)
+static int tap_open(char *ifname, int ifname_size, int *offload)
 {
     struct ifreq ifr;
     int fd, ret;
+    unsigned int features;
 
     TFR(fd = open("/dev/net/tun", O_RDWR));
     if (fd < 0) {
         fprintf(stderr, "warning: could not open /dev/net/tun: no virtual network emulation\n");
         return -1;
     }
+
+    if (ioctl(fd, TUNGETFEATURES, &features))
+	features = IFF_TUN | IFF_TAP | IFF_NO_PI | IFF_ONE_QUEUE;
+
     memset(&ifr, 0, sizeof(ifr));
     ifr.ifr_flags = IFF_TAP | IFF_NO_PI;
+
+#ifdef IFF_VNET_HDR
+    if (features & IFF_VNET_HDR) {
+	*offload = 1;
+	ifr.ifr_flags |= IFF_VNET_HDR;
+    }
+#endif
+
     if (ifname[0] != '\0')
         pstrcpy(ifr.ifr_name, IFNAMSIZ, ifname);
     else
@@ -4528,13 +4587,15 @@ static int net_tap_init(VLANState *vlan, const char *ifname1,
 {
     TAPState *s;
     int fd;
+    int offload;
     char ifname[128];
 
     if (ifname1 != NULL)
         pstrcpy(ifname, sizeof(ifname), ifname1);
     else
         ifname[0] = '\0';
-    TFR(fd = tap_open(ifname, sizeof(ifname)));
+    offload = 0;
+    TFR(fd = tap_open(ifname, sizeof(ifname), &offload));
     if (fd < 0)
         return -1;
 
@@ -4547,6 +4608,8 @@ static int net_tap_init(VLANState *vlan, const char *ifname1,
     s = net_tap_fd_init(vlan, fd);
     if (!s)
         return -1;
+
+    s->offload = offload;
     snprintf(s->vc->info_str, sizeof(s->vc->info_str),
              "tap: ifname=%s setup_script=%s", ifname, setup_script);
     if (down_script && strcmp(down_script, "no"))
-- 
1.5.4.1


  reply	other threads:[~2008-07-24 11:46 UTC|newest]

Thread overview: 36+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2008-07-24 11:46 [PATCH 0/9][RFC] KVM virtio_net performance Mark McLoughlin
2008-07-24 11:46 ` [PATCH 1/9] kvm: qemu: Set MIN_TIMER_REARM_US to 150us Mark McLoughlin
2008-07-24 11:46   ` [PATCH 2/9] kvm: qemu: Fix virtio_net tx timer Mark McLoughlin
2008-07-24 11:46     ` [PATCH 3/9] kvm: qemu: Remove virtio_net tx ring-full heuristic Mark McLoughlin
2008-07-24 11:46       ` [PATCH 4/9] kvm: qemu: Add VIRTIO_F_NOTIFY_ON_EMPTY Mark McLoughlin
2008-07-24 11:46         ` [PATCH 5/9] kvm: qemu: Disable recv notifications until avail buffers exhausted Mark McLoughlin
2008-07-24 11:46           ` Mark McLoughlin [this message]
2008-07-24 11:46             ` [PATCH 7/9] kvm: qemu: Increase size of virtio_net rings Mark McLoughlin
2008-07-24 11:46               ` [PATCH 8/9] kvm: qemu: Drop the mutex while reading from tapfd Mark McLoughlin
2008-07-24 11:46                 ` [PATCH 9/9] kvm: qemu: Eliminate extra virtio_net copy Mark McLoughlin
2008-07-24 23:33                 ` [PATCH 8/9] kvm: qemu: Drop the mutex while reading from tapfd Dor Laor
2008-07-25 17:25                   ` Mark McLoughlin
2008-07-24 23:22       ` [PATCH 3/9] kvm: qemu: Remove virtio_net tx ring-full heuristic Dor Laor
2008-07-25  0:30         ` Rusty Russell
2008-07-25 17:30           ` Mark McLoughlin
2008-07-25 17:23         ` Mark McLoughlin
2008-07-24 23:56       ` Dor Laor
2008-07-26  9:48     ` [PATCH 2/9] kvm: qemu: Fix virtio_net tx timer Avi Kivity
2008-07-26 12:08       ` Mark McLoughlin
2008-07-24 11:55 ` [PATCH 0/9][RFC] KVM virtio_net performance Herbert Xu
2008-07-24 16:53 ` Mark McLoughlin
2008-07-24 18:29   ` Anthony Liguori
2008-07-25 16:36     ` Mark McLoughlin
2008-07-24 20:56 ` Anthony Liguori
2008-07-25 17:17   ` Mark McLoughlin
2008-07-25 21:29     ` Dor Laor
2008-07-26 19:09   ` Bill Davidsen
2008-07-27  7:52     ` Avi Kivity
2008-07-27 12:52       ` Bill Davidsen
2008-07-27 13:17       ` Bill Davidsen
2008-07-28  6:42         ` Mark McLoughlin
2008-07-26  9:45 ` Avi Kivity
2008-07-27  6:48   ` Rusty Russell
2008-07-27  6:48   ` Rusty Russell
2008-08-11 19:56   ` Mark McLoughlin
2008-08-12 13:35     ` Avi Kivity

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1216899979-32532-7-git-send-email-markmc@redhat.com \
    --to=markmc@redhat.com \
    --cc=herbert@gondor.apana.org.au \
    --cc=kvm@vger.kernel.org \
    --cc=rusty@rustcorp.com.au \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox