* [0/6] [NET]: virtio SG/TSO patches
@ 2008-04-18 3:12 Herbert Xu
2008-04-18 3:14 ` [1/6] [TUN]: Add GSO support Herbert Xu
0 siblings, 1 reply; 19+ messages in thread
From: Herbert Xu @ 2008-04-18 3:12 UTC (permalink / raw)
To: virtualization
Hi:
Here are the patches I used for testing KVM with virtio-net using
TSO. There are three patches for the tun device which are basically
Rusty's patches with the mmap turned into copying (for correctness).
Two patches are for the virtio-net frontend, one required to support
receiving SG/TSO, and the other useful for testing SG per se. The
other patch is to the KVM backend to make all this work. It isn't
the prettiest or the smartest solution but it was the easiest :)
Cheers,
--
Visit Openswan at http://www.openswan.org/
Email: Herbert Xu ~{PmV>HI~} <herbert@gondor.apana.org.au>
Home Page: http://gondor.apana.org.au/~herbert/
PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt
^ permalink raw reply [flat|nested] 19+ messages in thread
* [1/6] [TUN]: Add GSO support
2008-04-18 3:12 [0/6] [NET]: virtio SG/TSO patches Herbert Xu
@ 2008-04-18 3:14 ` Herbert Xu
2008-04-18 3:15 ` [2/6] [TUN]: Add GSO detection Herbert Xu
0 siblings, 1 reply; 19+ messages in thread
From: Herbert Xu @ 2008-04-18 3:14 UTC (permalink / raw)
To: virtualization
This is Rusty's GSO patch for the tun device driver. Please
see his posting for the changelog.
diff --git a/drivers/net/tun.c b/drivers/net/tun.c
index 7b816a0..34a03ec 100644
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -62,6 +62,7 @@
#include <linux/if_ether.h>
#include <linux/if_tun.h>
#include <linux/crc32.h>
+#include <linux/virtio_net.h>
#include <net/net_namespace.h>
#include <asm/system.h>
@@ -238,35 +239,188 @@ static unsigned int tun_chr_poll(struct file *file, poll_table * wait)
return mask;
}
+static struct sk_buff *copy_user_skb(size_t align, struct iovec *iv, size_t len)
+{
+ struct sk_buff *skb;
+
+ if (!(skb = alloc_skb(len + align, GFP_KERNEL)))
+ return ERR_PTR(-ENOMEM);
+
+ if (align)
+ skb_reserve(skb, align);
+
+ if (memcpy_fromiovec(skb_put(skb, len), iv, len)) {
+ kfree_skb(skb);
+ return ERR_PTR(-EFAULT);
+ }
+ return skb;
+}
+
+/* This will fail if they give us a crazy iovec, but that's their own fault. */
+static int get_user_skb_frags(const struct iovec *iv, size_t count,
+ struct skb_frag_struct *f)
+{
+ unsigned int i, j, num_pg = 0;
+ int err;
+ struct page *pages[MAX_SKB_FRAGS];
+
+ down_read(¤t->mm->mmap_sem);
+ for (i = 0; i < count; i++) {
+ int n, npages;
+ unsigned long base, len;
+ base = (unsigned long)iv[i].iov_base;
+ len = (unsigned long)iv[i].iov_len;
+
+ if (len == 0)
+ continue;
+
+ /* How many pages will this take? */
+ npages = 1 + (base + len - 1)/PAGE_SIZE - base/PAGE_SIZE;
+ if (unlikely(num_pg + npages > MAX_SKB_FRAGS)) {
+ err = -ENOSPC;
+ goto fail;
+ }
+ n = get_user_pages(current, current->mm, base, npages,
+ 0, 0, pages, NULL);
+ if (unlikely(n < 0)) {
+ err = n;
+ goto fail;
+ }
+
+ /* Transfer pages to the frag array */
+ for (j = 0; j < n; j++) {
+ f[num_pg].page = pages[j];
+ if (j == 0) {
+ f[num_pg].page_offset = offset_in_page(base);
+ f[num_pg].size = min(len, PAGE_SIZE -
+ f[num_pg].page_offset);
+ } else {
+ f[num_pg].page_offset = 0;
+ f[num_pg].size = min(len, PAGE_SIZE);
+ }
+ len -= f[num_pg].size;
+ base += f[num_pg].size;
+ num_pg++;
+ }
+
+ if (unlikely(n != npages)) {
+ err = -EFAULT;
+ goto fail;
+ }
+ }
+ up_read(¤t->mm->mmap_sem);
+ return num_pg;
+
+fail:
+ for (i = 0; i < num_pg; i++)
+ put_page(f[i].page);
+ up_read(¤t->mm->mmap_sem);
+ return err;
+}
+
+
+static struct sk_buff *map_user_skb(const struct virtio_net_hdr *gso,
+ size_t align, struct iovec *iv,
+ size_t count, size_t len)
+{
+ struct sk_buff *skb;
+ struct skb_shared_info *sinfo;
+ int err;
+
+ if (!(skb = alloc_skb(gso->hdr_len + align, GFP_KERNEL)))
+ return ERR_PTR(-ENOMEM);
+
+ if (align)
+ skb_reserve(skb, align);
+
+ sinfo = skb_shinfo(skb);
+ sinfo->gso_size = gso->gso_size;
+ sinfo->gso_type = SKB_GSO_DODGY;
+ switch (gso->gso_type & ~VIRTIO_NET_HDR_GSO_ECN) {
+ case VIRTIO_NET_HDR_GSO_TCPV4:
+ sinfo->gso_type |= SKB_GSO_TCPV4;
+ break;
+ case VIRTIO_NET_HDR_GSO_TCPV6:
+ sinfo->gso_type |= SKB_GSO_TCPV6;
+ break;
+ case VIRTIO_NET_HDR_GSO_UDP:
+ sinfo->gso_type |= SKB_GSO_UDP;
+ break;
+ default:
+ err = -EINVAL;
+ goto fail;
+ }
+
+ if (gso->gso_type & VIRTIO_NET_HDR_GSO_ECN)
+ skb_shinfo(skb)->gso_type |= SKB_GSO_TCP_ECN;
+
+ /* Copy in the header. */
+ if (memcpy_fromiovec(skb_put(skb, gso->hdr_len), iv, gso->hdr_len)) {
+ err = -EFAULT;
+ goto fail;
+ }
+
+ err = get_user_skb_frags(iv, count, sinfo->frags);
+ if (err < 0)
+ goto fail;
+
+ sinfo->nr_frags = err;
+ skb->len += len;
+ skb->data_len += len;
+
+ return skb;
+
+fail:
+ kfree_skb(skb);
+ return ERR_PTR(err);
+}
+
+static inline size_t iov_total(const struct iovec *iv, unsigned long count)
+{
+ unsigned long i;
+ size_t len;
+
+ for (i = 0, len = 0; i < count; i++)
+ len += iv[i].iov_len;
+
+ return len;
+}
+
/* Get packet from user space buffer */
-static __inline__ ssize_t tun_get_user(struct tun_struct *tun, struct iovec *iv, size_t count)
+static __inline__ ssize_t tun_get_user(struct tun_struct *tun, struct iovec *iv, size_t num)
{
struct tun_pi pi = { 0, __constant_htons(ETH_P_IP) };
+ struct virtio_net_hdr gso = { 0, VIRTIO_NET_HDR_GSO_NONE };
struct sk_buff *skb;
- size_t len = count, align = 0;
+ size_t tot_len = iov_total(iv, num);
+ size_t len = tot_len, align = 0;
if (!(tun->flags & TUN_NO_PI)) {
- if ((len -= sizeof(pi)) > count)
+ if ((len -= sizeof(pi)) > tot_len)
return -EINVAL;
if(memcpy_fromiovec((void *)&pi, iv, sizeof(pi)))
return -EFAULT;
}
+ if (tun->flags & TUN_VIRTIO_HDR) {
+ if ((len -= sizeof(gso)) > tot_len)
+ return -EINVAL;
+
+ if (memcpy_fromiovec((void *)&gso, iv, sizeof(gso)))
+ return -EFAULT;
+ }
if ((tun->flags & TUN_TYPE_MASK) == TUN_TAP_DEV)
align = NET_IP_ALIGN;
- if (!(skb = alloc_skb(len + align, GFP_KERNEL))) {
- tun->dev->stats.rx_dropped++;
- return -ENOMEM;
- }
+ if (gso.gso_type != VIRTIO_NET_HDR_GSO_NONE)
+ skb = map_user_skb(&gso, align, iv, num, len);
+ else
+ skb = copy_user_skb(align, iv, len);
- if (align)
- skb_reserve(skb, align);
- if (memcpy_fromiovec(skb_put(skb, len), iv, len)) {
+ if (IS_ERR(skb)) {
tun->dev->stats.rx_dropped++;
- kfree_skb(skb);
- return -EFAULT;
+ return PTR_ERR(skb);
}
switch (tun->flags & TUN_TYPE_MASK) {
@@ -280,7 +434,13 @@ static __inline__ ssize_t tun_get_user(struct tun_struct *tun, struct iovec *iv,
break;
};
- if (tun->flags & TUN_NOCHECKSUM)
+ if (gso.flags & (1 << VIRTIO_NET_F_CSUM)) {
+ if (!skb_partial_csum_set(skb,gso.csum_start,gso.csum_offset)) {
+ tun->dev->stats.rx_dropped++;
+ kfree_skb(skb);
+ return -EINVAL;
+ }
+ } else if (tun->flags & TUN_NOCHECKSUM)
skb->ip_summed = CHECKSUM_UNNECESSARY;
netif_rx_ni(skb);
@@ -289,7 +449,7 @@ static __inline__ ssize_t tun_get_user(struct tun_struct *tun, struct iovec *iv,
tun->dev->stats.rx_packets++;
tun->dev->stats.rx_bytes += len;
- return count;
+ return tot_len;
}
static ssize_t tun_chr_aio_write(struct kiocb *iocb, const struct iovec *iv,
@@ -302,7 +462,7 @@ static ssize_t tun_chr_aio_write(struct kiocb *iocb, const struct iovec *iv,
DBG(KERN_INFO "%s: tun_chr_write %ld\n", tun->dev->name, count);
- return tun_get_user(tun, (struct iovec *) iv, iov_length(iv, count));
+ return tun_get_user(tun, (struct iovec *) iv, count);
}
/* Put packet to the user space buffer */
@@ -326,6 +486,42 @@ static __inline__ ssize_t tun_put_user(struct tun_struct *tun,
return -EFAULT;
total += sizeof(pi);
}
+ if (tun->flags & TUN_VIRTIO_HDR) {
+ struct virtio_net_hdr gso;
+ struct skb_shared_info *sinfo = skb_shinfo(skb);
+
+ if (skb_is_gso(skb)) {
+ gso.hdr_len = skb_transport_header(skb) - skb->data;
+ gso.gso_size = sinfo->gso_size;
+ if (sinfo->gso_type & SKB_GSO_TCPV4)
+ gso.gso_type = VIRTIO_NET_HDR_GSO_TCPV4;
+ else if (sinfo->gso_type & SKB_GSO_TCPV6)
+ gso.gso_type = VIRTIO_NET_HDR_GSO_TCPV6;
+ else if (sinfo->gso_type & SKB_GSO_UDP)
+ gso.gso_type = VIRTIO_NET_HDR_GSO_UDP;
+ else
+ BUG();
+ if (sinfo->gso_type & SKB_GSO_TCP_ECN)
+ gso.gso_type |= VIRTIO_NET_HDR_GSO_ECN;
+ } else
+ gso.gso_type = VIRTIO_NET_HDR_GSO_NONE;
+
+ if (skb->ip_summed == CHECKSUM_PARTIAL) {
+ gso.flags = VIRTIO_NET_HDR_F_NEEDS_CSUM;
+ gso.csum_start = skb->csum_start - skb_headroom(skb);
+ gso.csum_offset = skb->csum_offset;
+ } else {
+ gso.flags = 0;
+ gso.csum_offset = gso.csum_start = 0;
+ }
+
+ if ((len -= sizeof(gso)) < 0)
+ return -EINVAL;
+
+ if (memcpy_toiovec(iv, (void *)&gso, sizeof(gso)))
+ return -EFAULT;
+ total += sizeof(gso);
+ }
len = min_t(int, skb->len, len);
@@ -512,6 +708,17 @@ static int tun_set_iff(struct file *file, struct ifreq *ifr)
tun_net_init(dev);
+ /* Virtio header means we can handle csum & gso. */
+ if ((ifr->ifr_flags & (IFF_VIRTIO_HDR|IFF_RECV_CSUM)) ==
+ (IFF_VIRTIO_HDR|IFF_RECV_CSUM)) {
+ dev->features = NETIF_F_SG | NETIF_F_HW_CSUM |
+ NETIF_F_HIGHDMA | NETIF_F_FRAGLIST;
+
+ if (ifr->ifr_flags & IFF_RECV_GSO)
+ dev->features |= NETIF_F_TSO | NETIF_F_UFO |
+ NETIF_F_TSO_ECN | NETIF_F_TSO6;
+ }
+
if (strchr(dev->name, '%')) {
err = dev_alloc_name(dev, dev->name);
if (err < 0)
@@ -537,6 +744,21 @@ static int tun_set_iff(struct file *file, struct ifreq *ifr)
else
tun->flags &= ~TUN_ONE_QUEUE;
+ if (ifr->ifr_flags & IFF_VIRTIO_HDR)
+ tun->flags |= TUN_VIRTIO_HDR;
+ else
+ tun->flags &= ~TUN_VIRTIO_HDR;
+
+ if (ifr->ifr_flags & IFF_RECV_CSUM)
+ tun->flags |= TUN_RECV_CSUM;
+ else
+ tun->flags &= ~TUN_RECV_CSUM;
+
+ if (ifr->ifr_flags & IFF_RECV_GSO)
+ tun->flags |= TUN_RECV_GSO;
+ else
+ tun->flags &= ~TUN_RECV_GSO;
+
file->private_data = tun;
tun->attached = 1;
diff --git a/include/linux/if_tun.h b/include/linux/if_tun.h
index 72f1c5f..3dbef10 100644
--- a/include/linux/if_tun.h
+++ b/include/linux/if_tun.h
@@ -72,6 +72,9 @@ struct tun_struct {
#define TUN_NO_PI 0x0040
#define TUN_ONE_QUEUE 0x0080
#define TUN_PERSIST 0x0100
+#define TUN_VIRTIO_HDR 0x0200
+#define TUN_RECV_CSUM 0x0400
+#define TUN_RECV_GSO 0x0400
/* Ioctl defines */
#define TUNSETNOCSUM _IOW('T', 200, int)
@@ -87,6 +90,9 @@ struct tun_struct {
#define IFF_TAP 0x0002
#define IFF_NO_PI 0x1000
#define IFF_ONE_QUEUE 0x2000
+#define IFF_VIRTIO_HDR 0x4000
+#define IFF_RECV_CSUM 0x8000
+#define IFF_RECV_GSO 0x0800
struct tun_pi {
unsigned short flags;
--
Visit Openswan at http://www.openswan.org/
Email: Herbert Xu ~{PmV>HI~} <herbert@gondor.apana.org.au>
Home Page: http://gondor.apana.org.au/~herbert/
PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt
^ permalink raw reply related [flat|nested] 19+ messages in thread
* [2/6] [TUN]: Add GSO detection
2008-04-18 3:14 ` [1/6] [TUN]: Add GSO support Herbert Xu
@ 2008-04-18 3:15 ` Herbert Xu
2008-04-18 3:17 ` [3/6] [TUN]: Fix GSO mapping Herbert Xu
0 siblings, 1 reply; 19+ messages in thread
From: Herbert Xu @ 2008-04-18 3:15 UTC (permalink / raw)
To: virtualization
This is Rusty's second patch to tun to allow user-space access
to the GSO feature in a backwards compatible way.
diff --git a/drivers/net/tun.c b/drivers/net/tun.c
index 34a03ec..4c15dc4 100644
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -800,6 +800,15 @@ static int tun_chr_ioctl(struct inode *inode, struct file *file,
return 0;
}
+ if (cmd == TUNGETFEATURES) {
+ /* Currently this just means: "what IFF flags are valid?".
+ * This is needed because we never checked for invalid flags on
+ * TUNSETIFF. This was introduced with IFF_GSO_HDR, so if a
+ * kernel doesn't have this ioctl, it doesn't have GSO header
+ * support. */
+ return put_user(IFF_ALL_FLAGS, (unsigned int __user*)argp);
+ }
+
if (!tun)
return -EBADFD;
diff --git a/include/linux/if_tun.h b/include/linux/if_tun.h
index 3dbef10..fe6855d 100644
--- a/include/linux/if_tun.h
+++ b/include/linux/if_tun.h
@@ -84,6 +84,7 @@ struct tun_struct {
#define TUNSETOWNER _IOW('T', 204, int)
#define TUNSETLINK _IOW('T', 205, int)
#define TUNSETGROUP _IOW('T', 206, int)
+#define TUNGETFEATURES _IOR('T', 207, unsigned int)
/* TUNSETIFF ifr flags */
#define IFF_TUN 0x0001
@@ -93,6 +94,8 @@ struct tun_struct {
#define IFF_VIRTIO_HDR 0x4000
#define IFF_RECV_CSUM 0x8000
#define IFF_RECV_GSO 0x0800
+#define IFF_ALL_FLAGS (IFF_TUN | IFF_TAP | IFF_NO_PI | IFF_ONE_QUEUE | \
+ IFF_VIRTIO_HDR | IFF_RECV_CSUM | IFF_RECV_GSO)
struct tun_pi {
unsigned short flags;
--
Visit Openswan at http://www.openswan.org/
Email: Herbert Xu ~{PmV>HI~} <herbert@gondor.apana.org.au>
Home Page: http://gondor.apana.org.au/~herbert/
PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt
^ permalink raw reply related [flat|nested] 19+ messages in thread
* [3/6] [TUN]: Fix GSO mapping
2008-04-18 3:15 ` [2/6] [TUN]: Add GSO detection Herbert Xu
@ 2008-04-18 3:17 ` Herbert Xu
2008-04-18 3:19 ` [4/6] [KVM] virtio-net: Add SG/GSO support Herbert Xu
2008-05-29 10:32 ` [3/6] [TUN]: Fix GSO mapping Mark McLoughlin
0 siblings, 2 replies; 19+ messages in thread
From: Herbert Xu @ 2008-04-18 3:17 UTC (permalink / raw)
To: virtualization
This patch avoids the correctness issue on the user-space mapping
by just copying the memory.
diff --git a/drivers/net/tun.c b/drivers/net/tun.c
index 4c15dc4..d75cfd2 100644
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -62,6 +62,7 @@
#include <linux/if_ether.h>
#include <linux/if_tun.h>
#include <linux/crc32.h>
+#include <linux/highmem.h>
#include <linux/virtio_net.h>
#include <net/net_namespace.h>
@@ -257,65 +258,55 @@ static struct sk_buff *copy_user_skb(size_t align, struct iovec *iv, size_t len)
}
/* This will fail if they give us a crazy iovec, but that's their own fault. */
-static int get_user_skb_frags(const struct iovec *iv, size_t count,
- struct skb_frag_struct *f)
+static int get_user_skb_frags(struct iovec *iv, size_t count,
+ struct skb_shared_info *sinfo)
{
- unsigned int i, j, num_pg = 0;
+ struct skb_frag_struct *f = sinfo->frags;
+ unsigned int i;
int err;
- struct page *pages[MAX_SKB_FRAGS];
- down_read(¤t->mm->mmap_sem);
+ f->page = NULL;
+
for (i = 0; i < count; i++) {
- int n, npages;
- unsigned long base, len;
- base = (unsigned long)iv[i].iov_base;
- len = (unsigned long)iv[i].iov_len;
+ unsigned int len = iv[i].iov_len;
- if (len == 0)
- continue;
+ while (len) {
+ void *virt;
+ unsigned int copy;
- /* How many pages will this take? */
- npages = 1 + (base + len - 1)/PAGE_SIZE - base/PAGE_SIZE;
- if (unlikely(num_pg + npages > MAX_SKB_FRAGS)) {
- err = -ENOSPC;
- goto fail;
- }
- n = get_user_pages(current, current->mm, base, npages,
- 0, 0, pages, NULL);
- if (unlikely(n < 0)) {
- err = n;
- goto fail;
- }
+ if (!f->page) {
+ f->page = alloc_page(GFP_KERNEL |
+ __GFP_HIGHMEM);
+ if (!f->page)
+ return -ENOMEM;
- /* Transfer pages to the frag array */
- for (j = 0; j < n; j++) {
- f[num_pg].page = pages[j];
- if (j == 0) {
- f[num_pg].page_offset = offset_in_page(base);
- f[num_pg].size = min(len, PAGE_SIZE -
- f[num_pg].page_offset);
- } else {
- f[num_pg].page_offset = 0;
- f[num_pg].size = min(len, PAGE_SIZE);
+ f->page_offset = 0;
+ f->size = 0;
+ sinfo->nr_frags++;
}
- len -= f[num_pg].size;
- base += f[num_pg].size;
- num_pg++;
- }
- if (unlikely(n != npages)) {
- err = -EFAULT;
- goto fail;
+ copy = PAGE_SIZE - f->size;
+ if (copy > len)
+ copy = len;
+
+ virt = kmap_atomic(f->page, KM_USER0);
+ err = memcpy_fromiovec(virt + f->size, iv, copy);
+ kunmap_atomic(virt, KM_USER0);
+
+ if (err)
+ return err;
+
+ f->size += copy;
+ if (f->size == PAGE_SIZE) {
+ if (sinfo->nr_frags >= MAX_SKB_FRAGS)
+ return -EMSGSIZE;
+ (++f)->page = NULL;
+ }
+ len -= copy;
}
}
- up_read(¤t->mm->mmap_sem);
- return num_pg;
-fail:
- for (i = 0; i < num_pg; i++)
- put_page(f[i].page);
- up_read(¤t->mm->mmap_sem);
- return err;
+ return 0;
}
@@ -360,13 +351,13 @@ static struct sk_buff *map_user_skb(const struct virtio_net_hdr *gso,
goto fail;
}
- err = get_user_skb_frags(iv, count, sinfo->frags);
+ err = get_user_skb_frags(iv, count, sinfo);
if (err < 0)
goto fail;
- sinfo->nr_frags = err;
skb->len += len;
skb->data_len += len;
+ skb->truesize += len;
return skb;
--
Visit Openswan at http://www.openswan.org/
Email: Herbert Xu ~{PmV>HI~} <herbert@gondor.apana.org.au>
Home Page: http://gondor.apana.org.au/~herbert/
PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt
^ permalink raw reply related [flat|nested] 19+ messages in thread
* [4/6] [KVM] virtio-net: Add SG/GSO support
2008-04-18 3:17 ` [3/6] [TUN]: Fix GSO mapping Herbert Xu
@ 2008-04-18 3:19 ` Herbert Xu
2008-04-18 3:21 ` [5/6] [VIRTIO] net: Add ethtool ops for SG/GSO Herbert Xu
2008-05-29 10:32 ` [3/6] [TUN]: Fix GSO mapping Mark McLoughlin
1 sibling, 1 reply; 19+ messages in thread
From: Herbert Xu @ 2008-04-18 3:19 UTC (permalink / raw)
To: virtualization
Here's the patch to get the KVM backend to do GSO. Please note
that this was a quick hack and I haven't even tested the case
where the tun device doesn't support GSO so it'll probably break
there.
It does the stupidest thing possible for guest => host by copying
the data so that it can use write(2).
diff --git a/qemu/hw/virtio-net.c b/qemu/hw/virtio-net.c
index 86f9e5a..b92e7c5 100644
--- a/qemu/hw/virtio-net.c
+++ b/qemu/hw/virtio-net.c
@@ -22,9 +22,9 @@
#define VIRTIO_ID_NET 1
/* The feature bitmap for virtio net */
-#define VIRTIO_NET_F_NO_CSUM 0
+#define VIRTIO_NET_F_CSUM 0
#define VIRTIO_NET_F_MAC 5
-#define VIRTIO_NET_F_GS0 6
+#define VIRTIO_NET_F_GSO 6
#define TX_TIMER_INTERVAL (1000 / 500)
@@ -87,7 +87,10 @@ static void virtio_net_update_config(VirtIODevice *vdev, uint8_t *config)
static uint32_t virtio_net_get_features(VirtIODevice *vdev)
{
- return (1 << VIRTIO_NET_F_MAC);
+ int gso = tap_has_gso(to_virtio_net(vdev)->vc->vlan->first_client);
+
+ return (1 << VIRTIO_NET_F_MAC) |
+ (gso ? (1 << VIRTIO_NET_F_CSUM) | (1 << VIRTIO_NET_F_GSO) : 0);
}
/* RX */
@@ -112,6 +115,7 @@ static void virtio_net_receive(void *opaque, const uint8_t *buf, int size)
VirtQueueElement elem;
struct virtio_net_hdr *hdr;
int offset, i;
+ int total;
/* FIXME: the drivers really need to set their status better */
if (n->rx_vq->vring.avail == NULL) {
@@ -129,18 +133,26 @@ static void virtio_net_receive(void *opaque, const uint8_t *buf, int size)
hdr->flags = 0;
hdr->gso_type = VIRTIO_NET_HDR_GSO_NONE;
- /* copy in packet. ugh */
offset = 0;
+ total = sizeof(*hdr);
+
+ if (tap_has_gso(n->vc->vlan->first_client)) {
+ memcpy(hdr, buf, sizeof(*hdr));
+ offset += total;
+ }
+
+ /* copy in packet. ugh */
i = 1;
while (offset < size && i < elem.in_num) {
int len = MIN(elem.in_sg[i].iov_len, size - offset);
memcpy(elem.in_sg[i].iov_base, buf + offset, len);
offset += len;
+ total += len;
i++;
}
/* signal other side */
- virtqueue_push(n->rx_vq, &elem, sizeof(*hdr) + offset);
+ virtqueue_push(n->rx_vq, &elem, total);
virtio_notify(&n->vdev, n->rx_vq);
}
@@ -201,7 +213,10 @@ void virtio_net_poll(void)
hdr->flags = 0;
hdr->gso_type = VIRTIO_NET_HDR_GSO_NONE;
again:
- len = readv(vnet->tap_fd, &elem.in_sg[1], elem.in_num - 1);
+ if (tap_has_gso(vnet->vc->vlan->first_client))
+ len = readv(vnet->tap_fd, &elem.in_sg[0], elem.in_num);
+ else
+ len = readv(vnet->tap_fd, &elem.in_sg[1], elem.in_num - 1);
if (len == -1) {
if (errno == EINTR || errno == EAGAIN)
goto again;
@@ -229,25 +244,32 @@ again:
static void virtio_net_flush_tx(VirtIONet *n, VirtQueue *vq)
{
VirtQueueElement elem;
+ uint8_t buf[sizeof(struct virtio_net_hdr) + 65536];
+ int size = sizeof(buf);
int count = 0;
+ int nogso = !tap_has_gso(n->vc->vlan->first_client);
if (!(n->vdev.status & VIRTIO_CONFIG_S_DRIVER_OK))
return;
while (virtqueue_pop(vq, &elem)) {
int i;
- size_t len = 0;
+ int offset = 0;
- /* ignore the header for now */
- for (i = 1; i < elem.out_num; i++) {
- qemu_send_packet(n->vc, elem.out_sg[i].iov_base,
- elem.out_sg[i].iov_len);
- len += elem.out_sg[i].iov_len;
+ /* Ignore the header if GSO is not supported. */
+ for (i = nogso; i < elem.out_num; i++) {
+ int len = MIN(elem.out_sg[i].iov_len, size - offset);
+ memcpy(buf + offset, elem.out_sg[i].iov_base, len);
+ offset += len;
}
+ qemu_send_packet(n->vc, buf, offset);
+
count++;
- virtqueue_push(vq, &elem, sizeof(struct virtio_net_hdr) + len);
+ if (nogso)
+ offset += elem.out_sg[0].iov_len;
+ virtqueue_push(vq, &elem, offset);
virtio_notify(&n->vdev, vq);
}
}
@@ -291,7 +313,7 @@ void *virtio_net_init(PCIBus *bus, NICInfo *nd, int devfn)
n->vdev.update_config = virtio_net_update_config;
n->vdev.get_features = virtio_net_get_features;
n->rx_vq = virtio_add_queue(&n->vdev, 512, virtio_net_handle_rx);
- n->tx_vq = virtio_add_queue(&n->vdev, 128, virtio_net_handle_tx);
+ n->tx_vq = virtio_add_queue(&n->vdev, 512, virtio_net_handle_tx);
n->can_receive = 0;
memcpy(n->mac, nd->macaddr, 6);
n->vc = qemu_new_vlan_client(nd->vlan, virtio_net_receive,
diff --git a/qemu/net.h b/qemu/net.h
index c8ff6d6..c3a75d6 100644
--- a/qemu/net.h
+++ b/qemu/net.h
@@ -36,6 +36,7 @@ void do_info_network(void);
/* virtio hack for zero copy receive */
int hack_around_tap(void *opaque);
+int tap_has_gso(void *opaque);
/* NIC info */
diff --git a/qemu/vl.c b/qemu/vl.c
index 21c9b53..3f10d0a 100644
--- a/qemu/vl.c
+++ b/qemu/vl.c
@@ -3962,6 +3962,7 @@ typedef struct TAPState {
int fd;
char down_script[1024];
int no_poll;
+ int gso;
} TAPState;
static int tap_read_poll(void *opaque)
@@ -4019,6 +4020,14 @@ int hack_around_tap(void *opaque)
return -1;
}
+int tap_has_gso(void *opaque)
+{
+ VLANClientState *vc = opaque;
+ TAPState *ts = vc->opaque;
+
+ return ts ? ts->gso : 0;
+}
+
/* fd support */
static TAPState *net_tap_fd_init(VLANState *vlan, int fd)
@@ -4038,7 +4047,7 @@ static TAPState *net_tap_fd_init(VLANState *vlan, int fd)
}
#if defined (_BSD) || defined (__FreeBSD_kernel__)
-static int tap_open(char *ifname, int ifname_size)
+static int tap_open(char *ifname, int ifname_size, int *gso)
{
int fd;
char *dev;
@@ -4180,7 +4189,7 @@ int tap_alloc(char *dev)
return tap_fd;
}
-static int tap_open(char *ifname, int ifname_size)
+static int tap_open(char *ifname, int ifname_size, int *gso)
{
char dev[10]="";
int fd;
@@ -4193,18 +4202,30 @@ static int tap_open(char *ifname, int ifname_size)
return fd;
}
#else
-static int tap_open(char *ifname, int ifname_size)
+static int tap_open(char *ifname, int ifname_size, int *gso)
{
struct ifreq ifr;
int fd, ret;
+ unsigned int features;
TFR(fd = open("/dev/net/tun", O_RDWR));
if (fd < 0) {
fprintf(stderr, "warning: could not open /dev/net/tun: no virtual network emulation\n");
return -1;
}
+
+ if (ioctl(fd, TUNGETFEATURES, &features))
+ features = IFF_TUN | IFF_TAP | IFF_NO_PI | IFF_ONE_QUEUE;
+
memset(&ifr, 0, sizeof(ifr));
ifr.ifr_flags = IFF_TAP | IFF_NO_PI;
+
+ if (features & IFF_VIRTIO_HDR && features & IFF_RECV_CSUM &&
+ features & IFF_RECV_GSO) {
+ *gso = 1;
+ ifr.ifr_flags |= IFF_VIRTIO_HDR | IFF_RECV_CSUM | IFF_RECV_GSO;
+ }
+
if (ifname[0] != '\0')
pstrcpy(ifr.ifr_name, IFNAMSIZ, ifname);
else
@@ -4262,13 +4283,15 @@ static int net_tap_init(VLANState *vlan, const char *ifname1,
{
TAPState *s;
int fd;
+ int gso;
char ifname[128];
if (ifname1 != NULL)
pstrcpy(ifname, sizeof(ifname), ifname1);
else
ifname[0] = '\0';
- TFR(fd = tap_open(ifname, sizeof(ifname)));
+ gso = 0;
+ TFR(fd = tap_open(ifname, sizeof(ifname), &gso));
if (fd < 0)
return -1;
@@ -4281,6 +4304,8 @@ static int net_tap_init(VLANState *vlan, const char *ifname1,
s = net_tap_fd_init(vlan, fd);
if (!s)
return -1;
+
+ s->gso = gso;
snprintf(s->vc->info_str, sizeof(s->vc->info_str),
"tap: ifname=%s setup_script=%s", ifname, setup_script);
if (down_script && strcmp(down_script, "no"))
--
Visit Openswan at http://www.openswan.org/
Email: Herbert Xu ~{PmV>HI~} <herbert@gondor.apana.org.au>
Home Page: http://gondor.apana.org.au/~herbert/
PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt
^ permalink raw reply related [flat|nested] 19+ messages in thread
* [5/6] [VIRTIO] net: Add ethtool ops for SG/GSO
2008-04-18 3:19 ` [4/6] [KVM] virtio-net: Add SG/GSO support Herbert Xu
@ 2008-04-18 3:21 ` Herbert Xu
2008-04-18 3:24 ` [6/6] [VIRTIO] net: Allow receiving SG packets Herbert Xu
2008-04-21 19:01 ` [5/6] [VIRTIO] net: Add ethtool ops for SG/GSO Rusty Russell
0 siblings, 2 replies; 19+ messages in thread
From: Herbert Xu @ 2008-04-18 3:21 UTC (permalink / raw)
To: virtualization
This patch adds some basic ethtool operations to virtio_net so
I could test SG without GSO (which was really useful because TSO
turned out to be buggy :)
diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index b58472c..0b508bb 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -19,6 +19,7 @@
//#define DEBUG
#include <linux/netdevice.h>
#include <linux/etherdevice.h>
+#include <linux/ethtool.h>
#include <linux/module.h>
#include <linux/virtio.h>
#include <linux/virtio_net.h>
@@ -333,6 +334,33 @@ static int virtnet_close(struct net_device *dev)
return 0;
}
+static int virtnet_set_tx_csum(struct net_device *dev, u32 data)
+{
+ struct virtnet_info *vi = netdev_priv(dev);
+ struct virtio_device *vdev = vi->vdev;
+
+ if (data && !vdev->config->feature(vdev, VIRTIO_NET_F_CSUM))
+ return -ENOSYS;
+
+ return ethtool_op_set_tx_hw_csum(dev, data);
+}
+
+static struct ethtool_ops virtnet_ethtool_ops =
+{
+ .set_tx_csum = virtnet_set_tx_csum,
+ .set_sg = ethtool_op_set_sg,
+};
+
+static int virtnet_change_mtu(struct net_device *dev, int mtu)
+{
+ int max = 65535 - ETH_HLEN;
+
+ if (mtu > max)
+ return -EINVAL;
+ dev->mtu = mtu;
+ return 0;
+}
+
static int virtnet_probe(struct virtio_device *vdev)
{
int err;
@@ -348,10 +376,12 @@ static int virtnet_probe(struct virtio_device *vdev)
dev->open = virtnet_open;
dev->stop = virtnet_close;
dev->hard_start_xmit = start_xmit;
+ dev->change_mtu = virtnet_change_mtu;
dev->features = NETIF_F_HIGHDMA;
#ifdef CONFIG_NET_POLL_CONTROLLER
dev->poll_controller = virtnet_netpoll;
#endif
+ SET_ETHTOOL_OPS(dev, &virtnet_ethtool_ops);
SET_NETDEV_DEV(dev, &vdev->dev);
/* Do we support "hardware" checksums? */
--
Visit Openswan at http://www.openswan.org/
Email: Herbert Xu ~{PmV>HI~} <herbert@gondor.apana.org.au>
Home Page: http://gondor.apana.org.au/~herbert/
PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt
^ permalink raw reply related [flat|nested] 19+ messages in thread
* [6/6] [VIRTIO] net: Allow receiving SG packets
2008-04-18 3:21 ` [5/6] [VIRTIO] net: Add ethtool ops for SG/GSO Herbert Xu
@ 2008-04-18 3:24 ` Herbert Xu
2008-04-18 14:08 ` Rusty Russell
2008-04-21 19:06 ` Rusty Russell
2008-04-21 19:01 ` [5/6] [VIRTIO] net: Add ethtool ops for SG/GSO Rusty Russell
1 sibling, 2 replies; 19+ messages in thread
From: Herbert Xu @ 2008-04-18 3:24 UTC (permalink / raw)
To: virtualization
Finally this patch lets virtio_net receive GSO packets in addition
to sending them. This can definitely be optimised for the non-GSO
case. For comparison the Xen approach stores one page in each skb
and uses subsequent skb's pages to construct an SG skb instead of
preallocating the maximum amount of pages per skb.
diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index b58472c..bc49057 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -73,6 +73,7 @@ static void receive_skb(struct net_device *dev, struct sk_buff *skb,
unsigned len)
{
struct virtio_net_hdr *hdr = skb_vnet_hdr(skb);
+ int err;
if (unlikely(len < sizeof(struct virtio_net_hdr) + ETH_HLEN)) {
pr_debug("%s: short packet %i\n", dev->name, len);
@@ -80,9 +81,15 @@ static void receive_skb(struct net_device *dev, struct sk_buff *skb,
goto drop;
}
len -= sizeof(struct virtio_net_hdr);
- BUG_ON(len > MAX_PACKET_LEN);
- skb_trim(skb, len);
+ err = pskb_trim(skb, len);
+ if (err) {
+ pr_debug("%s: pskb_trim failed %i %d\n", dev->name, len, err);
+ dev->stats.rx_dropped++;
+ goto drop;
+ }
+ skb->truesize += skb->data_len;
+
skb->protocol = eth_type_trans(skb, dev);
pr_debug("Receiving skb proto 0x%04x len %i type %i\n",
ntohs(skb->protocol), skb->len, skb->pkt_type);
@@ -142,10 +149,11 @@ drop:
static void try_fill_recv(struct virtnet_info *vi)
{
struct sk_buff *skb;
- struct scatterlist sg[1+MAX_SKB_FRAGS];
+ struct scatterlist sg[2+MAX_SKB_FRAGS];
int num, err;
+ int i;
- sg_init_table(sg, 1+MAX_SKB_FRAGS);
+ sg_init_table(sg, 2+MAX_SKB_FRAGS);
for (;;) {
skb = netdev_alloc_skb(vi->dev, MAX_PACKET_LEN);
if (unlikely(!skb))
@@ -153,6 +161,21 @@ static void try_fill_recv(struct virtnet_info *vi)
skb_put(skb, MAX_PACKET_LEN);
vnet_hdr_to_sg(sg, skb);
+
+ for (i = 0; i < MAX_SKB_FRAGS; i++) {
+ skb_shinfo(skb)->frags[i].page = alloc_page(GFP_ATOMIC);
+ if (!skb_shinfo(skb)->frags[i].page)
+ break;
+
+ skb_shinfo(skb)->frags[i].page_offset = 0;
+ skb_shinfo(skb)->frags[i].size = PAGE_SIZE;
+
+ skb->data_len += PAGE_SIZE;
+ skb->len += PAGE_SIZE;
+
+ skb_shinfo(skb)->nr_frags++;
+ }
+
num = skb_to_sgvec(skb, sg+1, 0, skb->len) + 1;
skb_queue_head(&vi->recv, skb);
--
Visit Openswan at http://www.openswan.org/
Email: Herbert Xu ~{PmV>HI~} <herbert@gondor.apana.org.au>
Home Page: http://gondor.apana.org.au/~herbert/
PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt
^ permalink raw reply related [flat|nested] 19+ messages in thread
* Re: [6/6] [VIRTIO] net: Allow receiving SG packets
2008-04-18 3:24 ` [6/6] [VIRTIO] net: Allow receiving SG packets Herbert Xu
@ 2008-04-18 14:08 ` Rusty Russell
2008-04-18 14:30 ` Herbert Xu
2008-04-21 19:06 ` Rusty Russell
1 sibling, 1 reply; 19+ messages in thread
From: Rusty Russell @ 2008-04-18 14:08 UTC (permalink / raw)
To: virtualization; +Cc: virtualization, Herbert Xu
On Friday 18 April 2008 13:24:27 Herbert Xu wrote:
> Finally this patch lets virtio_net receive GSO packets in addition
> to sending them. This can definitely be optimised for the non-GSO
> case. For comparison the Xen approach stores one page in each skb
> and uses subsequent skb's pages to construct an SG skb instead of
> preallocating the maximum amount of pages per skb.
Well, this ends up freeing the unused pages immediately which is actually
quite nice. We can cache them ourselves if we want, but we'd have to see if
the page allocation/free overhead is measurable...
Thanks!
Rusty.
^ permalink raw reply [flat|nested] 19+ messages in thread
* Re: [6/6] [VIRTIO] net: Allow receiving SG packets
2008-04-18 14:08 ` Rusty Russell
@ 2008-04-18 14:30 ` Herbert Xu
0 siblings, 0 replies; 19+ messages in thread
From: Herbert Xu @ 2008-04-18 14:30 UTC (permalink / raw)
To: Rusty Russell; +Cc: virtualization, virtualization
On Sat, Apr 19, 2008 at 12:08:04AM +1000, Rusty Russell wrote:
> On Friday 18 April 2008 13:24:27 Herbert Xu wrote:
> > Finally this patch lets virtio_net receive GSO packets in addition
> > to sending them. This can definitely be optimised for the non-GSO
> > case. For comparison the Xen approach stores one page in each skb
> > and uses subsequent skb's pages to construct an SG skb instead of
> > preallocating the maximum amount of pages per skb.
>
> Well, this ends up freeing the unused pages immediately which is actually
> quite nice. We can cache them ourselves if we want, but we'd have to see if
> the page allocation/free overhead is measurable...
I was thinking of the case of a large number of these interfaces
but yeah for one interface it's no biggie :)
Cheers,
--
Visit Openswan at http://www.openswan.org/
Email: Herbert Xu ~{PmV>HI~} <herbert@gondor.apana.org.au>
Home Page: http://gondor.apana.org.au/~herbert/
PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt
^ permalink raw reply [flat|nested] 19+ messages in thread
* Re: [5/6] [VIRTIO] net: Add ethtool ops for SG/GSO
2008-04-18 3:21 ` [5/6] [VIRTIO] net: Add ethtool ops for SG/GSO Herbert Xu
2008-04-18 3:24 ` [6/6] [VIRTIO] net: Allow receiving SG packets Herbert Xu
@ 2008-04-21 19:01 ` Rusty Russell
2008-04-22 1:15 ` Herbert Xu
1 sibling, 1 reply; 19+ messages in thread
From: Rusty Russell @ 2008-04-21 19:01 UTC (permalink / raw)
To: virtualization; +Cc: virtualization, Herbert Xu
On Friday 18 April 2008 13:21:42 Herbert Xu wrote:
> +static int virtnet_change_mtu(struct net_device *dev, int mtu)
> +{
> + int max = 65535 - ETH_HLEN;
> +
> + if (mtu > max)
> + return -EINVAL;
> + dev->mtu = mtu;
> + return 0;
> +}
Hi Herbert!
I removed this part; useful for testing, but we need a feature bit for MTU
size in general. And to change it on the fly either requires a reset &
re-init, or some protocol (and feature bit!) for renegotiating MTU.
Cheers,
Rusty.
^ permalink raw reply [flat|nested] 19+ messages in thread
* Re: [6/6] [VIRTIO] net: Allow receiving SG packets
2008-04-18 3:24 ` [6/6] [VIRTIO] net: Allow receiving SG packets Herbert Xu
2008-04-18 14:08 ` Rusty Russell
@ 2008-04-21 19:06 ` Rusty Russell
2008-04-21 20:04 ` David Miller
1 sibling, 1 reply; 19+ messages in thread
From: Rusty Russell @ 2008-04-21 19:06 UTC (permalink / raw)
To: virtualization; +Cc: Herbert Xu, virtualization, netdev
On Friday 18 April 2008 13:24:27 Herbert Xu wrote:
> Finally this patch lets virtio_net receive GSO packets in addition
> to sending them.
...
> static void try_fill_recv(struct virtnet_info *vi)
> {
> struct sk_buff *skb;
> - struct scatterlist sg[1+MAX_SKB_FRAGS];
> + struct scatterlist sg[2+MAX_SKB_FRAGS];
> int num, err;
I'm not sure what the right number is here. Say worst case is header which
goes over a page boundary then MAX_SKB_FRAGS in the skb, but for some reason
that already has a +2:
/* To allow 64K frame to be packed as single skb without frag_list */
#define MAX_SKB_FRAGS (65536/PAGE_SIZE + 2)
Unless someone explains, I'll change the xmit sg to 2+MAX_SKB_FRAGS as well.
Thanks,
Rusty.
^ permalink raw reply [flat|nested] 19+ messages in thread
* Re: [6/6] [VIRTIO] net: Allow receiving SG packets
2008-04-21 19:06 ` Rusty Russell
@ 2008-04-21 20:04 ` David Miller
2008-04-22 1:13 ` Herbert Xu
2008-04-22 2:50 ` Rusty Russell
0 siblings, 2 replies; 19+ messages in thread
From: David Miller @ 2008-04-21 20:04 UTC (permalink / raw)
To: rusty; +Cc: virtualization, herbert, virtualization, netdev
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Tue, 22 Apr 2008 05:06:16 +1000
> I'm not sure what the right number is here. Say worst case is header which
> goes over a page boundary then MAX_SKB_FRAGS in the skb, but for some reason
> that already has a +2:
>
> /* To allow 64K frame to be packed as single skb without frag_list */
> #define MAX_SKB_FRAGS (65536/PAGE_SIZE + 2)
>
> Unless someone explains, I'll change the xmit sg to 2+MAX_SKB_FRAGS as well.
MAX_SKB_FRAGS + 1 is what you ought to need.
MAX_SKB_FRAGS is only accounting for the skb frag pages.
If you want to know how many segments skb->data might
consume as well, you have to add one.
skb->data is linear, therefore it's not possible to need
more than one scatterlist entry for it.
^ permalink raw reply [flat|nested] 19+ messages in thread
* Re: [6/6] [VIRTIO] net: Allow receiving SG packets
2008-04-21 20:04 ` David Miller
@ 2008-04-22 1:13 ` Herbert Xu
2008-04-22 2:50 ` Rusty Russell
1 sibling, 0 replies; 19+ messages in thread
From: Herbert Xu @ 2008-04-22 1:13 UTC (permalink / raw)
To: David Miller; +Cc: rusty, virtualization, virtualization, netdev
On Mon, Apr 21, 2008 at 01:04:18PM -0700, David Miller wrote:
> From: Rusty Russell <rusty@rustcorp.com.au>
> Date: Tue, 22 Apr 2008 05:06:16 +1000
>
> > I'm not sure what the right number is here. Say worst case is header which
> > goes over a page boundary then MAX_SKB_FRAGS in the skb, but for some reason
> > that already has a +2:
The +2 (i.e., extra +1) is for the virtio GSO header.
> skb->data is linear, therefore it's not possible to need
> more than one scatterlist entry for it.
Theoretically yes :) But for virtualisation the underlying transport
may present meta-physically contiguous memory that is physically
discrete. So we may actually need to have multiple SG entries for
skb->data. However, no current code path should generate packets
with both long skb->data areas *and* skb page frags so we could
just drop them if they show up.
Cheers,
--
Visit Openswan at http://www.openswan.org/
Email: Herbert Xu ~{PmV>HI~} <herbert@gondor.apana.org.au>
Home Page: http://gondor.apana.org.au/~herbert/
PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt
^ permalink raw reply [flat|nested] 19+ messages in thread
* Re: [5/6] [VIRTIO] net: Add ethtool ops for SG/GSO
2008-04-21 19:01 ` [5/6] [VIRTIO] net: Add ethtool ops for SG/GSO Rusty Russell
@ 2008-04-22 1:15 ` Herbert Xu
2008-04-22 2:44 ` Rusty Russell
0 siblings, 1 reply; 19+ messages in thread
From: Herbert Xu @ 2008-04-22 1:15 UTC (permalink / raw)
To: Rusty Russell; +Cc: virtualization, virtualization
On Tue, Apr 22, 2008 at 05:01:46AM +1000, Rusty Russell wrote:
> On Friday 18 April 2008 13:21:42 Herbert Xu wrote:
> > +static int virtnet_change_mtu(struct net_device *dev, int mtu)
> > +{
> > + int max = 65535 - ETH_HLEN;
> > +
> > + if (mtu > max)
> > + return -EINVAL;
> > + dev->mtu = mtu;
> > + return 0;
> > +}
>
> Hi Herbert!
>
> I removed this part; useful for testing, but we need a feature bit for MTU
> size in general. And to change it on the fly either requires a reset &
> re-init, or some protocol (and feature bit!) for renegotiating MTU.
BTW Rusty this was just a work-in-progress. When I submit them I
will add sign-offs.
However, the MTU part should be fine as long as the other end supports
SG. The operative word in MTU is T :)
Cheers,
--
Visit Openswan at http://www.openswan.org/
Email: Herbert Xu ~{PmV>HI~} <herbert@gondor.apana.org.au>
Home Page: http://gondor.apana.org.au/~herbert/
PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt
^ permalink raw reply [flat|nested] 19+ messages in thread
* Re: [5/6] [VIRTIO] net: Add ethtool ops for SG/GSO
2008-04-22 1:15 ` Herbert Xu
@ 2008-04-22 2:44 ` Rusty Russell
0 siblings, 0 replies; 19+ messages in thread
From: Rusty Russell @ 2008-04-22 2:44 UTC (permalink / raw)
To: Herbert Xu; +Cc: virtualization
On Tuesday 22 April 2008 11:15:02 Herbert Xu wrote:
> On Tue, Apr 22, 2008 at 05:01:46AM +1000, Rusty Russell wrote:
> > On Friday 18 April 2008 13:21:42 Herbert Xu wrote:
> > > +static int virtnet_change_mtu(struct net_device *dev, int mtu)
> > > +{
> > > + int max = 65535 - ETH_HLEN;
> > > +
> > > + if (mtu > max)
> > > + return -EINVAL;
> > > + dev->mtu = mtu;
> > > + return 0;
> > > +}
> >
> > Hi Herbert!
> >
> > I removed this part; useful for testing, but we need a feature bit
> > for MTU size in general. And to change it on the fly either requires a
> > reset & re-init, or some protocol (and feature bit!) for renegotiating
> > MTU.
>
> BTW Rusty this was just a work-in-progress. When I submit them I
> will add sign-offs.
OK. Meanwhile I stole it for my own testing :)
> However, the MTU part should be fine as long as the other end supports
> SG. The operative word in MTU is T :)
I hadn't really thought about it; you're right, MTU is merely a formality. If
the other end has big enough receive buffers, we might as well use them.
I still feel oddly nervous about surprising the other end tho...
Thanks,
Rusty.
^ permalink raw reply [flat|nested] 19+ messages in thread
* Re: [6/6] [VIRTIO] net: Allow receiving SG packets
2008-04-21 20:04 ` David Miller
2008-04-22 1:13 ` Herbert Xu
@ 2008-04-22 2:50 ` Rusty Russell
2008-04-22 2:55 ` David Miller
1 sibling, 1 reply; 19+ messages in thread
From: Rusty Russell @ 2008-04-22 2:50 UTC (permalink / raw)
To: David Miller; +Cc: virtualization, herbert, virtualization, netdev
On Tuesday 22 April 2008 06:04:18 David Miller wrote:
> From: Rusty Russell <rusty@rustcorp.com.au>
> Date: Tue, 22 Apr 2008 05:06:16 +1000
>
> > I'm not sure what the right number is here. Say worst case is header
> > which goes over a page boundary then MAX_SKB_FRAGS in the skb, but for
> > some reason that already has a +2:
> >
> > /* To allow 64K frame to be packed as single skb without frag_list */
> > #define MAX_SKB_FRAGS (65536/PAGE_SIZE + 2)
> >
> > Unless someone explains, I'll change the xmit sg to 2+MAX_SKB_FRAGS as
> > well.
>
> MAX_SKB_FRAGS + 1 is what you ought to need.
Right, and so that's +2 for virtio_net because we have an extra header as
Herbert points out.
But I was curious as to why the +2 in the MAX_SKB_FRAGS definition?
Thanks,
Rusty.
^ permalink raw reply [flat|nested] 19+ messages in thread
* Re: [6/6] [VIRTIO] net: Allow receiving SG packets
2008-04-22 2:50 ` Rusty Russell
@ 2008-04-22 2:55 ` David Miller
0 siblings, 0 replies; 19+ messages in thread
From: David Miller @ 2008-04-22 2:55 UTC (permalink / raw)
To: rusty; +Cc: virtualization, herbert, virtualization, netdev
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Tue, 22 Apr 2008 12:50:27 +1000
> But I was curious as to why the +2 in the MAX_SKB_FRAGS definition?
To be honest I have no idea.
When Alexey added the TSO changeset way back then, it had the
"+2", from the history-2.6 tree:
commit 80223d5186f73bf42a7e260c66c9cb9f7d8ec9cf
Author: Alexey Kuznetsov <kuznet@ms2.inr.ac.ru>
Date: Wed Aug 28 11:52:03 2002 -0700
[NET]: Add TCP segmentation offload core infrastructure.
...
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index a812681..9b6e6ad 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -109,7 +109,8 @@ struct sk_buff_head {
struct sk_buff;
-#define MAX_SKB_FRAGS 6
+/* To allow 64K frame to be packed as single skb without frag_list */
+#define MAX_SKB_FRAGS (65536/PAGE_SIZE + 2)
typedef struct skb_frag_struct skb_frag_t;
^ permalink raw reply related [flat|nested] 19+ messages in thread
* Re: [3/6] [TUN]: Fix GSO mapping
2008-04-18 3:17 ` [3/6] [TUN]: Fix GSO mapping Herbert Xu
2008-04-18 3:19 ` [4/6] [KVM] virtio-net: Add SG/GSO support Herbert Xu
@ 2008-05-29 10:32 ` Mark McLoughlin
2008-05-29 10:38 ` Herbert Xu
1 sibling, 1 reply; 19+ messages in thread
From: Mark McLoughlin @ 2008-05-29 10:32 UTC (permalink / raw)
To: Herbert Xu; +Cc: virtualization
On Fri, 2008-04-18 at 11:17 +0800, Herbert Xu wrote:
> This patch avoids the correctness issue on the user-space mapping
> by just copying the memory.
>
> diff --git a/drivers/net/tun.c b/drivers/net/tun.c
> index 4c15dc4..d75cfd2 100644
> --- a/drivers/net/tun.c
> +++ b/drivers/net/tun.c
> + virt = kmap_atomic(f->page, KM_USER0);
> + err = memcpy_fromiovec(virt + f->size, iv, copy);
> + kunmap_atomic(virt, KM_USER0);
Seeing an oops from this; fix below.
Cheers,
Mark.
Subject: [PATCH 1/1] tun: Do not use kmap_atomic() since memcpy_fromiovec() can sleep
Signed-off-by: Mark McLoughlin <markmc@redhat.com>
---
drivers/net/tun.c | 4 ++--
1 files changed, 2 insertions(+), 2 deletions(-)
diff --git a/drivers/net/tun.c b/drivers/net/tun.c
index 151b409..aff338e 100644
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -322,9 +322,9 @@ static int get_user_skb_frags(struct iovec *iv, size_t count,
if (copy > len)
copy = len;
- virt = kmap_atomic(f->page, KM_USER0);
+ virt = kmap(f->page);
err = memcpy_fromiovec(virt + f->size, iv, copy);
- kunmap_atomic(virt, KM_USER0);
+ kunmap(f->page);
if (err)
return err;
^ permalink raw reply related [flat|nested] 19+ messages in thread
* Re: [3/6] [TUN]: Fix GSO mapping
2008-05-29 10:32 ` [3/6] [TUN]: Fix GSO mapping Mark McLoughlin
@ 2008-05-29 10:38 ` Herbert Xu
0 siblings, 0 replies; 19+ messages in thread
From: Herbert Xu @ 2008-05-29 10:38 UTC (permalink / raw)
To: Mark McLoughlin; +Cc: virtualization
On Thu, May 29, 2008 at 11:32:51AM +0100, Mark McLoughlin wrote:
>
> Subject: [PATCH 1/1] tun: Do not use kmap_atomic() since memcpy_fromiovec() can sleep
>
> Signed-off-by: Mark McLoughlin <markmc@redhat.com>
Good catch!
Thanks,
--
Visit Openswan at http://www.openswan.org/
Email: Herbert Xu ~{PmV>HI~} <herbert@gondor.apana.org.au>
Home Page: http://gondor.apana.org.au/~herbert/
PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt
^ permalink raw reply [flat|nested] 19+ messages in thread
end of thread, other threads:[~2008-05-29 10:38 UTC | newest]
Thread overview: 19+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2008-04-18 3:12 [0/6] [NET]: virtio SG/TSO patches Herbert Xu
2008-04-18 3:14 ` [1/6] [TUN]: Add GSO support Herbert Xu
2008-04-18 3:15 ` [2/6] [TUN]: Add GSO detection Herbert Xu
2008-04-18 3:17 ` [3/6] [TUN]: Fix GSO mapping Herbert Xu
2008-04-18 3:19 ` [4/6] [KVM] virtio-net: Add SG/GSO support Herbert Xu
2008-04-18 3:21 ` [5/6] [VIRTIO] net: Add ethtool ops for SG/GSO Herbert Xu
2008-04-18 3:24 ` [6/6] [VIRTIO] net: Allow receiving SG packets Herbert Xu
2008-04-18 14:08 ` Rusty Russell
2008-04-18 14:30 ` Herbert Xu
2008-04-21 19:06 ` Rusty Russell
2008-04-21 20:04 ` David Miller
2008-04-22 1:13 ` Herbert Xu
2008-04-22 2:50 ` Rusty Russell
2008-04-22 2:55 ` David Miller
2008-04-21 19:01 ` [5/6] [VIRTIO] net: Add ethtool ops for SG/GSO Rusty Russell
2008-04-22 1:15 ` Herbert Xu
2008-04-22 2:44 ` Rusty Russell
2008-05-29 10:32 ` [3/6] [TUN]: Fix GSO mapping Mark McLoughlin
2008-05-29 10:38 ` Herbert Xu
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).