* [PATCH] TUN/TAP GSO/partial csum support
@ 2008-01-16 12:06 Rusty Russell
2008-01-16 12:07 ` [PATCH] Interface to query tun/tap features Rusty Russell
0 siblings, 1 reply; 2+ messages in thread
From: Rusty Russell @ 2008-01-16 12:06 UTC (permalink / raw)
To: netdev; +Cc: Herbert Xu, Max Krasnyansky
[-- Attachment #1: Type: text/plain, Size: 9879 bytes --]
OK, revised with help from Herbert. Also, I have attached a test program and
a script to run it (it short-circuits two tun devices, so you can run it with
the patch applied and see big packets flowing).
This implements partial checksum and GSO support for tun/tap.
We use the virtio_net_hdr: it is an ABI already and designed to
encapsulate such metadata as GSO and partial checksums.
lguest performance (160MB sendfile, worst/best/avg, 20 runs):
Before: 5.06/3.39/3.82
After: 4.69/0.84/2.84
Note that there is no easy way to detect if GSO is supported: see next
patch.
Questions:
1) Should we rename/move virtio_net_hdr to something more generic?
2) Is this the right way to build a paged skb from user pages?
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
---
drivers/net/tun.c | 250 +++++++++++++++++++++++++++++++++++++++++++------
include/linux/if_tun.h | 2
2 files changed, 225 insertions(+), 27 deletions(-)
diff -r ba3c0eb8741a drivers/net/tun.c
--- a/drivers/net/tun.c Wed Jan 16 17:35:25 2008 +1100
+++ b/drivers/net/tun.c Wed Jan 16 22:11:11 2008 +1100
@@ -62,6 +62,7 @@
#include <linux/if_ether.h>
#include <linux/if_tun.h>
#include <linux/crc32.h>
+#include <linux/virtio_net.h>
#include <net/net_namespace.h>
#include <asm/system.h>
@@ -238,35 +239,189 @@ static unsigned int tun_chr_poll(struct
return mask;
}
+static struct sk_buff *copy_user_skb(size_t align, struct iovec *iv, size_t len)
+{
+ struct sk_buff *skb;
+
+ if (!(skb = alloc_skb(len + align, GFP_KERNEL)))
+ return ERR_PTR(-ENOMEM);
+
+ if (align)
+ skb_reserve(skb, align);
+
+ if (memcpy_fromiovec(skb_put(skb, len), iv, len)) {
+ kfree_skb(skb);
+ return ERR_PTR(-EFAULT);
+ }
+ return skb;
+}
+
+/* This will fail if they give us a crazy iovec, but that's their own fault. */
+static int get_user_skb_frags(const struct iovec *iv, size_t count,
+ struct skb_frag_struct *f)
+{
+ unsigned int i, j, num_pg = 0;
+ int err;
+ struct page *pages[MAX_SKB_FRAGS];
+
+ down_read(¤t->mm->mmap_sem);
+ for (i = 0; i < count; i++) {
+ int n, npages;
+ unsigned long base, len;
+ base = (unsigned long)iv[i].iov_base;
+ len = (unsigned long)iv[i].iov_len;
+
+ if (len == 0)
+ continue;
+
+ /* How many pages will this take? */
+ npages = 1 + (base + len - 1)/PAGE_SIZE - base/PAGE_SIZE;
+ if (unlikely(num_pg + npages > MAX_SKB_FRAGS)) {
+ err = -ENOSPC;
+ goto fail;
+ }
+ n = get_user_pages(current, current->mm, base, npages,
+ 0, 0, pages, NULL);
+ if (unlikely(n < 0)) {
+ err = n;
+ goto fail;
+ }
+
+ /* Transfer pages to the frag array */
+ for (j = 0; j < n; j++) {
+ f[num_pg].page = pages[j];
+ if (j == 0) {
+ f[num_pg].page_offset = offset_in_page(base);
+ f[num_pg].size = min(len, PAGE_SIZE -
+ f[num_pg].page_offset);
+ } else {
+ f[num_pg].page_offset = 0;
+ f[num_pg].size = min(len, PAGE_SIZE);
+ }
+ len -= f[num_pg].size;
+ base += f[num_pg].size;
+ num_pg++;
+ }
+
+ if (unlikely(n != npages)) {
+ err = -EFAULT;
+ goto fail;
+ }
+ }
+ up_read(¤t->mm->mmap_sem);
+ return num_pg;
+
+fail:
+ for (i = 0; i < num_pg; i++)
+ put_page(f[i].page);
+ up_read(¤t->mm->mmap_sem);
+ return err;
+}
+
+
+static struct sk_buff *map_user_skb(const struct virtio_net_hdr *gso,
+ size_t align, struct iovec *iv,
+ size_t count, size_t len)
+{
+ struct sk_buff *skb;
+ struct skb_shared_info *sinfo;
+ int err;
+
+ if (!(skb = alloc_skb(gso->gso_hdr_len + align, GFP_KERNEL)))
+ return ERR_PTR(-ENOMEM);
+
+ if (align)
+ skb_reserve(skb, align);
+
+ sinfo = skb_shinfo(skb);
+ sinfo->gso_size = gso->gso_size;
+ sinfo->gso_type = SKB_GSO_DODGY;
+ switch (gso->gso_type) {
+ case VIRTIO_NET_HDR_GSO_TCPV4_ECN:
+ sinfo->gso_type |= SKB_GSO_TCP_ECN;
+ /* fall through */
+ case VIRTIO_NET_HDR_GSO_TCPV4:
+ sinfo->gso_type |= SKB_GSO_TCPV4;
+ break;
+ case VIRTIO_NET_HDR_GSO_TCPV6:
+ sinfo->gso_type |= SKB_GSO_TCPV6;
+ break;
+ case VIRTIO_NET_HDR_GSO_UDP:
+ sinfo->gso_type |= SKB_GSO_UDP;
+ break;
+ default:
+ err = -EINVAL;
+ goto fail;
+ }
+
+ /* Copy in the header. */
+ if (memcpy_fromiovec(skb_put(skb, gso->gso_hdr_len), iv,
+ gso->gso_hdr_len)) {
+ err = -EFAULT;
+ goto fail;
+ }
+
+ err = get_user_skb_frags(iv, count, sinfo->frags);
+ if (err < 0)
+ goto fail;
+
+ sinfo->nr_frags = err;
+ skb->len += len;
+ skb->data_len += len;
+
+ return skb;
+
+fail:
+ kfree_skb(skb);
+ return ERR_PTR(err);
+}
+
+static inline size_t iov_total(const struct iovec *iv, unsigned long count)
+{
+ unsigned long i;
+ size_t len;
+
+ for (i = 0, len = 0; i < count; i++)
+ len += iv[i].iov_len;
+
+ return len;
+}
+
/* Get packet from user space buffer */
-static __inline__ ssize_t tun_get_user(struct tun_struct *tun, struct iovec *iv, size_t count)
+static __inline__ ssize_t tun_get_user(struct tun_struct *tun, struct iovec *iv, size_t num)
{
struct tun_pi pi = { 0, __constant_htons(ETH_P_IP) };
+ struct virtio_net_hdr gso = { 0, VIRTIO_NET_HDR_GSO_NONE };
struct sk_buff *skb;
- size_t len = count, align = 0;
+ size_t tot_len = iov_total(iv, num);
+ size_t len = tot_len, align = 0;
if (!(tun->flags & TUN_NO_PI)) {
- if ((len -= sizeof(pi)) > count)
+ if ((len -= sizeof(pi)) > tot_len)
return -EINVAL;
if(memcpy_fromiovec((void *)&pi, iv, sizeof(pi)))
+ return -EFAULT;
+ }
+ if (tun->flags & TUN_GSO_HDR) {
+ if ((len -= sizeof(gso)) > tot_len)
+ return -EINVAL;
+
+ if (memcpy_fromiovec((void *)&gso, iv, sizeof(gso)))
return -EFAULT;
}
if ((tun->flags & TUN_TYPE_MASK) == TUN_TAP_DEV)
align = NET_IP_ALIGN;
- if (!(skb = alloc_skb(len + align, GFP_KERNEL))) {
+ if (gso.gso_type != VIRTIO_NET_HDR_GSO_NONE)
+ skb = map_user_skb(&gso, align, iv, num, len);
+ else
+ skb = copy_user_skb(align, iv, len);
+
+ if (IS_ERR(skb)) {
tun->dev->stats.rx_dropped++;
- return -ENOMEM;
- }
-
- if (align)
- skb_reserve(skb, align);
- if (memcpy_fromiovec(skb_put(skb, len), iv, len)) {
- tun->dev->stats.rx_dropped++;
- kfree_skb(skb);
- return -EFAULT;
+ return PTR_ERR(skb);
}
switch (tun->flags & TUN_TYPE_MASK) {
@@ -280,7 +435,13 @@ static __inline__ ssize_t tun_get_user(s
break;
};
- if (tun->flags & TUN_NOCHECKSUM)
+ if (gso.flags & (1 << VIRTIO_NET_F_NO_CSUM)) {
+ if (!skb_partial_csum_set(skb,gso.csum_start,gso.csum_offset)) {
+ tun->dev->stats.rx_dropped++;
+ kfree_skb(skb);
+ return -EINVAL;
+ }
+ } else if (tun->flags & TUN_NOCHECKSUM)
skb->ip_summed = CHECKSUM_UNNECESSARY;
netif_rx_ni(skb);
@@ -289,18 +450,7 @@ static __inline__ ssize_t tun_get_user(s
tun->dev->stats.rx_packets++;
tun->dev->stats.rx_bytes += len;
- return count;
-}
-
-static inline size_t iov_total(const struct iovec *iv, unsigned long count)
-{
- unsigned long i;
- size_t len;
-
- for (i = 0, len = 0; i < count; i++)
- len += iv[i].iov_len;
-
- return len;
+ return tot_len;
}
static ssize_t tun_chr_aio_write(struct kiocb *iocb, const struct iovec *iv,
@@ -313,7 +463,7 @@ static ssize_t tun_chr_aio_write(struct
DBG(KERN_INFO "%s: tun_chr_write %ld\n", tun->dev->name, count);
- return tun_get_user(tun, (struct iovec *) iv, iov_total(iv, count));
+ return tun_get_user(tun, (struct iovec *) iv, count);
}
/* Put packet to the user space buffer */
@@ -336,6 +486,42 @@ static __inline__ ssize_t tun_put_user(s
if (memcpy_toiovec(iv, (void *) &pi, sizeof(pi)))
return -EFAULT;
total += sizeof(pi);
+ }
+ if (tun->flags & TUN_GSO_HDR) {
+ struct virtio_net_hdr gso;
+ struct skb_shared_info *sinfo = skb_shinfo(skb);
+
+ if (skb_is_gso(skb)) {
+ gso.gso_hdr_len = skb_transport_header(skb) - skb->data;
+ gso.gso_size = sinfo->gso_size;
+ if (sinfo->gso_type & SKB_GSO_TCP_ECN)
+ gso.gso_type = VIRTIO_NET_HDR_GSO_TCPV4_ECN;
+ else if (sinfo->gso_type & SKB_GSO_TCPV4)
+ gso.gso_type = VIRTIO_NET_HDR_GSO_TCPV4;
+ else if (sinfo->gso_type & SKB_GSO_TCPV6)
+ gso.gso_type = VIRTIO_NET_HDR_GSO_TCPV6;
+ else if (sinfo->gso_type & SKB_GSO_UDP)
+ gso.gso_type = VIRTIO_NET_HDR_GSO_UDP;
+ else
+ BUG();
+ } else
+ gso.gso_type = VIRTIO_NET_HDR_GSO_NONE;
+
+ if (skb->ip_summed == CHECKSUM_PARTIAL) {
+ gso.flags = VIRTIO_NET_HDR_F_NEEDS_CSUM;
+ gso.csum_start = skb->csum_start - skb_headroom(skb);
+ gso.csum_offset = skb->csum_offset;
+ } else {
+ gso.flags = 0;
+ gso.csum_offset = gso.csum_start = 0;
+ }
+
+ if ((len -= sizeof(gso)) < 0)
+ return -EINVAL;
+
+ if (memcpy_toiovec(iv, (void *)&gso, sizeof(gso)))
+ return -EFAULT;
+ total += sizeof(gso);
}
len = min_t(int, skb->len, len);
@@ -523,6 +709,13 @@ static int tun_set_iff(struct file *file
tun_net_init(dev);
+ /* GSO? One of everything, please. */
+ if (ifr->ifr_flags & IFF_GSO_HDR)
+ dev->features = (NETIF_F_SG | NETIF_F_HW_CSUM
+ | NETIF_F_HIGHDMA | NETIF_F_FRAGLIST
+ | NETIF_F_TSO | NETIF_F_UFO
+ | NETIF_F_TSO_ECN | NETIF_F_TSO6);
+
if (strchr(dev->name, '%')) {
err = dev_alloc_name(dev, dev->name);
if (err < 0)
@@ -543,6 +736,9 @@ static int tun_set_iff(struct file *file
if (ifr->ifr_flags & IFF_ONE_QUEUE)
tun->flags |= TUN_ONE_QUEUE;
+
+ if (ifr->ifr_flags & IFF_GSO_HDR)
+ tun->flags |= TUN_GSO_HDR;
file->private_data = tun;
tun->attached = 1;
diff -r ba3c0eb8741a include/linux/if_tun.h
--- a/include/linux/if_tun.h Wed Jan 16 17:35:25 2008 +1100
+++ b/include/linux/if_tun.h Wed Jan 16 22:11:11 2008 +1100
@@ -70,6 +70,7 @@ struct tun_struct {
#define TUN_NO_PI 0x0040
#define TUN_ONE_QUEUE 0x0080
#define TUN_PERSIST 0x0100
+#define TUN_GSO_HDR 0x0200
/* Ioctl defines */
#define TUNSETNOCSUM _IOW('T', 200, int)
@@ -79,6 +80,7 @@ struct tun_struct {
#define IFF_TAP 0x0002
#define IFF_NO_PI 0x1000
#define IFF_ONE_QUEUE 0x2000
+#define IFF_GSO_HDR 0x4000
struct tun_pi {
unsigned short flags;
[-- Attachment #2: tun_gso_pipe.c --]
[-- Type: text/x-csrc, Size: 8976 bytes --]
#include <signal.h>
#include <stddef.h>
#include <errno.h>
#include <sys/socket.h>
#include <sys/ioctl.h>
#include <netinet/ip.h>
#include <netinet/ip_icmp.h>
#include <netinet/udp.h>
#include <netinet/tcp.h>
#include <net/if.h>
#include <net/ethernet.h>
#include <stdio.h>
#include <string.h>
#include <err.h>
#include <fcntl.h>
#include <unistd.h>
#include <stdlib.h>
#include <sys/uio.h>
#include <linux/sockios.h>
#include <linux/if_tun.h>
#include <stdbool.h>
#include <stdint.h>
#include <stddef.h>
typedef uint32_t u32;
typedef uint16_t u16;
typedef uint8_t u8;
#ifndef TUNGETFEATURES
#define TUNGETFEATURES _IOR('T', 207, unsigned int)
#endif
#ifndef IFF_GSO_HDR
#define IFF_GSO_HDR 0x4000
#endif
static bool use_gso = true;
static bool write_all(int fd, const void *data, unsigned long size)
{
while (size) {
int done;
done = write(fd, data, size);
if (done < 0 && errno == EINTR)
continue;
if (done <= 0)
return false;
data += done;
size -= done;
}
return true;
}
static bool read_all(int fd, void *data, unsigned long size)
{
while (size) {
int done;
done = read(fd, data, size);
if (done < 0 && errno == EINTR)
continue;
if (done <= 0)
return false;
data += done;
size -= done;
}
return true;
}
static uint32_t str2ip(const char *ipaddr)
{
unsigned int byte[4];
sscanf(ipaddr, "%u.%u.%u.%u", &byte[0], &byte[1], &byte[2], &byte[3]);
return (byte[0] << 24) | (byte[1] << 16) | (byte[2] << 8) | byte[3];
}
static void configure_device(int fd, const char *devname, uint32_t ipaddr)
{
struct ifreq ifr;
struct sockaddr_in *sin = (struct sockaddr_in *)&ifr.ifr_addr;
/* Don't read these incantations. Just cut & paste them like I did! */
memset(&ifr, 0, sizeof(ifr));
strcpy(ifr.ifr_name, devname);
sin->sin_family = AF_INET;
sin->sin_addr.s_addr = htonl(ipaddr);
if (ioctl(fd, SIOCSIFADDR, &ifr) != 0)
err(1, "Setting %s interface address", devname);
ifr.ifr_flags = IFF_UP;
if (ioctl(fd, SIOCSIFFLAGS, &ifr) != 0)
err(1, "Bringing interface %s up", devname);
}
static int setup_tun_net(uint32_t ip)
{
struct ifreq ifr;
int netfd, ipfd;
unsigned int features;
/* We open the /dev/net/tun device and tell it we want a tap device. A
* tap device is like a tun device, only somehow different. To tell
* the truth, I completely blundered my way through this code, but it
* works now! */
netfd = open("/dev/net/tun", O_RDWR);
if (netfd < 0)
err(1, "Opening /dev/net/tun");
if (use_gso &&
(ioctl(netfd, TUNGETFEATURES, &features) != 0
|| !(features & IFF_GSO_HDR))) {
fprintf(stderr, "No GSO support!\n");
use_gso = false;
}
memset(&ifr, 0, sizeof(ifr));
ifr.ifr_flags = IFF_TAP | IFF_NO_PI | (use_gso ? IFF_GSO_HDR : 0);
strcpy(ifr.ifr_name, "tap%d");
if (ioctl(netfd, TUNSETIFF, &ifr) != 0)
err(1, "configuring /dev/net/tun");
/* We need a socket to perform the magic network ioctls to bring up the
* tap interface, connect to the bridge etc. Any socket will do! */
ipfd = socket(PF_INET, SOCK_DGRAM, IPPROTO_IP);
if (ipfd < 0)
err(1, "opening IP socket");
/* We are peer 0, ie. first slot, so we hand dev->mem to this routine
* to write the MAC address at the start of the device memory. */
configure_device(ipfd, ifr.ifr_name, ip);
close(ipfd);
return netfd;
}
static void two_way_popen(char *const argv[])
{
int pid;
int pipe1[2], pipe2[2];
if (pipe(pipe1) != 0 || pipe(pipe2) != 0)
err(1, "creating pipe");
pid = fork();
if (pid == -1)
err(1, "forking");
if (pid == 0) {
/* We are the child. */
close(pipe1[1]);
close(pipe2[0]);
dup2(pipe1[0], STDIN_FILENO);
dup2(pipe2[1], STDOUT_FILENO);
execvp(argv[0], argv);
fprintf(stderr, "Failed to exec '%s': %m\n", argv[0]);
kill(getppid(), SIGKILL);
}
/* We are parent. */
close(pipe1[0]);
close(pipe2[1]);
dup2(pipe1[1], STDOUT_FILENO);
dup2(pipe2[0], STDIN_FILENO);
}
struct virtio_net_hdr
{
#define VIRTIO_NET_HDR_F_NEEDS_CSUM 1 // Use csum_start, csum_offset
__u8 flags;
#define VIRTIO_NET_HDR_GSO_NONE 0 // Not a GSO frame
#define VIRTIO_NET_HDR_GSO_TCPV4 1 // GSO frame, IPv4 TCP (TSO)
/* FIXME: Do we need this? If they said they can handle ECN, do they care? */
#define VIRTIO_NET_HDR_GSO_TCPV4_ECN 2 // GSO frame, IPv4 TCP w/ ECN
#define VIRTIO_NET_HDR_GSO_UDP 3 // GSO frame, IPv4 UDP (UFO)
#define VIRTIO_NET_HDR_GSO_TCPV6 4 // GSO frame, IPv6 TCP
__u8 gso_type;
__u16 gso_hdr_len; /* Ethernet + IP + tcp/udp hdrs */
__u16 gso_size; /* Bytes to append to gso_hdr_len per frame */
__u16 csum_start; /* Position to start checksumming from */
__u16 csum_offset; /* Offset after that to place checksum */
};
struct packet
{
struct virtio_net_hdr gso;
struct ether_header mac;
struct iphdr ip;
union {
struct icmphdr icmp;
struct tcphdr tcp;
struct udphdr udp;
char pad[65535 - 34];
};
} __attribute__((packed));
static inline unsigned short from32to16(unsigned long x)
{
/* add up 16-bit and 16-bit for 16+c bit */
x = (x & 0xffff) + (x >> 16);
/* add up carry.. */
x = (x & 0xffff) + (x >> 16);
return x;
}
static unsigned int csum_fold(unsigned int sum)
{
return ~from32to16(sum);
}
static unsigned long do_csum(const unsigned char * buff, int len)
{
int odd, count;
unsigned long result = 0;
if (len <= 0)
return 0;
odd = 1 & (unsigned long) buff;
if (odd) {
result = *buff;
len--;
buff++;
}
count = len >> 1; /* nr of 16-bit words.. */
if (count) {
if (2 & (unsigned long) buff) {
result += *(unsigned short *) buff;
count--;
len -= 2;
buff += 2;
}
count >>= 1; /* nr of 32-bit words.. */
if (count) {
unsigned long carry = 0;
do {
unsigned int w = *(unsigned int *) buff;
count--;
buff += 4;
result += carry;
result += w;
carry = (w > result);
} while (count);
result += carry;
result = (result & 0xffff) + (result >> 16);
}
if (len & 2) {
result += *(unsigned short *) buff;
buff += 2;
}
}
if (len & 1)
result += (*buff << 8);
result = from32to16(result);
if (odd)
result = ((result >> 8) & 0xff) | ((result & 0xff) << 8);
return result;
}
static unsigned int csum_partial(const void * buff, int len, unsigned int sum)
{
unsigned int result = do_csum(buff, len);
/* add in old sum, and carry.. */
result += sum;
if (sum > result)
result += 1;
return result;
}
static void csum_replace(__u16 *sum, u32 from, u32 to)
{
u32 diff[] = { ~from, to };
*sum = csum_fold(csum_partial(diff, sizeof(diff), *sum ^ 0xFFFF));
}
#define NIPQUAD(addr) \
((unsigned char *)&addr)[0], \
((unsigned char *)&addr)[1], \
((unsigned char *)&addr)[2], \
((unsigned char *)&addr)[3]
/* Change destination IP address */
static void nat_packet(struct packet *packet, u32 src, u32 dst)
{
u32 oldsrc, olddst;
if (packet->mac.ether_type != htons(ETHERTYPE_IP))
return;
oldsrc = packet->ip.saddr;
olddst = packet->ip.daddr;
packet->ip.saddr = src;
packet->ip.daddr = dst;
csum_replace(&packet->ip.check, oldsrc, src);
csum_replace(&packet->ip.check, olddst, dst);
switch (packet->ip.protocol) {
case IPPROTO_TCP:
csum_replace(&packet->tcp.check, oldsrc, src);
csum_replace(&packet->tcp.check, olddst, dst);
break;
case IPPROTO_UDP:
csum_replace(&packet->udp.check, oldsrc, src);
csum_replace(&packet->udp.check, olddst, dst);
break;
}
}
int main(int argc, char *argv[])
{
int netfd;
__u32 natdst, natsrc;
int size;
struct packet packet;
void *buf;
if (argv[1] && strcmp(argv[1], "--no-gso") == 0) {
argv++;
argc--;
use_gso = false;
}
if (argc < 4)
errx(1, "Usage: %s [--no-gso] ip-addr src-nat-addr dst-nat-addr [command-to-open...]", argv[0]);
netfd = setup_tun_net(str2ip(argv[1]));
natsrc = htonl(str2ip(argv[2]));
natdst = htonl(str2ip(argv[3]));
/* Eg. ssh othermachine /root/tun_gso_pipe 192.168.1.2 192.168.5.2 192.158.5.1 */
if (argc > 4)
two_way_popen(argv+4);
if (use_gso)
buf = &packet;
else
buf = &packet.mac;
for (;;) {
fd_set fds;
FD_ZERO(&fds);
FD_SET(netfd, &fds);
FD_SET(STDIN_FILENO, &fds);
select(netfd+1, &fds, NULL, NULL, NULL);
if (FD_ISSET(netfd, &fds)) {
size = read(netfd, buf, sizeof(packet));
if (size <= 0)
err(1, "Reading netfd");
if (use_gso)
fprintf(stderr, "Read %u, gso = %u/%u\n", size,
packet.gso.gso_type,
packet.gso.gso_size);
nat_packet(&packet, natsrc, natdst);
if (!write_all(STDOUT_FILENO, &size, sizeof(size))
|| !write_all(STDOUT_FILENO, buf, size))
err(1, "Writing data to stdout");
}
if (FD_ISSET(STDIN_FILENO, &fds)) {
int ret;
if (!read_all(STDIN_FILENO, &size, sizeof(size)))
err(1, "Reading stdin");
if (!read_all(STDIN_FILENO, buf, size))
err(1, "Reading %u byte packet", size);
fprintf(stderr, "Writing %u, gso = %u/%u\n", size,
packet.gso.gso_type,
packet.gso.gso_size);
ret = write(netfd, buf, size);
if (ret != size)
err(1, "Writing data to netfd gave %i/%i",
ret, size);
}
}
}
[-- Attachment #3: tun_gso_pipe-setup.sh --]
[-- Type: application/x-shellscript, Size: 794 bytes --]
^ permalink raw reply [flat|nested] 2+ messages in thread
* [PATCH] Interface to query tun/tap features.
2008-01-16 12:06 [PATCH] TUN/TAP GSO/partial csum support Rusty Russell
@ 2008-01-16 12:07 ` Rusty Russell
0 siblings, 0 replies; 2+ messages in thread
From: Rusty Russell @ 2008-01-16 12:07 UTC (permalink / raw)
To: netdev; +Cc: Herbert Xu, Max Krasnyansky
The problem with introducing IFF_GSO_HDR is that it needs to set dev->features
(to enable GSO, checksumming, etc), which is supposed to be done before
register_netdevice(), ie. as part of TUNSETIFF.
Unfortunately, TUNSETIFF has always just ignored flags it doesn't understand,
so there's no good way of detecting whether the kernel supports IFF_GSO_HDR.
This patch implements a TUNGETFEATURES ioctl which returns all the valid IFF
flags. It could be extended later to include other features.
Here's an example program which uses it:
#include <linux/if_tun.h>
#include <sys/types.h>
#include <sys/ioctl.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <err.h>
#include <stdio.h>
static struct {
unsigned int flag;
const char *name;
} known_flags[] = {
{ IFF_TUN, "TUN" },
{ IFF_TAP, "TAP" },
{ IFF_NO_PI, "NO_PI" },
{ IFF_ONE_QUEUE, "ONE_QUEUE" },
{ IFF_GSO_HDR, "GSO_HDR" },
};
int main()
{
unsigned int features, i;
int netfd = open("/dev/net/tun", O_RDWR);
if (netfd < 0)
err(1, "Opening /dev/net/tun");
if (ioctl(netfd, TUNGETFEATURES, &features) != 0) {
printf("Kernel does not support TUNGETFEATURES, guessing\n");
features = (IFF_TUN|IFF_TAP|IFF_NO_PI|IFF_ONE_QUEUE);
}
printf("Available features are: ");
for (i = 0; i < sizeof(known_flags)/sizeof(known_flags[0]); i++) {
if (features & known_flags[i].flag) {
features &= ~known_flags[i].flag;
printf("%s ", known_flags[i].name);
}
}
if (features)
printf("(UNKNOWN %#x)", features);
printf("\n");
return 0;
}
---
drivers/net/tun.c | 9 +++++++++
include/linux/if_tun.h | 2 ++
2 files changed, 11 insertions(+)
diff -r ba3c0eb8741a drivers/net/tun.c
--- a/drivers/net/tun.c Wed Jan 16 17:35:25 2008 +1100
+++ b/drivers/net/tun.c Wed Jan 16 22:11:11 2008 +1100
@@ -583,6 +779,15 @@ static int tun_chr_ioctl(struct inode *i
if (copy_to_user(argp, &ifr, sizeof(ifr)))
return -EFAULT;
return 0;
+ }
+
+ if (cmd == TUNGETFEATURES) {
+ /* Currently this just means: "what IFF flags are valid?".
+ * This is needed because we never checked for invalid flags on
+ * TUNSETIFF. This was introduced with IFF_GSO_HDR, so if a
+ * kernel doesn't have this ioctl, it doesn't have GSO header
+ * support. */
+ return put_user(IFF_ALL_FLAGS, (unsigned int __user*)argp);
}
if (!tun)
diff -r ba3c0eb8741a include/linux/if_tun.h
--- a/include/linux/if_tun.h Wed Jan 16 17:35:25 2008 +1100
+++ b/include/linux/if_tun.h Wed Jan 16 22:11:11 2008 +1100
@@ -79,13 +80,15 @@ struct tun_struct {
#define TUNSETOWNER _IOW('T', 204, int)
#define TUNSETLINK _IOW('T', 205, int)
#define TUNSETGROUP _IOW('T', 206, int)
+#define TUNGETFEATURES _IOR('T', 207, unsigned int)
/* TUNSETIFF ifr flags */
#define IFF_TUN 0x0001
#define IFF_TAP 0x0002
#define IFF_NO_PI 0x1000
#define IFF_ONE_QUEUE 0x2000
#define IFF_GSO_HDR 0x4000
+#define IFF_ALL_FLAGS (IFF_TUN|IFF_TAP|IFF_NO_PI|IFF_ONE_QUEUE|IFF_GSO_HDR)
struct tun_pi {
unsigned short flags;
^ permalink raw reply [flat|nested] 2+ messages in thread
end of thread, other threads:[~2008-01-16 12:08 UTC | newest]
Thread overview: 2+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2008-01-16 12:06 [PATCH] TUN/TAP GSO/partial csum support Rusty Russell
2008-01-16 12:07 ` [PATCH] Interface to query tun/tap features Rusty Russell
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).