From: Jason Wang <jasowang@redhat.com>
To: krkumar2@in.ibm.com, kvm@vger.kernel.org, mst@redhat.com,
netdev@vger.kernel.org, rusty@rustcorp.com.au,
virtualization@lists.linux-foundation.org,
levinsasha928@gmail.com, bhutchings@solarflare.com
Subject: [net-next RFC PATCH 2/5] tuntap: simple flow director support
Date: Mon, 05 Dec 2011 16:58:57 +0800 [thread overview]
Message-ID: <20111205085857.6116.99252.stgit@dhcp-8-146.nay.redhat.com> (raw)
In-Reply-To: <20111205085603.6116.65101.stgit@dhcp-8-146.nay.redhat.com>
This patch adds a simple flow director to tun/tap device. It is just a
page that contains the hash to queue mapping which could be changed by
user-space. The backend (tap/macvtap) would query this table to get
the desired queue of a packets when it send packets to userspace.
The page address were set through a new kind of ioctl - TUNSETFD and
were pinned until device exit or another new page were specified.
Signed-off-by: Jason Wang <jasowang@redhat.com>
---
drivers/net/tun.c | 63 ++++++++++++++++++++++++++++++++++++++++--------
include/linux/if_tun.h | 10 ++++++++
2 files changed, 62 insertions(+), 11 deletions(-)
diff --git a/drivers/net/tun.c b/drivers/net/tun.c
index 7d22b4b..2efaf81 100644
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -64,6 +64,7 @@
#include <linux/nsproxy.h>
#include <linux/virtio_net.h>
#include <linux/rcupdate.h>
+#include <linux/highmem.h>
#include <net/net_namespace.h>
#include <net/netns/generic.h>
#include <net/rtnetlink.h>
@@ -109,6 +110,7 @@ struct tap_filter {
};
#define MAX_TAP_QUEUES (NR_CPUS < 16 ? NR_CPUS : 16)
+#define TAP_HASH_MASK 0xFF
struct tun_file {
struct sock sk;
@@ -128,6 +130,7 @@ struct tun_sock;
struct tun_struct {
struct tun_file *tfiles[MAX_TAP_QUEUES];
+ struct page *fd_page[1];
unsigned int numqueues;
unsigned int flags;
uid_t owner;
@@ -156,7 +159,7 @@ static struct tun_file *tun_get_queue(struct net_device *dev,
struct tun_struct *tun = netdev_priv(dev);
struct tun_file *tfile = NULL;
int numqueues = tun->numqueues;
- __u32 rxq;
+ __u32 rxq, rxhash;
BUG_ON(!rcu_read_lock_held());
@@ -168,6 +171,22 @@ static struct tun_file *tun_get_queue(struct net_device *dev,
goto out;
}
+ rxhash = skb_get_rxhash(skb);
+ if (rxhash) {
+ if (tun->fd_page[0]) {
+ u16 *table = kmap_atomic(tun->fd_page[0]);
+ rxq = table[rxhash & TAP_HASH_MASK];
+ kunmap_atomic(table);
+ if (rxq < numqueues) {
+ tfile = rcu_dereference(tun->tfiles[rxq]);
+ goto out;
+ }
+ }
+ rxq = ((u64)rxhash * numqueues) >> 32;
+ tfile = rcu_dereference(tun->tfiles[rxq]);
+ goto out;
+ }
+
if (likely(skb_rx_queue_recorded(skb))) {
rxq = skb_get_rx_queue(skb);
@@ -178,14 +197,6 @@ static struct tun_file *tun_get_queue(struct net_device *dev,
goto out;
}
- /* Check if we can use flow to select a queue */
- rxq = skb_get_rxhash(skb);
- if (rxq) {
- u32 idx = ((u64)rxq * numqueues) >> 32;
- tfile = rcu_dereference(tun->tfiles[idx]);
- goto out;
- }
-
tfile = rcu_dereference(tun->tfiles[0]);
out:
return tfile;
@@ -1020,6 +1031,14 @@ out:
return ret;
}
+static void tun_destructor(struct net_device *dev)
+{
+ struct tun_struct *tun = netdev_priv(dev);
+ if (tun->fd_page[0])
+ put_page(tun->fd_page[0]);
+ free_netdev(dev);
+}
+
static void tun_setup(struct net_device *dev)
{
struct tun_struct *tun = netdev_priv(dev);
@@ -1028,7 +1047,7 @@ static void tun_setup(struct net_device *dev)
tun->group = -1;
dev->ethtool_ops = &tun_ethtool_ops;
- dev->destructor = free_netdev;
+ dev->destructor = tun_destructor;
}
/* Trivial set of netlink ops to allow deleting tun or tap
@@ -1230,6 +1249,7 @@ static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr)
tun = netdev_priv(dev);
tun->dev = dev;
tun->flags = flags;
+ tun->fd_page[0] = NULL;
security_tun_dev_post_create(&tfile->sk);
@@ -1353,6 +1373,7 @@ static long __tun_chr_ioctl(struct file *file, unsigned int cmd,
struct net_device *dev = NULL;
void __user* argp = (void __user*)arg;
struct ifreq ifr;
+ struct tun_fd tfd;
int ret;
if (cmd == TUNSETIFF || cmd == TUNATTACHQUEUE || _IOC_TYPE(cmd) == 0x89)
@@ -1364,7 +1385,8 @@ static long __tun_chr_ioctl(struct file *file, unsigned int cmd,
* This is needed because we never checked for invalid flags on
* TUNSETIFF. */
return put_user(IFF_TUN | IFF_TAP | IFF_NO_PI | IFF_ONE_QUEUE |
- IFF_VNET_HDR | IFF_MULTI_QUEUE | IFF_RXHASH,
+ IFF_VNET_HDR | IFF_MULTI_QUEUE | IFF_RXHASH |
+ IFF_FD,
(unsigned int __user*)argp);
}
@@ -1476,6 +1498,25 @@ static long __tun_chr_ioctl(struct file *file, unsigned int cmd,
ret = set_offload(tun, arg);
break;
+ case TUNSETFD:
+ if (copy_from_user(&tfd, argp, sizeof(tfd)))
+ ret = -EFAULT;
+ else {
+ if (tun->fd_page[0]) {
+ put_page(tun->fd_page[0]);
+ tun->fd_page[0] = NULL;
+ }
+
+ /* put_page() in tun_destructor() */
+ if (get_user_pages_fast(tfd.addr, 1, 0,
+ &tun->fd_page[0]) != 1)
+ ret = -EFAULT;
+ else
+ ret = 0;
+ }
+
+ break;
+
case SIOCGIFHWADDR:
/* Get hw address */
memcpy(ifr.ifr_hwaddr.sa_data, tun->dev->dev_addr, ETH_ALEN);
diff --git a/include/linux/if_tun.h b/include/linux/if_tun.h
index a1f6f3f..726731d 100644
--- a/include/linux/if_tun.h
+++ b/include/linux/if_tun.h
@@ -36,6 +36,8 @@
#define TUN_VNET_HDR 0x0200
#define TUN_TAP_MQ 0x0400
+struct tun_fd;
+
/* Ioctl defines */
#define TUNSETNOCSUM _IOW('T', 200, int)
#define TUNSETDEBUG _IOW('T', 201, int)
@@ -56,6 +58,7 @@
#define TUNSETVNETHDRSZ _IOW('T', 216, int)
#define TUNATTACHQUEUE _IOW('T', 217, int)
#define TUNDETACHQUEUE _IOW('T', 218, int)
+#define TUNSETFD _IOW('T', 219, struct tun_fd)
/* TUNSETIFF ifr flags */
@@ -67,6 +70,7 @@
#define IFF_TUN_EXCL 0x8000
#define IFF_MULTI_QUEUE 0x0100
#define IFF_RXHASH 0x0200
+#define IFF_FD 0x0400
/* Features for GSO (TUNSETOFFLOAD). */
#define TUN_F_CSUM 0x01 /* You can hand me unchecksummed packets. */
@@ -97,6 +101,12 @@ struct tun_filter {
__u8 addr[0][ETH_ALEN];
};
+/* Programmable flow director */
+struct tun_fd {
+ unsigned long addr;
+ size_t size;
+};
+
#ifdef __KERNEL__
#if defined(CONFIG_TUN) || defined(CONFIG_TUN_MODULE)
struct socket *tun_get_socket(struct file *);
next prev parent reply other threads:[~2011-12-05 8:58 UTC|newest]
Thread overview: 36+ messages / expand[flat|nested] mbox.gz Atom feed top
2011-12-05 8:58 [net-next RFC PATCH 0/5] Series short description Jason Wang
2011-12-05 8:58 ` [net-next RFC PATCH 1/5] virtio_net: passing rxhash through vnet_hdr Jason Wang
2011-12-05 8:58 ` Jason Wang [this message]
2011-12-05 10:38 ` [net-next RFC PATCH 2/5] tuntap: simple flow director support Stefan Hajnoczi
2011-12-05 20:09 ` Ben Hutchings
[not found] ` <1323115763.2887.12.camel@bwh-desktop>
2011-12-06 7:21 ` Jason Wang
2011-12-06 17:31 ` Ben Hutchings
2011-12-05 8:59 ` [net-next RFC PATCH 3/5] macvtap: " Jason Wang
2011-12-05 20:11 ` Ben Hutchings
2011-12-05 8:59 ` [net-next RFC PATCH 4/5] virtio: introduce a method to get the irq of a specific virtqueue Jason Wang
2011-12-05 8:59 ` [net-next RFC PATCH 5/5] virtio-net: flow director support Jason Wang
[not found] ` <20111205085925.6116.94352.stgit@dhcp-8-146.nay.redhat.com>
2011-12-05 10:55 ` Stefan Hajnoczi
2011-12-06 6:33 ` Jason Wang
2011-12-06 9:18 ` Stefan Hajnoczi
[not found] ` <CAJSP0QX5dDkpX+cRcQut2mb6K91zeqGLRrZBGAWT_r2p685gaQ@mail.gmail.com>
2011-12-06 10:21 ` Jason Wang
2011-12-06 13:15 ` Stefan Hajnoczi
[not found] ` <CAJSP0QXsLwvH5xYj6h0E_V4VLg6DuUc-GKXu9esEYzL2MFcFGw@mail.gmail.com>
2011-12-06 15:42 ` Sridhar Samudrala
[not found] ` <4EDE37FE.5090409@us.ibm.com>
2011-12-06 16:14 ` Michael S. Tsirkin
2011-12-06 23:10 ` Sridhar Samudrala
2011-12-07 11:05 ` Jason Wang
2011-12-07 11:02 ` Jason Wang
2011-12-09 2:00 ` Sridhar Samudrala
2011-12-07 3:03 ` Jason Wang
2011-12-07 9:08 ` Stefan Hajnoczi
2011-12-07 12:10 ` Jason Wang
2011-12-07 15:04 ` Stefan Hajnoczi
2011-12-05 20:42 ` Ben Hutchings
2011-12-06 7:25 ` Jason Wang
2011-12-06 17:36 ` Ben Hutchings
2011-12-07 7:30 ` [net-next RFC PATCH 0/5] Series short description Rusty Russell
[not found] ` <87ty5cj0sw.fsf@rustcorp.com.au>
2011-12-07 11:31 ` Jason Wang
2011-12-07 17:02 ` Ben Hutchings
2011-12-08 10:06 ` Jason Wang
2011-12-09 5:31 ` Rusty Russell
2011-12-15 1:36 ` Ben Hutchings
2011-12-15 23:12 ` Rusty Russell
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20111205085857.6116.99252.stgit@dhcp-8-146.nay.redhat.com \
--to=jasowang@redhat.com \
--cc=bhutchings@solarflare.com \
--cc=krkumar2@in.ibm.com \
--cc=kvm@vger.kernel.org \
--cc=levinsasha928@gmail.com \
--cc=mst@redhat.com \
--cc=netdev@vger.kernel.org \
--cc=rusty@rustcorp.com.au \
--cc=virtualization@lists.linux-foundation.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).