virtualization.lists.linux-foundation.org archive mirror
 help / color / mirror / Atom feed
From: Jason Wang <jasowang@redhat.com>
To: krkumar2@in.ibm.com, kvm@vger.kernel.org, mst@redhat.com,
	netdev@vger.kernel.org, rusty@rustcorp.com.au,
	virtualization@lists.linux-foundation.org,
	levinsasha928@gmail.com, bhutchings@solarflare.com
Subject: [net-next RFC PATCH 2/5] tuntap: simple flow director support
Date: Mon, 05 Dec 2011 16:58:57 +0800	[thread overview]
Message-ID: <20111205085857.6116.99252.stgit@dhcp-8-146.nay.redhat.com> (raw)
In-Reply-To: <20111205085603.6116.65101.stgit@dhcp-8-146.nay.redhat.com>

This patch adds a simple flow director to tun/tap device. It is just a
page that contains the hash to queue mapping which could be changed by
user-space. The backend (tap/macvtap) would query this table to get
the desired queue of a packets when it send packets to userspace.

The page address were set through a new kind of ioctl - TUNSETFD and
were pinned until device exit or another new page were specified.

Signed-off-by: Jason Wang <jasowang@redhat.com>
---
 drivers/net/tun.c      |   63 ++++++++++++++++++++++++++++++++++++++++--------
 include/linux/if_tun.h |   10 ++++++++
 2 files changed, 62 insertions(+), 11 deletions(-)

diff --git a/drivers/net/tun.c b/drivers/net/tun.c
index 7d22b4b..2efaf81 100644
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -64,6 +64,7 @@
 #include <linux/nsproxy.h>
 #include <linux/virtio_net.h>
 #include <linux/rcupdate.h>
+#include <linux/highmem.h>
 #include <net/net_namespace.h>
 #include <net/netns/generic.h>
 #include <net/rtnetlink.h>
@@ -109,6 +110,7 @@ struct tap_filter {
 };
 
 #define MAX_TAP_QUEUES (NR_CPUS < 16 ? NR_CPUS : 16)
+#define TAP_HASH_MASK  0xFF
 
 struct tun_file {
 	struct sock sk;
@@ -128,6 +130,7 @@ struct tun_sock;
 
 struct tun_struct {
 	struct tun_file		*tfiles[MAX_TAP_QUEUES];
+	struct page             *fd_page[1];
 	unsigned int            numqueues;
 	unsigned int 		flags;
 	uid_t			owner;
@@ -156,7 +159,7 @@ static struct tun_file *tun_get_queue(struct net_device *dev,
 	struct tun_struct *tun = netdev_priv(dev);
 	struct tun_file *tfile = NULL;
 	int numqueues = tun->numqueues;
-	__u32 rxq;
+	__u32 rxq, rxhash;
 
 	BUG_ON(!rcu_read_lock_held());
 
@@ -168,6 +171,22 @@ static struct tun_file *tun_get_queue(struct net_device *dev,
 		goto out;
 	}
 
+	rxhash = skb_get_rxhash(skb);
+	if (rxhash) {
+		if (tun->fd_page[0]) {
+			u16 *table = kmap_atomic(tun->fd_page[0]);
+			rxq = table[rxhash & TAP_HASH_MASK];
+			kunmap_atomic(table);
+			if (rxq < numqueues) {
+				tfile = rcu_dereference(tun->tfiles[rxq]);
+				goto out;
+			}
+		}
+		rxq = ((u64)rxhash * numqueues) >> 32;
+		tfile = rcu_dereference(tun->tfiles[rxq]);
+		goto out;
+	}
+
 	if (likely(skb_rx_queue_recorded(skb))) {
 		rxq = skb_get_rx_queue(skb);
 
@@ -178,14 +197,6 @@ static struct tun_file *tun_get_queue(struct net_device *dev,
 		goto out;
 	}
 
-	/* Check if we can use flow to select a queue */
-	rxq = skb_get_rxhash(skb);
-	if (rxq) {
-		u32 idx = ((u64)rxq * numqueues) >> 32;
-		tfile = rcu_dereference(tun->tfiles[idx]);
-		goto out;
-	}
-
 	tfile = rcu_dereference(tun->tfiles[0]);
 out:
 	return tfile;
@@ -1020,6 +1031,14 @@ out:
 	return ret;
 }
 
+static void tun_destructor(struct net_device *dev)
+{
+	struct tun_struct *tun = netdev_priv(dev);
+	if (tun->fd_page[0])
+		put_page(tun->fd_page[0]);
+	free_netdev(dev);
+}
+
 static void tun_setup(struct net_device *dev)
 {
 	struct tun_struct *tun = netdev_priv(dev);
@@ -1028,7 +1047,7 @@ static void tun_setup(struct net_device *dev)
 	tun->group = -1;
 
 	dev->ethtool_ops = &tun_ethtool_ops;
-	dev->destructor = free_netdev;
+	dev->destructor = tun_destructor;
 }
 
 /* Trivial set of netlink ops to allow deleting tun or tap
@@ -1230,6 +1249,7 @@ static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr)
 		tun = netdev_priv(dev);
 		tun->dev = dev;
 		tun->flags = flags;
+		tun->fd_page[0] = NULL;
 
 		security_tun_dev_post_create(&tfile->sk);
 
@@ -1353,6 +1373,7 @@ static long __tun_chr_ioctl(struct file *file, unsigned int cmd,
 	struct net_device *dev = NULL;
 	void __user* argp = (void __user*)arg;
 	struct ifreq ifr;
+	struct tun_fd tfd;
 	int ret;
 
 	if (cmd == TUNSETIFF || cmd == TUNATTACHQUEUE || _IOC_TYPE(cmd) == 0x89)
@@ -1364,7 +1385,8 @@ static long __tun_chr_ioctl(struct file *file, unsigned int cmd,
 		 * This is needed because we never checked for invalid flags on
 		 * TUNSETIFF. */
 		return put_user(IFF_TUN | IFF_TAP | IFF_NO_PI | IFF_ONE_QUEUE |
-				IFF_VNET_HDR | IFF_MULTI_QUEUE | IFF_RXHASH,
+				IFF_VNET_HDR | IFF_MULTI_QUEUE | IFF_RXHASH |
+				IFF_FD,
 				(unsigned int __user*)argp);
 	}
 
@@ -1476,6 +1498,25 @@ static long __tun_chr_ioctl(struct file *file, unsigned int cmd,
 		ret = set_offload(tun, arg);
 		break;
 
+	case TUNSETFD:
+		if (copy_from_user(&tfd, argp, sizeof(tfd)))
+			ret = -EFAULT;
+		else {
+			if (tun->fd_page[0]) {
+				put_page(tun->fd_page[0]);
+				tun->fd_page[0] = NULL;
+			}
+
+			/* put_page() in tun_destructor() */
+			if (get_user_pages_fast(tfd.addr, 1, 0,
+						&tun->fd_page[0]) != 1)
+				ret = -EFAULT;
+			else
+				ret = 0;
+		}
+
+		break;
+
 	case SIOCGIFHWADDR:
 		/* Get hw address */
 		memcpy(ifr.ifr_hwaddr.sa_data, tun->dev->dev_addr, ETH_ALEN);
diff --git a/include/linux/if_tun.h b/include/linux/if_tun.h
index a1f6f3f..726731d 100644
--- a/include/linux/if_tun.h
+++ b/include/linux/if_tun.h
@@ -36,6 +36,8 @@
 #define TUN_VNET_HDR 	0x0200
 #define TUN_TAP_MQ      0x0400
 
+struct tun_fd;
+
 /* Ioctl defines */
 #define TUNSETNOCSUM  _IOW('T', 200, int) 
 #define TUNSETDEBUG   _IOW('T', 201, int) 
@@ -56,6 +58,7 @@
 #define TUNSETVNETHDRSZ _IOW('T', 216, int)
 #define TUNATTACHQUEUE  _IOW('T', 217, int)
 #define TUNDETACHQUEUE  _IOW('T', 218, int)
+#define TUNSETFD        _IOW('T', 219, struct tun_fd)
 
 
 /* TUNSETIFF ifr flags */
@@ -67,6 +70,7 @@
 #define IFF_TUN_EXCL	0x8000
 #define IFF_MULTI_QUEUE 0x0100
 #define IFF_RXHASH      0x0200
+#define IFF_FD          0x0400
 
 /* Features for GSO (TUNSETOFFLOAD). */
 #define TUN_F_CSUM	0x01	/* You can hand me unchecksummed packets. */
@@ -97,6 +101,12 @@ struct tun_filter {
 	__u8   addr[0][ETH_ALEN];
 };
 
+/* Programmable flow director */
+struct tun_fd {
+	unsigned long addr;
+	size_t size;
+};
+
 #ifdef __KERNEL__
 #if defined(CONFIG_TUN) || defined(CONFIG_TUN_MODULE)
 struct socket *tun_get_socket(struct file *);

  parent reply	other threads:[~2011-12-05  8:58 UTC|newest]

Thread overview: 36+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2011-12-05  8:58 [net-next RFC PATCH 0/5] Series short description Jason Wang
2011-12-05  8:58 ` [net-next RFC PATCH 1/5] virtio_net: passing rxhash through vnet_hdr Jason Wang
2011-12-05  8:58 ` Jason Wang [this message]
2011-12-05 10:38   ` [net-next RFC PATCH 2/5] tuntap: simple flow director support Stefan Hajnoczi
2011-12-05 20:09   ` Ben Hutchings
     [not found]   ` <1323115763.2887.12.camel@bwh-desktop>
2011-12-06  7:21     ` Jason Wang
2011-12-06 17:31       ` Ben Hutchings
2011-12-05  8:59 ` [net-next RFC PATCH 3/5] macvtap: " Jason Wang
2011-12-05 20:11   ` Ben Hutchings
2011-12-05  8:59 ` [net-next RFC PATCH 4/5] virtio: introduce a method to get the irq of a specific virtqueue Jason Wang
2011-12-05  8:59 ` [net-next RFC PATCH 5/5] virtio-net: flow director support Jason Wang
     [not found] ` <20111205085925.6116.94352.stgit@dhcp-8-146.nay.redhat.com>
2011-12-05 10:55   ` Stefan Hajnoczi
2011-12-06  6:33     ` Jason Wang
2011-12-06  9:18       ` Stefan Hajnoczi
     [not found]       ` <CAJSP0QX5dDkpX+cRcQut2mb6K91zeqGLRrZBGAWT_r2p685gaQ@mail.gmail.com>
2011-12-06 10:21         ` Jason Wang
2011-12-06 13:15           ` Stefan Hajnoczi
     [not found]           ` <CAJSP0QXsLwvH5xYj6h0E_V4VLg6DuUc-GKXu9esEYzL2MFcFGw@mail.gmail.com>
2011-12-06 15:42             ` Sridhar Samudrala
     [not found]             ` <4EDE37FE.5090409@us.ibm.com>
2011-12-06 16:14               ` Michael S. Tsirkin
2011-12-06 23:10                 ` Sridhar Samudrala
2011-12-07 11:05                   ` Jason Wang
2011-12-07 11:02               ` Jason Wang
2011-12-09  2:00                 ` Sridhar Samudrala
2011-12-07  3:03             ` Jason Wang
2011-12-07  9:08               ` Stefan Hajnoczi
2011-12-07 12:10                 ` Jason Wang
2011-12-07 15:04                   ` Stefan Hajnoczi
2011-12-05 20:42   ` Ben Hutchings
2011-12-06  7:25     ` Jason Wang
2011-12-06 17:36       ` Ben Hutchings
2011-12-07  7:30 ` [net-next RFC PATCH 0/5] Series short description Rusty Russell
     [not found] ` <87ty5cj0sw.fsf@rustcorp.com.au>
2011-12-07 11:31   ` Jason Wang
2011-12-07 17:02     ` Ben Hutchings
2011-12-08 10:06       ` Jason Wang
2011-12-09  5:31       ` Rusty Russell
2011-12-15  1:36         ` Ben Hutchings
2011-12-15 23:12           ` Rusty Russell

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20111205085857.6116.99252.stgit@dhcp-8-146.nay.redhat.com \
    --to=jasowang@redhat.com \
    --cc=bhutchings@solarflare.com \
    --cc=krkumar2@in.ibm.com \
    --cc=kvm@vger.kernel.org \
    --cc=levinsasha928@gmail.com \
    --cc=mst@redhat.com \
    --cc=netdev@vger.kernel.org \
    --cc=rusty@rustcorp.com.au \
    --cc=virtualization@lists.linux-foundation.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).