netdev.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH 0/10] Tun fixes and netns migration
@ 2009-01-20 20:53 Eric W. Biederman
  2009-01-20 20:56 ` [PATCH 01/10] tun: Remove unnecessary tun_get_by_name Eric W. Biederman
  2009-01-22  0:03 ` [PATCH 0/10] Tun fixes and netns migration David Miller
  0 siblings, 2 replies; 11+ messages in thread
From: Eric W. Biederman @ 2009-01-20 20:53 UTC (permalink / raw)
  To: David Miller; +Cc: netdev, Max Krasnyansky, Pavel Emelyanov


Having a tun device that can be migrated between network namespaces 
is interesting when setting up creative network topologies using
network namespaces.

To get there requires some general clean and restructuring of the
tun driver.

Eric

^ permalink raw reply	[flat|nested] 11+ messages in thread

* [PATCH 01/10] tun: Remove unnecessary tun_get_by_name
  2009-01-20 20:53 [PATCH 0/10] Tun fixes and netns migration Eric W. Biederman
@ 2009-01-20 20:56 ` Eric W. Biederman
  2009-01-20 20:57   ` [PATCH 02/10] tun: Fix races in tun_set_iff Eric W. Biederman
  2009-01-22  0:03 ` [PATCH 0/10] Tun fixes and netns migration David Miller
  1 sibling, 1 reply; 11+ messages in thread
From: Eric W. Biederman @ 2009-01-20 20:56 UTC (permalink / raw)
  To: David Miller; +Cc: netdev, Max Krasnyansky, Pavel Emelyanov


Currently the tun driver keeps a private list of tun devices for what
appears to be a small gain in performance when reconnecting a file
descriptor to an existing tun or tap device.  So simplify the code by
removing it.

Signed-off-by: Eric W. Biederman <ebiederm@aristanetworks.com>
---
 drivers/net/tun.c |   74 +++++++++++++---------------------------------------
 1 files changed, 19 insertions(+), 55 deletions(-)

diff --git a/drivers/net/tun.c b/drivers/net/tun.c
index d7b81e4..17923a5 100644
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -88,7 +88,6 @@ struct tap_filter {
 };
 
 struct tun_struct {
-	struct list_head        list;
 	unsigned int 		flags;
 	int			attached;
 	uid_t			owner;
@@ -213,11 +212,6 @@ static int check_filter(struct tap_filter *filter, const struct sk_buff *skb)
 
 /* Network device part of the driver */
 
-static int tun_net_id;
-struct tun_net {
-	struct list_head dev_list;
-};
-
 static const struct ethtool_ops tun_ethtool_ops;
 
 /* Net device open. */
@@ -697,30 +691,22 @@ static void tun_setup(struct net_device *dev)
 	dev->features |= NETIF_F_NETNS_LOCAL;
 }
 
-static struct tun_struct *tun_get_by_name(struct tun_net *tn, const char *name)
-{
-	struct tun_struct *tun;
-
-	ASSERT_RTNL();
-	list_for_each_entry(tun, &tn->dev_list, list) {
-		if (!strncmp(tun->dev->name, name, IFNAMSIZ))
-		    return tun;
-	}
-
-	return NULL;
-}
-
 static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr)
 {
-	struct tun_net *tn;
 	struct tun_struct *tun;
 	struct net_device *dev;
 	const struct cred *cred = current_cred();
 	int err;
 
-	tn = net_generic(net, tun_net_id);
-	tun = tun_get_by_name(tn, ifr->ifr_name);
-	if (tun) {
+	dev = __dev_get_by_name(net, ifr->ifr_name);
+	if (dev) {
+		if ((ifr->ifr_flags & IFF_TUN) && dev->netdev_ops == &tun_netdev_ops)
+			tun = netdev_priv(dev);
+		else if ((ifr->ifr_flags & IFF_TAP) && dev->netdev_ops == &tap_netdev_ops)
+			tun = netdev_priv(dev);
+		else
+			return -EINVAL;
+
 		if (tun->attached)
 			return -EBUSY;
 
@@ -733,8 +719,6 @@ static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr)
 			return -EPERM;
 		}
 	}
-	else if (__dev_get_by_name(net, ifr->ifr_name))
-		return -EINVAL;
 	else {
 		char *name;
 		unsigned long flags = 0;
@@ -782,8 +766,6 @@ static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr)
 		err = register_netdevice(tun->dev);
 		if (err < 0)
 			goto err_free_dev;
-
-		list_add(&tun->list, &tn->dev_list);
 	}
 
 	DBG(KERN_INFO "%s: tun_set_iff\n", tun->dev->name);
@@ -1095,10 +1077,8 @@ static int tun_chr_close(struct inode *inode, struct file *file)
 	/* Drop read queue */
 	skb_queue_purge(&tun->readq);
 
-	if (!(tun->flags & TUN_PERSIST)) {
-		list_del(&tun->list);
+	if (!(tun->flags & TUN_PERSIST))
 		unregister_netdevice(tun->dev);
-	}
 
 	rtnl_unlock();
 
@@ -1212,37 +1192,21 @@ static const struct ethtool_ops tun_ethtool_ops = {
 
 static int tun_init_net(struct net *net)
 {
-	struct tun_net *tn;
-
-	tn = kmalloc(sizeof(*tn), GFP_KERNEL);
-	if (tn == NULL)
-		return -ENOMEM;
-
-	INIT_LIST_HEAD(&tn->dev_list);
-
-	if (net_assign_generic(net, tun_net_id, tn)) {
-		kfree(tn);
-		return -ENOMEM;
-	}
-
 	return 0;
 }
 
 static void tun_exit_net(struct net *net)
 {
-	struct tun_net *tn;
-	struct tun_struct *tun, *nxt;
-
-	tn = net_generic(net, tun_net_id);
+	struct net_device *dev, *next;
 
 	rtnl_lock();
-	list_for_each_entry_safe(tun, nxt, &tn->dev_list, list) {
-		DBG(KERN_INFO "%s cleaned up\n", tun->dev->name);
-		unregister_netdevice(tun->dev);
+	for_each_netdev_safe(net, dev, next) {
+		if (dev->ethtool_ops != &tun_ethtool_ops)
+			continue;
+		DBG(KERN_INFO "%s cleaned up\n", dev->name);
+		unregister_netdevice(dev);
 	}
 	rtnl_unlock();
-
-	kfree(tn);
 }
 
 static struct pernet_operations tun_net_ops = {
@@ -1257,7 +1221,7 @@ static int __init tun_init(void)
 	printk(KERN_INFO "tun: %s, %s\n", DRV_DESCRIPTION, DRV_VERSION);
 	printk(KERN_INFO "tun: %s\n", DRV_COPYRIGHT);
 
-	ret = register_pernet_gen_device(&tun_net_id, &tun_net_ops);
+	ret = register_pernet_device(&tun_net_ops);
 	if (ret) {
 		printk(KERN_ERR "tun: Can't register pernet ops\n");
 		goto err_pernet;
@@ -1271,7 +1235,7 @@ static int __init tun_init(void)
 	return 0;
 
 err_misc:
-	unregister_pernet_gen_device(tun_net_id, &tun_net_ops);
+	unregister_pernet_device(&tun_net_ops);
 err_pernet:
 	return ret;
 }
@@ -1279,7 +1243,7 @@ err_pernet:
 static void tun_cleanup(void)
 {
 	misc_deregister(&tun_miscdev);
-	unregister_pernet_gen_device(tun_net_id, &tun_net_ops);
+	unregister_pernet_device(&tun_net_ops);
 }
 
 module_init(tun_init);
-- 
1.5.6.3

^ permalink raw reply related	[flat|nested] 11+ messages in thread

* [PATCH 02/10] tun: Fix races in tun_set_iff
  2009-01-20 20:56 ` [PATCH 01/10] tun: Remove unnecessary tun_get_by_name Eric W. Biederman
@ 2009-01-20 20:57   ` Eric W. Biederman
  2009-01-20 20:59     ` [PATCH 03/10] tun: Use POLLERR not EBADF in tun_chr_poll Eric W. Biederman
  0 siblings, 1 reply; 11+ messages in thread
From: Eric W. Biederman @ 2009-01-20 20:57 UTC (permalink / raw)
  To: David Miller; +Cc: netdev, Max Krasnyansky, Pavel Emelyanov, netdev


It is possible for two different tasks with access to the same file
descriptor to call tun_set_iff on it at the same time and race to
attach to a tap device.  Prevent this by placing all of the logic to
attach to a file descriptor in one function and testing the file
descriptor to be certain it is not already attached to another tun
device.

Signed-off-by: Eric W. Biederman <ebiederm@aristanetworks.com>
---
 drivers/net/tun.c |   48 ++++++++++++++++++++++++++++++++----------------
 1 files changed, 32 insertions(+), 16 deletions(-)

diff --git a/drivers/net/tun.c b/drivers/net/tun.c
index 17923a5..20ef14d 100644
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -106,6 +106,31 @@ struct tun_struct {
 #endif
 };
 
+static int tun_attach(struct tun_struct *tun, struct file *file)
+{
+	const struct cred *cred = current_cred();
+
+	ASSERT_RTNL();
+
+	if (file->private_data)
+		return -EINVAL;
+
+	if (tun->attached)
+		return -EBUSY;
+
+	/* Check permissions */
+	if (((tun->owner != -1 && cred->euid != tun->owner) ||
+	     (tun->group != -1 && cred->egid != tun->group)) &&
+		!capable(CAP_NET_ADMIN))
+		return -EPERM;
+
+	file->private_data = tun;
+	tun->attached = 1;
+	get_net(dev_net(tun->dev));
+
+	return 0;
+}
+
 /* TAP filterting */
 static void addr_hash_set(u32 *mask, const u8 *addr)
 {
@@ -695,7 +720,6 @@ static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr)
 {
 	struct tun_struct *tun;
 	struct net_device *dev;
-	const struct cred *cred = current_cred();
 	int err;
 
 	dev = __dev_get_by_name(net, ifr->ifr_name);
@@ -707,17 +731,9 @@ static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr)
 		else
 			return -EINVAL;
 
-		if (tun->attached)
-			return -EBUSY;
-
-		/* Check permissions */
-		if (((tun->owner != -1 &&
-		      cred->euid != tun->owner) ||
-		     (tun->group != -1 &&
-		      cred->egid != tun->group)) &&
-		    !capable(CAP_NET_ADMIN)) {
-			return -EPERM;
-		}
+		err = tun_attach(tun, file);
+		if (err < 0)
+			return err;
 	}
 	else {
 		char *name;
@@ -766,6 +782,10 @@ static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr)
 		err = register_netdevice(tun->dev);
 		if (err < 0)
 			goto err_free_dev;
+
+		err = tun_attach(tun, file);
+		if (err < 0)
+			goto err_free_dev;
 	}
 
 	DBG(KERN_INFO "%s: tun_set_iff\n", tun->dev->name);
@@ -785,10 +805,6 @@ static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr)
 	else
 		tun->flags &= ~TUN_VNET_HDR;
 
-	file->private_data = tun;
-	tun->attached = 1;
-	get_net(dev_net(tun->dev));
-
 	/* Make sure persistent devices do not get stuck in
 	 * xoff state.
 	 */
-- 
1.5.6.3

^ permalink raw reply related	[flat|nested] 11+ messages in thread

* [PATCH 03/10] tun: Use POLLERR not EBADF in tun_chr_poll
  2009-01-20 20:57   ` [PATCH 02/10] tun: Fix races in tun_set_iff Eric W. Biederman
@ 2009-01-20 20:59     ` Eric W. Biederman
  2009-01-20 21:00       ` [PATCH 04/10] tun: Introduce tun_file Eric W. Biederman
  0 siblings, 1 reply; 11+ messages in thread
From: Eric W. Biederman @ 2009-01-20 20:59 UTC (permalink / raw)
  To: David Miller; +Cc: netdev, Max Krasnyansky, Pavel Emelyanov


EBADF is meaningless in the context of a poll mask so use POLLERR
instead.

Signed-off-by: Eric W. Biederman <ebiederm@aristanetworks.com>
---
 drivers/net/tun.c |    2 +-
 1 files changed, 1 insertions(+), 1 deletions(-)

diff --git a/drivers/net/tun.c b/drivers/net/tun.c
index 20ef14d..8743de9 100644
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -382,7 +382,7 @@ static unsigned int tun_chr_poll(struct file *file, poll_table * wait)
 	unsigned int mask = POLLOUT | POLLWRNORM;
 
 	if (!tun)
-		return -EBADFD;
+		return POLLERR;
 
 	DBG(KERN_INFO "%s: tun_chr_poll\n", tun->dev->name);
 
-- 
1.5.6.3

^ permalink raw reply related	[flat|nested] 11+ messages in thread

* [PATCH 04/10] tun: Introduce tun_file
  2009-01-20 20:59     ` [PATCH 03/10] tun: Use POLLERR not EBADF in tun_chr_poll Eric W. Biederman
@ 2009-01-20 21:00       ` Eric W. Biederman
  2009-01-20 21:01         ` [PATCH 05/10] tun: Grab the netns in open Eric W. Biederman
  0 siblings, 1 reply; 11+ messages in thread
From: Eric W. Biederman @ 2009-01-20 21:00 UTC (permalink / raw)
  To: David Miller; +Cc: netdev, Max Krasnyansky, Pavel Emelyanov


Currently the tun code suffers from only having a single word of
data that exists for the entire life of the tun file descriptor.

This results in peculiar holding of references to the network namespace
as well as races between free_netdevice and tun_chr_close.

Fix this by introducing tun_file which will hold the per file state.
For the moment it still holds just a single word so the differences
are all logic changes with no changes in semantics.

Signed-off-by: Eric W. Biederman <ebiederm@aristanetworks.com>
---
 drivers/net/tun.c |  156 +++++++++++++++++++++++++++++++++++------------------
 1 files changed, 104 insertions(+), 52 deletions(-)

diff --git a/drivers/net/tun.c b/drivers/net/tun.c
index 8743de9..d3a665d 100644
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -87,9 +87,13 @@ struct tap_filter {
 	unsigned char	addr[FLT_EXACT_COUNT][ETH_ALEN];
 };
 
+struct tun_file {
+	struct tun_struct *tun;
+};
+
 struct tun_struct {
+	struct tun_file		*tfile;
 	unsigned int 		flags;
-	int			attached;
 	uid_t			owner;
 	gid_t			group;
 
@@ -108,14 +112,15 @@ struct tun_struct {
 
 static int tun_attach(struct tun_struct *tun, struct file *file)
 {
+	struct tun_file *tfile = file->private_data;
 	const struct cred *cred = current_cred();
 
 	ASSERT_RTNL();
 
-	if (file->private_data)
+	if (tfile->tun)
 		return -EINVAL;
 
-	if (tun->attached)
+	if (tun->tfile)
 		return -EBUSY;
 
 	/* Check permissions */
@@ -124,13 +129,41 @@ static int tun_attach(struct tun_struct *tun, struct file *file)
 		!capable(CAP_NET_ADMIN))
 		return -EPERM;
 
-	file->private_data = tun;
-	tun->attached = 1;
+	tfile->tun = tun;
+	tun->tfile = tfile;
 	get_net(dev_net(tun->dev));
 
 	return 0;
 }
 
+static void __tun_detach(struct tun_struct *tun)
+{
+	struct tun_file *tfile = tun->tfile;
+
+	/* Detach from net device */
+	tfile->tun = NULL;
+	tun->tfile = NULL;
+	put_net(dev_net(tun->dev));
+
+	/* Drop read queue */
+	skb_queue_purge(&tun->readq);
+}
+
+static struct tun_struct *__tun_get(struct tun_file *tfile)
+{
+	return tfile->tun;
+}
+
+static struct tun_struct *tun_get(struct file *file)
+{
+	return __tun_get(file->private_data);
+}
+
+static void tun_put(struct tun_struct *tun)
+{
+	/* Noop for now */
+}
+
 /* TAP filterting */
 static void addr_hash_set(u32 *mask, const u8 *addr)
 {
@@ -261,7 +294,7 @@ static int tun_net_xmit(struct sk_buff *skb, struct net_device *dev)
 	DBG(KERN_INFO "%s: tun_net_xmit %d\n", tun->dev->name, skb->len);
 
 	/* Drop packet if interface is not attached */
-	if (!tun->attached)
+	if (!tun->tfile)
 		goto drop;
 
 	/* Drop if the filter does not like it.
@@ -378,7 +411,7 @@ static void tun_net_init(struct net_device *dev)
 /* Poll */
 static unsigned int tun_chr_poll(struct file *file, poll_table * wait)
 {
-	struct tun_struct *tun = file->private_data;
+	struct tun_struct *tun = tun_get(file);
 	unsigned int mask = POLLOUT | POLLWRNORM;
 
 	if (!tun)
@@ -391,6 +424,7 @@ static unsigned int tun_chr_poll(struct file *file, poll_table * wait)
 	if (!skb_queue_empty(&tun->readq))
 		mask |= POLLIN | POLLRDNORM;
 
+	tun_put(tun);
 	return mask;
 }
 
@@ -575,14 +609,18 @@ static __inline__ ssize_t tun_get_user(struct tun_struct *tun, struct iovec *iv,
 static ssize_t tun_chr_aio_write(struct kiocb *iocb, const struct iovec *iv,
 			      unsigned long count, loff_t pos)
 {
-	struct tun_struct *tun = iocb->ki_filp->private_data;
+	struct tun_struct *tun = tun_get(iocb->ki_filp);
+	ssize_t result;
 
 	if (!tun)
 		return -EBADFD;
 
 	DBG(KERN_INFO "%s: tun_chr_write %ld\n", tun->dev->name, count);
 
-	return tun_get_user(tun, (struct iovec *) iv, iov_length(iv, count));
+	result = tun_get_user(tun, (struct iovec *) iv, iov_length(iv, count));
+
+	tun_put(tun);
+	return result;
 }
 
 /* Put packet to the user space buffer */
@@ -655,7 +693,7 @@ static ssize_t tun_chr_aio_read(struct kiocb *iocb, const struct iovec *iv,
 			    unsigned long count, loff_t pos)
 {
 	struct file *file = iocb->ki_filp;
-	struct tun_struct *tun = file->private_data;
+	struct tun_struct *tun = tun_get(file);
 	DECLARE_WAITQUEUE(wait, current);
 	struct sk_buff *skb;
 	ssize_t len, ret = 0;
@@ -666,8 +704,10 @@ static ssize_t tun_chr_aio_read(struct kiocb *iocb, const struct iovec *iv,
 	DBG(KERN_INFO "%s: tun_chr_read\n", tun->dev->name);
 
 	len = iov_length(iv, count);
-	if (len < 0)
-		return -EINVAL;
+	if (len < 0) {
+		ret = -EINVAL;
+		goto out;
+	}
 
 	add_wait_queue(&tun->read_wait, &wait);
 	while (len) {
@@ -698,6 +738,8 @@ static ssize_t tun_chr_aio_read(struct kiocb *iocb, const struct iovec *iv,
 	current->state = TASK_RUNNING;
 	remove_wait_queue(&tun->read_wait, &wait);
 
+out:
+	tun_put(tun);
 	return ret;
 }
 
@@ -822,7 +864,7 @@ static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr)
 
 static int tun_get_iff(struct net *net, struct file *file, struct ifreq *ifr)
 {
-	struct tun_struct *tun = file->private_data;
+	struct tun_struct *tun = tun_get(file);
 
 	if (!tun)
 		return -EBADFD;
@@ -847,6 +889,7 @@ static int tun_get_iff(struct net *net, struct file *file, struct ifreq *ifr)
 	if (tun->flags & TUN_VNET_HDR)
 		ifr->ifr_flags |= IFF_VNET_HDR;
 
+	tun_put(tun);
 	return 0;
 }
 
@@ -893,7 +936,7 @@ static int set_offload(struct net_device *dev, unsigned long arg)
 static int tun_chr_ioctl(struct inode *inode, struct file *file,
 			 unsigned int cmd, unsigned long arg)
 {
-	struct tun_struct *tun = file->private_data;
+	struct tun_struct *tun;
 	void __user* argp = (void __user*)arg;
 	struct ifreq ifr;
 	int ret;
@@ -902,6 +945,16 @@ static int tun_chr_ioctl(struct inode *inode, struct file *file,
 		if (copy_from_user(&ifr, argp, sizeof ifr))
 			return -EFAULT;
 
+	if (cmd == TUNGETFEATURES) {
+		/* Currently this just means: "what IFF flags are valid?".
+		 * This is needed because we never checked for invalid flags on
+		 * TUNSETIFF. */
+		return put_user(IFF_TUN | IFF_TAP | IFF_NO_PI | IFF_ONE_QUEUE |
+				IFF_VNET_HDR,
+				(unsigned int __user*)argp);
+	}
+
+	tun = tun_get(file);
 	if (cmd == TUNSETIFF && !tun) {
 		int err;
 
@@ -919,28 +972,21 @@ static int tun_chr_ioctl(struct inode *inode, struct file *file,
 		return 0;
 	}
 
-	if (cmd == TUNGETFEATURES) {
-		/* Currently this just means: "what IFF flags are valid?".
-		 * This is needed because we never checked for invalid flags on
-		 * TUNSETIFF. */
-		return put_user(IFF_TUN | IFF_TAP | IFF_NO_PI | IFF_ONE_QUEUE |
-				IFF_VNET_HDR,
-				(unsigned int __user*)argp);
-	}
 
 	if (!tun)
 		return -EBADFD;
 
 	DBG(KERN_INFO "%s: tun_chr_ioctl cmd %d\n", tun->dev->name, cmd);
 
+	ret = 0;
 	switch (cmd) {
 	case TUNGETIFF:
 		ret = tun_get_iff(current->nsproxy->net_ns, file, &ifr);
 		if (ret)
-			return ret;
+			break;
 
 		if (copy_to_user(argp, &ifr, sizeof(ifr)))
-			return -EFAULT;
+			ret = -EFAULT;
 		break;
 
 	case TUNSETNOCSUM:
@@ -992,7 +1038,7 @@ static int tun_chr_ioctl(struct inode *inode, struct file *file,
 			ret = 0;
 		}
 		rtnl_unlock();
-		return ret;
+		break;
 
 #ifdef TUN_DEBUG
 	case TUNSETDEBUG:
@@ -1003,24 +1049,25 @@ static int tun_chr_ioctl(struct inode *inode, struct file *file,
 		rtnl_lock();
 		ret = set_offload(tun->dev, arg);
 		rtnl_unlock();
-		return ret;
+		break;
 
 	case TUNSETTXFILTER:
 		/* Can be set only for TAPs */
+		ret = -EINVAL;
 		if ((tun->flags & TUN_TYPE_MASK) != TUN_TAP_DEV)
-			return -EINVAL;
+			break;
 		rtnl_lock();
 		ret = update_filter(&tun->txflt, (void __user *)arg);
 		rtnl_unlock();
-		return ret;
+		break;
 
 	case SIOCGIFHWADDR:
 		/* Get hw addres */
 		memcpy(ifr.ifr_hwaddr.sa_data, tun->dev->dev_addr, ETH_ALEN);
 		ifr.ifr_hwaddr.sa_family = tun->dev->type;
 		if (copy_to_user(argp, &ifr, sizeof ifr))
-			return -EFAULT;
-		return 0;
+			ret = -EFAULT;
+		break;
 
 	case SIOCSIFHWADDR:
 		/* Set hw address */
@@ -1030,18 +1077,19 @@ static int tun_chr_ioctl(struct inode *inode, struct file *file,
 		rtnl_lock();
 		ret = dev_set_mac_address(tun->dev, &ifr.ifr_hwaddr);
 		rtnl_unlock();
-		return ret;
-
+		break;
 	default:
-		return -EINVAL;
+		ret = -EINVAL;
+		break;
 	};
 
-	return 0;
+	tun_put(tun);
+	return ret;
 }
 
 static int tun_chr_fasync(int fd, struct file *file, int on)
 {
-	struct tun_struct *tun = file->private_data;
+	struct tun_struct *tun = tun_get(file);
 	int ret;
 
 	if (!tun)
@@ -1063,40 +1111,44 @@ static int tun_chr_fasync(int fd, struct file *file, int on)
 	ret = 0;
 out:
 	unlock_kernel();
+	tun_put(tun);
 	return ret;
 }
 
 static int tun_chr_open(struct inode *inode, struct file * file)
 {
+	struct tun_file *tfile;
 	cycle_kernel_lock();
 	DBG1(KERN_INFO "tunX: tun_chr_open\n");
-	file->private_data = NULL;
+
+	tfile = kmalloc(sizeof(*tfile), GFP_KERNEL);
+	if (!tfile)
+		return -ENOMEM;
+	tfile->tun = NULL;
+	file->private_data = tfile;
 	return 0;
 }
 
 static int tun_chr_close(struct inode *inode, struct file *file)
 {
-	struct tun_struct *tun = file->private_data;
-
-	if (!tun)
-		return 0;
+	struct tun_file *tfile = file->private_data;
+	struct tun_struct *tun = __tun_get(tfile);
 
-	DBG(KERN_INFO "%s: tun_chr_close\n", tun->dev->name);
 
-	rtnl_lock();
+	if (tun) {
+		DBG(KERN_INFO "%s: tun_chr_close\n", tun->dev->name);
 
-	/* Detach from net device */
-	file->private_data = NULL;
-	tun->attached = 0;
-	put_net(dev_net(tun->dev));
+		rtnl_lock();
+		__tun_detach(tun);
 
-	/* Drop read queue */
-	skb_queue_purge(&tun->readq);
+		/* If desireable, unregister the netdevice. */
+		if (!(tun->flags & TUN_PERSIST))
+			unregister_netdevice(tun->dev);
 
-	if (!(tun->flags & TUN_PERSIST))
-		unregister_netdevice(tun->dev);
+		rtnl_unlock();
+	}
 
-	rtnl_unlock();
+	kfree(tfile);
 
 	return 0;
 }
@@ -1177,7 +1229,7 @@ static void tun_set_msglevel(struct net_device *dev, u32 value)
 static u32 tun_get_link(struct net_device *dev)
 {
 	struct tun_struct *tun = netdev_priv(dev);
-	return tun->attached;
+	return !!tun->tfile;
 }
 
 static u32 tun_get_rx_csum(struct net_device *dev)
-- 
1.5.6.3

^ permalink raw reply related	[flat|nested] 11+ messages in thread

* [PATCH 05/10] tun: Grab the netns in open.
  2009-01-20 21:00       ` [PATCH 04/10] tun: Introduce tun_file Eric W. Biederman
@ 2009-01-20 21:01         ` Eric W. Biederman
  2009-01-20 21:02           ` [PATCH 06/10] tun: Make tun_net_xmit atomic wrt tun_attach && tun_detach Eric W. Biederman
  0 siblings, 1 reply; 11+ messages in thread
From: Eric W. Biederman @ 2009-01-20 21:01 UTC (permalink / raw)
  To: David Miller; +Cc: netdev, Max Krasnyansky, Pavel Emelyanov


Grabbing namespaces in open, and putting them in close always seems to
be the cleanest approach with the fewest surprises.

So now that we have tun_file so we have somepleace to put the network
namespace, let's grab the network namespace on file open and put on
file close.

Signed-off-by: Eric W. Biederman <ebiederm@aristanetworks.com>
---
 drivers/net/tun.c |   10 ++++++----
 1 files changed, 6 insertions(+), 4 deletions(-)

diff --git a/drivers/net/tun.c b/drivers/net/tun.c
index d3a665d..dfbf586 100644
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -89,6 +89,7 @@ struct tap_filter {
 
 struct tun_file {
 	struct tun_struct *tun;
+	struct net *net;
 };
 
 struct tun_struct {
@@ -131,7 +132,6 @@ static int tun_attach(struct tun_struct *tun, struct file *file)
 
 	tfile->tun = tun;
 	tun->tfile = tfile;
-	get_net(dev_net(tun->dev));
 
 	return 0;
 }
@@ -143,7 +143,6 @@ static void __tun_detach(struct tun_struct *tun)
 	/* Detach from net device */
 	tfile->tun = NULL;
 	tun->tfile = NULL;
-	put_net(dev_net(tun->dev));
 
 	/* Drop read queue */
 	skb_queue_purge(&tun->readq);
@@ -936,6 +935,7 @@ static int set_offload(struct net_device *dev, unsigned long arg)
 static int tun_chr_ioctl(struct inode *inode, struct file *file,
 			 unsigned int cmd, unsigned long arg)
 {
+	struct tun_file *tfile = file->private_data;
 	struct tun_struct *tun;
 	void __user* argp = (void __user*)arg;
 	struct ifreq ifr;
@@ -954,14 +954,14 @@ static int tun_chr_ioctl(struct inode *inode, struct file *file,
 				(unsigned int __user*)argp);
 	}
 
-	tun = tun_get(file);
+	tun = __tun_get(tfile);
 	if (cmd == TUNSETIFF && !tun) {
 		int err;
 
 		ifr.ifr_name[IFNAMSIZ-1] = '\0';
 
 		rtnl_lock();
-		err = tun_set_iff(current->nsproxy->net_ns, file, &ifr);
+		err = tun_set_iff(tfile->net, file, &ifr);
 		rtnl_unlock();
 
 		if (err)
@@ -1125,6 +1125,7 @@ static int tun_chr_open(struct inode *inode, struct file * file)
 	if (!tfile)
 		return -ENOMEM;
 	tfile->tun = NULL;
+	tfile->net = get_net(current->nsproxy->net_ns);
 	file->private_data = tfile;
 	return 0;
 }
@@ -1148,6 +1149,7 @@ static int tun_chr_close(struct inode *inode, struct file *file)
 		rtnl_unlock();
 	}
 
+	put_net(tfile->net);
 	kfree(tfile);
 
 	return 0;
-- 
1.5.6.3

^ permalink raw reply related	[flat|nested] 11+ messages in thread

* [PATCH 06/10] tun: Make tun_net_xmit atomic wrt tun_attach && tun_detach
  2009-01-20 21:01         ` [PATCH 05/10] tun: Grab the netns in open Eric W. Biederman
@ 2009-01-20 21:02           ` Eric W. Biederman
  2009-01-20 21:03             ` [PATCH 07/10] tun: Move read_wait into tun_file Eric W. Biederman
  0 siblings, 1 reply; 11+ messages in thread
From: Eric W. Biederman @ 2009-01-20 21:02 UTC (permalink / raw)
  To: David Miller; +Cc: netdev, Max Krasnyansky, Pavel Emelyanov


Currently this small race allows for a packet to be received when we
detach from an tun device and still be enqueued.  Not especially
important but not what the code is trying to do.

Signed-off-by: Eric W. Biederman <ebiederm@aristanetworks.com>
---
 drivers/net/tun.c |   24 +++++++++++++++++-------
 1 files changed, 17 insertions(+), 7 deletions(-)

diff --git a/drivers/net/tun.c b/drivers/net/tun.c
index dfbf586..fa93160 100644
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -115,25 +115,33 @@ static int tun_attach(struct tun_struct *tun, struct file *file)
 {
 	struct tun_file *tfile = file->private_data;
 	const struct cred *cred = current_cred();
+	int err;
 
 	ASSERT_RTNL();
 
-	if (tfile->tun)
-		return -EINVAL;
-
-	if (tun->tfile)
-		return -EBUSY;
-
 	/* Check permissions */
 	if (((tun->owner != -1 && cred->euid != tun->owner) ||
 	     (tun->group != -1 && cred->egid != tun->group)) &&
 		!capable(CAP_NET_ADMIN))
 		return -EPERM;
 
+	netif_tx_lock_bh(tun->dev);
+
+	err = -EINVAL;
+	if (tfile->tun)
+		goto out;
+
+	err = -EBUSY;
+	if (tun->tfile)
+		goto out;
+
+	err = 0;
 	tfile->tun = tun;
 	tun->tfile = tfile;
 
-	return 0;
+out:
+	netif_tx_unlock_bh(tun->dev);
+	return err;
 }
 
 static void __tun_detach(struct tun_struct *tun)
@@ -141,8 +149,10 @@ static void __tun_detach(struct tun_struct *tun)
 	struct tun_file *tfile = tun->tfile;
 
 	/* Detach from net device */
+	netif_tx_lock_bh(tun->dev);
 	tfile->tun = NULL;
 	tun->tfile = NULL;
+	netif_tx_unlock_bh(tun->dev);
 
 	/* Drop read queue */
 	skb_queue_purge(&tun->readq);
-- 
1.5.6.3

^ permalink raw reply related	[flat|nested] 11+ messages in thread

* [PATCH 07/10] tun: Move read_wait into tun_file
  2009-01-20 21:02           ` [PATCH 06/10] tun: Make tun_net_xmit atomic wrt tun_attach && tun_detach Eric W. Biederman
@ 2009-01-20 21:03             ` Eric W. Biederman
  2009-01-20 21:07               ` [PATCH 08/10] tun: Fix races between tun_net_close and free_netdev Eric W. Biederman
  0 siblings, 1 reply; 11+ messages in thread
From: Eric W. Biederman @ 2009-01-20 21:03 UTC (permalink / raw)
  To: David Miller; +Cc: netdev, Max Krasnyansky, Pavel Emelyanov


The poll interface requires that the waitqueue exist while the struct
file is open.  In the rare case when a tun device disappears before
the tun file closes we fail to provide this property, so move
read_wait.

This is safe now that tun_net_xmit is atomic with tun_detach.

Signed-off-by: Eric W. Biederman <ebiederm@aristanetworks.com>
---
 drivers/net/tun.c |   18 ++++++++++--------
 1 files changed, 10 insertions(+), 8 deletions(-)

diff --git a/drivers/net/tun.c b/drivers/net/tun.c
index fa93160..030d985 100644
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -90,6 +90,7 @@ struct tap_filter {
 struct tun_file {
 	struct tun_struct *tun;
 	struct net *net;
+	wait_queue_head_t	read_wait;
 };
 
 struct tun_struct {
@@ -98,7 +99,6 @@ struct tun_struct {
 	uid_t			owner;
 	gid_t			group;
 
-	wait_queue_head_t	read_wait;
 	struct sk_buff_head	readq;
 
 	struct net_device	*dev;
@@ -335,7 +335,7 @@ static int tun_net_xmit(struct sk_buff *skb, struct net_device *dev)
 	/* Notify and wake up reader process */
 	if (tun->flags & TUN_FASYNC)
 		kill_fasync(&tun->fasync, SIGIO, POLL_IN);
-	wake_up_interruptible(&tun->read_wait);
+	wake_up_interruptible(&tun->tfile->read_wait);
 	return 0;
 
 drop:
@@ -420,7 +420,8 @@ static void tun_net_init(struct net_device *dev)
 /* Poll */
 static unsigned int tun_chr_poll(struct file *file, poll_table * wait)
 {
-	struct tun_struct *tun = tun_get(file);
+	struct tun_file *tfile = file->private_data;
+	struct tun_struct *tun = __tun_get(tfile);
 	unsigned int mask = POLLOUT | POLLWRNORM;
 
 	if (!tun)
@@ -428,7 +429,7 @@ static unsigned int tun_chr_poll(struct file *file, poll_table * wait)
 
 	DBG(KERN_INFO "%s: tun_chr_poll\n", tun->dev->name);
 
-	poll_wait(file, &tun->read_wait, wait);
+	poll_wait(file, &tfile->read_wait, wait);
 
 	if (!skb_queue_empty(&tun->readq))
 		mask |= POLLIN | POLLRDNORM;
@@ -702,7 +703,8 @@ static ssize_t tun_chr_aio_read(struct kiocb *iocb, const struct iovec *iv,
 			    unsigned long count, loff_t pos)
 {
 	struct file *file = iocb->ki_filp;
-	struct tun_struct *tun = tun_get(file);
+	struct tun_file *tfile = file->private_data;
+	struct tun_struct *tun = __tun_get(tfile);
 	DECLARE_WAITQUEUE(wait, current);
 	struct sk_buff *skb;
 	ssize_t len, ret = 0;
@@ -718,7 +720,7 @@ static ssize_t tun_chr_aio_read(struct kiocb *iocb, const struct iovec *iv,
 		goto out;
 	}
 
-	add_wait_queue(&tun->read_wait, &wait);
+	add_wait_queue(&tfile->read_wait, &wait);
 	while (len) {
 		current->state = TASK_INTERRUPTIBLE;
 
@@ -745,7 +747,7 @@ static ssize_t tun_chr_aio_read(struct kiocb *iocb, const struct iovec *iv,
 	}
 
 	current->state = TASK_RUNNING;
-	remove_wait_queue(&tun->read_wait, &wait);
+	remove_wait_queue(&tfile->read_wait, &wait);
 
 out:
 	tun_put(tun);
@@ -757,7 +759,6 @@ static void tun_setup(struct net_device *dev)
 	struct tun_struct *tun = netdev_priv(dev);
 
 	skb_queue_head_init(&tun->readq);
-	init_waitqueue_head(&tun->read_wait);
 
 	tun->owner = -1;
 	tun->group = -1;
@@ -1136,6 +1137,7 @@ static int tun_chr_open(struct inode *inode, struct file * file)
 		return -ENOMEM;
 	tfile->tun = NULL;
 	tfile->net = get_net(current->nsproxy->net_ns);
+	init_waitqueue_head(&tfile->read_wait);
 	file->private_data = tfile;
 	return 0;
 }
-- 
1.5.6.3

^ permalink raw reply related	[flat|nested] 11+ messages in thread

* [PATCH 08/10] tun: Fix races between tun_net_close and free_netdev.
  2009-01-20 21:03             ` [PATCH 07/10] tun: Move read_wait into tun_file Eric W. Biederman
@ 2009-01-20 21:07               ` Eric W. Biederman
  2009-01-20 21:08                 ` [PATCH 09/10] tun: There is no longer any need to deny changing network namespaces Eric W. Biederman
  0 siblings, 1 reply; 11+ messages in thread
From: Eric W. Biederman @ 2009-01-20 21:07 UTC (permalink / raw)
  To: David Miller; +Cc: netdev, Max Krasnyansky, Pavel Emelyanov


The tun code does not cope gracefully if the network device goes away before
the tun file descriptor is closed.  It looks like we can trigger this with
rmmod, and moving tun devices between network namespaces will allow this
to be triggered when network namespaces exit.

To fix this I introduce an intermediate data structure tun_file which
holds a count of users and a pointer to the struct tun_struct.  tun_get
increments that reference count if it is greater than 0.  tun_put decrements
that reference count and detaches from the network device if the count is 0.

While we have a file attached to the network device I hold a reference
to the network device keeping it from going away completely.

When a network device is unregistered I decrement the count of the
attached tun_file and if that was the last user I detach the tun_file,
and all processes on read_wait are woken up to ensure they do not
sleep indefinitely. As some of those sleeps happen with the count on
the tun device elevated waking up the read waiters ensures that
tun_file will be detached in a timely manner.

Signed-off-by: Eric W. Biederman <ebiederm@aristanetworks.com>
---
 drivers/net/tun.c |   50 ++++++++++++++++++++++++++++++++++++++++++++++++--
 1 files changed, 48 insertions(+), 2 deletions(-)

diff --git a/drivers/net/tun.c b/drivers/net/tun.c
index 030d985..51dba61 100644
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -88,6 +88,7 @@ struct tap_filter {
 };
 
 struct tun_file {
+	atomic_t count;
 	struct tun_struct *tun;
 	struct net *net;
 	wait_queue_head_t	read_wait;
@@ -138,6 +139,8 @@ static int tun_attach(struct tun_struct *tun, struct file *file)
 	err = 0;
 	tfile->tun = tun;
 	tun->tfile = tfile;
+	dev_hold(tun->dev);
+	atomic_inc(&tfile->count);
 
 out:
 	netif_tx_unlock_bh(tun->dev);
@@ -156,11 +159,26 @@ static void __tun_detach(struct tun_struct *tun)
 
 	/* Drop read queue */
 	skb_queue_purge(&tun->readq);
+
+	/* Drop the extra count on the net device */
+	dev_put(tun->dev);
+}
+
+static void tun_detach(struct tun_struct *tun)
+{
+	rtnl_lock();
+	__tun_detach(tun);
+	rtnl_unlock();
 }
 
 static struct tun_struct *__tun_get(struct tun_file *tfile)
 {
-	return tfile->tun;
+	struct tun_struct *tun = NULL;
+
+	if (atomic_inc_not_zero(&tfile->count))
+		tun = tfile->tun;
+
+	return tun;
 }
 
 static struct tun_struct *tun_get(struct file *file)
@@ -170,7 +188,10 @@ static struct tun_struct *tun_get(struct file *file)
 
 static void tun_put(struct tun_struct *tun)
 {
-	/* Noop for now */
+	struct tun_file *tfile = tun->tfile;
+
+	if (atomic_dec_and_test(&tfile->count))
+		tun_detach(tfile->tun);
 }
 
 /* TAP filterting */
@@ -281,6 +302,21 @@ static int check_filter(struct tap_filter *filter, const struct sk_buff *skb)
 
 static const struct ethtool_ops tun_ethtool_ops;
 
+/* Net device detach from fd. */
+static void tun_net_uninit(struct net_device *dev)
+{
+	struct tun_struct *tun = netdev_priv(dev);
+	struct tun_file *tfile = tun->tfile;
+
+	/* Inform the methods they need to stop using the dev.
+	 */
+	if (tfile) {
+		wake_up_all(&tfile->read_wait);
+		if (atomic_dec_and_test(&tfile->count))
+			__tun_detach(tun);
+	}
+}
+
 /* Net device open. */
 static int tun_net_open(struct net_device *dev)
 {
@@ -367,6 +403,7 @@ tun_net_change_mtu(struct net_device *dev, int new_mtu)
 }
 
 static const struct net_device_ops tun_netdev_ops = {
+	.ndo_uninit		= tun_net_uninit,
 	.ndo_open		= tun_net_open,
 	.ndo_stop		= tun_net_close,
 	.ndo_start_xmit		= tun_net_xmit,
@@ -374,6 +411,7 @@ static const struct net_device_ops tun_netdev_ops = {
 };
 
 static const struct net_device_ops tap_netdev_ops = {
+	.ndo_uninit		= tun_net_uninit,
 	.ndo_open		= tun_net_open,
 	.ndo_stop		= tun_net_close,
 	.ndo_start_xmit		= tun_net_xmit,
@@ -434,6 +472,9 @@ static unsigned int tun_chr_poll(struct file *file, poll_table * wait)
 	if (!skb_queue_empty(&tun->readq))
 		mask |= POLLIN | POLLRDNORM;
 
+	if (tun->dev->reg_state != NETREG_REGISTERED)
+		mask = POLLERR;
+
 	tun_put(tun);
 	return mask;
 }
@@ -734,6 +775,10 @@ static ssize_t tun_chr_aio_read(struct kiocb *iocb, const struct iovec *iv,
 				ret = -ERESTARTSYS;
 				break;
 			}
+			if (tun->dev->reg_state != NETREG_REGISTERED) {
+				ret = -EIO;
+				break;
+			}
 
 			/* Nothing to read, let's sleep */
 			schedule();
@@ -1135,6 +1180,7 @@ static int tun_chr_open(struct inode *inode, struct file * file)
 	tfile = kmalloc(sizeof(*tfile), GFP_KERNEL);
 	if (!tfile)
 		return -ENOMEM;
+	atomic_set(&tfile->count, 0);
 	tfile->tun = NULL;
 	tfile->net = get_net(current->nsproxy->net_ns);
 	init_waitqueue_head(&tfile->read_wait);
-- 
1.5.6.3

^ permalink raw reply related	[flat|nested] 11+ messages in thread

* [PATCH 09/10] tun: There is no longer any need to deny changing network namespaces
  2009-01-20 21:07               ` [PATCH 08/10] tun: Fix races between tun_net_close and free_netdev Eric W. Biederman
@ 2009-01-20 21:08                 ` Eric W. Biederman
  0 siblings, 0 replies; 11+ messages in thread
From: Eric W. Biederman @ 2009-01-20 21:08 UTC (permalink / raw)
  To: David Miller; +Cc: netdev, Max Krasnyansky, Pavel Emelyanov


With the awkward case between free_netdev and dev_chr_close fixed
there is no longer any need to limit tun and tap devices to the
network namespace they were created it.  So remove the
NETIF_F_NETNS_LOCAL flag on the network device.

Signed-off-by: Eric W. Biederman <ebiederm@aristanetworks.com>
---
 drivers/net/tun.c |    1 -
 1 files changed, 0 insertions(+), 1 deletions(-)

diff --git a/drivers/net/tun.c b/drivers/net/tun.c
index 51dba61..97b0500 100644
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -810,7 +810,6 @@ static void tun_setup(struct net_device *dev)
 
 	dev->ethtool_ops = &tun_ethtool_ops;
 	dev->destructor = free_netdev;
-	dev->features |= NETIF_F_NETNS_LOCAL;
 }
 
 static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr)
-- 
1.5.6.3

^ permalink raw reply related	[flat|nested] 11+ messages in thread

* Re: [PATCH 0/10] Tun fixes and netns migration
  2009-01-20 20:53 [PATCH 0/10] Tun fixes and netns migration Eric W. Biederman
  2009-01-20 20:56 ` [PATCH 01/10] tun: Remove unnecessary tun_get_by_name Eric W. Biederman
@ 2009-01-22  0:03 ` David Miller
  1 sibling, 0 replies; 11+ messages in thread
From: David Miller @ 2009-01-22  0:03 UTC (permalink / raw)
  To: ebiederm; +Cc: netdev, maxk, xemul

From: ebiederm@xmission.com (Eric W. Biederman)
Date: Tue, 20 Jan 2009 12:53:53 -0800

> Having a tun device that can be migrated between network namespaces 
> is interesting when setting up creative network topologies using
> network namespaces.
> 
> To get there requires some general clean and restructuring of the
> tun driver.

All applied to net-next-2.6, thanks Eric.

^ permalink raw reply	[flat|nested] 11+ messages in thread

end of thread, other threads:[~2009-01-22  0:03 UTC | newest]

Thread overview: 11+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2009-01-20 20:53 [PATCH 0/10] Tun fixes and netns migration Eric W. Biederman
2009-01-20 20:56 ` [PATCH 01/10] tun: Remove unnecessary tun_get_by_name Eric W. Biederman
2009-01-20 20:57   ` [PATCH 02/10] tun: Fix races in tun_set_iff Eric W. Biederman
2009-01-20 20:59     ` [PATCH 03/10] tun: Use POLLERR not EBADF in tun_chr_poll Eric W. Biederman
2009-01-20 21:00       ` [PATCH 04/10] tun: Introduce tun_file Eric W. Biederman
2009-01-20 21:01         ` [PATCH 05/10] tun: Grab the netns in open Eric W. Biederman
2009-01-20 21:02           ` [PATCH 06/10] tun: Make tun_net_xmit atomic wrt tun_attach && tun_detach Eric W. Biederman
2009-01-20 21:03             ` [PATCH 07/10] tun: Move read_wait into tun_file Eric W. Biederman
2009-01-20 21:07               ` [PATCH 08/10] tun: Fix races between tun_net_close and free_netdev Eric W. Biederman
2009-01-20 21:08                 ` [PATCH 09/10] tun: There is no longer any need to deny changing network namespaces Eric W. Biederman
2009-01-22  0:03 ` [PATCH 0/10] Tun fixes and netns migration David Miller

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).