From: Dmitry Mishin <dim@openvz.org>
To: containers@lists.osdl.org
Cc: alexey@sw.ru, saw@sw.ru, Andrew Morton <akpm@osdl.org>,
netdev@vger.kernel.org
Subject: [PATCH 9/12] L2 network namespace (v3): device to pass packets between namespaces
Date: Wed, 17 Jan 2007 19:14:00 +0300 [thread overview]
Message-ID: <200701171914.00723.dim@openvz.org> (raw)
In-Reply-To: <200701171851.14734.dim@openvz.org>
A simple device to pass packets between a namespace and its child.
Signed-off-by: Dmitry Mishin <dim@openvz.org>
---
drivers/net/Makefile | 3
drivers/net/veth.c | 321 +++++++++++++++++++++++++++++++++++++++++++++++
net/core/net_namespace.c | 1
3 files changed, 325 insertions(+)
--- linux-2.6.20-rc4-mm1.net_ns.orig/drivers/net/Makefile
+++ linux-2.6.20-rc4-mm1.net_ns/drivers/net/Makefile
@@ -125,6 +125,9 @@ obj-$(CONFIG_SLIP) += slip.o
obj-$(CONFIG_SLHC) += slhc.o
obj-$(CONFIG_DUMMY) += dummy.o
+ifeq ($(CONFIG_NET_NS),y)
+obj-m += veth.o
+endif
obj-$(CONFIG_IFB) += ifb.o
obj-$(CONFIG_DE600) += de600.o
obj-$(CONFIG_DE620) += de620.o
--- /dev/null
+++ linux-2.6.20-rc4-mm1.net_ns/drivers/net/veth.c
@@ -0,0 +1,321 @@
+/*
+ * Copyright (C) 2006 SWsoft
+ *
+ * Written by Andrey Savochkin <saw@sw.ru>,
+ * reusing code by Andrey Mirkin <amirkin@sw.ru>.
+ */
+#include <linux/list.h>
+#include <linux/spinlock.h>
+#include <linux/ctype.h>
+#include <asm/semaphore.h>
+#include <linux/netdevice.h>
+#include <linux/etherdevice.h>
+#include <linux/proc_fs.h>
+#include <linux/seq_file.h>
+#include <net/dst.h>
+#include <net/xfrm.h>
+
+struct veth_struct
+{
+ struct net_device *pair;
+ struct net_device_stats stats;
+};
+
+#define veth_from_netdev(dev) ((struct veth_struct *)(netdev_priv(dev)))
+
+/* ------------------------------------------------------------------- *
+ *
+ * Device functions
+ *
+ * ------------------------------------------------------------------- */
+
+static struct net_device_stats *get_stats(struct net_device *dev);
+static int veth_xmit(struct sk_buff *skb, struct net_device *dev)
+{
+ struct net_device_stats *stats;
+ struct veth_struct *entry;
+ struct net_device *rcv;
+ struct net_namespace *orig_net_ns;
+ int length;
+
+ stats = get_stats(dev);
+ entry = veth_from_netdev(dev);
+ rcv = entry->pair;
+
+ if (!(rcv->flags & IFF_UP))
+ /* Target namespace does not want to receive packets */
+ goto outf;
+
+ dst_release(skb->dst);
+ skb->dst = NULL;
+ secpath_reset(skb);
+ skb_orphan(skb);
+ nf_reset(skb);
+
+ orig_net_ns = push_net_ns(rcv->net_ns);
+ skb->dev = rcv;
+ skb->pkt_type = PACKET_HOST;
+ skb->protocol = eth_type_trans(skb, rcv);
+
+ length = skb->len;
+ stats->tx_bytes += length;
+ stats->tx_packets++;
+ stats = get_stats(rcv);
+ stats->rx_bytes += length;
+ stats->rx_packets++;
+
+ netif_rx(skb);
+ pop_net_ns(orig_net_ns);
+ return 0;
+
+outf:
+ stats->tx_dropped++;
+ kfree_skb(skb);
+ return 0;
+}
+
+static int veth_open(struct net_device *dev)
+{
+ return 0;
+}
+
+static int veth_close(struct net_device *dev)
+{
+ return 0;
+}
+
+static void veth_destructor(struct net_device *dev)
+{
+ free_netdev(dev);
+}
+
+static struct net_device_stats *get_stats(struct net_device *dev)
+{
+ return &veth_from_netdev(dev)->stats;
+}
+
+int veth_init_dev(struct net_device *dev)
+{
+ dev->hard_start_xmit = veth_xmit;
+ dev->open = veth_open;
+ dev->stop = veth_close;
+ dev->destructor = veth_destructor;
+ dev->get_stats = get_stats;
+
+ ether_setup(dev);
+
+ dev->tx_queue_len = 0;
+ return 0;
+}
+
+static void veth_setup(struct net_device *dev)
+{
+ dev->init = veth_init_dev;
+}
+
+static inline int is_veth_dev(struct net_device *dev)
+{
+ return dev->init == veth_init_dev;
+}
+
+/* ------------------------------------------------------------------- *
+ *
+ * Management interface
+ *
+ * ------------------------------------------------------------------- */
+
+struct net_device *veth_dev_alloc(char *name, char *addr)
+{
+ struct net_device *dev;
+
+ dev = alloc_netdev(sizeof(struct veth_struct), name, veth_setup);
+ if (dev != NULL) {
+ memcpy(dev->dev_addr, addr, ETH_ALEN);
+ dev->addr_len = ETH_ALEN;
+ }
+ return dev;
+}
+
+int veth_entry_add(char *parent_name, char *parent_addr,
+ struct net_namespace *parent_ns, char *child_name, char *child_addr,
+ struct net_namespace *child_ns)
+{
+ struct net_device *parent_dev, *child_dev;
+ int err;
+
+ err = -ENOMEM;
+ if ((parent_dev = veth_dev_alloc(parent_name, parent_addr)) == NULL)
+ goto out_alocp;
+ if ((child_dev = veth_dev_alloc(child_name, child_addr)) == NULL)
+ goto out_alocc;
+ veth_from_netdev(parent_dev)->pair = child_dev;
+ veth_from_netdev(child_dev)->pair = parent_dev;
+
+ /*
+ * About serialization, see comments to veth_pair_del().
+ */
+ rtnl_lock();
+ /* refcounts should be already upped, so, just put old ones */
+ put_net_ns(parent_dev->net_ns);
+ parent_dev->net_ns = parent_ns;
+ if ((err = register_netdevice(parent_dev)))
+ goto out_regp;
+
+ put_net_ns(child_dev->net_ns);
+ child_dev->net_ns = child_ns;
+ if ((err = register_netdevice(child_dev)))
+ goto out_regc;
+ rtnl_unlock();
+ return 0;
+
+out_regc:
+ unregister_netdevice(parent_dev);
+ rtnl_unlock();
+ free_netdev(child_dev);
+ return err;
+
+out_regp:
+ rtnl_unlock();
+ free_netdev(child_dev);
+out_alocc:
+ free_netdev(parent_dev);
+out_alocp:
+ return err;
+}
+
+static void veth_pair_del(struct net_device *parent_dev)
+{
+ struct net_device *child_dev;
+ struct net_namespace *parent_ns, *child_ns;
+
+ child_dev = veth_from_netdev(parent_dev)->pair;
+ get_net_ns(child_dev->net_ns);
+ child_ns = child_dev->net_ns;
+
+ dev_close(child_dev);
+ synchronize_net();
+ /*
+ * Now child_dev does not send or receives anything.
+ * This means child_dev->hard_start_xmit is not called anymore.
+ */
+ unregister_netdevice(parent_dev);
+ /*
+ * At this point child_dev has dead pointer to parent_dev.
+ * But this pointer is not dereferenced.
+ */
+ parent_ns = push_net_ns(child_ns);
+ unregister_netdevice(child_dev);
+ pop_net_ns(parent_ns);
+
+ put_net_ns(child_ns);
+}
+
+int veth_entry_del(char *parent_name)
+{
+ struct net_device *dev;
+
+ if ((dev = dev_get_by_name(parent_name)) == NULL)
+ return -ENODEV;
+
+ rtnl_lock();
+ veth_pair_del(dev);
+ dev_put(dev);
+ rtnl_unlock();
+
+ return 0;
+}
+
+void veth_entry_del_all(void)
+{
+ struct net_device **p, *dev;
+
+ rtnl_lock();
+ for (p = &dev_base; (dev = *p) != NULL; ) {
+ if (!is_veth_dev(dev)) {
+ p = &dev->next;
+ continue;
+ }
+
+ dev_hold(dev);
+ veth_pair_del(dev);
+ dev_put(dev);
+ }
+ rtnl_unlock();
+}
+
+/* ------------------------------------------------------------------- *
+ *
+ * Information in proc
+ *
+ * ------------------------------------------------------------------- */
+
+#ifdef CONFIG_PROC_FS
+
+#define ADDR_FMT "%02x:%02x:%02x:%02x:%02x:%02x"
+#define ADDR(x) (x)[0],(x)[1],(x)[2],(x)[3],(x)[4],(x)[5]
+#define ADDR_HDR "%-17s"
+
+static int veth_proc_show(struct seq_file *m,
+ struct net_device *dev, void *data)
+{
+ struct net_device *pair;
+
+ if (dev == SEQ_START_TOKEN) {
+ seq_puts(m, "Version: 1.0\n");
+ seq_printf(m, "%-*s " ADDR_HDR " %-*s " ADDR_HDR "\n",
+ IFNAMSIZ, "Name", "Address",
+ IFNAMSIZ, "PeerName", "PeerAddress");
+ return 0;
+ }
+
+ if (!is_veth_dev(dev))
+ return 0;
+
+ pair = veth_from_netdev(dev)->pair;
+ seq_printf(m, "%-*s " ADDR_FMT " %-*s " ADDR_FMT "\n",
+ IFNAMSIZ, dev->name, ADDR(dev->dev_addr),
+ IFNAMSIZ, pair->name, ADDR(pair->dev_addr));
+ return 0;
+}
+
+static int veth_proc_create(void)
+{
+ return netdev_proc_create("veth_list", &veth_proc_show, NULL,
+ THIS_MODULE);
+}
+
+static void veth_proc_remove(void)
+{
+ netdev_proc_remove("net/veth_list");
+}
+
+#else
+
+static inline int veth_proc_create(void) { return 0; }
+static inline void veth_proc_remove(void) { }
+
+#endif
+
+/* ------------------------------------------------------------------- *
+ *
+ * Module initialization
+ *
+ * ------------------------------------------------------------------- */
+
+int __init veth_init(void)
+{
+ veth_proc_create();
+ return 0;
+}
+
+void __exit veth_exit(void)
+{
+ veth_proc_remove();
+ veth_entry_del_all();
+}
+
+module_init(veth_init)
+module_exit(veth_exit)
+
+MODULE_DESCRIPTION("Virtual Ethernet Device");
+MODULE_LICENSE("GPL v2");
--- linux-2.6.20-rc4-mm1.net_ns.orig/net/core/net_namespace.c
+++ linux-2.6.20-rc4-mm1.net_ns/net/core/net_namespace.c
@@ -111,5 +111,6 @@ void free_net_ns(struct kref *kref)
ip_fib_struct_cleanup(ns);
kfree(ns);
}
+EXPORT_SYMBOL_GPL(free_net_ns);
#endif /* CONFIG_NET_NS */
next prev parent reply other threads:[~2007-01-17 16:16 UTC|newest]
Thread overview: 24+ messages / expand[flat|nested] mbox.gz Atom feed top
2007-01-17 15:51 [PATCH 0/12] L2 network namespace (v3) Dmitry Mishin
2007-01-17 15:57 ` [PATCH 1/12] L2 network namespace (v3): current network namespace operations Dmitry Mishin
2007-01-17 20:16 ` Eric W. Biederman
2007-01-18 10:56 ` Dmitry Mishin
2007-01-18 13:37 ` Eric W. Biederman
2007-01-25 7:58 ` Eric W. Biederman
2007-01-17 15:58 ` [PATCH 0/12] L2 network namespace (v3) Cedric Le Goater
2007-01-17 15:59 ` [PATCH 2/12] L2 network namespace (v3): network devices virtualization Dmitry Mishin
2007-01-17 16:00 ` [PATCH 3/12] L2 network namespace (v3): loopback device virtualization Dmitry Mishin
2007-01-17 16:01 ` [PATCH 4/12] L2 network namespace (v3): devinet sysctl's checks Dmitry Mishin
2007-01-17 16:03 ` [PATCH 5/12] L2 network namespace (v3): IPv4 routing Dmitry Mishin
2007-01-17 16:05 ` [PATCH 6/12] L2 network namespace (v3): socket hashes Dmitry Mishin
2007-01-17 16:10 ` [PATCH 7/12] allow proc_dir_entries to have destructor Dmitry Mishin
2007-01-17 16:10 ` [PATCH 0/12] L2 network namespace (v3) Daniel Lezcano
2007-01-17 16:11 ` [PATCH 8/12] net_device seq_file Dmitry Mishin
2007-01-17 20:36 ` Stephen Hemminger
2007-01-18 17:07 ` Eric W. Biederman
2007-01-17 16:14 ` Dmitry Mishin [this message]
2007-01-17 16:15 ` [PATCH 10/12] L2 network namespace (v3): playing with pass-through device Dmitry Mishin
2007-01-17 16:16 ` [PATCH 11/12] L2 network namespace (v3): sockets proc view virtualization Dmitry Mishin
2007-01-17 16:18 ` [PATCH 12/12] L2 network namespace (v3): L3 network namespace intro Dmitry Mishin
2007-01-19 0:07 ` [PATCH 0/12] L2 network namespace (v3) YOSHIFUJI Hideaki / 吉藤英明
2007-01-19 7:27 ` Eric W. Biederman
2007-01-19 9:35 ` Dmitry Mishin
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=200701171914.00723.dim@openvz.org \
--to=dim@openvz.org \
--cc=akpm@osdl.org \
--cc=alexey@sw.ru \
--cc=containers@lists.osdl.org \
--cc=netdev@vger.kernel.org \
--cc=saw@sw.ru \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).