Netdev List

Netdev List
 help / color / mirror / Atom feed

* [net-next PATCH 01/02] net/ipv4: VTI support rx-path hook in xfrm4_mode_tunnel.
From: Saurabh @ 2012-06-08 17:32 UTC (permalink / raw)
  To: netdev



Add hook for rx-path xfmr4_mode_tunnel for VTI tunnel module.

Signed-off-by: Saurabh Mohan <saurabh.mohan@vyatta.com>
Reviewed-by: Stephen Hemminger <shemminger@vyatta.com>

---
diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index e0a55df..04214c0 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -1475,6 +1475,8 @@ extern int xfrm4_output(struct sk_buff *skb);
 extern int xfrm4_output_finish(struct sk_buff *skb);
 extern int xfrm4_tunnel_register(struct xfrm_tunnel *handler, unsigned short family);
 extern int xfrm4_tunnel_deregister(struct xfrm_tunnel *handler, unsigned short family);
+extern int xfrm4_mode_tunnel_input_register(struct xfrm_tunnel *handler);
+extern int xfrm4_mode_tunnel_input_deregister(struct xfrm_tunnel *handler);
 extern int xfrm6_extract_header(struct sk_buff *skb);
 extern int xfrm6_extract_input(struct xfrm_state *x, struct sk_buff *skb);
 extern int xfrm6_rcv_spi(struct sk_buff *skb, int nexthdr, __be32 spi);
diff --git a/net/ipv4/xfrm4_mode_tunnel.c b/net/ipv4/xfrm4_mode_tunnel.c
index ed4bf11..4fc2944 100644
--- a/net/ipv4/xfrm4_mode_tunnel.c
+++ b/net/ipv4/xfrm4_mode_tunnel.c
@@ -15,6 +15,68 @@
 #include <net/ip.h>
 #include <net/xfrm.h>
 
+/*
+ * Informational hook. The decap is still done here.
+ */
+static struct xfrm_tunnel __rcu *rcv_notify_handlers __read_mostly;
+static DEFINE_MUTEX(xfrm4_mode_tunnel_input_mutex);
+
+int xfrm4_mode_tunnel_input_register(struct xfrm_tunnel *handler)
+{
+	struct xfrm_tunnel __rcu **pprev;
+	struct xfrm_tunnel *t;
+
+	int ret = -EEXIST;
+	int priority = handler->priority;
+
+	mutex_lock(&xfrm4_mode_tunnel_input_mutex);
+
+	for (pprev = &rcv_notify_handlers;
+		(t = rcu_dereference_protected(*pprev,
+		lockdep_is_held(&xfrm4_mode_tunnel_input_mutex))) != NULL;
+		pprev = &t->next) {
+		if (t->priority > priority)
+			break;
+		if (t->priority == priority)
+			goto err;
+
+	}
+
+	handler->next = *pprev;
+	rcu_assign_pointer(*pprev, handler);
+
+	ret = 0;
+
+err:
+	mutex_unlock(&xfrm4_mode_tunnel_input_mutex);
+	return ret;
+}
+EXPORT_SYMBOL_GPL(xfrm4_mode_tunnel_input_register);
+
+int xfrm4_mode_tunnel_input_deregister(struct xfrm_tunnel *handler)
+{
+	struct xfrm_tunnel __rcu **pprev;
+	struct xfrm_tunnel *t;
+	int ret = -ENOENT;
+
+	mutex_lock(&xfrm4_mode_tunnel_input_mutex);
+	for (pprev = &rcv_notify_handlers;
+		(t = rcu_dereference_protected(*pprev,
+		lockdep_is_held(&xfrm4_mode_tunnel_input_mutex))) != NULL;
+		pprev = &t->next) {
+		if (t == handler) {
+			*pprev = handler->next;
+			ret = 0;
+			break;
+		}
+	}
+	mutex_unlock(&xfrm4_mode_tunnel_input_mutex);
+	synchronize_net();
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(xfrm4_mode_tunnel_input_deregister);
+
 static inline void ipip_ecn_decapsulate(struct sk_buff *skb)
 {
 	struct iphdr *inner_iph = ipip_hdr(skb);
@@ -64,8 +126,14 @@ static int xfrm4_mode_tunnel_output(struct xfrm_state *x, struct sk_buff *skb)
 	return 0;
 }
 
+#define for_each_input_rcu(head, handler)	\
+	for (handler = rcu_dereference(head);	\
+		handler != NULL;		\
+		handler = rcu_dereference(handler->next))  \
+
 static int xfrm4_mode_tunnel_input(struct xfrm_state *x, struct sk_buff *skb)
 {
+	struct xfrm_tunnel *handler;
 	int err = -EINVAL;
 
 	if (XFRM_MODE_SKB_CB(skb)->protocol != IPPROTO_IPIP)
@@ -74,6 +142,10 @@ static int xfrm4_mode_tunnel_input(struct xfrm_state *x, struct sk_buff *skb)
 	if (!pskb_may_pull(skb, sizeof(struct iphdr)))
 		goto out;
 
+	/* The handlers do not consume the skb. */
+	for_each_input_rcu(rcv_notify_handlers, handler)
+		handler->handler(skb);
+
 	if (skb_cloned(skb) &&
 	    (err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC)))
 		goto out;

^ permalink raw reply related

* [net-next PATCH 02/02] net/ipv4: VTI support new module for ip_vti.
From: Saurabh @ 2012-06-08 17:32 UTC (permalink / raw)
  To: netdev



New VTI tunnel kernel module, Kconfig and Makefile changes.

Signed-off-by: Saurabh Mohan <saurabh.mohan@vyatta.com>
Reviewed-by: Stephen Hemminger <shemminger@vyatta.com>

---
diff --git a/include/linux/if_tunnel.h b/include/linux/if_tunnel.h
index 16b92d0..4b4ce17 100644
--- a/include/linux/if_tunnel.h
+++ b/include/linux/if_tunnel.h
@@ -80,4 +80,15 @@ enum {
 
 #define IFLA_GRE_MAX	(__IFLA_GRE_MAX - 1)
 
+enum {
+	IFLA_VTI_UNSPEC,
+	IFLA_VTI_LINK,
+	IFLA_VTI_IKEY,
+	IFLA_VTI_OKEY,
+	IFLA_VTI_LOCAL,
+	IFLA_VTI_REMOTE,
+	__IFLA_VTI_MAX,
+};
+
+#define IFLA_VTI_MAX	(__IFLA_VTI_MAX - 1)
 #endif /* _IF_TUNNEL_H_ */
diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig
index 20f1cb5..3a95308 100644
--- a/net/ipv4/Kconfig
+++ b/net/ipv4/Kconfig
@@ -310,6 +310,20 @@ config SYN_COOKIES
 
 	  If unsure, say N.
 
+config NET_IPVTI
+    tristate "Virtual (secure) IP: tunneling"
+    select INET_TUNNEL
+    ---help---
+      Tunneling means encapsulating data of one protocol type within
+      another protocol and sending it over a channel that understands the
+      Pencapsulating protocol. This particular tunneling driver implements
+      encapsulation of IP within IP-ESP. This can be used with xfrm to give
+      the notion of a secure tunnel and then use routing protocol on top.
+
+      Saying Y to this option will produce one module ( = code which can
+      be inserted in and removed from the running kernel whenever you
+      want). Most people won't need this and can say N.
+
 config INET_AH
 	tristate "IP: AH transformation"
 	select XFRM_ALGO
diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile
index ff75d3b..3999ce9 100644
--- a/net/ipv4/Makefile
+++ b/net/ipv4/Makefile
@@ -20,6 +20,7 @@ obj-$(CONFIG_IP_MROUTE) += ipmr.o
 obj-$(CONFIG_NET_IPIP) += ipip.o
 obj-$(CONFIG_NET_IPGRE_DEMUX) += gre.o
 obj-$(CONFIG_NET_IPGRE) += ip_gre.o
+obj-$(CONFIG_NET_IPVTI) += ip_vti.o
 obj-$(CONFIG_SYN_COOKIES) += syncookies.o
 obj-$(CONFIG_INET_AH) += ah4.o
 obj-$(CONFIG_INET_ESP) += esp4.o
diff --git a/net/ipv4/ip_vti.c b/net/ipv4/ip_vti.c
new file mode 100644
index 0000000..3eaa47c
--- /dev/null
+++ b/net/ipv4/ip_vti.c
@@ -0,0 +1,980 @@
+/*
+ *	Linux NET3:	IP/IP protocol decoder modified to support virtual tunnel interface
+ *
+ *	Authors:
+ *		Sam Lantinga (slouken@cs.ucdavis.edu)  02/01/95
+ *		Saurabh Mohan (saurabh.mohan@vyatta.com) 05/07/2012
+ *
+ *	This program is free software; you can redistribute it and/or
+ *	modify it under the terms of the GNU General Public License
+ *	as published by the Free Software Foundation; either version
+ *	2 of the License, or (at your option) any later version.
+ *
+ */
+
+/*
+   This version of net/ipv4/ip_vti.c is cloned of net/ipv4/ipip.c
+
+   For comments look at net/ipv4/ip_gre.c --ANK
+ */
+
+
+#include <linux/capability.h>
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/uaccess.h>
+#include <linux/skbuff.h>
+#include <linux/netdevice.h>
+#include <linux/in.h>
+#include <linux/tcp.h>
+#include <linux/udp.h>
+#include <linux/if_arp.h>
+#include <linux/mroute.h>
+#include <linux/init.h>
+#include <linux/netfilter_ipv4.h>
+#include <linux/if_ether.h>
+
+#include <net/sock.h>
+#include <net/ip.h>
+#include <net/icmp.h>
+#include <net/ipip.h>
+#include <net/inet_ecn.h>
+#include <net/xfrm.h>
+#include <net/net_namespace.h>
+#include <net/netns/generic.h>
+
+#define HASH_SIZE  16
+#define HASH(addr) (((__force u32)addr^((__force u32)addr>>4))&0xF)
+
+static struct rtnl_link_ops vti_link_ops __read_mostly;
+
+static int vti_net_id __read_mostly;
+struct vti_net {
+	struct ip_tunnel __rcu *tunnels_r_l[HASH_SIZE];
+	struct ip_tunnel __rcu *tunnels_r[HASH_SIZE];
+	struct ip_tunnel __rcu *tunnels_l[HASH_SIZE];
+	struct ip_tunnel __rcu *tunnels_wc[1];
+	struct ip_tunnel **tunnels[4];
+
+	struct net_device *fb_tunnel_dev;
+};
+
+static int vti_fb_tunnel_init(struct net_device *dev);
+static int vti_tunnel_init(struct net_device *dev);
+static void vti_tunnel_setup(struct net_device *dev);
+static void vti_dev_free(struct net_device *dev);
+static int vti_tunnel_bind_dev(struct net_device *dev);
+
+/*
+ * Locking : hash tables are protected by RCU and RTNL
+ */
+
+#define for_each_ip_tunnel_rcu(start) \
+	for (t = rcu_dereference(start); t; t = rcu_dereference(t->next))
+
+/* often modified stats are per cpu, other are shared (netdev->stats) */
+struct pcpu_tstats {
+	u64	rx_packets;
+	u64	rx_bytes;
+	u64	tx_packets;
+	u64	tx_bytes;
+	struct	u64_stats_sync	syncp;
+};
+
+#define VTI_XMIT(stats1, stats2) do {				\
+	int err;						\
+	int pkt_len = skb->len;					\
+	err = dst_output(skb);					\
+	if (net_xmit_eval(err) == 0) {				\
+		(stats1)->tx_bytes += pkt_len;			\
+		(stats1)->tx_packets++;				\
+	} else {						\
+		(stats2)->tx_errors++;				\
+		(stats2)->tx_aborted_errors++;			\
+	}							\
+} while (0)
+
+
+static struct rtnl_link_stats64 *vti_get_stats64(struct net_device *dev,
+					       struct rtnl_link_stats64 *tot)
+{
+	int i;
+
+	for_each_possible_cpu(i) {
+		const struct pcpu_tstats *tstats = per_cpu_ptr(dev->tstats, i);
+		u64 rx_packets, rx_bytes, tx_packets, tx_bytes;
+		unsigned int start;
+
+		do {
+			start = u64_stats_fetch_begin_bh(&tstats->syncp);
+			rx_packets = tstats->rx_packets;
+			tx_packets = tstats->tx_packets;
+			rx_bytes = tstats->rx_bytes;
+			tx_bytes = tstats->tx_bytes;
+		} while (u64_stats_fetch_retry_bh(&tstats->syncp, start));
+
+		tot->rx_packets += rx_packets;
+		tot->tx_packets += tx_packets;
+		tot->rx_bytes   += rx_bytes;
+		tot->tx_bytes   += tx_bytes;
+	}
+
+	tot->multicast = dev->stats.multicast;
+	tot->rx_crc_errors = dev->stats.rx_crc_errors;
+	tot->rx_fifo_errors = dev->stats.rx_fifo_errors;
+	tot->rx_length_errors = dev->stats.rx_length_errors;
+	tot->rx_errors = dev->stats.rx_errors;
+	tot->tx_fifo_errors = dev->stats.tx_fifo_errors;
+	tot->tx_carrier_errors = dev->stats.tx_carrier_errors;
+	tot->tx_dropped = dev->stats.tx_dropped;
+	tot->tx_aborted_errors = dev->stats.tx_aborted_errors;
+	tot->tx_errors = dev->stats.tx_errors;
+
+	return tot;
+}
+
+static struct ip_tunnel *vti_tunnel_lookup(struct net *net,
+					 __be32 remote, __be32 local)
+{
+	unsigned h0 = HASH(remote);
+	unsigned h1 = HASH(local);
+	struct ip_tunnel *t;
+	struct vti_net *ipn = net_generic(net, vti_net_id);
+
+	for_each_ip_tunnel_rcu(ipn->tunnels_r_l[h0 ^ h1])
+		if (local == t->parms.iph.saddr &&
+		    remote == t->parms.iph.daddr && (t->dev->flags&IFF_UP))
+			return t;
+	for_each_ip_tunnel_rcu(ipn->tunnels_r[h0])
+		if (remote == t->parms.iph.daddr && (t->dev->flags&IFF_UP))
+			return t;
+
+	for_each_ip_tunnel_rcu(ipn->tunnels_l[h1])
+		if (local == t->parms.iph.saddr && (t->dev->flags&IFF_UP))
+			return t;
+
+	for_each_ip_tunnel_rcu(ipn->tunnels_wc[0])
+		if (t && (t->dev->flags&IFF_UP))
+			return t;
+	return NULL;
+}
+
+static struct ip_tunnel **__vti_bucket(struct vti_net *ipn,
+				     struct ip_tunnel_parm *parms)
+{
+	__be32 remote = parms->iph.daddr;
+	__be32 local = parms->iph.saddr;
+	unsigned h = 0;
+	int prio = 0;
+
+	if (remote) {
+		prio |= 2;
+		h ^= HASH(remote);
+	}
+	if (local) {
+		prio |= 1;
+		h ^= HASH(local);
+	}
+	return &ipn->tunnels[prio][h];
+}
+
+static inline struct ip_tunnel **vti_bucket(struct vti_net *ipn,
+					  struct ip_tunnel *t)
+{
+	return __vti_bucket(ipn, &t->parms);
+}
+
+static void vti_tunnel_unlink(struct vti_net *ipn, struct ip_tunnel *t)
+{
+	struct ip_tunnel __rcu **tp;
+	struct ip_tunnel *iter;
+
+	for (tp = vti_bucket(ipn, t);
+	     (iter = rtnl_dereference(*tp)) != NULL;
+	     tp = &iter->next) {
+		if (t == iter) {
+			rcu_assign_pointer(*tp, t->next);
+			break;
+		}
+	}
+}
+
+static void vti_tunnel_link(struct vti_net *ipn, struct ip_tunnel *t)
+{
+	struct ip_tunnel __rcu **tp = vti_bucket(ipn, t);
+
+	rcu_assign_pointer(t->next, rtnl_dereference(*tp));
+	rcu_assign_pointer(*tp, t);
+}
+
+static struct ip_tunnel *vti_tunnel_locate(struct net *net,
+					 struct ip_tunnel_parm *parms,
+					 int create)
+{
+	__be32 remote = parms->iph.daddr;
+	__be32 local = parms->iph.saddr;
+	struct ip_tunnel *t, *nt;
+	struct ip_tunnel __rcu **tp;
+	struct net_device *dev;
+	char name[IFNAMSIZ];
+	struct vti_net *ipn = net_generic(net, vti_net_id);
+
+	for (tp = __vti_bucket(ipn, parms);
+	     (t = rtnl_dereference(*tp)) != NULL;
+	     tp = &t->next) {
+		if (local == t->parms.iph.saddr && remote == t->parms.iph.daddr)
+			return t;
+	}
+	if (!create)
+		return NULL;
+
+	if (parms->name[0])
+		strlcpy(name, parms->name, IFNAMSIZ);
+	else
+		strcpy(name, "vti%d");
+
+	dev = alloc_netdev(sizeof(*t), name, vti_tunnel_setup);
+	if (dev == NULL)
+		return NULL;
+
+	dev_net_set(dev, net);
+
+	nt = netdev_priv(dev);
+	nt->parms = *parms;
+	dev->rtnl_link_ops = &vti_link_ops;
+
+	vti_tunnel_bind_dev(dev);
+
+	if (register_netdevice(dev) < 0)
+		goto failed_free;
+
+	dev_hold(dev);
+	vti_tunnel_link(ipn, nt);
+	return nt;
+
+ failed_free:
+	free_netdev(dev);
+	return NULL;
+}
+
+static void vti_tunnel_uninit(struct net_device *dev)
+{
+	struct net *net = dev_net(dev);
+	struct vti_net *ipn = net_generic(net, vti_net_id);
+
+	if (dev == ipn->fb_tunnel_dev)
+		RCU_INIT_POINTER(ipn->tunnels_wc[0], NULL);
+	else
+		vti_tunnel_unlink(ipn, netdev_priv(dev));
+	dev_put(dev);
+}
+
+static int vti_err(struct sk_buff *skb, u32 info)
+{
+
+	/* All the routers (except for Linux) return only
+	 * 8 bytes of packet payload. It means, that precise relaying of
+	 * ICMP in the real Internet is absolutely infeasible.
+	 */
+	struct iphdr *iph = (struct iphdr *)skb->data;
+	const int type = icmp_hdr(skb)->type;
+	const int code = icmp_hdr(skb)->code;
+	struct ip_tunnel *t;
+	int err;
+
+	switch (type) {
+	default:
+	case ICMP_PARAMETERPROB:
+		return 0;
+
+	case ICMP_DEST_UNREACH:
+		switch (code) {
+		case ICMP_SR_FAILED:
+		case ICMP_PORT_UNREACH:
+			/* Impossible event. */
+			return 0;
+		case ICMP_FRAG_NEEDED:
+			/* Soft state for pmtu is maintained by IP core. */
+			return 0;
+		default:
+			/* All others are translated to HOST_UNREACH. */
+			break;
+		}
+		break;
+	case ICMP_TIME_EXCEEDED:
+		if (code != ICMP_EXC_TTL)
+			return 0;
+		break;
+	}
+
+	err = -ENOENT;
+
+	rcu_read_lock();
+	t = vti_tunnel_lookup(dev_net(skb->dev), iph->daddr, iph->saddr);
+	if (t == NULL || t->parms.iph.daddr == 0)
+		goto out;
+
+	err = 0;
+	if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED)
+		goto out;
+
+	if (time_before(jiffies, t->err_time + IPTUNNEL_ERR_TIMEO))
+		t->err_count++;
+	else
+		t->err_count = 1;
+	t->err_time = jiffies;
+out:
+	rcu_read_unlock();
+	return err;
+}
+
+static inline void vti_ecn_decapsulate(const struct iphdr *outer_iph,
+					struct sk_buff *skb)
+{
+	struct iphdr *inner_iph = ip_hdr(skb);
+
+	if (INET_ECN_is_ce(outer_iph->tos))
+		IP_ECN_set_ce(inner_iph);
+}
+
+/*
+ * We dont digest the packet therefore let the packet pass.
+ */
+static int vti_rcv(struct sk_buff *skb)
+{
+	struct ip_tunnel *tunnel;
+	const struct iphdr *iph = ip_hdr(skb);
+
+	rcu_read_lock();
+	tunnel = vti_tunnel_lookup(dev_net(skb->dev), iph->saddr, iph->daddr);
+	if (tunnel != NULL) {
+		struct pcpu_tstats *tstats;
+
+		tstats = this_cpu_ptr(tunnel->dev->tstats);
+		tstats->rx_packets++;
+		tstats->rx_bytes += skb->len;
+
+		skb->dev = tunnel->dev;
+		skb_dst_drop(skb);
+		nf_reset(skb);
+		rcu_read_unlock();
+		/* We do not eat the packet here therefore return 1 */
+		return 1;
+	}
+	rcu_read_unlock();
+
+	return -1;
+}
+
+/*
+ *	This function assumes it is being called from dev_queue_xmit()
+ *	and that skb is filled properly by that function.
+ */
+
+static netdev_tx_t vti_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
+{
+	struct ip_tunnel *tunnel = netdev_priv(dev);
+	struct pcpu_tstats *tstats;
+	struct net_device_stats *stats = &tunnel->dev->stats;
+	struct iphdr  *tiph = &tunnel->parms.iph;
+	u8     tos = tunnel->parms.iph.tos;
+	struct rtable *rt;		/* Route to the other host */
+	struct net_device *tdev;	/* Device to other host */
+	struct iphdr  *old_iph = ip_hdr(skb);
+	__be32 dst = tiph->daddr;
+	struct flowi4 fl4;
+
+	if (skb->protocol != htons(ETH_P_IP))
+		goto tx_error;
+
+	if (tos&1)
+		tos = old_iph->tos;
+
+	if (!dst) {
+		/* NBMA tunnel */
+		rt = skb_rtable(skb);
+		if (rt == NULL) {
+			stats->tx_fifo_errors++;
+			goto tx_error;
+		}
+		dst = rt->rt_gateway;
+		if (dst == 0)
+			goto tx_error_icmp;
+	}
+
+	memset(&fl4, 0, sizeof(fl4));
+	flowi4_init_output(&fl4, tunnel->parms.link,
+		htonl(tunnel->parms.i_key), RT_TOS(tos), RT_SCOPE_UNIVERSE,
+		IPPROTO_IPIP, 0,
+		dst, tiph->saddr, 0, 0);
+	rt = ip_route_output_key(dev_net(dev), &fl4);
+	if (IS_ERR(rt)) {
+		dev->stats.tx_carrier_errors++;
+		goto tx_error_icmp;
+	}
+#ifdef CONFIG_XFRM
+		/* if there is no transform then this tunnel is not functional. */
+		if (!rt->dst.xfrm) {
+			stats->tx_carrier_errors++;
+			goto tx_error_icmp;
+		}
+#endif
+	tdev = rt->dst.dev;
+
+	if (tdev == dev) {
+		ip_rt_put(rt);
+		stats->collisions++;
+		goto tx_error;
+
+	}
+
+
+	if (tunnel->err_count > 0) {
+		if (time_before(jiffies,
+				tunnel->err_time + IPTUNNEL_ERR_TIMEO)) {
+			tunnel->err_count--;
+			dst_link_failure(skb);
+		} else
+			tunnel->err_count = 0;
+	}
+
+
+	IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED |
+			      IPSKB_REROUTED);
+	skb_dst_drop(skb);
+	skb_dst_set(skb, &rt->dst);
+	nf_reset(skb);
+	skb->dev = skb_dst(skb)->dev;
+
+	tstats = this_cpu_ptr(dev->tstats);
+	VTI_XMIT(tstats, &dev->stats);
+	return NETDEV_TX_OK;
+
+tx_error_icmp:
+	dst_link_failure(skb);
+tx_error:
+	stats->tx_errors++;
+	dev_kfree_skb(skb);
+	return NETDEV_TX_OK;
+}
+
+static int vti_tunnel_bind_dev(struct net_device *dev)
+{
+	struct net_device *tdev = NULL;
+	struct ip_tunnel *tunnel;
+	struct iphdr *iph;
+
+	tunnel = netdev_priv(dev);
+	iph = &tunnel->parms.iph;
+
+	if (iph->daddr) {
+		struct rtable *rt;
+		struct flowi4 fl4;
+		memset(&fl4, 0, sizeof(fl4));
+		flowi4_init_output(&fl4, tunnel->parms.link,
+				htonl(tunnel->parms.i_key), RT_TOS(iph->tos), RT_SCOPE_UNIVERSE,
+				IPPROTO_IPIP, 0,
+				iph->daddr, iph->saddr, 0, 0);
+		rt = ip_route_output_key(dev_net(dev), &fl4);
+		if (!IS_ERR(rt)) {
+			tdev = rt->dst.dev;
+			ip_rt_put(rt);
+		}
+		dev->flags |= IFF_POINTOPOINT;
+	}
+
+	if (!tdev && tunnel->parms.link)
+		tdev = __dev_get_by_index(dev_net(dev), tunnel->parms.link);
+
+	if (tdev) {
+		dev->hard_header_len = tdev->hard_header_len + sizeof(struct iphdr);
+		dev->mtu = tdev->mtu;
+	}
+	dev->iflink = tunnel->parms.link;
+	return dev->mtu;
+}
+
+static int
+vti_tunnel_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
+{
+	int err = 0;
+	struct ip_tunnel_parm p;
+	struct ip_tunnel *t;
+	struct net *net = dev_net(dev);
+	struct vti_net *ipn = net_generic(net, vti_net_id);
+
+	switch (cmd) {
+	case SIOCGETTUNNEL:
+		t = NULL;
+		if (dev == ipn->fb_tunnel_dev) {
+			if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) {
+				err = -EFAULT;
+				break;
+			}
+			t = vti_tunnel_locate(net, &p, 0);
+		}
+		if (t == NULL)
+			t = netdev_priv(dev);
+		memcpy(&p, &t->parms, sizeof(p));
+		p.i_flags |= GRE_KEY;
+		p.o_flags |= GRE_KEY;
+		if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
+			err = -EFAULT;
+		break;
+
+	case SIOCADDTUNNEL:
+	case SIOCCHGTUNNEL:
+		err = -EPERM;
+		if (!capable(CAP_NET_ADMIN))
+			goto done;
+
+		err = -EFAULT;
+		if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
+			goto done;
+
+		err = -EINVAL;
+		if (p.iph.version != 4 || p.iph.protocol != IPPROTO_ESP ||
+		    p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF)))
+			goto done;
+		if (p.iph.ttl)
+			p.iph.frag_off |= htons(IP_DF);
+
+		t = vti_tunnel_locate(net, &p, cmd == SIOCADDTUNNEL);
+
+		if (dev != ipn->fb_tunnel_dev && cmd == SIOCCHGTUNNEL) {
+			if (t != NULL) {
+				if (t->dev != dev) {
+					err = -EEXIST;
+					break;
+				}
+			} else {
+				if (((dev->flags&IFF_POINTOPOINT) && !p.iph.daddr) ||
+				    (!(dev->flags&IFF_POINTOPOINT) && p.iph.daddr)) {
+					err = -EINVAL;
+					break;
+				}
+				t = netdev_priv(dev);
+				vti_tunnel_unlink(ipn, t);
+				synchronize_net();
+				t->parms.iph.saddr = p.iph.saddr;
+				t->parms.iph.daddr = p.iph.daddr;
+				t->parms.i_key = p.i_key;
+				t->parms.o_key = p.o_key;
+				t->parms.iph.protocol = IPPROTO_ESP;
+				memcpy(dev->dev_addr, &p.iph.saddr, 4);
+				memcpy(dev->broadcast, &p.iph.daddr, 4);
+				vti_tunnel_link(ipn, t);
+				netdev_state_change(dev);
+			}
+		}
+
+		if (t) {
+			err = 0;
+			if (cmd == SIOCCHGTUNNEL) {
+				t->parms.iph.ttl = p.iph.ttl;
+				t->parms.iph.tos = p.iph.tos;
+				t->parms.iph.frag_off = p.iph.frag_off;
+				t->parms.i_key = p.i_key;
+				t->parms.o_key = p.o_key;
+				if (t->parms.link != p.link) {
+					t->parms.link = p.link;
+					vti_tunnel_bind_dev(dev);
+					netdev_state_change(dev);
+				}
+			}
+			if (copy_to_user(ifr->ifr_ifru.ifru_data, &t->parms, sizeof(p)))
+				err = -EFAULT;
+		} else
+			err = (cmd == SIOCADDTUNNEL ? -ENOBUFS : -ENOENT);
+		break;
+
+	case SIOCDELTUNNEL:
+		err = -EPERM;
+		if (!capable(CAP_NET_ADMIN))
+			goto done;
+
+		if (dev == ipn->fb_tunnel_dev) {
+			err = -EFAULT;
+			if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
+				goto done;
+			err = -ENOENT;
+
+			t = vti_tunnel_locate(net, &p, 0);
+			if (t == NULL)
+				goto done;
+			err = -EPERM;
+			if (t->dev == ipn->fb_tunnel_dev)
+				goto done;
+			dev = t->dev;
+		}
+		unregister_netdevice(dev);
+		err = 0;
+		break;
+
+	default:
+		err = -EINVAL;
+	}
+
+done:
+	return err;
+}
+
+static int vti_tunnel_change_mtu(struct net_device *dev, int new_mtu)
+{
+	if (new_mtu < 68 || new_mtu > 0xFFF8)
+		return -EINVAL;
+	dev->mtu = new_mtu;
+	return 0;
+}
+
+static const struct net_device_ops vti_netdev_ops = {
+	.ndo_init	= vti_tunnel_init,
+	.ndo_uninit	= vti_tunnel_uninit,
+	.ndo_start_xmit	= vti_tunnel_xmit,
+	.ndo_do_ioctl	= vti_tunnel_ioctl,
+	.ndo_change_mtu	= vti_tunnel_change_mtu,
+	.ndo_get_stats64  = vti_get_stats64,
+};
+
+static void vti_dev_free(struct net_device *dev)
+{
+	free_percpu(dev->tstats);
+	free_netdev(dev);
+}
+
+static void vti_tunnel_setup(struct net_device *dev)
+{
+	dev->netdev_ops		= &vti_netdev_ops;
+	dev->destructor		= vti_dev_free;
+
+	dev->type		= ARPHRD_TUNNEL;
+	dev->hard_header_len	= LL_MAX_HEADER + sizeof(struct iphdr);
+	dev->mtu		= ETH_DATA_LEN;
+	dev->flags		= IFF_NOARP;
+	dev->iflink		= 0;
+	dev->addr_len		= 4;
+	dev->features		|= NETIF_F_NETNS_LOCAL;
+	dev->features		|= NETIF_F_LLTX;
+	dev->priv_flags		&= ~IFF_XMIT_DST_RELEASE;
+}
+
+static int vti_tunnel_init(struct net_device *dev)
+{
+	struct ip_tunnel *tunnel = netdev_priv(dev);
+
+	tunnel->dev = dev;
+	strcpy(tunnel->parms.name, dev->name);
+
+	memcpy(dev->dev_addr, &tunnel->parms.iph.saddr, 4);
+	memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4);
+
+	dev->tstats = alloc_percpu(struct pcpu_tstats);
+	if (!dev->tstats)
+		return -ENOMEM;
+
+	return 0;
+}
+
+static int __net_init vti_fb_tunnel_init(struct net_device *dev)
+{
+	struct ip_tunnel *tunnel = netdev_priv(dev);
+	struct iphdr *iph = &tunnel->parms.iph;
+	struct vti_net *ipn = net_generic(dev_net(dev), vti_net_id);
+
+	tunnel->dev = dev;
+	strcpy(tunnel->parms.name, dev->name);
+
+	iph->version		= 4;
+	iph->protocol		= IPPROTO_ESP;
+	iph->ihl		= 5;
+
+	dev->tstats = alloc_percpu(struct pcpu_tstats);
+	if (!dev->tstats)
+		return -ENOMEM;
+
+	dev_hold(dev);
+	rcu_assign_pointer(ipn->tunnels_wc[0], tunnel);
+	return 0;
+}
+
+static struct xfrm_tunnel vti_handler __read_mostly = {
+	.handler	=	vti_rcv,
+	.err_handler	=	vti_err,
+	.priority	=	1,
+};
+
+static void vti_destroy_tunnels(struct vti_net *ipn, struct list_head *head)
+{
+	int prio;
+
+	for (prio = 1; prio < 4; prio++) {
+		int h;
+		for (h = 0; h < HASH_SIZE; h++) {
+			struct ip_tunnel *t;
+
+			t = rtnl_dereference(ipn->tunnels[prio][h]);
+			while (t != NULL) {
+				unregister_netdevice_queue(t->dev, head);
+				t = rtnl_dereference(t->next);
+			}
+		}
+	}
+}
+
+static int __net_init vti_init_net(struct net *net)
+{
+	int err;
+	struct vti_net *ipn = net_generic(net, vti_net_id);
+
+	ipn->tunnels[0] = ipn->tunnels_wc;
+	ipn->tunnels[1] = ipn->tunnels_l;
+	ipn->tunnels[2] = ipn->tunnels_r;
+	ipn->tunnels[3] = ipn->tunnels_r_l;
+
+	ipn->fb_tunnel_dev = alloc_netdev(sizeof(struct ip_tunnel),
+					   "ip_vti0",
+					   vti_tunnel_setup);
+	if (!ipn->fb_tunnel_dev) {
+		err = -ENOMEM;
+		goto err_alloc_dev;
+	}
+	dev_net_set(ipn->fb_tunnel_dev, net);
+
+	err = vti_fb_tunnel_init(ipn->fb_tunnel_dev);
+	if (err)
+		goto err_reg_dev;
+	ipn->fb_tunnel_dev->rtnl_link_ops = &vti_link_ops;
+
+	err = register_netdev(ipn->fb_tunnel_dev);
+	if (err)
+		goto err_reg_dev;
+	return 0;
+
+err_reg_dev:
+	vti_dev_free(ipn->fb_tunnel_dev);
+err_alloc_dev:
+	/* nothing */
+	return err;
+}
+
+static void __net_exit vti_exit_net(struct net *net)
+{
+	struct vti_net *ipn = net_generic(net, vti_net_id);
+	LIST_HEAD(list);
+
+	rtnl_lock();
+	vti_destroy_tunnels(ipn, &list);
+	unregister_netdevice_many(&list);
+	rtnl_unlock();
+}
+
+static struct pernet_operations vti_net_ops = {
+	.init = vti_init_net,
+	.exit = vti_exit_net,
+	.id   = &vti_net_id,
+	.size = sizeof(struct vti_net),
+};
+
+static int vti_tunnel_validate(struct nlattr *tb[], struct nlattr *data[])
+{
+	return 0;
+}
+
+static void vti_netlink_parms(struct nlattr *data[],
+				struct ip_tunnel_parm *parms)
+{
+	memset(parms, 0, sizeof(*parms));
+
+	parms->iph.protocol = IPPROTO_ESP;
+
+	if (!data)
+		return;
+
+	if (data[IFLA_VTI_LINK])
+		parms->link = nla_get_u32(data[IFLA_VTI_LINK]);
+
+	if (data[IFLA_VTI_IKEY])
+		parms->i_key = nla_get_be32(data[IFLA_VTI_IKEY]);
+
+	if (data[IFLA_VTI_OKEY])
+		parms->o_key = nla_get_be32(data[IFLA_VTI_OKEY]);
+
+	if (data[IFLA_VTI_LOCAL])
+		parms->iph.saddr = nla_get_be32(data[IFLA_VTI_LOCAL]);
+
+	if (data[IFLA_VTI_REMOTE])
+		parms->iph.daddr = nla_get_be32(data[IFLA_VTI_REMOTE]);
+
+}
+
+static int vti_newlink(struct net *src_net, struct net_device *dev, struct nlattr *tb[],
+			 struct nlattr *data[])
+{
+	struct ip_tunnel *nt;
+	struct net *net = dev_net(dev);
+	struct vti_net *ipn = net_generic(net, vti_net_id);
+	int mtu;
+	int err;
+
+	nt = netdev_priv(dev);
+	vti_netlink_parms(data, &nt->parms);
+
+	if (vti_tunnel_locate(net, &nt->parms, 0))
+		return -EEXIST;
+
+	mtu = vti_tunnel_bind_dev(dev);
+	if (!tb[IFLA_MTU])
+		dev->mtu = mtu;
+
+	err = register_netdevice(dev);
+	if (err)
+		goto out;
+
+	dev_hold(dev);
+	vti_tunnel_link(ipn, nt);
+
+out:
+	return err;
+	return 0;
+}
+
+static int vti_changelink(struct net_device *dev, struct nlattr *tb[],
+			    struct nlattr *data[])
+{
+	struct ip_tunnel *t, *nt;
+	struct net *net = dev_net(dev);
+	struct vti_net *ipn = net_generic(net, vti_net_id);
+	struct ip_tunnel_parm p;
+	int mtu;
+
+	if (dev == ipn->fb_tunnel_dev)
+		return -EINVAL;
+
+	nt = netdev_priv(dev);
+	vti_netlink_parms(data, &p);
+
+	t = vti_tunnel_locate(net, &p, 0);
+
+	if (t) {
+		if (t->dev != dev)
+			return -EEXIST;
+	} else {
+		t = nt;
+
+		vti_tunnel_unlink(ipn, t);
+		t->parms.iph.saddr = p.iph.saddr;
+		t->parms.iph.daddr = p.iph.daddr;
+		t->parms.i_key = p.i_key;
+		t->parms.o_key = p.o_key;
+		if (dev->type != ARPHRD_ETHER) {
+			memcpy(dev->dev_addr, &p.iph.saddr, 4);
+			memcpy(dev->broadcast, &p.iph.daddr, 4);
+		}
+		vti_tunnel_link(ipn, t);
+		netdev_state_change(dev);
+	}
+
+	if (t->parms.link != p.link) {
+		t->parms.link = p.link;
+		mtu = vti_tunnel_bind_dev(dev);
+		if (!tb[IFLA_MTU])
+			dev->mtu = mtu;
+		netdev_state_change(dev);
+	}
+
+	return 0;
+}
+
+static size_t vti_get_size(const struct net_device *dev)
+{
+	return
+		/* IFLA_VTI_LINK */
+		nla_total_size(4) +
+		/* IFLA_VTI_IKEY */
+		nla_total_size(4) +
+		/* IFLA_VTI_OKEY */
+		nla_total_size(4) +
+		/* IFLA_VTI_LOCAL */
+		nla_total_size(4) +
+		/* IFLA_VTI_REMOTE */
+		nla_total_size(4) +
+		0;
+}
+
+static int vti_fill_info(struct sk_buff *skb, const struct net_device *dev)
+{
+	struct ip_tunnel *t = netdev_priv(dev);
+	struct ip_tunnel_parm *p = &t->parms;
+
+	nla_put_u32(skb, IFLA_VTI_LINK, p->link);
+	nla_put_be32(skb, IFLA_VTI_IKEY, p->i_key);
+	nla_put_be32(skb, IFLA_VTI_OKEY, p->o_key);
+	nla_put_be32(skb, IFLA_VTI_LOCAL, p->iph.saddr);
+	nla_put_be32(skb, IFLA_VTI_REMOTE, p->iph.daddr);
+
+	return 0;
+}
+
+static const struct nla_policy vti_policy[IFLA_VTI_MAX + 1] = {
+	[IFLA_VTI_LINK]		= { .type = NLA_U32 },
+	[IFLA_VTI_IKEY]		= { .type = NLA_U32 },
+	[IFLA_VTI_OKEY]		= { .type = NLA_U32 },
+	[IFLA_VTI_LOCAL]	= { .len = FIELD_SIZEOF(struct iphdr, saddr) },
+	[IFLA_VTI_REMOTE]	= { .len = FIELD_SIZEOF(struct iphdr, daddr) },
+};
+
+static struct rtnl_link_ops vti_link_ops __read_mostly = {
+	.kind		= "vti",
+	.maxtype	= IFLA_VTI_MAX,
+	.policy		= vti_policy,
+	.priv_size	= sizeof(struct ip_tunnel),
+	.setup		= vti_tunnel_setup,
+	.validate	= vti_tunnel_validate,
+	.newlink	= vti_newlink,
+	.changelink	= vti_changelink,
+	.get_size	= vti_get_size,
+	.fill_info	= vti_fill_info,
+};
+
+static int __init vti_init(void)
+{
+	int err;
+
+	pr_info("IPv4 over ESP tunneling driver v4\n");
+
+	err = register_pernet_device(&vti_net_ops);
+	if (err < 0)
+		return err;
+	err = xfrm4_mode_tunnel_input_register(&vti_handler);
+	if (err < 0) {
+		unregister_pernet_device(&vti_net_ops);
+		pr_info(KERN_INFO "vti init: can't register tunnel\n");
+	}
+
+	err = rtnl_link_register(&vti_link_ops);
+	if (err < 0)
+		goto rtnl_link_failed;
+
+	return err;
+
+rtnl_link_failed:
+	xfrm4_mode_tunnel_input_deregister(&vti_handler);
+	unregister_pernet_device(&vti_net_ops);
+	return err;
+}
+
+static void __exit vti_fini(void)
+{
+	rtnl_link_unregister(&vti_link_ops);
+	if (xfrm4_mode_tunnel_input_deregister(&vti_handler))
+		pr_info("vti close: can't deregister tunnel\n");
+
+	unregister_pernet_device(&vti_net_ops);
+}
+
+module_init(vti_init);
+module_exit(vti_fini);
+MODULE_LICENSE("GPL");
+MODULE_ALIAS_RTNL_LINK("vti");
+MODULE_ALIAS_NETDEV("ip_vti0");

^ permalink raw reply related

* [PATCH 00/02] iproute2: Add support for new tunnel type VTI.
From: Saurabh @ 2012-06-08 17:33 UTC (permalink / raw)
  To: netdev

Introduction:
Virtual tunnel interface is a way to represent policy based IPsec tunnels as virtual interfaces in linux. This is similar to Cisco's VTI (virtual tunnel interface) and Juniper's representaion of secure tunnel (st.xx). The advantage of representing an IPsec tunnel as an interface is that it is possible to plug Ipsec tunnels into the routing protocol infrastructure of a router. Therefore it becomes possible to influence the packet path by toggling the link state of the tunnel or based on routing metrics.

Overview:
Natively linux kernel does not support ipsec as an interface. Also secure interface assume a ipsec policy 4 tupple of {dst-ip-any, src-ip-any, dst-port-any, src-port-any}. Applying this 4 tuple in linux would result in all traffic matching the ipsec policy. What is needed is a tunnel distinguisher. The linux kernel skbuff has fwmark which is used for policy based routing (PBR). Linux kernel version 2.6.35 enhanced SPD/SADB to use fwmark as part of the IPsec policy. Strongswan has also introduced support for this kernel feature with version 4.5.0. We can therefore use the fwmark as the distinguisher for tunnel interface. We can also create a light weight tunnel kernel module (vti) to give the notion of an interface for rest of the kernel routing system. The tunnel module does not do any enc
 apsulation/decapsulation. The kernel's xfrm modules still do the esp encryption/decryption. 

Enhancement to iproute2:
Add support to configure and display VTI tunnel using ioctl and rtnetlink.

Usage:
ip tunnel add sti15 mode vti remote 12.0.0.1 local 12.0.0.3 ikey 15
or
ip link add sti15 type vti key 15 remote 12.0.0.1 local 12.0.0.3

Signed-off-by: Saurabh Mohan <saurabh.mohan@vyatta.com>

---

^ permalink raw reply

* [PATCH 01/02] iproute2: VTI support for ip tunnel command.
From: Saurabh @ 2012-06-08 17:33 UTC (permalink / raw)
  To: netdev



Configure VTI using 'ip tunnel'.

Signed-off-by: Saurabh Mohan <saurabh.mohan@vyatta.com>

---
diff --git a/ip/iptunnel.c b/ip/iptunnel.c
index 38ccd87..c054b7e 100644
--- a/ip/iptunnel.c
+++ b/ip/iptunnel.c
@@ -33,7 +33,7 @@ static void usage(void) __attribute__((noreturn));
 static void usage(void)
 {
 	fprintf(stderr, "Usage: ip tunnel { add | change | del | show | prl | 6rd } [ NAME ]\n");
-	fprintf(stderr, "          [ mode { ipip | gre | sit | isatap } ] [ remote ADDR ] [ local ADDR ]\n");
+	fprintf(stderr, "          [ mode { ipip | gre | sit | isatap | esp } ] [ remote ADDR ] [ local ADDR ]\n");
 	fprintf(stderr, "          [ [i|o]seq ] [ [i|o]key KEY ] [ [i|o]csum ]\n");
 	fprintf(stderr, "          [ prl-default ADDR ] [ prl-nodefault ADDR ] [ prl-delete ADDR ]\n");
 	fprintf(stderr, "          [ 6rd-prefix ADDR ] [ 6rd-relay_prefix ADDR ] [ 6rd-reset ]\n");
@@ -94,6 +94,12 @@ static int parse_args(int argc, char **argv, int cmd, struct ip_tunnel_parm *p)
 				}
 				p->iph.protocol = IPPROTO_IPV6;
 				isatap++;
+			} else if (strcmp(*argv, "esp") == 0) {
+				if (p->iph.protocol && p->iph.protocol != IPPROTO_ESP) {
+					fprintf(stderr, "You managed to ask for more than one tunnel mode.\n");
+					exit(-1);
+				}
+				p->iph.protocol = IPPROTO_ESP;
 			} else {
 				fprintf(stderr,"Cannot guess tunnel mode.\n");
 				exit(-1);
@@ -220,6 +226,8 @@ static int parse_args(int argc, char **argv, int cmd, struct ip_tunnel_parm *p)
 		else if (memcmp(p->name, "isatap", 6) == 0) {
 			p->iph.protocol = IPPROTO_IPV6;
 			isatap++;
+		} else if (memcmp(p->name, "esp", 3) == 0) {
+			p->iph.protocol = IPPROTO_ESP;
 		}
 	}
 
@@ -274,8 +282,10 @@ static int do_add(int cmd, int argc, char **argv)
 		return tnl_add_ioctl(cmd, "gre0", p.name, &p);
 	case IPPROTO_IPV6:
 		return tnl_add_ioctl(cmd, "sit0", p.name, &p);
+	case IPPROTO_ESP:
+		return tnl_add_ioctl(cmd, "ip_vti0", p.name, &p);
 	default:
-		fprintf(stderr, "cannot determine tunnel mode (ipip, gre or sit)\n");
+		fprintf(stderr, "cannot determine tunnel mode (ipip, gre, esp or sit)\n");
 		return -1;
 	}
 	return -1;
@@ -295,6 +305,8 @@ static int do_del(int argc, char **argv)
 		return tnl_del_ioctl("gre0", p.name, &p);
 	case IPPROTO_IPV6:
 		return tnl_del_ioctl("sit0", p.name, &p);
+	case IPPROTO_ESP:
+		return tnl_del_ioctl("ip_vti0", p.name, &p);
 	default:
 		return tnl_del_ioctl(p.name, p.name, &p);
 	}
@@ -487,6 +499,9 @@ static int do_show(int argc, char **argv)
 	case IPPROTO_IPV6:
 		err = tnl_get_ioctl(p.name[0] ? p.name : "sit0", &p);
 		break;
+	case IPPROTO_ESP:
+		err = tnl_get_ioctl(p.name[0] ? p.name : "ip_vti0", &p);
+		break;
 	default:
 		do_tunnels_list(&p);
 		return 0;
diff --git a/ip/tunnel.c b/ip/tunnel.c
index b176d3f..8544581 100644
--- a/ip/tunnel.c
+++ b/ip/tunnel.c
@@ -52,6 +52,9 @@ const char *tnl_strproto(__u8 proto)
 	case IPPROTO_IPV6:
 		strcpy(buf, "ipv6");
 		break;
+	case IPPROTO_ESP:
+		strcpy(buf, "esp");
+		break;
 	case 0:
 		strcpy(buf, "any");
 		break;

^ permalink raw reply related

* [PATCH 02/02] iproute2: VTI support for ip link command.
From: Saurabh @ 2012-06-08 17:33 UTC (permalink / raw)
  To: netdev



Support for VTI via rt netlink.

Signed-off-by: Saurabh Mohan <saurabh.mohan@vyatta.com>

---
diff --git a/ip/link_vti.c b/ip/link_vti.c
new file mode 100644
index 0000000..385f435
--- /dev/null
+++ b/ip/link_vti.c
@@ -0,0 +1,245 @@
+/*
+ * link_vti.c	VTI driver module
+ *
+ *		This program is free software; you can redistribute it and/or
+ *		modify it under the terms of the GNU General Public License
+ *		as published by the Free Software Foundation; either version
+ *		2 of the License, or (at your option) any later version.
+ *
+ * Authors:	Herbert Xu <herbert@gondor.apana.org.au>
+ *          Saurabh Mohan <saurabh.mohan@vyatta.com> Modified link_gre.c for VTI
+ */
+
+#include <string.h>
+#include <net/if.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <arpa/inet.h>
+
+#include <linux/ip.h>
+#include <linux/if_tunnel.h>
+#include "rt_names.h"
+#include "utils.h"
+#include "ip_common.h"
+#include "tunnel.h"
+
+
+static void usage(void) __attribute__((noreturn));
+static void usage(void)
+{
+	fprintf(stderr, "Usage: ip link { add | set | change | replace | del } NAME\n");
+	fprintf(stderr, "          type { vti } [ remote ADDR ] [ local ADDR ]\n");
+	fprintf(stderr, "          [ [i|o]key KEY ]\n");
+	fprintf(stderr, "          [ dev PHYS_DEV ]\n");
+	fprintf(stderr, "\n");
+	fprintf(stderr, "Where: NAME := STRING\n");
+	fprintf(stderr, "       ADDR := { IP_ADDRESS }\n");
+	fprintf(stderr, "       KEY  := { DOTTED_QUAD | NUMBER }\n");
+	exit(-1);
+}
+
+static int vti_parse_opt(struct link_util *lu, int argc, char **argv,
+			 struct nlmsghdr *n)
+{
+	struct {
+		struct nlmsghdr n;
+		struct ifinfomsg i;
+		char buf[1024];
+	} req;
+	struct ifinfomsg *ifi = (struct ifinfomsg *)(n + 1);
+	struct rtattr *tb[IFLA_MAX + 1];
+	struct rtattr *linkinfo[IFLA_INFO_MAX+1];
+	struct rtattr *vtiinfo[IFLA_VTI_MAX + 1];
+	unsigned ikey = 0;
+	unsigned okey = 0;
+	unsigned saddr = 0;
+	unsigned daddr = 0;
+	unsigned link = 0;
+	int len;
+
+	if (!(n->nlmsg_flags & NLM_F_CREATE)) {
+		memset(&req, 0, sizeof(req));
+
+		req.n.nlmsg_len = NLMSG_LENGTH(sizeof(*ifi));
+		req.n.nlmsg_flags = NLM_F_REQUEST;
+		req.n.nlmsg_type = RTM_GETLINK;
+		req.i.ifi_family = preferred_family;
+		req.i.ifi_index = ifi->ifi_index;
+
+		if (rtnl_talk(&rth, &req.n, 0, 0, &req.n) < 0) {
+get_failed:
+			fprintf(stderr,
+				"Failed to get existing tunnel info.\n");
+			return -1;
+		}
+
+		len = req.n.nlmsg_len;
+		len -= NLMSG_LENGTH(sizeof(*ifi));
+		if (len < 0)
+			goto get_failed;
+
+		parse_rtattr(tb, IFLA_MAX, IFLA_RTA(&req.i), len);
+
+		if (!tb[IFLA_LINKINFO])
+			goto get_failed;
+
+		parse_rtattr_nested(linkinfo, IFLA_INFO_MAX, tb[IFLA_LINKINFO]);
+
+		if (!linkinfo[IFLA_INFO_DATA])
+			goto get_failed;
+
+		parse_rtattr_nested(vtiinfo, IFLA_VTI_MAX,
+				    linkinfo[IFLA_INFO_DATA]);
+
+		if (vtiinfo[IFLA_VTI_IKEY])
+			ikey = *(__u32 *)RTA_DATA(vtiinfo[IFLA_VTI_IKEY]);
+
+		if (vtiinfo[IFLA_VTI_OKEY])
+			okey = *(__u32 *)RTA_DATA(vtiinfo[IFLA_VTI_OKEY]);
+
+		if (vtiinfo[IFLA_VTI_LOCAL])
+			saddr = *(__u32 *)RTA_DATA(vtiinfo[IFLA_VTI_LOCAL]);
+
+		if (vtiinfo[IFLA_VTI_REMOTE])
+			daddr = *(__u32 *)RTA_DATA(vtiinfo[IFLA_VTI_REMOTE]);
+
+		if (vtiinfo[IFLA_VTI_LINK])
+			link = *(__u8 *)RTA_DATA(vtiinfo[IFLA_VTI_LINK]);
+	}
+
+	while (argc > 0) {
+		if (!matches(*argv, "key")) {
+			unsigned uval;
+
+			NEXT_ARG();
+			if (strchr(*argv, '.'))
+				uval = get_addr32(*argv);
+			else {
+				if (get_unsigned(&uval, *argv, 0) < 0) {
+					fprintf(stderr,
+						"Invalid value for \"key\"\n");
+					exit(-1);
+				}
+				uval = htonl(uval);
+			}
+
+			ikey = okey = uval;
+		} else if (!matches(*argv, "ikey")) {
+			unsigned uval;
+
+			NEXT_ARG();
+			if (strchr(*argv, '.'))
+				uval = get_addr32(*argv);
+			else {
+				if (get_unsigned(&uval, *argv, 0) < 0) {
+					fprintf(stderr, "invalid value of \"ikey\"\n");
+					exit(-1);
+				}
+				uval = htonl(uval);
+			}
+			ikey = uval;
+		} else if (!matches(*argv, "okey")) {
+			unsigned uval;
+
+			NEXT_ARG();
+			if (strchr(*argv, '.'))
+				uval = get_addr32(*argv);
+			else {
+				if (get_unsigned(&uval, *argv, 0) < 0) {
+					fprintf(stderr, "invalid value of \"okey\"\n");
+					exit(-1);
+				}
+				uval = htonl(uval);
+			}
+			okey = uval;
+		} else if (!matches(*argv, "remote")) {
+			NEXT_ARG();
+			if (!strcmp(*argv, "any")) {
+				fprintf(stderr, "invalid value of \"remote\"\n");
+				exit(-1);
+			} else {
+				daddr = get_addr32(*argv);
+			}
+		} else if (!matches(*argv, "local")) {
+			NEXT_ARG();
+			if (!strcmp(*argv, "any")) {
+				fprintf(stderr, "invalid value of \"local\"\n");
+				exit(-1);
+			} else {
+				saddr = get_addr32(*argv);
+			}
+		} else if (!matches(*argv, "dev")) {
+			NEXT_ARG();
+			link = if_nametoindex(*argv);
+			if (link == 0)
+				exit(-1);
+		} else
+			usage();
+		argc--; argv++;
+	}
+
+	addattr32(n, 1024, IFLA_VTI_IKEY, ikey);
+	addattr32(n, 1024, IFLA_VTI_OKEY, okey);
+	addattr_l(n, 1024, IFLA_VTI_LOCAL, &saddr, 4);
+	addattr_l(n, 1024, IFLA_VTI_REMOTE, &daddr, 4);
+	if (link)
+		addattr32(n, 1024, IFLA_VTI_LINK, link);
+
+	return 0;
+}
+
+static void vti_print_opt(struct link_util *lu, FILE *f, struct rtattr *tb[])
+{
+	char s1[1024];
+	char s2[64];
+	const char *local = "any";
+	const char *remote = "any";
+
+	if (!tb)
+		return;
+
+	if (tb[IFLA_VTI_REMOTE]) {
+		unsigned addr = *(__u32 *)RTA_DATA(tb[IFLA_VTI_REMOTE]);
+
+		if (addr)
+			remote = format_host(AF_INET, 4, &addr, s1, sizeof(s1));
+	}
+
+	fprintf(f, "remote %s ", remote);
+
+	if (tb[IFLA_VTI_LOCAL]) {
+		unsigned addr = *(__u32 *)RTA_DATA(tb[IFLA_VTI_LOCAL]);
+
+		if (addr)
+			local = format_host(AF_INET, 4, &addr, s1, sizeof(s1));
+	}
+
+	fprintf(f, "local %s ", local);
+
+	if (tb[IFLA_VTI_LINK] && *(__u32 *)RTA_DATA(tb[IFLA_VTI_LINK])) {
+		unsigned link = *(__u32 *)RTA_DATA(tb[IFLA_VTI_LINK]);
+		const char *n = if_indextoname(link, s2);
+
+		if (n)
+			fprintf(f, "dev %s ", n);
+		else
+			fprintf(f, "dev %u ", link);
+	}
+
+	if (tb[IFLA_VTI_IKEY]) {
+		inet_ntop(AF_INET, RTA_DATA(tb[IFLA_VTI_IKEY]), s2, sizeof(s2));
+		fprintf(f, "ikey %s ", s2);
+	}
+
+	if (tb[IFLA_VTI_OKEY]) {
+		inet_ntop(AF_INET, RTA_DATA(tb[IFLA_VTI_OKEY]), s2, sizeof(s2));
+		fprintf(f, "okey %s ", s2);
+	}
+}
+
+struct link_util vti_link_util = {
+	.id = "vti",
+	.maxattr = IFLA_VTI_MAX,
+	.parse_opt = vti_parse_opt,
+	.print_opt = vti_print_opt,
+};
diff --git a/ip/Makefile b/ip/Makefile
index e029ea1..6a518f8 100644
--- a/ip/Makefile
+++ b/ip/Makefile
@@ -3,7 +3,7 @@ IPOBJ=ip.o ipaddress.o ipaddrlabel.o iproute.o iprule.o ipnetns.o \
     ipmaddr.o ipmonitor.o ipmroute.o ipprefix.o iptuntap.o \
     ipxfrm.o xfrm_state.o xfrm_policy.o xfrm_monitor.o \
     iplink_vlan.o link_veth.o link_gre.o iplink_can.o \
-    iplink_macvlan.o iplink_macvtap.o ipl2tp.o
+    iplink_macvlan.o iplink_macvtap.o ipl2tp.o link_vti.o
 
 RTMONOBJ=rtmon.o
 

^ permalink raw reply related

* Re: Reoccuring kern.log events after running xl2tp with ethernet adapter Realtek 8111E
From: Dustin Schumm @ 2012-06-08 17:56 UTC (permalink / raw)
  To: Francois Romieu; +Cc: netdev
In-Reply-To: <20120606222529.GA13420@electric-eye.fr.zoreil.com>

On Wed, Jun 6, 2012 at 6:25 PM, Francois Romieu <romieu@fr.zoreil.com> wrote:
> Dustin Schumm <shodid@gmail.com> :
> [...]
>> Do you see any indication if this is a problem with the mainline
>> kernel, distro kernel, driver, or otherwise? It's not definitive, been
>> I've been good using kernels 2.6.x and getting errors on everything
>> 3.x I have tried. I am looking for some direction to pursue. Thanks.
>
> See http://marc.info/?l=linux-kernel&m=133897670115862&w=2
>
> --
> Ueimor

After further investigation of "EIP is at aesni_cbc_dec"
I changed the ipsec encryption to 3des and l2tp is working properly
now under kernels 3.x
It is strange that the aes module errors do not occur when used with
ipsec alone. It is not until l2tp is initiated that the errors start.

^ permalink raw reply

* Re: [PATCH 1/1 v2] Ethtool: Add EEE support
From: Ben Hutchings @ 2012-06-08 18:11 UTC (permalink / raw)
  To: Yuval Mintz; +Cc: netdev, eilong, peppe.cavallaro
In-Reply-To: <1339010124-23413-1-git-send-email-yuvalmin@broadcom.com>

On Wed, 2012-06-06 at 22:15 +0300, Yuval Mintz wrote:
> This patch adds 2 new ethtool commands which can be
> used to manipulate network interfaces' support in
> EEE.
[...]
> @@ -3423,6 +3514,12 @@ static const struct option {
>  	  "		[ hex on|off ]\n"
>  	  "		[ offset N ]\n"
>  	  "		[ length N ]\n" },
> +	{ "--get-eee", 1, do_geee, "Get EEE settings"},
> +	{ "--set-eee", 1, do_seee, "Set EEE settings",
> +	  "		[ eee on|off ]\n"
> +	  "		[ advertise %x ]\n"
> +	  "		[ tx-lpi on|off ]\n"
> +	  "		[ tx-timer %d ]\n"},
[...]

Another thing: the long options are generally --foo to set/change foo
settings and --show-foo to get/show foo settings.  Please rename these
options to match.

Ben.

-- 
Ben Hutchings, Staff Engineer, Solarflare
Not speaking for my employer; that's the marketing department's job.
They asked us to note that Solarflare product names are trademarked.

^ permalink raw reply

* Re: [PATCH 1/1 v2] Ethtool: Add EEE support
From: Ben Hutchings @ 2012-06-08 18:14 UTC (permalink / raw)
  To: Yuval Mintz; +Cc: netdev, eilong, peppe.cavallaro
In-Reply-To: <1339179089.2648.5.camel@bwh-desktop.uk.solarflarecom.com>

On Fri, 2012-06-08 at 19:11 +0100, Ben Hutchings wrote:
> On Wed, 2012-06-06 at 22:15 +0300, Yuval Mintz wrote:
> > This patch adds 2 new ethtool commands which can be
> > used to manipulate network interfaces' support in
> > EEE.
> [...]
> > @@ -3423,6 +3514,12 @@ static const struct option {
> >  	  "		[ hex on|off ]\n"
> >  	  "		[ offset N ]\n"
> >  	  "		[ length N ]\n" },
> > +	{ "--get-eee", 1, do_geee, "Get EEE settings"},
> > +	{ "--set-eee", 1, do_seee, "Set EEE settings",
> > +	  "		[ eee on|off ]\n"
> > +	  "		[ advertise %x ]\n"
> > +	  "		[ tx-lpi on|off ]\n"
> > +	  "		[ tx-timer %d ]\n"},
> [...]
> 
> Another thing: the long options are generally --foo to set/change foo
> settings and --show-foo to get/show foo settings.  Please rename these
> options to match.

Hmm, we already have a lot of --set-foo options, so never mind that.
But do change --get-eee to --show-eee.

Ben.

-- 
Ben Hutchings, Staff Engineer, Solarflare
Not speaking for my employer; that's the marketing department's job.
They asked us to note that Solarflare product names are trademarked.

^ permalink raw reply

* [PATCH ethtool] Make link mode listing in dump_link_caps() a data-driven loop
From: Ben Hutchings @ 2012-06-08 18:25 UTC (permalink / raw)
  To: netdev

I've committed the following change.

Ben.
---
This removes lots of repeated code and makes it trivial to support
additional modes.

Signed-off-by: Ben Hutchings <bhutchings@solarflare.com>
---
 ethtool.c |   99 +++++++++++++++++++------------------------------------------
 1 files changed, 31 insertions(+), 68 deletions(-)

diff --git a/ethtool.c b/ethtool.c
index f09a032..3576f4c 100644
--- a/ethtool.c
+++ b/ethtool.c
@@ -444,8 +444,25 @@ static void dump_supported(struct ethtool_cmd *ep)
 static void
 dump_link_caps(const char *prefix, const char *an_prefix, u32 mask)
 {
+	static const struct {
+		int same_line; /* print on same line as previous */
+		u32 value;
+		const char *name;
+	} mode_defs[] = {
+		{ 0, ADVERTISED_10baseT_Half,       "10baseT/Half" },
+		{ 1, ADVERTISED_10baseT_Full,       "10baseT/Full" },
+		{ 0, ADVERTISED_100baseT_Half,      "100baseT/Half" },
+		{ 1, ADVERTISED_100baseT_Full,      "100baseT/Full" },
+		{ 0, ADVERTISED_1000baseT_Half,     "1000baseT/Half" },
+		{ 1, ADVERTISED_1000baseT_Full,     "1000baseT/Full" },
+		{ 0, ADVERTISED_1000baseKX_Full,    "1000baseKX/Full" },
+		{ 0, ADVERTISED_2500baseX_Full,     "2500baseX/Full" },
+		{ 0, ADVERTISED_10000baseT_Full,    "10000baseT/Full" },
+		{ 0, ADVERTISED_10000baseKX4_Full,  "10000baseKX4/Full" },
+		{ 0, ADVERTISED_20000baseMLD2_Full, "20000baseMLD2/Full" },
+	};
 	int indent;
-	int did1;
+	int did1, new_line_pend, i;
 
 	/* Indent just like the separate functions used to */
 	indent = strlen(prefix) + 14;
@@ -455,73 +472,19 @@ dump_link_caps(const char *prefix, const char *an_prefix, u32 mask)
 	fprintf(stdout, "	%s link modes:%*s", prefix,
 		indent - (int)strlen(prefix) - 12, "");
 	did1 = 0;
-	if (mask & ADVERTISED_10baseT_Half) {
-		did1++; fprintf(stdout, "10baseT/Half ");
-	}
-	if (mask & ADVERTISED_10baseT_Full) {
-		did1++; fprintf(stdout, "10baseT/Full ");
-	}
-	if (did1 && (mask & (ADVERTISED_100baseT_Half|ADVERTISED_100baseT_Full))) {
-		fprintf(stdout, "\n");
-		fprintf(stdout, "	%*s", indent, "");
-	}
-	if (mask & ADVERTISED_100baseT_Half) {
-		did1++; fprintf(stdout, "100baseT/Half ");
-	}
-	if (mask & ADVERTISED_100baseT_Full) {
-		did1++; fprintf(stdout, "100baseT/Full ");
-	}
-	if (did1 && (mask & (ADVERTISED_1000baseT_Half|ADVERTISED_1000baseT_Full))) {
-		fprintf(stdout, "\n");
-		fprintf(stdout, "	%*s", indent, "");
-	}
-	if (mask & ADVERTISED_1000baseT_Half) {
-		did1++; fprintf(stdout, "1000baseT/Half ");
-	}
-	if (mask & ADVERTISED_1000baseT_Full) {
-		did1++; fprintf(stdout, "1000baseT/Full ");
-	}
-	if (did1 && (mask & ADVERTISED_1000baseKX_Full)) {
-		fprintf(stdout, "\n");
-		fprintf(stdout, "	%*s", indent, "");
-	}
-	if (mask & ADVERTISED_1000baseKX_Full) {
-		did1++; fprintf(stdout, "1000baseKX/Full ");
-	}
-	if (did1 && (mask & ADVERTISED_2500baseX_Full)) {
-		fprintf(stdout, "\n");
-		fprintf(stdout, "	%*s", indent, "");
-	}
-	if (mask & ADVERTISED_2500baseX_Full) {
-		did1++; fprintf(stdout, "2500baseX/Full ");
-	}
-	if (did1 && (mask & ADVERTISED_10000baseT_Full)) {
-		fprintf(stdout, "\n");
-		fprintf(stdout, "	%*s", indent, "");
-	}
-	if (mask & ADVERTISED_10000baseT_Full) {
-		did1++; fprintf(stdout, "10000baseT/Full ");
-	}
-	if (did1 && (mask & ADVERTISED_10000baseKX4_Full)) {
-		fprintf(stdout, "\n");
-		fprintf(stdout, "	%*s", indent, "");
-	}
-	if (mask & ADVERTISED_10000baseKX4_Full) {
-		did1++; fprintf(stdout, "10000baseKX4/Full ");
-	}
-	if (did1 && (mask & ADVERTISED_20000baseMLD2_Full)) {
-		fprintf(stdout, "\n");
-		fprintf(stdout, "	%*s", indent, "");
-	}
-	if (mask & ADVERTISED_20000baseMLD2_Full) {
-		did1++; fprintf(stdout, "20000baseMLD2/Full ");
-	}
-	if (did1 && (mask & ADVERTISED_20000baseKR2_Full)) {
-		fprintf(stdout, "\n");
-		fprintf(stdout, "	%*s", indent, "");
-	}
-	if (mask & ADVERTISED_20000baseKR2_Full) {
-		did1++; fprintf(stdout, "20000baseKR2/Full ");
+	new_line_pend = 0;
+	for (i = 0; i < ARRAY_SIZE(mode_defs); i++) {
+		if (did1 && !mode_defs[i].same_line)
+			new_line_pend = 1;
+		if (mask & mode_defs[i].value) {
+			if (new_line_pend) {
+				fprintf(stdout, "\n");
+				fprintf(stdout, "	%*s", indent, "");
+				new_line_pend = 0;
+			}
+			did1++;
+			fprintf(stdout, "%s ", mode_defs[i].name);
+		}
 	}
 	if (did1 == 0)
 		 fprintf(stdout, "Not reported");
-- 
1.7.7.6


-- 
Ben Hutchings, Staff Engineer, Solarflare
Not speaking for my employer; that's the marketing department's job.
They asked us to note that Solarflare product names are trademarked.

^ permalink raw reply related

* formal way to map from pf to vfs when using VT-d?
From: Chris Friesen @ 2012-06-08 20:14 UTC (permalink / raw)
  To: netdev, e1000-devel@lists.sourceforge.net, Kirsher, Jeffrey T,
	Brandeburg, Jesse, Allan, Bruce W

Hi,

I'm using the igb/igbvf drivers.  If I set igb.max_vfs=7,7,7,7 it 
creates 7 vfs for each of my pfs.  So far so good.

Now, how do I map from a given pf to the PCI addresses for the set of 
vfs that are associated with it?  I don't have virsh installed, so is 
there a way to query this with sysfs or something?

Thanks,
Chris

-- 

Chris Friesen
Software Designer

3500 Carling Avenue
Ottawa, Ontario K2H 8E9
www.genband.com

^ permalink raw reply

* Re: [PATCH net-next] ipv4: Add interface option to enable routing of 127.0.0.0/8
From: David Miller @ 2012-06-08 20:23 UTC (permalink / raw)
  To: tgraf; +Cc: netdev
In-Reply-To: <20120608101859.GH32152@canuck.infradead.org>


What's the different between this patch and the one you posted
half a day ago?

^ permalink raw reply

* Re: formal way to map from pf to vfs when using VT-d?
From: Chris Friesen @ 2012-06-08 20:41 UTC (permalink / raw)
  To: netdev, e1000-devel@lists.sourceforge.net, Kirsher, Jeffrey T,
	Brandeburg, Jesse, Allan, Bruce W
In-Reply-To: <4FD25D18.6090002@genband.com>

On 06/08/2012 02:14 PM, Chris Friesen wrote:
>
> Hi,
>
> I'm using the igb/igbvf drivers.  If I set igb.max_vfs=7,7,7,7 it 
> creates 7 vfs for each of my pfs.  So far so good.
>
> Now, how do I map from a given pf to the PCI addresses for the set of 
> vfs that are associated with it?  I don't have virsh installed, so is 
> there a way to query this with sysfs or something?
>

I think I found it.../sys/class/net/eth0/device/ has "virtfnX" entries 
which are symlinks to the appropriate pci address.

Chris

^ permalink raw reply

* Re: [PATCH net-next v3 1/2] inetpeer: add namespace support for inetpeer
From: David Miller @ 2012-06-08 21:27 UTC (permalink / raw)
  To: gaofeng; +Cc: eric.dumazet, steffen.klassert, netdev, containers
In-Reply-To: <1339137683-19217-1-git-send-email-gaofeng@cn.fujitsu.com>

From: Gao feng <gaofeng@cn.fujitsu.com>
Date: Fri,  8 Jun 2012 14:41:23 +0800

> now inetpeer doesn't support namespace,the information will
> be leaking across namespace.
> 
> this patch move the global vars v4_peers and v6_peers to
> netns_ipv4 and netns_ipv6 as a field peers.
> 
> add struct pernet_operations inetpeer_ops to initial pernet
> inetpeer data.
> 
> and change family_to_base and inet_getpeer to support namespace.
> 
> Signed-off-by: Gao feng <gaofeng@cn.fujitsu.com>

Applied.

^ permalink raw reply

* Re: [PATCH net-next v3 2/2] inetpeer: add parameter net for inet_getpeer_v4,v6
From: David Miller @ 2012-06-08 21:27 UTC (permalink / raw)
  To: gaofeng-BthXqXjhjHXQFUHtdCDX3A
  Cc: steffen.klassert-opNxpl+3fjRBDgjK7y7TUQ,
	netdev-u79uwXL29TY76Z2rM5mHXA,
	containers-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA,
	eric.dumazet-Re5JQEeQqe8AvxtiuMwx3w
In-Reply-To: <1339137779-19340-1-git-send-email-gaofeng-BthXqXjhjHXQFUHtdCDX3A@public.gmane.org>

From: Gao feng <gaofeng-BthXqXjhjHXQFUHtdCDX3A@public.gmane.org>
Date: Fri,  8 Jun 2012 14:42:59 +0800

> add struct net as a parameter of inet_getpeer_v[4,6],
> use net to replace &init_net.
> 
> and modify some places to provide net for inet_getpeer_v[4,6]
> 
> Signed-off-by: Gao feng <gaofeng-BthXqXjhjHXQFUHtdCDX3A@public.gmane.org>

Applied.

^ permalink raw reply

* Re: [PATCH net-next] af_unix: speedup /proc/net/unix
From: David Miller @ 2012-06-08 21:28 UTC (permalink / raw)
  To: eric.dumazet; +Cc: netdev, swhiteho, xemul
In-Reply-To: <1339167801.6001.111.camel@edumazet-glaptop>

From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Fri, 08 Jun 2012 17:03:21 +0200

> From: Eric Dumazet <edumazet@google.com>
> 
> /proc/net/unix has quadratic behavior, and can hold unix_table_lock for
> a while if high number of unix sockets are alive. (90 ms for 200k
> sockets...)
> 
> We already have a hash table, so its quite easy to use it.
> 
> Problem is unbound sockets are still hashed in a single hash slot
> (unix_socket_table[UNIX_HASH_TABLE])
> 
> This patch also spreads unbound sockets to 256 hash slots, to speedup
> both /proc/net/unix and unix_diag.
> 
> Time to read /proc/net/unix with 200k unix sockets :
> (time dd if=/proc/net/unix of=/dev/null bs=4k)
> 
> before : 520 secs
> after : 2 secs
> 
> Signed-off-by: Eric Dumazet <edumazet@google.com>

Nice work Eric, applied.

^ permalink raw reply

* Re: [PATCH] l2tp: fix a race in l2tp_ip_sendmsg()
From: David Miller @ 2012-06-08 21:31 UTC (permalink / raw)
  To: eric.dumazet; +Cc: netdev, jchapman, denys
In-Reply-To: <1339172700.6001.128.camel@edumazet-glaptop>

From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Fri, 08 Jun 2012 18:25:00 +0200

> From: Eric Dumazet <edumazet@google.com>
> 
> Commit 081b1b1bb27f (l2tp: fix l2tp_ip_sendmsg() route handling) added
> a race, in case IP route cache is disabled.
> 
> In this case, we should not do the dst_release(&rt->dst), since it'll
> free the dst immediately, instead of waiting a RCU grace period.
> 
> Signed-off-by: Eric Dumazet <edumazet@google.com>

Working with a zero-ref'd object is always playing with fire even
when we "know what we are doing" :-)

Applied and queued up for -stable, thanks Eric.

^ permalink raw reply

* Re: [PATCH] be2net: fix a race in be_xmit()
From: David Miller @ 2012-06-08 21:44 UTC (permalink / raw)
  To: Sathya.Perla; +Cc: eric.dumazet, netdev
In-Reply-To: <3367B80B08154D42A3B2BC708B5D41F647C6A0B900@EXMAIL.ad.emulex.com>

From: <Sathya.Perla@Emulex.Com>
Date: Fri, 8 Jun 2012 03:06:14 -0700

> 
>>-----Original Message-----
>>From: Eric Dumazet <edumazet@google.com>
>>
>>As soon as hardware is notified of a transmit, we no longer can assume
>>skb can be dereferenced, as TX completion might have freed the packet.
>>
>>Signed-off-by: Eric Dumazet <edumazet@google.com>
>>Cc: Sathya Perla <sathya.perla@emulex.com>
> 
> Good catch. Thanks!
> Acked-by: Sathya Perla <sathya.perla@emulex.com>

Applied.

^ permalink raw reply

* Re: [PATCH net-next] ipv4: Add interface option to enable routing of 127.0.0.0/8
From: Thomas Graf @ 2012-06-08 22:22 UTC (permalink / raw)
  To: David Miller; +Cc: tgraf, netdev
In-Reply-To: <20120608.132338.892251811480644242.davem@davemloft.net>

On Fri, Jun 08, 2012 at 01:23:38PM -0700, David Miller wrote:
> 
> What's the different between this patch and the one you posted
> half a day ago?

There is no difference. I didn't see my own mail appear in
my netdev folder and assumed git send-email had failed so
I did send it again. Sorry for the double post.

^ permalink raw reply

* [PATCH] net/core: fix kernel-doc warnings
From: Randy Dunlap @ 2012-06-09  0:01 UTC (permalink / raw)
  To: netdev, David Miller

From: Randy Dunlap <rdunlap@xenotime.net>

Fix kernel-doc warnings in net/core:

Warning(net/core/skbuff.c:3368): No description found for parameter 'delta_truesize'
Warning(net/core/filter.c:628): No description found for parameter 'pfp'
Warning(net/core/filter.c:628): Excess function parameter 'sk' description in 'sk_unattached_filter_create'

Signed-off-by: Randy Dunlap <rdunlap@xenotime.net>
---
 net/core/filter.c |    4 ++--
 net/core/skbuff.c |    2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

--- lnx-35-rc1.orig/net/core/filter.c
+++ lnx-35-rc1/net/core/filter.c
@@ -616,9 +616,9 @@ static int __sk_prepare_filter(struct sk
 /**
  *	sk_unattached_filter_create - create an unattached filter
  *	@fprog: the filter program
- *	@sk: the socket to use
+ *	@pfp: the unattached filter that is created
  *
- * Create a filter independent ofr any socket. We first run some
+ * Create a filter independent of any socket. We first run some
  * sanity checks on it to make sure it does not explode on us later.
  * If an error occurs or there is insufficient memory for the filter
  * a negative errno code is returned. On success the return is zero.
--- lnx-35-rc1.orig/net/core/skbuff.c
+++ lnx-35-rc1/net/core/skbuff.c
@@ -3361,7 +3361,7 @@ EXPORT_SYMBOL(kfree_skb_partial);
  * @to: prior buffer
  * @from: buffer to add
  * @fragstolen: pointer to boolean
- *
+ * @delta_truesize: how much more was allocated than was requested
  */
 bool skb_try_coalesce(struct sk_buff *to, struct sk_buff *from,
 		      bool *fragstolen, int *delta_truesize)

^ permalink raw reply

* Re: formal way to map from pf to vfs when using VT-d?
From: Greg Rose @ 2012-06-09  0:04 UTC (permalink / raw)
  To: Chris Friesen
  Cc: netdev, e1000-devel@lists.sourceforge.net, Kirsher, Jeffrey T,
	Brandeburg, Jesse, Allan, Bruce W
In-Reply-To: <4FD26382.3050303@genband.com>

On Fri, 8 Jun 2012 14:41:38 -0600
Chris Friesen <chris.friesen@genband.com> wrote:

> On 06/08/2012 02:14 PM, Chris Friesen wrote:
> >
> > Hi,
> >
> > I'm using the igb/igbvf drivers.  If I set igb.max_vfs=7,7,7,7 it 
> > creates 7 vfs for each of my pfs.  So far so good.
> >
> > Now, how do I map from a given pf to the PCI addresses for the set
> > of vfs that are associated with it?  I don't have virsh installed,
> > so is there a way to query this with sysfs or something?
> >
> 
> I think I found it.../sys/class/net/eth0/device/ has "virtfnX"
> entries which are symlinks to the appropriate pci address.

Here's a script used by our validation team, inserted inline.

#!/usr/bin/perl
use strict;

if ($#ARGV <1)
{
        print " Usage: ./list_vfs.pl Nic_name (t for Twinville, n for niantic, k for kawela, p for powerville) port_numer( 1 for first port, 2 for second port..) \n";

        exit ;
}
my $nic_name=$ARGV[0];
#my $port_n=$ARGV[1]%4;
my $all_vfs;
my $vf_str;
my $mode = 2;
my $my_pf_str;

if ($nic_name eq 't') {
        $my_pf_str= "\"10 Gigabit\"";
	$vf_str="\"Intel Corporation Device 1515\"";
} elsif ($nic_name eq 'n') {
	$my_pf_str="\"82599EB\"";
	$vf_str="\"82599 Ethernet Controller Virtual\"";
} elsif ($nic_name eq 'k') {
	$my_pf_str="\"82576 Gigabit Network Connection\"";
	$vf_str="\"Intel Corporation 82576 Virtual Function\"";
} elsif ($nic_name eq 'p') {
	$my_pf_str= "\"I350\"";
	$vf_str="\"Intel Corporation Device 1520\"";
	$mode=4;
} else {
	print "No such virtual Function device, please check again\n";
}

my $port_n=$ARGV[1]%$mode;

my $my_pf=qx(lspci | grep $my_pf_str | awk '{print \$1}' | awk 'NR%$mode==$port_n');
print  "PF=$my_pf\n";
my $all_vfs=qx(lspci | grep $vf_str | awk '{print \$1}' | awk 'NR%$mode==$port_n');
print "VFs are:\n";
print "$all_vfs\n";

- Greg

> 
> Chris
> --
> To unsubscribe from this list: send the line "unsubscribe netdev" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply

* [PATCH] netdev: fix drivers/net/phy/ kernel-doc warnings
From: Randy Dunlap @ 2012-06-09  0:07 UTC (permalink / raw)
  To: netdev, David Miller

From: Randy Dunlap <rdunlap@xenotime.net>

Fix kernel-doc warnings in drivers/net/phy:

Warning(drivers/net/phy/mdio_bus.c:109): No description found for parameter 'mdio_bus_np'
Warning(drivers/net/phy/mdio_bus.c:109): Excess function parameter 'mdio_np' description in 'of_mdio_find_bus'

Signed-off-by: Randy Dunlap <rdunlap@xenotime.net>
---
 drivers/net/phy/mdio_bus.c |    2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

--- lnx-35-rc1.orig/drivers/net/phy/mdio_bus.c
+++ lnx-35-rc1/drivers/net/phy/mdio_bus.c
@@ -96,7 +96,7 @@ static int of_mdio_bus_match(struct devi
 }
 /**
  * of_mdio_find_bus - Given an mii_bus node, find the mii_bus.
- * @mdio_np: Pointer to the mii_bus.
+ * @mdio_bus_np: Pointer to the mii_bus.
  *
  * Returns a pointer to the mii_bus, or NULL if none found.
  *

^ permalink raw reply

* solar lala shared photos with you
From: solar lala @ 2012-06-09  0:52 UTC (permalink / raw)
  To: netdev

[-- Attachment #1: Type: text/plain, Size: 326 bytes --]

dear sir

     We send an updated list (enclosed )of available Solar automatic control  
insecticidal lamps , Please contact us for more information.



Regards

    lala

Ecosol PV Tech Co., Ltd
Tel: 86-769-8279 2468
Fax: 86-769-879 2478
email:info@ecsolsolar.com
skype:solarlala
msn:solarlala@hotmail.com
www.ecsolsolar.com

[-- Attachment #2: 太阳能杀虫灯002.jpg --]
[-- Type: image/jpeg, Size: 144424 bytes --]

[-- Attachment #3: laptop charger.jpg --]
[-- Type: image/jpeg, Size: 10232 bytes --]

[-- Attachment #4: 太阳能手提箱002.jpg --]
[-- Type: image/jpeg, Size: 42070 bytes --]

[-- Attachment #5: 太阳能水泵.jpg --]
[-- Type: image/jpeg, Size: 48979 bytes --]

^ permalink raw reply

* [PATCH] inetpeer: fix build failed when IPV6 not enabled
From: Gao feng @ 2012-06-09  2:12 UTC (permalink / raw)
  To: davem-fT/PcQaiUtIeIZ0/mPfg9Q
  Cc: netdev-u79uwXL29TY76Z2rM5mHXA,
	containers-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA,
	wfg-VuQAYsv1563Yd54FQh9/CA

commit c8a627ed06d6d49bf65015a2185c519335c4c83f
(inetpeer: add namespace support for inetpeer)
makes kernel bulid failed when IPV6 not enabled.

fix this by adding #if IS_ENABLED(CONFIG_IPV6)

Reported-by: Fengguang Wu <wfg-VuQAYsv1563Yd54FQh9/CA@public.gmane.org>
Signed-off-by: Gao feng <gaofeng-BthXqXjhjHXQFUHtdCDX3A@public.gmane.org>
---
 net/ipv4/inetpeer.c |   18 ++++++++++++------
 1 files changed, 12 insertions(+), 6 deletions(-)

diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c
index 1c85273..57f694e 100644
--- a/net/ipv4/inetpeer.c
+++ b/net/ipv4/inetpeer.c
@@ -151,18 +151,19 @@ static int __net_init inetpeer_net_init(struct net *net)
 	net->ipv4.peers->root = peer_avl_empty_rcu;
 	seqlock_init(&net->ipv4.peers->lock);
 
+#if IS_ENABLED(CONFIG_IPV6)
 	net->ipv6.peers = kzalloc(sizeof(struct inet_peer_base),
 				  GFP_KERNEL);
-	if (net->ipv6.peers == NULL)
-		goto out_ipv6;
+	if (net->ipv6.peers == NULL) {
+		kfree(net->ipv4.peers);
+		return -ENOMEM;
+	}
 
 	net->ipv6.peers->root = peer_avl_empty_rcu;
 	seqlock_init(&net->ipv6.peers->lock);
+#endif
 
 	return 0;
-out_ipv6:
-	kfree(net->ipv4.peers);
-	return -ENOMEM;
 }
 
 static void __net_exit inetpeer_net_exit(struct net *net)
@@ -170,10 +171,11 @@ static void __net_exit inetpeer_net_exit(struct net *net)
 	inetpeer_invalidate_tree(net, AF_INET);
 	kfree(net->ipv4.peers);
 	net->ipv4.peers = NULL;
-
+#if IS_ENABLED(CONFIG_IPV6)
 	inetpeer_invalidate_tree(net, AF_INET6);
 	kfree(net->ipv6.peers);
 	net->ipv6.peers = NULL;
+#endif
 }
 
 static struct pernet_operations inetpeer_ops = {
@@ -433,7 +435,11 @@ static void unlink_from_pool(struct inet_peer *p, struct inet_peer_base *base,
 static struct inet_peer_base *family_to_base(struct net *net,
 					     int family)
 {
+#if IS_ENABLED(CONFIG_IPV6)
 	return family == AF_INET ? net->ipv4.peers : net->ipv6.peers;
+#else
+	return net->ipv4.peers;
+#endif
 }
 
 /* perform garbage collect on all items stacked during a lookup */
-- 
1.7.7.6

^ permalink raw reply related

* Re: [PATCH] netdev: mv643xx_eth: Prevent build on PPC32
From: Mark Brown @ 2012-06-09  4:01 UTC (permalink / raw)
  To: Josh Boyer
  Cc: Ben Hutchings, Lennert Buytenhek, Andrew Lunn, Olof Johansson,
	netdev
In-Reply-To: <20120608010403.GL7683@zod.bos.redhat.com>

[-- Attachment #1: Type: text/plain, Size: 2013 bytes --]

On Thu, Jun 07, 2012 at 09:04:03PM -0400, Josh Boyer wrote:

> I'm not placing blame.  I'm declaring people should be cautious going
> forward.  5 arches have the clock API.  21 don't.  Whatever reasons
> there are for that, I don't care.  It should be a big warning sign.

My point here is that it's a warning sign for the API, not really for
the drivers that use it.

> It might even be beneficial to put some Kconfig dependencies on both
> CONFIG_COMMON_CLK (which is somewhat misleadingly named) and
> CONFIG_CLKDEV_LOOKUP so those are only selectable on those 5 arches.
> Something like:

>  config CLKDEV_LOOKUP
>  	bool
> +	depends on (ARM || SUPERH || MIPS || C6X || BLACKFIN)
>  	select HAVE_CLK

This is a really bad approach.  It's sending totally the wrong message
about where we want to be (we want to have the clock API available
everywhere) and more importantly it still means that drivers need to go
on carrying around ifdefery or unhelpful dependencies which is just lots
of pointless work.  A very large proportion of the drivers that use
clocks are just making sure clocks are enabled when the device is active
to integrate with system wide power optimisation and don't actually care
if there are clocks there at all, we should be making their life as easy
as possible.

A much better approach is get the stubs mentioned earlier merged
(they're already on their way) faster.  That way there are no compile
time dependencies and the problem goes away unless the driver is doing
something more active with clocks like managing the clock rate.

In the case of CLKDEV_LOOKUP the symbol should only be selected by an
architecture anyway, it's a layer on top of the architecture clock code.

> Regardless, hopefully things like this will get hit in linux-next in the
> future.  I believe the only reason that it wasn't this time is that
> none of the PPC defconfigs build in linux-next bother to build the
> driver at all.

They do generally, people do randconfig and allXconfig builds all the
time.

[-- Attachment #2: Digital signature --]
[-- Type: application/pgp-signature, Size: 836 bytes --]

^ permalink raw reply

* Re: [PATCH torvalds/linux.git] Make linux/tcp.h C++ friendly (trivial)
From: Paul Pluzhnikov @ 2012-06-09  4:14 UTC (permalink / raw)
  To: Jiri Kosina; +Cc: netdev
In-Reply-To: <CALoOobMSXp4x_+En2gyHmpXcxq=sECQvMZN1LX_j3Yve7Ha6Aw@mail.gmail.com>

[-- Attachment #1: Type: text/plain, Size: 776 bytes --]

On Tue, Mar 6, 2012 at 4:14 PM, Paul Pluzhnikov <ppluzhnikov@google.com> wrote:
> Ping?
>
> I am attaching the patch again so it doesn't get mangled by quoting
> and is easier to apply.

Ping? Ping?

I thought the patch has been applied, and stopped pinging it.
But it appears that it never did get applied :-(

Thanks,

On Thu, Dec 29, 2011 at 10:30 AM, Paul Pluzhnikov
<ppluzhnikov@google.com> wrote:
>
> Using linux/tcp.h from C++ results in:
>
> cat t.cc
>
> #include <linux/tcp.h>
> int main() { }
>
> g++ -c t.cc
>
> In file included from t.cc:1:
> /usr/include/linux/tcp.h:72: error: '__u32 __fswab32(__u32)' cannot appear in a constant-expression
> /usr/include/linux/tcp.h:72: error: a function call cannot appear in a constant-expression
> ...


-- 
Paul Pluzhnikov

[-- Attachment #2: linux-tcp_h-patch-20111229.txt --]
[-- Type: text/plain, Size: 1255 bytes --]

diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index 7f59ee9..63334f7 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -69,16 +69,16 @@ union tcp_word_hdr {
 #define tcp_flag_word(tp) ( ((union tcp_word_hdr *)(tp))->words [3]) 
 
 enum { 
-	TCP_FLAG_CWR = __cpu_to_be32(0x00800000),
-	TCP_FLAG_ECE = __cpu_to_be32(0x00400000),
-	TCP_FLAG_URG = __cpu_to_be32(0x00200000),
-	TCP_FLAG_ACK = __cpu_to_be32(0x00100000),
-	TCP_FLAG_PSH = __cpu_to_be32(0x00080000),
-	TCP_FLAG_RST = __cpu_to_be32(0x00040000),
-	TCP_FLAG_SYN = __cpu_to_be32(0x00020000),
-	TCP_FLAG_FIN = __cpu_to_be32(0x00010000),
-	TCP_RESERVED_BITS = __cpu_to_be32(0x0F000000),
-	TCP_DATA_OFFSET = __cpu_to_be32(0xF0000000)
+	TCP_FLAG_CWR = __constant_cpu_to_be32(0x00800000),
+	TCP_FLAG_ECE = __constant_cpu_to_be32(0x00400000),
+	TCP_FLAG_URG = __constant_cpu_to_be32(0x00200000),
+	TCP_FLAG_ACK = __constant_cpu_to_be32(0x00100000),
+	TCP_FLAG_PSH = __constant_cpu_to_be32(0x00080000),
+	TCP_FLAG_RST = __constant_cpu_to_be32(0x00040000),
+	TCP_FLAG_SYN = __constant_cpu_to_be32(0x00020000),
+	TCP_FLAG_FIN = __constant_cpu_to_be32(0x00010000),
+	TCP_RESERVED_BITS = __constant_cpu_to_be32(0x0F000000),
+	TCP_DATA_OFFSET = __constant_cpu_to_be32(0xF0000000)
 }; 
 
 /*

^ permalink raw reply related

page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox