Netdev List

Netdev List
 help / color / mirror / Atom feed

* [PATCH net-next] ixgbe: Remove unnecessary #ifdef CONFIG_DEBUG_FS tests
From: Joe Perches @ 2013-04-08  1:27 UTC (permalink / raw)
  To: Jeff Kirsher; +Cc: e1000-devel, netdev

Add some empty static inlines instead to make
the code more readable.

Signed-off-by: Joe Perches <joe@perches.com>
---
 drivers/net/ethernet/intel/ixgbe/ixgbe.h      |  5 +++++
 drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 10 ----------
 2 files changed, 5 insertions(+), 10 deletions(-)

diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe.h b/drivers/net/ethernet/intel/ixgbe/ixgbe.h
index a8e10cf..ca93238 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe.h
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe.h
@@ -740,6 +740,11 @@ extern void ixgbe_dbg_adapter_init(struct ixgbe_adapter *adapter);
 extern void ixgbe_dbg_adapter_exit(struct ixgbe_adapter *adapter);
 extern void ixgbe_dbg_init(void);
 extern void ixgbe_dbg_exit(void);
+#else
+static inline void ixgbe_dbg_adapter_init(struct ixgbe_adapter *adapter) {}
+static inline void ixgbe_dbg_adapter_exit(struct ixgbe_adapter *adapter) {}
+static inline void ixgbe_dbg_init(void) {}
+static inline void ixgbe_dbg_exit(void) {}
 #endif /* CONFIG_DEBUG_FS */
 static inline struct netdev_queue *txring_txq(const struct ixgbe_ring *ring)
 {
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
index 1339932..06cd8cd 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
@@ -7575,9 +7575,7 @@ skip_sriov:
 		e_err(probe, "failed to allocate sysfs resources\n");
 #endif /* CONFIG_IXGBE_HWMON */
 
-#ifdef CONFIG_DEBUG_FS
 	ixgbe_dbg_adapter_init(adapter);
-#endif /* CONFIG_DEBUG_FS */
 
 	return 0;
 
@@ -7613,9 +7611,7 @@ static void ixgbe_remove(struct pci_dev *pdev)
 	struct ixgbe_adapter *adapter = pci_get_drvdata(pdev);
 	struct net_device *netdev = adapter->netdev;
 
-#ifdef CONFIG_DEBUG_FS
 	ixgbe_dbg_adapter_exit(adapter);
-#endif /*CONFIG_DEBUG_FS */
 
 	set_bit(__IXGBE_DOWN, &adapter->state);
 	cancel_work_sync(&adapter->service_task);
@@ -7878,15 +7874,11 @@ static int __init ixgbe_init_module(void)
 	pr_info("%s - version %s\n", ixgbe_driver_string, ixgbe_driver_version);
 	pr_info("%s\n", ixgbe_copyright);
 
-#ifdef CONFIG_DEBUG_FS
 	ixgbe_dbg_init();
-#endif /* CONFIG_DEBUG_FS */
 
 	ret = pci_register_driver(&ixgbe_driver);
 	if (ret) {
-#ifdef CONFIG_DEBUG_FS
 		ixgbe_dbg_exit();
-#endif /* CONFIG_DEBUG_FS */
 		return ret;
 	}
 
@@ -7912,9 +7904,7 @@ static void __exit ixgbe_exit_module(void)
 #endif
 	pci_unregister_driver(&ixgbe_driver);
 
-#ifdef CONFIG_DEBUG_FS
 	ixgbe_dbg_exit();
-#endif /* CONFIG_DEBUG_FS */
 
 	rcu_barrier(); /* Wait for completion of call_rcu()'s */
 }

^ permalink raw reply related

* Re: [Patch net-next] tcp: add a global sysctl to control TCP delayed ack
From: Cong Wang @ 2013-04-08  1:45 UTC (permalink / raw)
  To: David Miller
  Cc: netdev, eric.dumazet, rick.jones2, shemminger, tgraf,
	David.Laight
In-Reply-To: <20130407.170958.338519719729552895.davem@davemloft.net>

On Sun, 2013-04-07 at 17:09 -0400, David Miller wrote:
> 
> I'm not applying a patch that adds a global parameter for
> an attribute which has per-path scope.

Ok, I will make it per-route.

Thanks.

^ permalink raw reply

* [Patch net-next v3 1/4] vxlan: defer vxlan init as late as possible
From: Cong Wang @ 2013-04-08  2:18 UTC (permalink / raw)
  To: netdev; +Cc: Stephen Hemminger, David S. Miller, Cong Wang

From: Cong Wang <amwang@redhat.com>

When vxlan is compiled as builtin, its init code
runs before IPv6 init, this could cause problems
if we create IPv6 socket in the latter patch.

Cc: Stephen Hemminger <stephen@networkplumber.org>
Cc: David S. Miller <davem@davemloft.net>
Signed-off-by: Cong Wang <amwang@redhat.com>
---
 drivers/net/vxlan.c |    2 +-
 1 files changed, 1 insertions(+), 1 deletions(-)

diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c
index 62a4438..cac4e4f 100644
--- a/drivers/net/vxlan.c
+++ b/drivers/net/vxlan.c
@@ -1619,7 +1619,7 @@ out2:
 out1:
 	return rc;
 }
-module_init(vxlan_init_module);
+late_initcall(vxlan_init_module);

 static void __exit vxlan_cleanup_module(void)
 {
-- 
1.7.7.6

^ permalink raw reply related

* [Patch net-next v3 2/4] ipv6: export ipv6_sock_mc_join and ipv6_sock_mc_drop
From: Cong Wang @ 2013-04-08  2:18 UTC (permalink / raw)
  To: netdev; +Cc: Stephen Hemminger, David S. Miller, Cong Wang
In-Reply-To: <1365387536-25217-1-git-send-email-amwang@redhat.com>

From: Cong Wang <amwang@redhat.com>

They will be used by vxlan module.

Cc: Stephen Hemminger <stephen@networkplumber.org>
Cc: David S. Miller <davem@davemloft.net>
Signed-off-by: Cong Wang <amwang@redhat.com>
---
 net/ipv6/mcast.c |    2 ++
 1 files changed, 2 insertions(+), 0 deletions(-)

diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c
index bfa6cc3..d03426d 100644
--- a/net/ipv6/mcast.c
+++ b/net/ipv6/mcast.c
@@ -200,6 +200,7 @@ int ipv6_sock_mc_join(struct sock *sk, int ifindex, const struct in6_addr *addr)
 
 	return 0;
 }
+EXPORT_SYMBOL(ipv6_sock_mc_join);
 
 /*
  *	socket leave on multicast group
@@ -246,6 +247,7 @@ int ipv6_sock_mc_drop(struct sock *sk, int ifindex, const struct in6_addr *addr)
 
 	return -EADDRNOTAVAIL;
 }
+EXPORT_SYMBOL(ipv6_sock_mc_drop);
 
 /* called with rcu_read_lock() */
 static struct inet6_dev *ip6_mc_find_dev_rcu(struct net *net,
-- 
1.7.7.6

^ permalink raw reply related

* [Patch net-next v3 3/4] vxlan: add ipv6 support
From: Cong Wang @ 2013-04-08  2:18 UTC (permalink / raw)
  To: netdev; +Cc: David Stevens, Stephen Hemminger, David S. Miller, Cong Wang
In-Reply-To: <1365387536-25217-1-git-send-email-amwang@redhat.com>

From: Cong Wang <amwang@redhat.com>

v3: fix many coding style issues
    fix some ugly #ifdef
    rename vxlan_ip to vxlan_addr
    rename ->proto to ->family
    rename ->ip4/->ip6 to ->sin/->sin6

v2: fix some compile error when !CONFIG_IPV6
    improve some code based on Stephen's comments
    use sockaddr suggested by David

This patch adds IPv6 support to vxlan device, as the new version
RFC already mentioned it:

   http://tools.ietf.org/html/draft-mahalingam-dutt-dcops-vxlan-03

Cc: David Stevens <dlstevens@us.ibm.com>
Cc: Stephen Hemminger <stephen@networkplumber.org>
Cc: David S. Miller <davem@davemloft.net>
Signed-off-by: Cong Wang <amwang@redhat.com>
---
 drivers/net/vxlan.c          |  544 ++++++++++++++++++++++++++++++++----------
 include/uapi/linux/if_link.h |    2 +
 2 files changed, 425 insertions(+), 121 deletions(-)

diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c
index cac4e4f..5fbf0ed 100644
--- a/drivers/net/vxlan.c
+++ b/drivers/net/vxlan.c
@@ -9,7 +9,6 @@
  *
  * TODO
  *  - use IANA UDP port number (when defined)
- *  - IPv6 (not in RFC)
  */
 
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
@@ -42,6 +41,11 @@
 #include <net/inet_ecn.h>
 #include <net/net_namespace.h>
 #include <net/netns/generic.h>
+#if IS_ENABLED(CONFIG_IPV6)
+#include <net/addrconf.h>
+#include <net/ip6_route.h>
+#include <net/ip6_tunnel.h>
+#endif
 
 #define VXLAN_VERSION	"0.1"
 
@@ -56,6 +60,7 @@
 #define VXLAN_VID_MASK	(VXLAN_N_VID - 1)
 /* IP header + UDP + VXLAN + Ethernet header */
 #define VXLAN_HEADROOM (20 + 8 + 8 + 14)
+#define VXLAN6_HEADROOM (40 + 8 + 8 + 14)
 
 #define VXLAN_FLAGS 0x08000000	/* struct vxlanhdr.vx_flags required value. */
 
@@ -81,9 +86,20 @@ struct vxlan_net {
 	struct hlist_head vni_list[VNI_HASH_SIZE];
 };
 
+struct vxlan_addr {
+	union {
+		struct sockaddr_in	sin;
+		struct sockaddr_in6	sin6;
+		struct sockaddr		sa;
+	} u;
+#define sin u.sin.sin_addr.s_addr
+#define sin6 u.sin6.sin6_addr
+#define family u.sa.sa_family
+};
+
 struct vxlan_rdst {
 	struct rcu_head		 rcu;
-	__be32			 remote_ip;
+	struct vxlan_addr	 remote_ip;
 	__be16			 remote_port;
 	u32			 remote_vni;
 	u32			 remote_ifindex;
@@ -106,8 +122,8 @@ struct vxlan_dev {
 	struct hlist_node hlist;
 	struct net_device *dev;
 	__u32		  vni;		/* virtual network id */
-	__be32	          gaddr;	/* multicast group */
-	__be32		  saddr;	/* source address */
+	struct vxlan_addr gaddr;	/* multicast group */
+	struct vxlan_addr saddr;	/* source address */
 	unsigned int      link;		/* link to multicast over */
 	__u16		  port_min;	/* source port range */
 	__u16		  port_max;
@@ -130,6 +146,59 @@ struct vxlan_dev {
 #define VXLAN_F_L2MISS	0x08
 #define VXLAN_F_L3MISS	0x10
 
+static inline
+bool vxlan_addr_equal(const struct vxlan_addr *a, const struct vxlan_addr *b)
+{
+#if IS_ENABLED(CONFIG_IPV6)
+	if (a->family != b->family)
+		return false;
+	if (a->family == AF_INET6)
+		return ipv6_addr_equal(&a->sin6, &b->sin6);
+	else
+#endif
+		return a->sin == b->sin;
+}
+
+static inline bool vxlan_addr_any(const struct vxlan_addr *ipa)
+{
+#if IS_ENABLED(CONFIG_IPV6)
+	if (ipa->family == AF_INET6)
+		return ipv6_addr_any(&ipa->sin6);
+	else
+#endif
+		return ipa->sin == htonl(INADDR_ANY);
+}
+
+static int vxlan_nla_get_addr(struct vxlan_addr *ip, struct nlattr *nla)
+{
+	if (nla_len(nla) == sizeof(struct in6_addr)) {
+#if IS_ENABLED(CONFIG_IPV6)
+		nla_memcpy(&ip->sin6, nla, sizeof(struct in6_addr));
+		ip->family = AF_INET6;
+		return 0;
+#else
+		return -EAFNOSUPPORT;
+#endif
+	} else if (nla_len(nla) == sizeof(__be32)) {
+		ip->sin = nla_get_be32(nla);
+		ip->family = AF_INET;
+		return 0;
+	} else {
+		return -EAFNOSUPPORT;
+	}
+}
+
+static int vxlan_nla_put_addr(struct sk_buff *skb, int attr,
+			      const struct vxlan_addr *ip)
+{
+#if IS_ENABLED(CONFIG_IPV6)
+	if (ip->family == AF_INET6)
+		return nla_put(skb, attr, sizeof(struct in6_addr), &ip->sin6);
+	else
+#endif
+		return nla_put_be32(skb, attr, ip->sin);
+}
+
 /* salt for hash table */
 static u32 vxlan_salt __read_mostly;
 
@@ -176,7 +245,7 @@ static int vxlan_fdb_info(struct sk_buff *skb, struct vxlan_dev *vxlan,
 
 	if (type == RTM_GETNEIGH) {
 		ndm->ndm_family	= AF_INET;
-		send_ip = rdst->remote_ip != htonl(INADDR_ANY);
+		send_ip = !vxlan_addr_any(&rdst->remote_ip);
 		send_eth = !is_zero_ether_addr(fdb->eth_addr);
 	} else
 		ndm->ndm_family	= AF_BRIDGE;
@@ -188,7 +257,7 @@ static int vxlan_fdb_info(struct sk_buff *skb, struct vxlan_dev *vxlan,
 	if (send_eth && nla_put(skb, NDA_LLADDR, ETH_ALEN, &fdb->eth_addr))
 		goto nla_put_failure;
 
-	if (send_ip && nla_put_be32(skb, NDA_DST, rdst->remote_ip))
+	if (send_ip && vxlan_nla_put_addr(skb, NDA_DST, &rdst->remote_ip))
 		goto nla_put_failure;
 
 	if (rdst->remote_port && rdst->remote_port != vxlan_port &&
@@ -220,7 +289,7 @@ static inline size_t vxlan_nlmsg_size(void)
 {
 	return NLMSG_ALIGN(sizeof(struct ndmsg))
 		+ nla_total_size(ETH_ALEN) /* NDA_LLADDR */
-		+ nla_total_size(sizeof(__be32)) /* NDA_DST */
+		+ nla_total_size(sizeof(struct in6_addr)) /* NDA_DST */
 		+ nla_total_size(sizeof(__be32)) /* NDA_PORT */
 		+ nla_total_size(sizeof(__be32)) /* NDA_VNI */
 		+ nla_total_size(sizeof(__u32)) /* NDA_IFINDEX */
@@ -253,14 +322,14 @@ errout:
 		rtnl_set_sk_err(net, RTNLGRP_NEIGH, err);
 }
 
-static void vxlan_ip_miss(struct net_device *dev, __be32 ipa)
+static void vxlan_ip_miss(struct net_device *dev, struct vxlan_addr *ipa)
 {
 	struct vxlan_dev *vxlan = netdev_priv(dev);
 	struct vxlan_fdb f;
 
 	memset(&f, 0, sizeof f);
 	f.state = NUD_STALE;
-	f.remote.remote_ip = ipa; /* goes to NDA_DST */
+	f.remote.remote_ip = *ipa; /* goes to NDA_DST */
 	f.remote.remote_vni = VXLAN_N_VID;
 
 	vxlan_fdb_notify(vxlan, &f, RTM_GETNEIGH);
@@ -315,14 +384,14 @@ static struct vxlan_fdb *vxlan_find_mac(struct vxlan_dev *vxlan,
 }
 
 /* Add/update destinations for multicast */
-static int vxlan_fdb_append(struct vxlan_fdb *f,
-			    __be32 ip, __u32 port, __u32 vni, __u32 ifindex)
+static int vxlan_fdb_append(struct vxlan_fdb *f, struct vxlan_addr *ip,
+			    __u32 port, __u32 vni, __u32 ifindex)
 {
 	struct vxlan_rdst *rd_prev, *rd;
 
 	rd_prev = NULL;
 	for (rd = &f->remote; rd; rd = rd->remote_next) {
-		if (rd->remote_ip == ip &&
+		if (vxlan_addr_equal(&rd->remote_ip, ip) &&
 		    rd->remote_port == port &&
 		    rd->remote_vni == vni &&
 		    rd->remote_ifindex == ifindex)
@@ -332,7 +401,7 @@ static int vxlan_fdb_append(struct vxlan_fdb *f,
 	rd = kmalloc(sizeof(*rd), GFP_ATOMIC);
 	if (rd == NULL)
 		return -ENOBUFS;
-	rd->remote_ip = ip;
+	rd->remote_ip = *ip;
 	rd->remote_port = port;
 	rd->remote_vni = vni;
 	rd->remote_ifindex = ifindex;
@@ -343,7 +412,7 @@ static int vxlan_fdb_append(struct vxlan_fdb *f,
 
 /* Add new entry to forwarding table -- assumes lock held */
 static int vxlan_fdb_create(struct vxlan_dev *vxlan,
-			    const u8 *mac, __be32 ip,
+			    const u8 *mac, struct vxlan_addr *ip,
 			    __u16 state, __u16 flags,
 			    __u32 port, __u32 vni, __u32 ifindex)
 {
@@ -383,7 +452,7 @@ static int vxlan_fdb_create(struct vxlan_dev *vxlan,
 			return -ENOMEM;
 
 		notify = 1;
-		f->remote.remote_ip = ip;
+		f->remote.remote_ip = *ip;
 		f->remote.remote_port = port;
 		f->remote.remote_vni = vni;
 		f->remote.remote_ifindex = ifindex;
@@ -435,7 +504,7 @@ static int vxlan_fdb_add(struct ndmsg *ndm, struct nlattr *tb[],
 {
 	struct vxlan_dev *vxlan = netdev_priv(dev);
 	struct net *net = dev_net(vxlan->dev);
-	__be32 ip;
+	struct vxlan_addr ip;
 	u32 port, vni, ifindex;
 	int err;
 
@@ -448,10 +517,9 @@ static int vxlan_fdb_add(struct ndmsg *ndm, struct nlattr *tb[],
 	if (tb[NDA_DST] == NULL)
 		return -EINVAL;
 
-	if (nla_len(tb[NDA_DST]) != sizeof(__be32))
-		return -EAFNOSUPPORT;
-
-	ip = nla_get_be32(tb[NDA_DST]);
+	err = vxlan_nla_get_addr(&ip, tb[NDA_DST]);
+	if (err)
+		return err;
 
 	if (tb[NDA_PORT]) {
 		if (nla_len(tb[NDA_PORT]) != sizeof(u32))
@@ -481,7 +549,7 @@ static int vxlan_fdb_add(struct ndmsg *ndm, struct nlattr *tb[],
 		ifindex = 0;
 
 	spin_lock_bh(&vxlan->hash_lock);
-	err = vxlan_fdb_create(vxlan, addr, ip, ndm->ndm_state, flags, port,
+	err = vxlan_fdb_create(vxlan, addr, &ip, ndm->ndm_state, flags, port,
 		vni, ifindex);
 	spin_unlock_bh(&vxlan->hash_lock);
 
@@ -545,7 +613,7 @@ skip:
  * and Tunnel endpoint.
  */
 static void vxlan_snoop(struct net_device *dev,
-			__be32 src_ip, const u8 *src_mac)
+			struct vxlan_addr *src_ip, const u8 *src_mac)
 {
 	struct vxlan_dev *vxlan = netdev_priv(dev);
 	struct vxlan_fdb *f;
@@ -554,7 +622,7 @@ static void vxlan_snoop(struct net_device *dev,
 	f = vxlan_find_mac(vxlan, src_mac);
 	if (likely(f)) {
 		f->used = jiffies;
-		if (likely(f->remote.remote_ip == src_ip))
+		if (likely(vxlan_addr_equal(&f->remote.remote_ip, src_ip)))
 			return;
 
 		if (net_ratelimit())
@@ -562,7 +630,7 @@ static void vxlan_snoop(struct net_device *dev,
 				    "%pM migrated from %pI4 to %pI4\n",
 				    src_mac, &f->remote.remote_ip, &src_ip);
 
-		f->remote.remote_ip = src_ip;
+		f->remote.remote_ip = *src_ip;
 		f->updated = jiffies;
 	} else {
 		/* learned new entry */
@@ -591,7 +659,7 @@ static bool vxlan_group_used(struct vxlan_net *vn,
 			if (!netif_running(vxlan->dev))
 				continue;
 
-			if (vxlan->gaddr == this->gaddr)
+			if (vxlan_addr_equal(&vxlan->gaddr, &this->gaddr))
 				return true;
 		}
 
@@ -605,7 +673,7 @@ static int vxlan_join_group(struct net_device *dev)
 	struct vxlan_net *vn = net_generic(dev_net(dev), vxlan_net_id);
 	struct sock *sk = vn->sock->sk;
 	struct ip_mreqn mreq = {
-		.imr_multiaddr.s_addr	= vxlan->gaddr,
+		.imr_multiaddr.s_addr	= vxlan->gaddr.sin,
 		.imr_ifindex		= vxlan->link,
 	};
 	int err;
@@ -617,7 +685,12 @@ static int vxlan_join_group(struct net_device *dev)
 	/* Need to drop RTNL to call multicast join */
 	rtnl_unlock();
 	lock_sock(sk);
-	err = ip_mc_join_group(sk, &mreq);
+#if IS_ENABLED(CONFIG_IPV6)
+	if (vxlan->gaddr.family == AF_INET6)
+		err = ipv6_sock_mc_join(sk, vxlan->link, &vxlan->gaddr.sin6);
+	else
+#endif
+		err = ip_mc_join_group(sk, &mreq);
 	release_sock(sk);
 	rtnl_lock();
 
@@ -633,7 +706,7 @@ static int vxlan_leave_group(struct net_device *dev)
 	int err = 0;
 	struct sock *sk = vn->sock->sk;
 	struct ip_mreqn mreq = {
-		.imr_multiaddr.s_addr	= vxlan->gaddr,
+		.imr_multiaddr.s_addr	= vxlan->gaddr.sin,
 		.imr_ifindex		= vxlan->link,
 	};
 
@@ -644,7 +717,12 @@ static int vxlan_leave_group(struct net_device *dev)
 	/* Need to drop RTNL to call multicast leave */
 	rtnl_unlock();
 	lock_sock(sk);
-	err = ip_mc_leave_group(sk, &mreq);
+#if IS_ENABLED(CONFIG_IPV6)
+	if (vxlan->gaddr.family == AF_INET6)
+		err = ipv6_sock_mc_drop(sk, vxlan->link, &vxlan->gaddr.sin6);
+	else
+#endif
+		err = ip_mc_leave_group(sk, &mreq);
 	release_sock(sk);
 	rtnl_lock();
 
@@ -654,12 +732,16 @@ static int vxlan_leave_group(struct net_device *dev)
 /* Callback from net/ipv4/udp.c to receive packets */
 static int vxlan_udp_encap_recv(struct sock *sk, struct sk_buff *skb)
 {
-	struct iphdr *oip;
+	struct iphdr *oip = NULL;
+#if IS_ENABLED(CONFIG_IPV6)
+	struct ipv6hdr *oip6 = NULL;
+#endif
 	struct vxlanhdr *vxh;
 	struct vxlan_dev *vxlan;
 	struct pcpu_tstats *stats;
+	struct vxlan_addr src_ip;
 	__u32 vni;
-	int err;
+	int err = 0;
 
 	/* pop off outer UDP header */
 	__skb_pull(skb, sizeof(struct udphdr));
@@ -696,7 +778,13 @@ static int vxlan_udp_encap_recv(struct sock *sk, struct sk_buff *skb)
 	skb_reset_mac_header(skb);
 
 	/* Re-examine inner Ethernet packet */
-	oip = ip_hdr(skb);
+	if (skb->protocol == htons(ETH_P_IP))
+		oip = ip_hdr(skb);
+#if IS_ENABLED(CONFIG_IPV6)
+	else
+		oip6 = ipv6_hdr(skb);
+#endif
+
 	skb->protocol = eth_type_trans(skb, vxlan->dev);
 
 	/* Ignore packet loops (and multicast echo) */
@@ -704,8 +792,19 @@ static int vxlan_udp_encap_recv(struct sock *sk, struct sk_buff *skb)
 			       vxlan->dev->dev_addr) == 0)
 		goto drop;
 
-	if (vxlan->flags & VXLAN_F_LEARN)
-		vxlan_snoop(skb->dev, oip->saddr, eth_hdr(skb)->h_source);
+	if (vxlan->flags & VXLAN_F_LEARN) {
+		if (oip) {
+			src_ip.sin = oip->saddr;
+			src_ip.family = AF_INET;
+		}
+#if IS_ENABLED(CONFIG_IPV6)
+		if (oip6) {
+			src_ip.sin6 = oip6->saddr;
+			src_ip.family = AF_INET6;
+		}
+#endif
+		vxlan_snoop(skb->dev, &src_ip, eth_hdr(skb)->h_source);
+	}
 
 	__skb_tunnel_rx(skb, vxlan->dev);
 	skb_reset_network_header(skb);
@@ -721,11 +820,24 @@ static int vxlan_udp_encap_recv(struct sock *sk, struct sk_buff *skb)
 
 	skb->encapsulation = 0;
 
-	err = IP_ECN_decapsulate(oip, skb);
+#if IS_ENABLED(CONFIG_IPV6)
+	if (oip6)
+		err = IP6_ECN_decapsulate(oip6, skb);
+#endif
+	if (oip)
+		err = IP_ECN_decapsulate(oip, skb);
+
 	if (unlikely(err)) {
-		if (log_ecn_error)
-			net_info_ratelimited("non-ECT from %pI4 with TOS=%#x\n",
-					     &oip->saddr, oip->tos);
+		if (log_ecn_error) {
+#if IS_ENABLED(CONFIG_IPV6)
+			if (oip6)
+				net_info_ratelimited("non-ECT from %pI6\n",
+					     &oip6->saddr);
+#endif
+			if (oip)
+				net_info_ratelimited("non-ECT from %pI4 with TOS=%#x\n",
+						     &oip->saddr, oip->tos);
+		}
 		if (err > 1) {
 			++vxlan->dev->stats.rx_frame_errors;
 			++vxlan->dev->stats.rx_errors;
@@ -760,6 +872,7 @@ static int arp_reduce(struct net_device *dev, struct sk_buff *skb)
 	u8 *arpptr, *sha;
 	__be32 sip, tip;
 	struct neighbour *n;
+	struct vxlan_addr ipa;
 
 	if (dev->flags & IFF_NOARP)
 		goto out;
@@ -801,7 +914,7 @@ static int arp_reduce(struct net_device *dev, struct sk_buff *skb)
 		}
 
 		f = vxlan_find_mac(vxlan, n->ha);
-		if (f && f->remote.remote_ip == htonl(INADDR_ANY)) {
+		if (f && vxlan_addr_any(&f->remote.remote_ip)) {
 			/* bridge-local neighbor */
 			neigh_release(n);
 			goto out;
@@ -819,8 +932,11 @@ static int arp_reduce(struct net_device *dev, struct sk_buff *skb)
 
 		if (netif_rx_ni(reply) == NET_RX_DROP)
 			dev->stats.rx_dropped++;
-	} else if (vxlan->flags & VXLAN_F_L3MISS)
-		vxlan_ip_miss(dev, tip);
+	} else if (vxlan->flags & VXLAN_F_L3MISS) {
+		ipa.sin = tip;
+		ipa.family = AF_INET;
+		vxlan_ip_miss(dev, &ipa);
+	}
 out:
 	consume_skb(skb);
 	return NETDEV_TX_OK;
@@ -842,6 +958,14 @@ static bool route_shortcircuit(struct net_device *dev, struct sk_buff *skb)
 			return false;
 		pip = ip_hdr(skb);
 		n = neigh_lookup(&arp_tbl, &pip->daddr, dev);
+		if (!n && vxlan->flags & VXLAN_F_L3MISS) {
+			struct vxlan_addr ipa;
+			ipa.sin = pip->daddr;
+			ipa.family = AF_INET;
+			vxlan_ip_miss(dev, &ipa);
+			return false;
+		}
+
 		break;
 	default:
 		return false;
@@ -858,8 +982,8 @@ static bool route_shortcircuit(struct net_device *dev, struct sk_buff *skb)
 		}
 		neigh_release(n);
 		return diff;
-	} else if (vxlan->flags & VXLAN_F_L3MISS)
-		vxlan_ip_miss(dev, pip->daddr);
+	}
+
 	return false;
 }
 
@@ -869,7 +993,8 @@ static void vxlan_sock_free(struct sk_buff *skb)
 }
 
 /* On transmit, associate with the tunnel socket */
-static void vxlan_set_owner(struct net_device *dev, struct sk_buff *skb)
+static inline void vxlan_set_owner(struct net_device *dev,
+				   struct sk_buff *skb)
 {
 	struct vxlan_net *vn = net_generic(dev_net(dev), vxlan_net_id);
 	struct sock *sk = vn->sock->sk;
@@ -917,23 +1042,30 @@ static netdev_tx_t vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
 {
 	struct vxlan_dev *vxlan = netdev_priv(dev);
 	struct rtable *rt;
-	const struct iphdr *old_iph;
+	const struct iphdr *old_iph = NULL;
 	struct iphdr *iph;
 	struct vxlanhdr *vxh;
 	struct udphdr *uh;
 	struct flowi4 fl4;
+#if IS_ENABLED(CONFIG_IPV6)
+	struct flowi6 fl6;
+	struct vxlan_net *vn = net_generic(dev_net(dev), vxlan_net_id);
+	struct sock *sk = vn->sock->sk;
+	struct ipv6hdr *ip6h;
+#endif
 	unsigned int pkt_len = skb->len;
-	__be32 dst;
-	__u16 src_port, dst_port;
+	const struct vxlan_addr *dst;
+	struct dst_entry *ndst = NULL;
+	__u16 src_port = 0, dst_port;
         u32 vni;
 	__be16 df = 0;
 	__u8 tos, ttl;
 
 	dst_port = rdst->remote_port ? rdst->remote_port : vxlan_port;
 	vni = rdst->remote_vni;
-	dst = rdst->remote_ip;
+	dst = &rdst->remote_ip;
 
-	if (!dst) {
+	if (vxlan_addr_any(dst)) {
 		if (did_rsc) {
 			__skb_pull(skb, skb_network_offset(skb));
 			skb->ip_summed = CHECKSUM_NONE;
@@ -961,47 +1093,86 @@ static netdev_tx_t vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
 		skb->encapsulation = 1;
 	}
 
-	/* Need space for new headers (invalidates iph ptr) */
-	if (skb_cow_head(skb, VXLAN_HEADROOM))
-		goto drop;
+	ttl = vxlan->ttl;
+	tos = vxlan->tos;
+	if (dst->family == AF_INET) {
+		/* Need space for new headers (invalidates iph ptr) */
+		if (skb_cow_head(skb, VXLAN_HEADROOM))
+			goto drop;
 
-	old_iph = ip_hdr(skb);
+		old_iph = ip_hdr(skb);
+		if (!ttl && IN_MULTICAST(ntohl(dst->sin)))
+			ttl = 1;
 
-	ttl = vxlan->ttl;
-	if (!ttl && IN_MULTICAST(ntohl(dst)))
-		ttl = 1;
+		if (tos == 1)
+			tos = ip_tunnel_get_dsfield(old_iph, skb);
 
-	tos = vxlan->tos;
-	if (tos == 1)
-		tos = ip_tunnel_get_dsfield(old_iph, skb);
-
-	src_port = vxlan_src_port(vxlan, skb);
-
-	memset(&fl4, 0, sizeof(fl4));
-	fl4.flowi4_oif = rdst->remote_ifindex;
-	fl4.flowi4_tos = RT_TOS(tos);
-	fl4.daddr = dst;
-	fl4.saddr = vxlan->saddr;
-
-	rt = ip_route_output_key(dev_net(dev), &fl4);
-	if (IS_ERR(rt)) {
-		netdev_dbg(dev, "no route to %pI4\n", &dst);
-		dev->stats.tx_carrier_errors++;
-		goto tx_error;
-	}
+		src_port = vxlan_src_port(vxlan, skb);
+
+		memset(&fl4, 0, sizeof(fl4));
+		fl4.flowi4_oif = rdst->remote_ifindex;
+		fl4.flowi4_tos = RT_TOS(tos);
+		fl4.daddr = dst->sin;
+		fl4.saddr = vxlan->saddr.sin;
+
+		rt = ip_route_output_key(dev_net(dev), &fl4);
+		if (IS_ERR(rt)) {
+			netdev_dbg(dev, "no route to %pI4\n", &dst->sin);
+			dev->stats.tx_carrier_errors++;
+			goto tx_error;
+		}
+
+		if (rt->dst.dev == dev) {
+			netdev_dbg(dev, "circular route to %pI4\n", &dst->sin);
+			ip_rt_put(rt);
+			dev->stats.collisions++;
+			goto tx_error;
+		}
+		ndst = &rt->dst;
+		memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
+	} else {
+#if IS_ENABLED(CONFIG_IPV6)
+		const struct ipv6hdr *old_iph6;
 
-	if (rt->dst.dev == dev) {
-		netdev_dbg(dev, "circular route to %pI4\n", &dst);
-		ip_rt_put(rt);
-		dev->stats.collisions++;
-		goto tx_error;
+		/* Need space for new headers (invalidates iph ptr) */
+		if (skb_cow_head(skb, VXLAN6_HEADROOM))
+			goto drop;
+
+		old_iph6 = ipv6_hdr(skb);
+		if (!ttl && ipv6_addr_is_multicast(&dst->sin6))
+			ttl = 1;
+
+		if (tos == 1)
+			tos = ipv6_get_dsfield(old_iph6);
+
+		src_port = vxlan_src_port(vxlan, skb);
+
+		memset(&fl6, 0, sizeof(fl6));
+		fl6.flowi6_oif = vxlan->link;
+		fl6.flowi6_tos = RT_TOS(tos);
+		fl6.daddr = dst->sin6;
+		fl6.saddr = vxlan->saddr.sin6;
+		fl6.flowi6_proto = skb->protocol;
+
+		if (ip6_dst_lookup(sk, &ndst, &fl6)) {
+			netdev_dbg(dev, "no route to %pI6\n", &dst->sin6);
+			dev->stats.tx_carrier_errors++;
+			goto tx_error;
+		}
+
+		if (ndst->dev == dev) {
+			netdev_dbg(dev, "circular route to %pI6\n", &dst->sin6);
+			dst_release(ndst);
+			dev->stats.collisions++;
+			goto tx_error;
+		}
+#endif
 	}
 
-	memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
 	IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED |
 			      IPSKB_REROUTED);
 	skb_dst_drop(skb);
-	skb_dst_set(skb, &rt->dst);
+	skb_dst_set(skb, ndst);
 
 	vxh = (struct vxlanhdr *) __skb_push(skb, sizeof(*vxh));
 	vxh->vx_flags = htonl(VXLAN_FLAGS);
@@ -1017,27 +1188,55 @@ static netdev_tx_t vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
 	uh->len = htons(skb->len);
 	uh->check = 0;
 
-	__skb_push(skb, sizeof(*iph));
-	skb_reset_network_header(skb);
-	iph		= ip_hdr(skb);
-	iph->version	= 4;
-	iph->ihl	= sizeof(struct iphdr) >> 2;
-	iph->frag_off	= df;
-	iph->protocol	= IPPROTO_UDP;
-	iph->tos	= ip_tunnel_ecn_encap(tos, old_iph, skb);
-	iph->daddr	= dst;
-	iph->saddr	= fl4.saddr;
-	iph->ttl	= ttl ? : ip4_dst_hoplimit(&rt->dst);
-	tunnel_ip_select_ident(skb, old_iph, &rt->dst);
-
-	nf_reset(skb);
+	if (dst->family == AF_INET) {
+		__skb_push(skb, sizeof(*iph));
+		skb_reset_network_header(skb);
+		iph		= ip_hdr(skb);
+		iph->version	= 4;
+		iph->ihl	= sizeof(struct iphdr) >> 2;
+		iph->frag_off	= df;
+		iph->protocol	= IPPROTO_UDP;
+		iph->tos	= ip_tunnel_ecn_encap(tos, old_iph, skb);
+		iph->daddr	= dst->sin;
+		iph->saddr	= fl4.saddr;
+		iph->ttl	= ttl ? : ip4_dst_hoplimit(ndst);
+		tunnel_ip_select_ident(skb, old_iph, ndst);
+	} else {
+#if IS_ENABLED(CONFIG_IPV6)
+		if (skb->ip_summed == CHECKSUM_PARTIAL) {
+			skb->csum_start = skb_transport_header(skb) - skb->head;
+			skb->csum_offset = offsetof(struct udphdr, check);
+		} else
+			uh->check = csum_ipv6_magic(&fl6.saddr, &fl6.daddr,
+						    skb->len, IPPROTO_UDP,
+						    csum_partial(uh, skb->len, 0));
+		__skb_push(skb, sizeof(*ip6h));
+		skb_reset_network_header(skb);
+		ip6h		  = ipv6_hdr(skb);
+		ip6h->version	  = 6;
+		ip6h->priority	  = 0;
+		ip6h->flow_lbl[0] = 0;
+		ip6h->flow_lbl[1] = 0;
+		ip6h->flow_lbl[2] = 0;
+		ip6h->payload_len = htons(skb->len);
+		ip6h->nexthdr     = IPPROTO_UDP;
+		ip6h->hop_limit   = ttl ? : ip6_dst_hoplimit(ndst);
+		ip6h->daddr	  = fl6.daddr;
+		ip6h->saddr	  = fl6.saddr;
+#endif
+	}
 
 	vxlan_set_owner(dev, skb);
 
 	if (handle_offloads(skb))
 		goto drop;
 
-	iptunnel_xmit(skb, dev);
+	if (dst->family == AF_INET)
+		iptunnel_xmit(skb, dev);
+#if IS_ENABLED(CONFIG_IPV6)
+	else
+		ip6tunnel_xmit(skb, dev);
+#endif
 	return NETDEV_TX_OK;
 
 drop:
@@ -1084,7 +1283,7 @@ static netdev_tx_t vxlan_xmit(struct sk_buff *skb, struct net_device *dev)
 		group.remote_next = 0;
 		rdst0 = &group;
 
-		if (group.remote_ip == htonl(INADDR_ANY) &&
+		if (vxlan_addr_any(&group.remote_ip) &&
 		    (vxlan->flags & VXLAN_F_L2MISS) &&
 		    !is_multicast_ether_addr(eth->h_dest))
 			vxlan_fdb_miss(vxlan, eth->h_dest);
@@ -1162,7 +1361,7 @@ static int vxlan_open(struct net_device *dev)
 	struct vxlan_dev *vxlan = netdev_priv(dev);
 	int err;
 
-	if (vxlan->gaddr) {
+	if (!vxlan_addr_any(&vxlan->gaddr)) {
 		err = vxlan_join_group(dev);
 		if (err)
 			return err;
@@ -1196,7 +1395,7 @@ static int vxlan_stop(struct net_device *dev)
 {
 	struct vxlan_dev *vxlan = netdev_priv(dev);
 
-	if (vxlan->gaddr)
+	if (!vxlan_addr_any(&vxlan->gaddr))
 		vxlan_leave_group(dev);
 
 	del_timer_sync(&vxlan->age_timer);
@@ -1246,7 +1445,10 @@ static void vxlan_setup(struct net_device *dev)
 
 	eth_hw_addr_random(dev);
 	ether_setup(dev);
-	dev->hard_header_len = ETH_HLEN + VXLAN_HEADROOM;
+	if (vxlan->gaddr.family == AF_INET)
+		dev->hard_header_len = ETH_HLEN + VXLAN_HEADROOM;
+	else
+		dev->hard_header_len = ETH_HLEN + VXLAN6_HEADROOM;
 
 	dev->netdev_ops = &vxlan_netdev_ops;
 	dev->destructor = vxlan_free;
@@ -1283,8 +1485,10 @@ static void vxlan_setup(struct net_device *dev)
 static const struct nla_policy vxlan_policy[IFLA_VXLAN_MAX + 1] = {
 	[IFLA_VXLAN_ID]		= { .type = NLA_U32 },
 	[IFLA_VXLAN_GROUP]	= { .len = FIELD_SIZEOF(struct iphdr, daddr) },
+	[IFLA_VXLAN_GROUP6]	= { .len = sizeof(struct in6_addr) },
 	[IFLA_VXLAN_LINK]	= { .type = NLA_U32 },
 	[IFLA_VXLAN_LOCAL]	= { .len = FIELD_SIZEOF(struct iphdr, saddr) },
+	[IFLA_VXLAN_LOCAL6]	= { .len = sizeof(struct in6_addr) },
 	[IFLA_VXLAN_TOS]	= { .type = NLA_U8 },
 	[IFLA_VXLAN_TTL]	= { .type = NLA_U8 },
 	[IFLA_VXLAN_LEARNING]	= { .type = NLA_U8 },
@@ -1326,6 +1530,17 @@ static int vxlan_validate(struct nlattr *tb[], struct nlattr *data[])
 			pr_debug("group address is not IPv4 multicast\n");
 			return -EADDRNOTAVAIL;
 		}
+	} else if (data[IFLA_VXLAN_GROUP6]) {
+#if IS_ENABLED(CONFIG_IPV6)
+		struct in6_addr gaddr;
+		nla_memcpy(&gaddr, data[IFLA_VXLAN_GROUP6], sizeof(gaddr));
+		if (!ipv6_addr_is_multicast(&gaddr)) {
+			pr_debug("group address is not IPv6 multicast\n");
+			return -EADDRNOTAVAIL;
+		}
+#else
+		return -EPFNOSUPPORT;
+#endif
 	}
 
 	if (data[IFLA_VXLAN_PORT_RANGE]) {
@@ -1371,11 +1586,31 @@ static int vxlan_newlink(struct net *net, struct net_device *dev,
 	}
 	vxlan->vni = vni;
 
-	if (data[IFLA_VXLAN_GROUP])
-		vxlan->gaddr = nla_get_be32(data[IFLA_VXLAN_GROUP]);
+	if (data[IFLA_VXLAN_GROUP]) {
+		vxlan->gaddr.sin = nla_get_be32(data[IFLA_VXLAN_GROUP]);
+		vxlan->gaddr.family = AF_INET;
+	} else if (data[IFLA_VXLAN_GROUP6]) {
+#if IS_ENABLED(CONFIG_IPV6)
+		nla_memcpy(&vxlan->gaddr.sin6, data[IFLA_VXLAN_GROUP6],
+			   sizeof(struct in6_addr));
+		vxlan->gaddr.family = AF_INET6;
+#else
+		return -EPFNOSUPPORT;
+#endif
+	}
 
-	if (data[IFLA_VXLAN_LOCAL])
-		vxlan->saddr = nla_get_be32(data[IFLA_VXLAN_LOCAL]);
+	if (data[IFLA_VXLAN_LOCAL]) {
+		vxlan->saddr.sin = nla_get_be32(data[IFLA_VXLAN_LOCAL]);
+		vxlan->saddr.family = AF_INET;
+	} else if (data[IFLA_VXLAN_LOCAL6]) {
+#if IS_ENABLED(CONFIG_IPV6)
+		nla_memcpy(&vxlan->saddr.sin6, data[IFLA_VXLAN_LOCAL6],
+			   sizeof(struct in6_addr));
+		vxlan->saddr.family = AF_INET6;
+#else
+		return -EPFNOSUPPORT;
+#endif
+	}
 
 	if (data[IFLA_VXLAN_LINK] &&
 	    (vxlan->link = nla_get_u32(data[IFLA_VXLAN_LINK]))) {
@@ -1453,9 +1688,9 @@ static size_t vxlan_get_size(const struct net_device *dev)
 {
 
 	return nla_total_size(sizeof(__u32)) +	/* IFLA_VXLAN_ID */
-		nla_total_size(sizeof(__be32)) +/* IFLA_VXLAN_GROUP */
+		nla_total_size(sizeof(struct in6_addr)) + /* IFLA_VXLAN_GROUP{6} */
 		nla_total_size(sizeof(__u32)) +	/* IFLA_VXLAN_LINK */
-		nla_total_size(sizeof(__be32))+	/* IFLA_VXLAN_LOCAL */
+		nla_total_size(sizeof(struct in6_addr)) + /* IFLA_VXLAN_LOCAL{6} */
 		nla_total_size(sizeof(__u8)) +	/* IFLA_VXLAN_TTL */
 		nla_total_size(sizeof(__u8)) +	/* IFLA_VXLAN_TOS */
 		nla_total_size(sizeof(__u8)) +	/* IFLA_VXLAN_LEARNING */
@@ -1480,14 +1715,34 @@ static int vxlan_fill_info(struct sk_buff *skb, const struct net_device *dev)
 	if (nla_put_u32(skb, IFLA_VXLAN_ID, vxlan->vni))
 		goto nla_put_failure;
 
-	if (vxlan->gaddr && nla_put_be32(skb, IFLA_VXLAN_GROUP, vxlan->gaddr))
-		goto nla_put_failure;
+	if (!vxlan_addr_any(&vxlan->gaddr)) {
+		if (vxlan->gaddr.family == AF_INET) {
+			if (nla_put_be32(skb, IFLA_VXLAN_GROUP, vxlan->gaddr.sin))
+				goto nla_put_failure;
+		} else {
+#if IS_ENABLED(CONFIG_IPV6)
+			if (nla_put(skb, IFLA_VXLAN_GROUP6, sizeof(struct in6_addr),
+				    &vxlan->gaddr.sin6))
+				goto nla_put_failure;
+#endif
+		}
+	}
 
 	if (vxlan->link && nla_put_u32(skb, IFLA_VXLAN_LINK, vxlan->link))
 		goto nla_put_failure;
 
-	if (vxlan->saddr && nla_put_be32(skb, IFLA_VXLAN_LOCAL, vxlan->saddr))
-		goto nla_put_failure;
+	if (!vxlan_addr_any(&vxlan->saddr)) {
+		if (vxlan->saddr.family == AF_INET) {
+			if (nla_put_be32(skb, IFLA_VXLAN_LOCAL, vxlan->saddr.sin))
+				goto nla_put_failure;
+		} else {
+#if IS_ENABLED(CONFIG_IPV6)
+			if (nla_put(skb, IFLA_VXLAN_LOCAL6, sizeof(struct in6_addr),
+				    &vxlan->saddr.sin6))
+				goto nla_put_failure;
+#endif
+		}
+	}
 
 	if (nla_put_u8(skb, IFLA_VXLAN_TTL, vxlan->ttl) ||
 	    nla_put_u8(skb, IFLA_VXLAN_TOS, vxlan->tos) ||
@@ -1526,38 +1781,82 @@ static struct rtnl_link_ops vxlan_link_ops __read_mostly = {
 	.fill_info	= vxlan_fill_info,
 };
 
-static __net_init int vxlan_init_net(struct net *net)
+/* Create UDP socket for encapsulation receive. AF_INET6 socket
+ * could be used for both IPv4 and IPv6 communications.
+ */
+#if IS_ENABLED(CONFIG_IPV6)
+static __net_init int create_sock(struct net *net, struct sock **sk)
+{
+	struct vxlan_net *vn = net_generic(net, vxlan_net_id);
+	struct sockaddr_in6 vxlan_addr = {
+		.sin6_family = AF_INET6,
+		.sin6_port = htons(vxlan_port),
+	};
+	int rc;
+
+	rc = sock_create_kern(AF_INET6, SOCK_DGRAM, IPPROTO_UDP, &vn->sock);
+	if (rc < 0) {
+		pr_debug("UDP socket create failed\n");
+		return rc;
+	}
+	/* Put in proper namespace */
+	*sk = vn->sock->sk;
+	sk_change_net(*sk, net);
+
+	rc = kernel_bind(vn->sock, (struct sockaddr *)&vxlan_addr,
+			 sizeof(struct sockaddr_in6));
+	if (rc < 0) {
+		pr_debug("bind for UDP socket %pI6:%u (%d)\n",
+			 &vxlan_addr.sin6_addr, ntohs(vxlan_addr.sin6_port), rc);
+		sk_release_kernel(*sk);
+		vn->sock = NULL;
+		return rc;
+	}
+	return 0;
+}
+#else
+static __net_init int create_sock(struct net *net, struct sock **sk)
 {
 	struct vxlan_net *vn = net_generic(net, vxlan_net_id);
-	struct sock *sk;
 	struct sockaddr_in vxlan_addr = {
 		.sin_family = AF_INET,
+		.sin_port = htons(vxlan_port),
 		.sin_addr.s_addr = htonl(INADDR_ANY),
 	};
 	int rc;
-	unsigned h;
 
-	/* Create UDP socket for encapsulation receive. */
 	rc = sock_create_kern(AF_INET, SOCK_DGRAM, IPPROTO_UDP, &vn->sock);
 	if (rc < 0) {
 		pr_debug("UDP socket create failed\n");
 		return rc;
 	}
 	/* Put in proper namespace */
-	sk = vn->sock->sk;
-	sk_change_net(sk, net);
-
-	vxlan_addr.sin_port = htons(vxlan_port);
+	*sk = vn->sock->sk;
+	sk_change_net(*sk, net);
 
-	rc = kernel_bind(vn->sock, (struct sockaddr *) &vxlan_addr,
-			 sizeof(vxlan_addr));
+	rc = kernel_bind(vn->sock, (struct sockaddr *)&vxlan_addr,
+			 sizeof(struct sockaddr_in));
 	if (rc < 0) {
 		pr_debug("bind for UDP socket %pI4:%u (%d)\n",
 			 &vxlan_addr.sin_addr, ntohs(vxlan_addr.sin_port), rc);
-		sk_release_kernel(sk);
+		sk_release_kernel(*sk);
 		vn->sock = NULL;
 		return rc;
 	}
+	return 0;
+}
+#endif
+
+static __net_init int vxlan_init_net(struct net *net)
+{
+	struct vxlan_net *vn = net_generic(net, vxlan_net_id);
+	struct sock *sk;
+	int rc;
+	unsigned h;
+
+	rc = create_sock(net, &sk);
+	if (rc < 0)
+		return rc;
 
 	/* Disable multicast loopback */
 	inet_sk(sk)->mc_loop = 0;
@@ -1566,6 +1865,9 @@ static __net_init int vxlan_init_net(struct net *net)
 	udp_sk(sk)->encap_type = 1;
 	udp_sk(sk)->encap_rcv = vxlan_udp_encap_recv;
 	udp_encap_enable();
+#if IS_ENABLED(CONFIG_IPV6)
+	udpv6_encap_enable();
+#endif
 
 	for (h = 0; h < VNI_HASH_SIZE; ++h)
 		INIT_HLIST_HEAD(&vn->vni_list[h]);
diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h
index c4edfe1..0eee00f 100644
--- a/include/uapi/linux/if_link.h
+++ b/include/uapi/linux/if_link.h
@@ -308,6 +308,8 @@ enum {
 	IFLA_VXLAN_RSC,
 	IFLA_VXLAN_L2MISS,
 	IFLA_VXLAN_L3MISS,
+	IFLA_VXLAN_GROUP6,
+	IFLA_VXLAN_LOCAL6,
 	__IFLA_VXLAN_MAX
 };
 #define IFLA_VXLAN_MAX	(__IFLA_VXLAN_MAX - 1)
-- 
1.7.7.6

^ permalink raw reply related

* [Patch net-next v3 4/4] ipv6: Add generic UDP Tunnel segmentation
From: Cong Wang @ 2013-04-08  2:18 UTC (permalink / raw)
  To: netdev
  Cc: Jesse Gross, Pravin B Shelar, Stephen Hemminger, David S. Miller,
	Cong Wang
In-Reply-To: <1365387536-25217-1-git-send-email-amwang@redhat.com>

From: Cong Wang <amwang@redhat.com>

Similar to commit 731362674580cb0c696cd1b1a03d8461a10cf90a
(tunneling: Add generic Tunnel segmentation)

This patch adds generic tunneling offloading support for IPv6-UDP based
tunnels.

This can be used by tunneling protocols like VXLAN.

Cc: Jesse Gross <jesse@nicira.com>
Cc: Pravin B Shelar <pshelar@nicira.com>
Cc: Stephen Hemminger <stephen@networkplumber.org>
Cc: David S. Miller <davem@davemloft.net>
Signed-off-by: Cong Wang <amwang@redhat.com>
---
 net/ipv6/ip6_offload.c |    4 +-
 net/ipv6/udp_offload.c |  155 +++++++++++++++++++++++++++++++++---------------
 2 files changed, 110 insertions(+), 49 deletions(-)

diff --git a/net/ipv6/ip6_offload.c b/net/ipv6/ip6_offload.c
index 71b766e..f031ccf 100644
--- a/net/ipv6/ip6_offload.c
+++ b/net/ipv6/ip6_offload.c
@@ -91,6 +91,7 @@ static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb,
 	unsigned int unfrag_ip6hlen;
 	u8 *prevhdr;
 	int offset = 0;
+	bool tunnel;
 
 	if (unlikely(skb_shinfo(skb)->gso_type &
 		     ~(SKB_GSO_UDP |
@@ -105,6 +106,7 @@ static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb,
 	if (unlikely(!pskb_may_pull(skb, sizeof(*ipv6h))))
 		goto out;
 
+	tunnel = !!skb->encapsulation;
 	ipv6h = ipv6_hdr(skb);
 	__skb_pull(skb, sizeof(*ipv6h));
 	segs = ERR_PTR(-EPROTONOSUPPORT);
@@ -125,7 +127,7 @@ static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb,
 		ipv6h = ipv6_hdr(skb);
 		ipv6h->payload_len = htons(skb->len - skb->mac_len -
 					   sizeof(*ipv6h));
-		if (proto == IPPROTO_UDP) {
+		if (!tunnel && proto == IPPROTO_UDP) {
 			unfrag_ip6hlen = ip6_find_1stfragopt(skb, &prevhdr);
 			fptr = (struct frag_hdr *)(skb_network_header(skb) +
 				unfrag_ip6hlen);
diff --git a/net/ipv6/udp_offload.c b/net/ipv6/udp_offload.c
index 3bb3a89..bbde7ba 100644
--- a/net/ipv6/udp_offload.c
+++ b/net/ipv6/udp_offload.c
@@ -21,26 +21,81 @@ static int udp6_ufo_send_check(struct sk_buff *skb)
 	const struct ipv6hdr *ipv6h;
 	struct udphdr *uh;
 
-	/* UDP Tunnel offload on ipv6 is not yet supported. */
-	if (skb->encapsulation)
-		return -EINVAL;
-
 	if (!pskb_may_pull(skb, sizeof(*uh)))
 		return -EINVAL;
 
-	ipv6h = ipv6_hdr(skb);
-	uh = udp_hdr(skb);
+	if (likely(!skb->encapsulation)) {
+		ipv6h = ipv6_hdr(skb);
+		uh = udp_hdr(skb);
+
+		uh->check = ~csum_ipv6_magic(&ipv6h->saddr, &ipv6h->daddr, skb->len,
+					     IPPROTO_UDP, 0);
+		skb->csum_start = skb_transport_header(skb) - skb->head;
+		skb->csum_offset = offsetof(struct udphdr, check);
+		skb->ip_summed = CHECKSUM_PARTIAL;
+	}
 
-	uh->check = ~csum_ipv6_magic(&ipv6h->saddr, &ipv6h->daddr, skb->len,
-				     IPPROTO_UDP, 0);
-	skb->csum_start = skb_transport_header(skb) - skb->head;
-	skb->csum_offset = offsetof(struct udphdr, check);
-	skb->ip_summed = CHECKSUM_PARTIAL;
 	return 0;
 }
 
+static struct sk_buff *skb_udp6_tunnel_segment(struct sk_buff *skb,
+					       netdev_features_t features)
+{
+	struct sk_buff *segs = ERR_PTR(-EINVAL);
+	int mac_len = skb->mac_len;
+	int tnl_hlen = skb_inner_mac_header(skb) - skb_transport_header(skb);
+	int outer_hlen;
+	netdev_features_t enc_features;
+
+	if (unlikely(!pskb_may_pull(skb, tnl_hlen)))
+		goto out;
+
+	skb->encapsulation = 0;
+	__skb_pull(skb, tnl_hlen);
+	skb_reset_mac_header(skb);
+	skb_set_network_header(skb, skb_inner_network_offset(skb));
+	skb->mac_len = skb_inner_network_offset(skb);
+
+	/* segment inner packet. */
+	enc_features = skb->dev->hw_enc_features & netif_skb_features(skb);
+	segs = skb_mac_gso_segment(skb, enc_features);
+	if (!segs || IS_ERR(segs))
+		goto out;
+
+	outer_hlen = skb_tnl_header_len(skb);
+	skb = segs;
+	do {
+		struct udphdr *uh;
+		int udp_offset = outer_hlen - tnl_hlen;
+
+		skb->mac_len = mac_len;
+
+		skb_push(skb, outer_hlen);
+		skb_reset_mac_header(skb);
+		skb_set_network_header(skb, mac_len);
+		skb_set_transport_header(skb, udp_offset);
+		uh = udp_hdr(skb);
+		uh->len = htons(skb->len - udp_offset);
+
+		/* csum segment if tunnel sets skb with csum. */
+		if (unlikely(uh->check)) {
+			struct ipv6hdr *iph = ipv6_hdr(skb);
+
+			uh->check = csum_ipv6_magic(&iph->saddr, &iph->daddr,
+						       skb->len - udp_offset,
+						       IPPROTO_UDP, 0);
+			if (uh->check == 0)
+				uh->check = CSUM_MANGLED_0;
+
+		}
+		skb->ip_summed = CHECKSUM_NONE;
+	} while ((skb = skb->next));
+out:
+	return segs;
+}
+
 static struct sk_buff *udp6_ufo_fragment(struct sk_buff *skb,
-	netdev_features_t features)
+					 netdev_features_t features)
 {
 	struct sk_buff *segs = ERR_PTR(-EINVAL);
 	unsigned int mss;
@@ -73,43 +128,47 @@ static struct sk_buff *udp6_ufo_fragment(struct sk_buff *skb,
 		goto out;
 	}
 
-	/* Do software UFO. Complete and fill in the UDP checksum as HW cannot
-	 * do checksum of UDP packets sent as multiple IP fragments.
-	 */
-	offset = skb_checksum_start_offset(skb);
-	csum = skb_checksum(skb, offset, skb->len - offset, 0);
-	offset += skb->csum_offset;
-	*(__sum16 *)(skb->data + offset) = csum_fold(csum);
-	skb->ip_summed = CHECKSUM_NONE;
-
-	/* Check if there is enough headroom to insert fragment header. */
-	if ((skb_mac_header(skb) < skb->head + frag_hdr_sz) &&
-	    pskb_expand_head(skb, frag_hdr_sz, 0, GFP_ATOMIC))
-		goto out;
+	if (skb->encapsulation && skb_shinfo(skb)->gso_type & SKB_GSO_UDP_TUNNEL)
+		segs = skb_udp6_tunnel_segment(skb, features);
+	else {
+		/* Do software UFO. Complete and fill in the UDP checksum as HW cannot
+		 * do checksum of UDP packets sent as multiple IP fragments.
+		 */
+		offset = skb_checksum_start_offset(skb);
+		csum = skb_checksum(skb, offset, skb->len - offset, 0);
+		offset += skb->csum_offset;
+		*(__sum16 *)(skb->data + offset) = csum_fold(csum);
+		skb->ip_summed = CHECKSUM_NONE;
+
+		/* Check if there is enough headroom to insert fragment header. */
+		if ((skb_mac_header(skb) < skb->head + frag_hdr_sz) &&
+		    pskb_expand_head(skb, frag_hdr_sz, 0, GFP_ATOMIC))
+			goto out;
 
-	/* Find the unfragmentable header and shift it left by frag_hdr_sz
-	 * bytes to insert fragment header.
-	 */
-	unfrag_ip6hlen = ip6_find_1stfragopt(skb, &prevhdr);
-	nexthdr = *prevhdr;
-	*prevhdr = NEXTHDR_FRAGMENT;
-	unfrag_len = skb_network_header(skb) - skb_mac_header(skb) +
-		     unfrag_ip6hlen;
-	mac_start = skb_mac_header(skb);
-	memmove(mac_start-frag_hdr_sz, mac_start, unfrag_len);
-
-	skb->mac_header -= frag_hdr_sz;
-	skb->network_header -= frag_hdr_sz;
-
-	fptr = (struct frag_hdr *)(skb_network_header(skb) + unfrag_ip6hlen);
-	fptr->nexthdr = nexthdr;
-	fptr->reserved = 0;
-	ipv6_select_ident(fptr, (struct rt6_info *)skb_dst(skb));
-
-	/* Fragment the skb. ipv6 header and the remaining fields of the
-	 * fragment header are updated in ipv6_gso_segment()
-	 */
-	segs = skb_segment(skb, features);
+		/* Find the unfragmentable header and shift it left by frag_hdr_sz
+		 * bytes to insert fragment header.
+		 */
+		unfrag_ip6hlen = ip6_find_1stfragopt(skb, &prevhdr);
+		nexthdr = *prevhdr;
+		*prevhdr = NEXTHDR_FRAGMENT;
+		unfrag_len = skb_network_header(skb) - skb_mac_header(skb) +
+			     unfrag_ip6hlen;
+		mac_start = skb_mac_header(skb);
+		memmove(mac_start-frag_hdr_sz, mac_start, unfrag_len);
+
+		skb->mac_header -= frag_hdr_sz;
+		skb->network_header -= frag_hdr_sz;
+
+		fptr = (struct frag_hdr *)(skb_network_header(skb) + unfrag_ip6hlen);
+		fptr->nexthdr = nexthdr;
+		fptr->reserved = 0;
+		ipv6_select_ident(fptr, (struct rt6_info *)skb_dst(skb));
+
+		/* Fragment the skb. ipv6 header and the remaining fields of the
+		 * fragment header are updated in ipv6_gso_segment()
+		 */
+		segs = skb_segment(skb, features);
+	}
 
 out:
 	return segs;
-- 
1.7.7.6

^ permalink raw reply related

* [PATCH iproute2] vxlan: add ipv6 support
From: Cong Wang @ 2013-04-08  2:18 UTC (permalink / raw)
  To: netdev; +Cc: Stephen Hemminger, Cong Wang
In-Reply-To: <1365387536-25217-1-git-send-email-amwang@redhat.com>

From: Cong Wang <amwang@redhat.com>

Cc: Stephen Hemminger <stephen@networkplumber.org>
Signed-off-by: Cong Wang <amwang@redhat.com>
---
 include/linux/if_link.h |    2 ++
 ip/iplink_vxlan.c       |   45 ++++++++++++++++++++++++++++++++++++++-------
 2 files changed, 40 insertions(+), 7 deletions(-)

diff --git a/include/linux/if_link.h b/include/linux/if_link.h
index 40167af..f74b8cc 100644
--- a/include/linux/if_link.h
+++ b/include/linux/if_link.h
@@ -306,6 +306,8 @@ enum {
 	IFLA_VXLAN_RSC,
 	IFLA_VXLAN_L2MISS,
 	IFLA_VXLAN_L3MISS,
+	IFLA_VXLAN_GROUP6,
+	IFLA_VXLAN_LOCAL6,
 	__IFLA_VXLAN_MAX
 };
 #define IFLA_VXLAN_MAX	(__IFLA_VXLAN_MAX - 1)
diff --git a/ip/iplink_vxlan.c b/ip/iplink_vxlan.c
index 1025326..c10ec0f 100644
--- a/ip/iplink_vxlan.c
+++ b/ip/iplink_vxlan.c
@@ -42,6 +42,8 @@ static int vxlan_parse_opt(struct link_util *lu, int argc, char **argv,
 	int vni_set = 0;
 	__u32 saddr = 0;
 	__u32 gaddr = 0;
+	struct in6_addr saddr6 = IN6ADDR_ANY_INIT;
+	struct in6_addr gaddr6 = IN6ADDR_ANY_INIT;
 	unsigned link = 0;
 	__u8 tos = 0;
 	__u8 ttl = 0;
@@ -65,15 +67,26 @@ static int vxlan_parse_opt(struct link_util *lu, int argc, char **argv,
 			vni_set = 1;
 		} else if (!matches(*argv, "group")) {
 			NEXT_ARG();
-			gaddr = get_addr32(*argv);
-
-			if (!IN_MULTICAST(ntohl(gaddr)))
-				invarg("invald group address", *argv);
+			if (!inet_pton(AF_INET, *argv, &gaddr)) {
+				if (!inet_pton(AF_INET6, *argv, &gaddr6)) {
+					fprintf(stderr, "Invalid address \"%s\"\n", *argv);
+					return -1;
+				} else if (!IN6_IS_ADDR_MULTICAST(&gaddr6))
+					invarg("invald group address", *argv);
+			} else if (!IN_MULTICAST(ntohl(gaddr)))
+					invarg("invald group address", *argv);
 		} else if (!matches(*argv, "local")) {
 			NEXT_ARG();
-			if (strcmp(*argv, "any"))
-				saddr = get_addr32(*argv);
-			if (IN_MULTICAST(ntohl(saddr)))
+			if (strcmp(*argv, "any")) {
+				if (!inet_pton(AF_INET, *argv, &saddr)) {
+					if (!inet_pton(AF_INET6, *argv, &saddr6)) {
+						fprintf(stderr, "Invalid address \"%s\"\n", *argv);
+						return -1;
+					}
+				}
+			}
+
+			if (IN_MULTICAST(ntohl(saddr)) || IN6_IS_ADDR_MULTICAST(&saddr6))
 				invarg("invalid local address", *argv);
 		} else if (!matches(*argv, "dev")) {
 			NEXT_ARG();
@@ -163,8 +176,14 @@ static int vxlan_parse_opt(struct link_util *lu, int argc, char **argv,
 	addattr32(n, 1024, IFLA_VXLAN_ID, vni);
 	if (gaddr)
 		addattr_l(n, 1024, IFLA_VXLAN_GROUP, &gaddr, 4);
+	else if (memcmp(&gaddr6, &in6addr_any, sizeof(gaddr6)) != 0)
+		addattr_l(n, 1024, IFLA_VXLAN_GROUP6, &gaddr6, sizeof(struct in6_addr));
+
 	if (saddr)
 		addattr_l(n, 1024, IFLA_VXLAN_LOCAL, &saddr, 4);
+	else if (memcmp(&saddr6, &in6addr_any, sizeof(saddr6)) != 0)
+		addattr_l(n, 1024, IFLA_VXLAN_LOCAL6, &saddr6, sizeof(struct in6_addr));
+
 	if (link)
 		addattr32(n, 1024, IFLA_VXLAN_LINK, link);
 	addattr8(n, 1024, IFLA_VXLAN_TTL, ttl);
@@ -211,6 +230,12 @@ static void vxlan_print_opt(struct link_util *lu, FILE *f, struct rtattr *tb[])
 		if (addr)
 			fprintf(f, "group %s ",
 				format_host(AF_INET, 4, &addr, s1, sizeof(s1)));
+	} else if (tb[IFLA_VXLAN_GROUP6]) {
+		struct in6_addr addr;
+		memcpy(&addr, RTA_DATA(tb[IFLA_VXLAN_GROUP6]), sizeof(struct in6_addr));
+		if (memcmp(&addr, &in6addr_any, sizeof(addr)) != 0)
+			fprintf(f, "group %s ",
+				format_host(AF_INET6, sizeof(struct in6_addr), &addr, s1, sizeof(s1)));
 	}
 
 	if (tb[IFLA_VXLAN_LOCAL]) {
@@ -218,6 +243,12 @@ static void vxlan_print_opt(struct link_util *lu, FILE *f, struct rtattr *tb[])
 		if (addr)
 			fprintf(f, "local %s ",
 				format_host(AF_INET, 4, &addr, s1, sizeof(s1)));
+	} else if (tb[IFLA_VXLAN_LOCAL6]) {
+		struct in6_addr addr;
+		memcpy(&addr, RTA_DATA(tb[IFLA_VXLAN_LOCAL6]), sizeof(struct in6_addr));
+		if (memcmp(&addr, &in6addr_any, sizeof(addr)) != 0)
+			fprintf(f, "local %s ",
+				format_host(AF_INET6, sizeof(struct in6_addr), &addr, s1, sizeof(s1)));
 	}
 
 	if (tb[IFLA_VXLAN_LINK] &&
-- 
1.7.7.6

^ permalink raw reply related

* Re: [PATCH 2/2] sh_eth: add R-Car support for real
From: Simon Horman @ 2013-04-08  2:39 UTC (permalink / raw)
  To: Sergei Shtylyov; +Cc: netdev, nobuhiro.iwamatsu.yj, linux-sh, phil.edworthy
In-Reply-To: <201303290051.32106.sergei.shtylyov@cogentembedded.com>

On Fri, Mar 29, 2013 at 12:51:31AM +0300, Sergei Shtylyov wrote:
> Commit d0418bb7123f44b23d69ac349eec7daf9103472f (net: sh_eth: Add eth support
> for R8A7779 device) was a failed attempt to add support for one of members of
> the R-Car SoC family.  That's for three reasons: it treated R8A7779 the  same
> as SH7724 except including quite dirty hack adding ECMR_ELB  bit  to the mask
> in sh_eth_set_rate() while not removing ECMR_RTM bit (despite it's reserved in
> R-Car Ether), and it didn't add a new register offset array despite the closest
> SH_ETH_REG_FAST_SH4 mapping differs by 0x200 to the offsets all the R-Car Ether
> registers have, and also some of the registers in this old mapping don't exist
> on R-Car Ether (due to this, SH7724's 'sh_eth_my_cpu_data' structure is not
> adequeate for R-Car too).  Fix all these shortcomings, restoring the SH7724
> related section to its pristine state...
> 
> Signed-off-by: Sergei Shtylyov <sergei.shtylyov@cogentembedded.com>

Hi Sergei,

thanks for this and sorry for not noticing it earlier.
I will try exercising it on the r8a7790 lager.

IIRC you previously mentioned that you were doing work on unravelling
the #define mess in sh_eth. I am wondering if you have made any progress
in that area. In particular, other than sh_eth it looks like
the r8a7740 and r8a7790 could use a common .config. So it would
be nice if sh_eth could support at least those two in the same build.

> ---
> The patch is against the David Miller's 'net-next.git' repo.
> 
> Support for the other members of R-Car family such as R8A7778 and R8A7790 should
> probably be added when they hit mainline (support for the former is already in
> the 'next' branch of Simon Horman's 'renesas.git' repo).
> 
>  drivers/net/ethernet/renesas/sh_eth.c |  107 +++++++++++++++++++++++++++++++---
>  include/linux/sh_eth.h                |    1 
>  2 files changed, 100 insertions(+), 8 deletions(-)
> 
> Index: net-next/drivers/net/ethernet/renesas/sh_eth.c
> ===================================================================
> --- net-next.orig/drivers/net/ethernet/renesas/sh_eth.c
> +++ net-next/drivers/net/ethernet/renesas/sh_eth.c
> @@ -2,7 +2,8 @@
>   *  SuperH Ethernet device driver
>   *
>   *  Copyright (C) 2006-2012 Nobuhiro Iwamatsu
> - *  Copyright (C) 2008-2012 Renesas Solutions Corp.
> + *  Copyright (C) 2008-2013 Renesas Solutions Corp.
> + *  Copyright (C) 2013 Cogent Embedded, Inc.
>   *
>   *  This program is free software; you can redistribute it and/or modify it
>   *  under the terms and conditions of the GNU General Public License,
> @@ -147,6 +148,51 @@ static const u16 sh_eth_offset_gigabit[S
>  	[FWALCR1]	= 0x00b4,
>  };
>  
> +static const u16 sh_eth_offset_fast_rcar[SH_ETH_MAX_REGISTER_OFFSET] = {
> +	[ECMR]		= 0x0300,
> +	[RFLR]		= 0x0308,
> +	[ECSR]		= 0x0310,
> +	[ECSIPR]	= 0x0318,
> +	[PIR]		= 0x0320,
> +	[PSR]		= 0x0328,
> +	[RDMLR]		= 0x0340,
> +	[IPGR]		= 0x0350,
> +	[APR]		= 0x0354,
> +	[MPR]		= 0x0358,
> +	[RFCF]		= 0x0360,
> +	[TPAUSER]	= 0x0364,
> +	[TPAUSECR]	= 0x0368,
> +	[MAHR]		= 0x03c0,
> +	[MALR]		= 0x03c8,
> +	[TROCR]		= 0x03d0,
> +	[CDCR]		= 0x03d4,
> +	[LCCR]		= 0x03d8,
> +	[CNDCR]		= 0x03dc,
> +	[CEFCR]		= 0x03e4,
> +	[FRECR]		= 0x03e8,
> +	[TSFRCR]	= 0x03ec,
> +	[TLFRCR]	= 0x03f0,
> +	[RFCR]		= 0x03f4,
> +	[MAFCR]		= 0x03f8,
> +
> +	[EDMR]		= 0x0200,
> +	[EDTRR]		= 0x0208,
> +	[EDRRR]		= 0x0210,
> +	[TDLAR]		= 0x0218,
> +	[RDLAR]		= 0x0220,
> +	[EESR]		= 0x0228,
> +	[EESIPR]	= 0x0230,
> +	[TRSCER]	= 0x0238,
> +	[RMFCR]		= 0x0240,
> +	[TFTR]		= 0x0248,
> +	[FDR]		= 0x0250,
> +	[RMCR]		= 0x0258,
> +	[TFUCR]		= 0x0264,
> +	[RFOCR]		= 0x0268,
> +	[FCFTR]		= 0x0270,
> +	[TRIMD]		= 0x027c,
> +};
> +
>  static const u16 sh_eth_offset_fast_sh4[SH_ETH_MAX_REGISTER_OFFSET] = {
>  	[ECMR]		= 0x0100,
>  	[RFLR]		= 0x0108,
> @@ -296,7 +342,7 @@ static void sh_eth_select_mii(struct net
>  #endif
>  
>  /* There is CPU dependent code */
> -#if defined(CONFIG_CPU_SUBTYPE_SH7724) || defined(CONFIG_ARCH_R8A7779)
> +#if defined(CONFIG_ARCH_R8A7779)
>  #define SH_ETH_RESET_DEFAULT	1
>  static void sh_eth_set_duplex(struct net_device *ndev)
>  {
> @@ -311,18 +357,60 @@ static void sh_eth_set_duplex(struct net
>  static void sh_eth_set_rate(struct net_device *ndev)
>  {
>  	struct sh_eth_private *mdp = netdev_priv(ndev);
> -	unsigned int bits = ECMR_RTM;
>  
> -#if defined(CONFIG_ARCH_R8A7779)
> -	bits |= ECMR_ELB;
> -#endif
> +	switch (mdp->speed) {
> +	case 10: /* 10BASE */
> +		sh_eth_write(ndev, sh_eth_read(ndev, ECMR) & ~ECMR_ELB, ECMR);
> +		break;
> +	case 100:/* 100BASE */
> +		sh_eth_write(ndev, sh_eth_read(ndev, ECMR) | ECMR_ELB, ECMR);
> +		break;
> +	default:
> +		break;
> +	}
> +}
> +
> +/* R8A7779 */
> +static struct sh_eth_cpu_data sh_eth_my_cpu_data = {
> +	.set_duplex	= sh_eth_set_duplex,
> +	.set_rate	= sh_eth_set_rate,
> +
> +	.ecsr_value	= ECSR_PSRTO | ECSR_LCHNG | ECSR_ICD,
> +	.ecsipr_value	= ECSIPR_PSRTOIP | ECSIPR_LCHNGIP | ECSIPR_ICDIP,
> +	.eesipr_value	= 0x01ff009f,
> +
> +	.tx_check	= EESR_FTC | EESR_CND | EESR_DLC | EESR_CD | EESR_RTO,
> +	.eesr_err_check	= EESR_TWB | EESR_TABT | EESR_RABT | EESR_RDE |
> +			  EESR_RFRMER | EESR_TFE | EESR_TDE | EESR_ECI,
> +	.tx_error_check	= EESR_TWB | EESR_TABT | EESR_TDE | EESR_TFE,
> +
> +	.apr		= 1,
> +	.mpr		= 1,
> +	.tpauser	= 1,
> +	.hw_swap	= 1,
> +};
> +#elif defined(CONFIG_CPU_SUBTYPE_SH7724)
> +#define SH_ETH_RESET_DEFAULT	1
> +static void sh_eth_set_duplex(struct net_device *ndev)
> +{
> +	struct sh_eth_private *mdp = netdev_priv(ndev);
> +
> +	if (mdp->duplex) /* Full */
> +		sh_eth_write(ndev, sh_eth_read(ndev, ECMR) | ECMR_DM, ECMR);
> +	else		/* Half */
> +		sh_eth_write(ndev, sh_eth_read(ndev, ECMR) & ~ECMR_DM, ECMR);
> +}
> +
> +static void sh_eth_set_rate(struct net_device *ndev)
> +{
> +	struct sh_eth_private *mdp = netdev_priv(ndev);
>  
>  	switch (mdp->speed) {
>  	case 10: /* 10BASE */
> -		sh_eth_write(ndev, sh_eth_read(ndev, ECMR) & ~bits, ECMR);
> +		sh_eth_write(ndev, sh_eth_read(ndev, ECMR) & ~ECMR_RTM, ECMR);
>  		break;
>  	case 100:/* 100BASE */
> -		sh_eth_write(ndev, sh_eth_read(ndev, ECMR) | bits, ECMR);
> +		sh_eth_write(ndev, sh_eth_read(ndev, ECMR) | ECMR_RTM, ECMR);
>  		break;
>  	default:
>  		break;
> @@ -2521,6 +2609,9 @@ static const u16 *sh_eth_get_register_of
>  	case SH_ETH_REG_GIGABIT:
>  		reg_offset = sh_eth_offset_gigabit;
>  		break;
> +	case SH_ETH_REG_FAST_RCAR:
> +		reg_offset = sh_eth_offset_fast_rcar;
> +		break;
>  	case SH_ETH_REG_FAST_SH4:
>  		reg_offset = sh_eth_offset_fast_sh4;
>  		break;
> Index: net-next/include/linux/sh_eth.h
> ===================================================================
> --- net-next.orig/include/linux/sh_eth.h
> +++ net-next/include/linux/sh_eth.h
> @@ -6,6 +6,7 @@
>  enum {EDMAC_LITTLE_ENDIAN, EDMAC_BIG_ENDIAN};
>  enum {
>  	SH_ETH_REG_GIGABIT,
> +	SH_ETH_REG_FAST_RCAR,
>  	SH_ETH_REG_FAST_SH4,
>  	SH_ETH_REG_FAST_SH3_SH2
>  };
> --
> To unsubscribe from this list: send the line "unsubscribe linux-sh" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> 

^ permalink raw reply

* RE: [net-next] stmmac: modified pcs mode support for SGMII
From: Byungho An @ 2013-04-08  3:56 UTC (permalink / raw)
  To: 'David Miller'; +Cc: netdev, peppe.cavallaro, kgene.kim, cpgs
In-Reply-To: <20130407.170846.2105935127378498264.davem@davemloft.net>

I fixed and resent this patch.

Thank you.

> -----Original Message-----
> From: David Miller [mailto:davem@davemloft.net]
> Sent: Monday, April 08, 2013 6:09 AM
> To: bh74.an@samsung.com
> Cc: netdev@vger.kernel.org; peppe.cavallaro@st.com; kgene.kim@samsung.com; cpgs@samsung.com
> Subject: Re: [net-next] stmmac: modified pcs mode support for SGMII
> 
> From: Byungho An <bh74.an@samsung.com>
> Date: Thu, 04 Apr 2013 14:57:01 +0900
> 
> > This patch modifies the pcs mode support for SGMII. Even though
> > SGMII does auto-negotiation with phy, it needs stmmac_init_phy and
> > stmmac_mdio_register function for initializing phy.
> >
> > Signed-off-by: Byungho An <bh74.an@samsung.com>
> 
> Your email client corrupted this patch, it turned all TAB characters
> into spaces.  This makes your submission unusable.
> 
> Please fix this, email the patch to yourself, and only resubmit the
> patch here if you are able to successfully apply the patch you receive
> in a test email.

^ permalink raw reply

* [PATCH net-next RESEND] stmmac: modified pcs mode support for SGMII
From: Byungho An @ 2013-04-08  3:56 UTC (permalink / raw)
  To: netdev
  Cc: 'Giuseppe CAVALLARO', '김국진',
	cpgs, davem


This patch modifies the pcs mode support for SGMII. Even though
SGMII does auto-negotiation with phy, it needs stmmac_init_phy and
stmmac_mdio_register function for initializing phy.

Signed-off-by: Byungho An <bh74.an@samsung.com>
---
 drivers/net/ethernet/stmicro/stmmac/stmmac_main.c |   12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
index 6b26d31..3ac9bd7 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
@@ -1504,7 +1504,8 @@ static int stmmac_open(struct net_device *dev)
 
 	stmmac_check_ether_addr(priv);
 
-	if (!priv->pcs) {
+	if (priv->pcs != STMMAC_PCS_RGMII && priv->pcs != STMMAC_PCS_TBI &&
+	    priv->pcs != STMMAC_PCS_RTBI) {
 		ret = stmmac_init_phy(dev);
 		if (ret) {
 			pr_err("%s: Cannot attach to PHY (error: %d)\n",
@@ -1607,7 +1608,8 @@ static int stmmac_open(struct net_device *dev)
 	/* Using PCS we cannot dial with the phy registers at this stage
 	 * so we do not support extra feature like EEE.
 	 */
-	if (!priv->pcs)
+	if (priv->pcs != STMMAC_PCS_RGMII && priv->pcs != STMMAC_PCS_TBI &&
+	    priv->pcs != STMMAC_PCS_RTBI)
 		priv->eee_enabled = stmmac_eee_init(priv);
 
 	stmmac_init_tx_coalesce(priv);
@@ -2637,7 +2639,8 @@ struct stmmac_priv *stmmac_dvr_probe(struct device *device,
 
 	stmmac_check_pcs_mode(priv);
 
-	if (!priv->pcs) {
+	if (priv->pcs != STMMAC_PCS_RGMII && priv->pcs != STMMAC_PCS_TBI &&
+	    priv->pcs != STMMAC_PCS_RTBI) {
 		/* MDIO bus Registration */
 		ret = stmmac_mdio_register(ndev);
 		if (ret < 0) {
@@ -2677,7 +2680,8 @@ int stmmac_dvr_remove(struct net_device *ndev)
 	priv->hw->dma->stop_tx(priv->ioaddr);
 
 	stmmac_set_mac(priv->ioaddr, false);
-	if (!priv->pcs)
+	if (priv->pcs != STMMAC_PCS_RGMII && priv->pcs != STMMAC_PCS_TBI &&
+	    priv->pcs != STMMAC_PCS_RTBI)
 		stmmac_mdio_unregister(ndev);
 	netif_carrier_off(ndev);
 	unregister_netdev(ndev);
-- 
1.7.10.4

^ permalink raw reply related

* Re: AMD Vi error and lost networking with r8169
From: David R @ 2013-04-08  6:14 UTC (permalink / raw)
  To: Francois Romieu; +Cc: Linux Kernel Mailing List, netdev
In-Reply-To: <20130407215303.GA28778@electric-eye.fr.zoreil.com>

Sure. Will apply this evening. It may take several days before I can  
report back due to the intermittent nature of the thing.

Thanks
David


Quoting Francois Romieu <romieu@fr.zoreil.com>:

> David R <david@unsolicited.net> :
>> I'm been seeing some problems with my new ish AMD motherboard/processor
>> combo and networking (r8169). I see the following page fault :-
>>
>> Apr  7 12:25:14 david kernel: [156421.436545] AMD-Vi: Event logged
>> [IO_PAGE_FAULT device=02:00.0 domain=0x0015 address=0x0000000000003000
>> flags=0x0050]
>
> Can you give the hack below a try ?
>
> diff --git a/drivers/net/ethernet/realtek/r8169.c  
> b/drivers/net/ethernet/realtek/r8169.c
> index 28fb50a..ed8625d 100644
> --- a/drivers/net/ethernet/realtek/r8169.c
> +++ b/drivers/net/ethernet/realtek/r8169.c
> @@ -4125,6 +4125,8 @@ static void rtl_init_rxcfg(struct rtl8169_private *tp)
>  	case RTL_GIGA_MAC_VER_23:
>  	case RTL_GIGA_MAC_VER_24:
>  	case RTL_GIGA_MAC_VER_34:
> +	case RTL_GIGA_MAC_VER_35:
> +	case RTL_GIGA_MAC_VER_36:
>  		RTL_W32(RxConfig, RX128_INT_EN | RX_MULTI_EN | RX_DMA_BURST);
>  		break;
>  	default:
>

^ permalink raw reply

* [PATCH v5 0/4] Add packet recirculation
From: Simon Horman @ 2013-04-08  6:43 UTC (permalink / raw)
  To: dev-yBygre7rU0TnMu66kgdUjQ, netdev-u79uwXL29TY76Z2rM5mHXA
  Cc: Isaku Yamahata, Ravi K

Recirculation is a technique to allow a frame to re-enter
frame processing. This is intended to be used after actions
have been applied to the frame with modify the frame in
some way that makes it possible for richer processing to occur.

An example is and indeed targeted use case is MPLS. If an MPLS frame has an
mpls_pop action applied with the IPv4 ethernet type then it becomes
possible to decode the IPv4 portion of the frame. This may be used to
construct a facet that modifies the IPv4 portion of the frame. This is not
possible prior to the mpls_pop action as the contents of the frame after
the MPLS stack is not known to be IPv4.

Status:

I have dropped the RFC prefix from this series as I now believe
it is feature-complete. Any and all review is greatly appreciated.

Design:

* New recirculation action.

  ovs-vswitchd adds a recirculation action to the end of a list of
  datapath actions for a flow when the actions are truncated because
  insufficient flow match information is available to add the next
  OpenFlow action.  The recirculation action is preceded by an action
  to set the skb_mark to an id which can be used to scope a facet lookup
  of a recirculated packet.

  e.g.  pop_mpls(0x0800),dec_ttl becomes pop_mpls(0x800),set(skb_mark(id)),recirculate

* Datapath behaviour

  Then the datapath encounters a recirculate action it:
  + Recalculates the flow key based on the packet
    which will typically have been modified by previous actions
  + As the recirculate action is preceded by a set(skb_mark(id)) action,
    the new match key will now include skb_mark=id.
  + Performs a lookup using the new match key
  + Processes the packet if a facet matches the key or;
  + Makes an upcall if necessary

* No facet behaviour

  + Loop:
    1) translate actions
    2) If there is a recirculate action, execute packet
       and go back to 1) for remaining actions.

Base/Pre-requisites:

This patch depends on "[PATCH v2.24] datapath: Add basic MPLS support to kernel".
There are currently no other patches in the recirculation series.

Availability:

For reference this patch is available in git at:
git://github.com/horms/openvswitch.git devel/mpls-recirculate.v5

Change Log:

v5
* Correct declaration of facet_find_by_id to match definition:
  ovs_be32 -> uint32_t.
* Enhancements to recirculation id code:
  - Allow efficient lookup of facets by their recirculation id
  - Add RECIRCULATION_ID_DUMMY which may be used in cases
    where no facet it used. It is an arbitrary valid id.
  - Also add recirculated element to action_xlate_ctx()
    to use to detect if a recirculation action was added during
    translation. The previous scheme of checking if recirculation_id
    was not RECIRCULATION_ID_NONE is broken for cases where
    the context is initialised with a recirculation_id other than
    RECIRCULATION_ID_NONE. E.g. when RECIRCULATION_ID_DUMMY is used.
  - Avoid id collision

rfc4:
* Allow recirculation without facets in ovs-vswitchd
  - Handle flow miss without facet
  - Packet out
* Minor enhancement to recirculation id management: Add RECIRCULATE_ID_NONE
  to use instead of using 0 directly.
* Correct calculation of facet->recirculation_ofpacts and
  facet->recirculation_ofpacts_len in subfacet_make_actions()
  in the case of more than one level of recirculation.

rfc3
* Use IS_ERR_OR_NULL()
* Handle facet consistency checking by constructing a chain of facets
  from the given facet, to its recirculation parent and then its parent
  until the topmost facet.  If there is no recirculation  the chain will
  be of length one. If there is one recirculation action then the chain
  will be of length two. And so on.

  The topmost facet in the chain can is used to lookup the rule to be
  verified. The chain is then walked from top to bottom, translating
  actions up to the end or the first recirculation action that is
  encountered, whichever comes first. As the code walks down the chain
  it updates the actions that are executed to start of the actions to
  be executed to be just after the end of the actions executed in the
  previous facet in the chain. This is similar to the way that facets
  are created when a recirculation action is encountered.

rfc2
* As suggested by Jesse Gross
  - Update for changes to ovs_dp_process_received_packet()
    to no longer check if OVS_CB(skb)->flow is pre-initialised.
  - Do not add spurious printk debugging to ovs_execute_actions()
  - Do not add spurious debugging messages to commit_set_nw_action()
  - Correct typo in comment above commit_odp_actions().
  - Do not execute recirculation in ovs-vswitchd, rather allow
    the datapath to make an upcall when a recirculation action
    is encountered on execute.
    + This implicitly breaks support for recirculation without facets,
      so for now force all misses of MPLS frames to be handled with
      a facet; and treat handling of recirculation for packet_out as
      a todo item.
  - Use skb_mark for recirculation_id in match. This avoids
    both expanding the match and including a recirculation_id parameter
    with the recirculation action: set_skb_mark should be used before
    the recirculation action.
  - Tidy up ownership of skb in ovs_execute_actions

rfc1
* Initial post

Patch List and Diffstat:

Simon Horman (4):
  Add packet recirculation
  Move execute_set_action to lib/odp-util.c
  Allow recirculation without facets
  Avoid recirculation id collision

 datapath/actions.c          |    9 +-
 datapath/datapath.c         |   98 +++++---
 datapath/datapath.h         |    2 +-
 include/linux/openvswitch.h |    4 +
 lib/dpif-netdev.c           |  153 +++++--------
 lib/flow.h                  |    3 +
 lib/odp-util.c              |   91 +++++++-
 lib/odp-util.h              |    4 +
 ofproto/ofproto-dpif.c      |  521 +++++++++++++++++++++++++++++++++++++------
 9 files changed, 681 insertions(+), 204 deletions(-)

-- 
1.7.10.4

^ permalink raw reply

* [PATCH 1/4] Add packet recirculation
From: Simon Horman @ 2013-04-08  6:43 UTC (permalink / raw)
  To: dev-yBygre7rU0TnMu66kgdUjQ, netdev-u79uwXL29TY76Z2rM5mHXA
  Cc: Isaku Yamahata, Ravi K
In-Reply-To: <1365403431-18102-1-git-send-email-horms-/R6kz+dDXgpPR4JQBCEnsQ@public.gmane.org>

Recirculation is a technique to allow a frame to re-enter
frame processing. This is intended to be used after actions
have been applied to the frame with modify the frame in
some way that makes it possible for richer processing to occur.

An example is and indeed targeted use case is MPLS. If an MPLS frame has an
mpls_pop action applied with the IPv4 ethernet type then it becomes
possible to decode the IPv4 portion of the frame. This may be used to
construct a facet that modifies the IPv4 portion of the frame. This is not
possible prior to the mpls_pop action as the contents of the frame after
the MPLS stack is not known to be IPv4.

Design:
* New recirculation action.

  ovs-vswitchd adds a recirculation action to the end of a list of
  datapath actions for a flow when the actions are truncated because
  insufficient flow match information is available to add the next
  OpenFlow action.  The recirculation action is preceded by an action
  to set the skb_mark to an id which can be used to scope a facet lookup
  of a recirculated packet.

  e.g.  pop_mpls(0x0800),dec_ttl becomes pop_mpls(0x800),set(skb_mark(id)),recirculate

* Datapath behaviour

  Then the datapath encounters a recirculate action it:
  + Recalculates the flow key based on the packet
    which will typically have been modified by previous actions
  + As the recirculate action is preceded by a set(skb_mark(id)) action,
    the new match key will now include skb_mark=id.
  + Performs a lookup using the new match key
  + Processes the packet if a facet matches the key or;
  + Makes an upcall if necessary

* No facet behaviour

  + Loop:
    1) translate actions
    2) If there is a recirculate action, execute packet
       and go back to 1) for remaining actions.

Limitations of this patch:

* Facets are required, support for recirculation without facets.
  A proposed implementation is provided in a subsequent patch.
* Recirculation ids may conflicts.
  A proposed resolution is provided in a subsequent patch.

Signed-off-by: Simon Horman <horms-/R6kz+dDXgpPR4JQBCEnsQ@public.gmane.org>

---

This patch depends on "[PATCH v2.24] datapath: Add basic MPLS support to kernel".

Change Log:

v5
* Correct declaration of facet_find_by_id to match definition:
  ovs_be32 -> uint32_t.
* Enhancements to recirculation id code:
  - Allow efficient lookup of facets by their recirculation id
  - Add RECIRCULATION_ID_DUMMY which may be used in cases
    where no facet it used. It is an arbitrary valid id.
  - Also add recirculated element to action_xlate_ctx()
    to use to detect if a recirculation action was added during
    translation. The previous scheme of checking if recirculation_id
    was not RECIRCULATION_ID_NONE is broken for cases where
    the context is initialised with a recirculation_id other than
    RECIRCULATION_ID_NONE. E.g. when RECIRCULATION_ID_DUMMY is used.

rfc4
* Minor enhancement to recirculation id management: Add RECIRCULATE_ID_NONE
  to use instead of using 0 directly.
* Correct calculation of facet->recirculation_ofpacts and
  facet->recirculation_ofpacts_len in subfacet_make_actions()
  in the case of more than one level of recirculation.

rfc3
* Use IS_ERR_OR_NULL()
* Handle facet consistency checking by constructing a chain of facets
  from the given facet, to its recirculation parent and then its parent
  until the topmost facet.  If there is no recirculation  the chain will
  be of length one. If there is one recirculation action then the chain
  will be of length two. And so on.

  The topmost facet in the chain can is used to lookup the rule to be
  verified. The chain is then walked from top to bottom, translating
  actions up to the end or the first recirculation action that is
  encountered, whichever comes first. As the code walks down the chain
  it updates the actions that are executed to start of the actions to
  be executed to be just after the end of the actions executed in the
  previous facet in the chain. This is similar to the way that facets
  are created when a recirculation action is encountered.

rfc2
* As suggested by Jesse Gross
  - Update for changes to ovs_dp_process_received_packet()
    to no longer check if OVS_CB(skb)->flow is pre-initialised.
  - Do not add spurious printk debugging to ovs_execute_actions()
  - Do not add spurious debugging messages to commit_set_nw_action()
  - Correct typo in comment above commit_odp_actions().
  - Do not execute recirculation in ovs-vswitchd, rather allow
    the datapath to make an upcall when a recirculation action
    is encountered on execute.
    + This implicitly breaks support for recirculation without facets,
      so for now force all misses of MPLS frames to be handled with
      a facet; and treat handling of recirculation for packet_out as
      a todo item.
  - Use skb_mark for recirculation_id in match. This avoids
    both expanding the match and including a recirculation_id parameter
    with the recirculation action: set_skb_mark should be used before
    the recirculation action.
  - Tidy up ownership of skb in ovs_execute_actions

rfc1
* Initial post
---
 datapath/actions.c          |    9 +-
 datapath/datapath.c         |   98 +++++++----
 datapath/datapath.h         |    2 +-
 include/linux/openvswitch.h |    4 +
 lib/dpif-netdev.c           |   89 +++++++---
 lib/flow.h                  |    3 +
 lib/odp-util.c              |   15 +-
 lib/odp-util.h              |    1 +
 ofproto/ofproto-dpif.c      |  382 ++++++++++++++++++++++++++++++++++++-------
 9 files changed, 476 insertions(+), 127 deletions(-)

diff --git a/datapath/actions.c b/datapath/actions.c
index e9634fe..7b0f022 100644
--- a/datapath/actions.c
+++ b/datapath/actions.c
@@ -617,6 +617,9 @@ static int do_execute_actions(struct datapath *dp, struct sk_buff *skb,
 		case OVS_ACTION_ATTR_SAMPLE:
 			err = sample(dp, skb, a);
 			break;
+
+		case OVS_ACTION_ATTR_RECIRCULATE:
+			return 1;
 		}
 
 		if (unlikely(err)) {
@@ -657,7 +660,7 @@ static int loop_suppress(struct datapath *dp, struct sw_flow_actions *actions)
 }
 
 /* Execute a list of actions against 'skb'. */
-int ovs_execute_actions(struct datapath *dp, struct sk_buff *skb)
+struct sk_buff *ovs_execute_actions(struct datapath *dp, struct sk_buff *skb)
 {
 	struct sw_flow_actions *acts = rcu_dereference(OVS_CB(skb)->flow->sf_acts);
 	struct loop_counter *loop;
@@ -676,6 +679,8 @@ int ovs_execute_actions(struct datapath *dp, struct sk_buff *skb)
 	OVS_CB(skb)->tun_key = NULL;
 	error = do_execute_actions(dp, skb, acts->actions,
 					 acts->actions_len, false);
+	if (likely(error <= 0))
+		skb = NULL;
 
 	/* Check whether sub-actions looped too much. */
 	if (unlikely(loop->looping))
@@ -686,5 +691,5 @@ out_loop:
 	if (!--loop->count)
 		loop->looping = false;
 
-	return error;
+	return (error < 0) ? ERR_PTR(error) : skb;
 }
diff --git a/datapath/datapath.c b/datapath/datapath.c
index e8be795..ab39dd7 100644
--- a/datapath/datapath.c
+++ b/datapath/datapath.c
@@ -202,52 +202,63 @@ void ovs_dp_detach_port(struct vport *p)
 	ovs_vport_del(p);
 }
 
+#define MAX_RECIRCULATION_DEPTH	4	/* Completely arbitrary */
+
 /* Must be called with rcu_read_lock. */
 void ovs_dp_process_received_packet(struct vport *p, struct sk_buff *skb)
 {
 	struct datapath *dp = p->dp;
-	struct sw_flow *flow;
 	struct dp_stats_percpu *stats;
-	struct sw_flow_key key;
-	u64 *stats_counter;
-	int error;
-	int key_len;
+	int limit = MAX_RECIRCULATION_DEPTH;
 
 	stats = this_cpu_ptr(dp->stats_percpu);
 
-	/* Extract flow from 'skb' into 'key'. */
-	error = ovs_flow_extract(skb, p->port_no, &key, &key_len);
-	if (unlikely(error)) {
-		kfree_skb(skb);
-		return;
-	}
+	while (1) {
+		u64 *stats_counter;
+		struct sw_flow *flow;
+		struct sw_flow_key key;
+		int error, key_len;
 
-	/* Look up flow. */
-	flow = ovs_flow_tbl_lookup(rcu_dereference(dp->table), &key, key_len);
-	if (unlikely(!flow)) {
-		struct dp_upcall_info upcall;
-
-		upcall.cmd = OVS_PACKET_CMD_MISS;
-		upcall.key = &key;
-		upcall.userdata = NULL;
-		upcall.portid = p->upcall_portid;
-		ovs_dp_upcall(dp, skb, &upcall);
-		consume_skb(skb);
-		stats_counter = &stats->n_missed;
-		goto out;
-	}
+		/* Extract flow from 'skb' into 'key'. */
+		error = ovs_flow_extract(skb, p->port_no, &key, &key_len);
+		if (unlikely(error)) {
+			kfree_skb(skb);
+			return;
+		}
 
-	OVS_CB(skb)->flow = flow;
+		/* Look up flow. */
+		flow = ovs_flow_tbl_lookup(rcu_dereference(dp->table),
+					   &key, key_len);
+		if (unlikely(!flow)) {
+			struct dp_upcall_info upcall;
+
+			upcall.cmd = OVS_PACKET_CMD_MISS;
+			upcall.key = &key;
+			upcall.userdata = NULL;
+			upcall.portid = p->upcall_portid;
+			ovs_dp_upcall(dp, skb, &upcall);
+			consume_skb(skb);
+			stats_counter = &stats->n_missed;
+			skb = NULL;
+		} else {
+			OVS_CB(skb)->flow = flow;
+			stats_counter = &stats->n_hit;
+			ovs_flow_used(flow, skb);
+			skb = ovs_execute_actions(dp, skb);
+		}
 
-	stats_counter = &stats->n_hit;
-	ovs_flow_used(OVS_CB(skb)->flow, skb);
-	ovs_execute_actions(dp, skb);
+		/* Update datapath statistics. */
+		u64_stats_update_begin(&stats->sync);
+		(*stats_counter)++;
+		u64_stats_update_end(&stats->sync);
 
-out:
-	/* Update datapath statistics. */
-	u64_stats_update_begin(&stats->sync);
-	(*stats_counter)++;
-	u64_stats_update_end(&stats->sync);
+		if (IS_ERR_OR_NULL(skb)) {
+			break;
+		} else if (unlikely(!limit--)) {
+			kfree_skb(skb);
+			return;
+		}
+	}
 }
 
 static struct genl_family dp_packet_genl_family = {
@@ -818,6 +829,7 @@ static int validate_and_copy_actions__(const struct nlattr *attr,
 			[OVS_ACTION_ATTR_POP_MPLS] = sizeof(__be16),
 			[OVS_ACTION_ATTR_PUSH_VLAN] = sizeof(struct ovs_action_push_vlan),
 			[OVS_ACTION_ATTR_POP_VLAN] = 0,
+			[OVS_ACTION_ATTR_RECIRCULATE] = 0,
 			[OVS_ACTION_ATTR_SET] = (u32)-1,
 			[OVS_ACTION_ATTR_SAMPLE] = (u32)-1
 		};
@@ -901,6 +913,9 @@ static int validate_and_copy_actions__(const struct nlattr *attr,
 			skip_copy = true;
 			break;
 
+		case OVS_ACTION_ATTR_RECIRCULATE:
+			break;
+
 		default:
 			return -EINVAL;
 		}
@@ -1005,12 +1020,23 @@ static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info)
 		goto err_unlock;
 
 	local_bh_disable();
-	err = ovs_execute_actions(dp, packet);
+	packet = ovs_execute_actions(dp, packet);
+	if (!IS_ERR_OR_NULL(packet)) {
+		struct vport *vport;
+		vport = ovs_lookup_vport(dp, flow->key.phy.in_port);
+		if (!vport) {
+			err = -ENODEV;
+			goto err_unlock;
+		}
+		/* Recirculate */
+		ovs_dp_process_received_packet(vport, packet);
+		packet = NULL;
+	}
 	local_bh_enable();
 	rcu_read_unlock();
 
 	ovs_flow_free(flow);
-	return err;
+	return PTR_ERR(packet);
 
 err_unlock:
 	rcu_read_unlock();
diff --git a/datapath/datapath.h b/datapath/datapath.h
index 7665742..8da5e8a 100644
--- a/datapath/datapath.h
+++ b/datapath/datapath.h
@@ -188,7 +188,7 @@ const char *ovs_dp_name(const struct datapath *dp);
 struct sk_buff *ovs_vport_cmd_build_info(struct vport *, u32 portid, u32 seq,
 					 u8 cmd);
 
-int ovs_execute_actions(struct datapath *dp, struct sk_buff *skb);
+struct sk_buff *ovs_execute_actions(struct datapath *dp, struct sk_buff *skb);
 
 unsigned char *skb_cb_mpls_stack(const struct sk_buff *skb);
 #endif /* datapath.h */
diff --git a/include/linux/openvswitch.h b/include/linux/openvswitch.h
index e890fd8..0fff7cc 100644
--- a/include/linux/openvswitch.h
+++ b/include/linux/openvswitch.h
@@ -516,6 +516,9 @@ struct ovs_action_push_vlan {
  * indicate the new packet contents This could potentially still be
  * %ETH_P_MPLS_* if the resulting MPLS label stack is not empty.  If there
  * is no MPLS label stack, as determined by ethertype, no action is taken.
+ * @OVS_ACTION_ATTR_RECIRCULATE: Restart processing of packet.
+ * The packet must have been modified by a previous action in such a way
+ * that it does not match its original flow again.
  *
  * Only a single header can be set with a single %OVS_ACTION_ATTR_SET.  Not all
  * fields within a header are modifiable, e.g. the IPv4 protocol and fragment
@@ -532,6 +535,7 @@ enum ovs_action_attr {
 	OVS_ACTION_ATTR_SAMPLE,       /* Nested OVS_SAMPLE_ATTR_*. */
 	OVS_ACTION_ATTR_PUSH_MPLS,    /* struct ovs_action_push_mpls. */
 	OVS_ACTION_ATTR_POP_MPLS,     /* __be16 ethertype. */
+	OVS_ACTION_ATTR_RECIRCULATE,  /* No argument */
 	__OVS_ACTION_ATTR_MAX
 };
 
diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
index e4a2f75..31255f6 100644
--- a/lib/dpif-netdev.c
+++ b/lib/dpif-netdev.c
@@ -152,10 +152,14 @@ static int dpif_netdev_open(const struct dpif_class *, const char *name,
 static int dp_netdev_output_userspace(struct dp_netdev *, const struct ofpbuf *,
                                     int queue_no, const struct flow *,
                                     const struct nlattr *userdata);
-static void dp_netdev_execute_actions(struct dp_netdev *,
+static bool dp_netdev_execute_actions(struct dp_netdev *,
                                       struct ofpbuf *, struct flow *,
                                       const struct nlattr *actions,
-                                      size_t actions_len);
+                                      size_t actions_len,
+                                      uint32_t *skb_mark);
+static void dp_netdev_port_input(struct dp_netdev *dp,
+                                 struct dp_netdev_port *port,
+                                 struct ofpbuf *packet);
 
 static struct dpif_netdev *
 dpif_netdev_cast(const struct dpif *dpif)
@@ -940,8 +944,22 @@ dpif_netdev_execute(struct dpif *dpif, const struct dpif_execute *execute)
     error = dpif_netdev_flow_from_nlattrs(execute->key, execute->key_len,
                                           &key);
     if (!error) {
-        dp_netdev_execute_actions(dp, &copy, &key,
-                                  execute->actions, execute->actions_len);
+        bool recirculate;
+        uint32_t skb_mark = 0;
+
+        recirculate = dp_netdev_execute_actions(dp, &copy, &key,
+                                                execute->actions,
+                                                execute->actions_len,
+                                                &skb_mark);
+        if (recirculate) {
+            struct dp_netdev_port *port;
+            port = (key.in_port < MAX_PORTS) ? dp->ports[key.in_port] : NULL;
+            if (port) {
+                dp_netdev_port_input(dp, port, &copy);
+                return 0;
+            }
+            error = ENOENT;
+        }
     }
 
     ofpbuf_uninit(&copy);
@@ -1028,23 +1046,32 @@ static void
 dp_netdev_port_input(struct dp_netdev *dp, struct dp_netdev_port *port,
                      struct ofpbuf *packet)
 {
-    struct dp_netdev_flow *flow;
-    struct flow key;
+    bool recirculate;
+    uint32_t skb_mark = 0;
+    int limit = MAX_RECIRCULATION_DEPTH;
 
-    if (packet->size < ETH_HEADER_LEN) {
-        return;
-    }
-    flow_extract(packet, 0, 0, NULL, port->port_no, &key);
-    flow = dp_netdev_lookup_flow(dp, &key);
-    if (flow) {
-        dp_netdev_flow_used(flow, packet);
-        dp_netdev_execute_actions(dp, packet, &key,
-                                  flow->actions, flow->actions_len);
-        dp->n_hit++;
-    } else {
-        dp->n_missed++;
-        dp_netdev_output_userspace(dp, packet, DPIF_UC_MISS, &key, NULL);
-    }
+    do {
+        struct dp_netdev_flow *flow;
+        struct flow key;
+
+        if (packet->size < ETH_HEADER_LEN) {
+            return;
+        }
+        flow_extract(packet, 0, skb_mark, NULL, port->port_no, &key);
+        flow = dp_netdev_lookup_flow(dp, &key);
+        if (flow) {
+            dp_netdev_flow_used(flow, packet);
+            recirculate = dp_netdev_execute_actions(dp, packet, &key,
+                                                    flow->actions,
+                                                    flow->actions_len,
+                                                    &skb_mark);
+            dp->n_hit++;
+        } else {
+            dp->n_missed++;
+            dp_netdev_output_userspace(dp, packet, DPIF_UC_MISS, &key, NULL);
+            recirculate = false;
+        }
+    } while (recirculate && limit--);
 }
 
 static void
@@ -1163,6 +1190,7 @@ dp_netdev_sample(struct dp_netdev *dp,
     const struct nlattr *subactions = NULL;
     const struct nlattr *a;
     size_t left;
+    uint32_t skb_mark;
 
     NL_NESTED_FOR_EACH_UNSAFE (a, left, action) {
         int type = nl_attr_type(a);
@@ -1186,7 +1214,7 @@ dp_netdev_sample(struct dp_netdev *dp,
     }
 
     dp_netdev_execute_actions(dp, packet, key, nl_attr_get(subactions),
-                              nl_attr_get_size(subactions));
+                              nl_attr_get_size(subactions), &skb_mark);
 }
 
 static void
@@ -1201,7 +1229,8 @@ dp_netdev_action_userspace(struct dp_netdev *dp,
 }
 
 static void
-execute_set_action(struct ofpbuf *packet, const struct nlattr *a)
+execute_set_action(struct ofpbuf *packet, const struct nlattr *a,
+                   uint32_t *skb_mark)
 {
     enum ovs_key_attr type = nl_attr_type(a);
     const struct ovs_key_ipv4 *ipv4_key;
@@ -1211,11 +1240,14 @@ execute_set_action(struct ofpbuf *packet, const struct nlattr *a)
 
     switch (type) {
     case OVS_KEY_ATTR_PRIORITY:
-    case OVS_KEY_ATTR_SKB_MARK:
     case OVS_KEY_ATTR_TUNNEL:
         /* not implemented */
         break;
 
+    case OVS_KEY_ATTR_SKB_MARK:
+        *skb_mark = nl_attr_get_u32(a);
+        break;
+
     case OVS_KEY_ATTR_ETHERNET:
         dp_netdev_set_dl(packet,
                    nl_attr_get_unspec(a, sizeof(struct ovs_key_ethernet)));
@@ -1263,11 +1295,11 @@ execute_set_action(struct ofpbuf *packet, const struct nlattr *a)
     }
 }
 
-static void
+static bool
 dp_netdev_execute_actions(struct dp_netdev *dp,
                           struct ofpbuf *packet, struct flow *key,
                           const struct nlattr *actions,
-                          size_t actions_len)
+                          size_t actions_len, uint32_t *skb_mark)
 {
     const struct nlattr *a;
     unsigned int left;
@@ -1305,18 +1337,23 @@ dp_netdev_execute_actions(struct dp_netdev *dp,
             break;
 
         case OVS_ACTION_ATTR_SET:
-            execute_set_action(packet, nl_attr_get(a));
+            execute_set_action(packet, nl_attr_get(a), skb_mark);
             break;
 
         case OVS_ACTION_ATTR_SAMPLE:
             dp_netdev_sample(dp, packet, key, a);
             break;
 
+        case OVS_ACTION_ATTR_RECIRCULATE:
+            return true;
+
         case OVS_ACTION_ATTR_UNSPEC:
         case __OVS_ACTION_ATTR_MAX:
             NOT_REACHED();
         }
     }
+
+    return false;
 }
 
 const struct dpif_class dpif_netdev_class = {
diff --git a/lib/flow.h b/lib/flow.h
index 6e169d6..66f89e3 100644
--- a/lib/flow.h
+++ b/lib/flow.h
@@ -296,4 +296,7 @@ uint32_t minimask_hash(const struct minimask *, uint32_t basis);
 bool minimask_has_extra(const struct minimask *, const struct minimask *);
 bool minimask_is_catchall(const struct minimask *);
 
+#define MAX_RECIRCULATION_DEPTH 4   /* Completely arbitrary value to
+                                     * guard against infinite loops */
+
 #endif /* flow.h */
diff --git a/lib/odp-util.c b/lib/odp-util.c
index 3206dc9..e18e109 100644
--- a/lib/odp-util.c
+++ b/lib/odp-util.c
@@ -75,6 +75,7 @@ odp_action_len(uint16_t type)
     case OVS_ACTION_ATTR_POP_VLAN: return 0;
     case OVS_ACTION_ATTR_PUSH_MPLS: return sizeof(struct ovs_action_push_mpls);
     case OVS_ACTION_ATTR_POP_MPLS: return sizeof(ovs_be16);
+    case OVS_ACTION_ATTR_RECIRCULATE: return 0;
     case OVS_ACTION_ATTR_SET: return -2;
     case OVS_ACTION_ATTR_SAMPLE: return -2;
 
@@ -376,6 +377,10 @@ format_odp_action(struct ds *ds, const struct nlattr *a)
         ds_put_format(ds, "pop_mpls(eth_type=0x%"PRIx16")", ntohs(ethertype));
         break;
     }
+    case OVS_ACTION_ATTR_RECIRCULATE: {
+        ds_put_format(ds, "recirculate");
+        break;
+    }
     case OVS_ACTION_ATTR_SAMPLE:
         format_odp_sample_action(ds, a);
         break;
@@ -2172,6 +2177,12 @@ commit_odp_tunnel_action(const struct flow *flow, struct flow *base,
     }
 }
 
+void
+commit_odp_recirculate_action(struct ofpbuf *odp_actions)
+{
+    nl_msg_put_flag(odp_actions, OVS_ACTION_ATTR_RECIRCULATE);
+}
+
 static void
 commit_set_ether_addr_action(const struct flow *flow, struct flow *base,
                              struct ofpbuf *odp_actions)
@@ -2385,14 +2396,14 @@ commit_set_skb_mark_action(const struct flow *flow, struct flow *base,
         return;
     }
     base->skb_mark = flow->skb_mark;
-
     odp_put_skb_mark_action(base->skb_mark, odp_actions);
 }
 /* If any of the flow key data that ODP actions can modify are different in
  * 'base' and 'flow', appends ODP actions to 'odp_actions' that change the flow
  * key from 'base' into 'flow', and then changes 'base' the same way.  Does not
  * commit set_tunnel actions.  Users should call commit_odp_tunnel_action()
- * in addition to this function if needed. */
+ * and commit_odp_recirculate_action() in addition to those functions are
+ * needed. */
 void
 commit_odp_actions(const struct flow *flow, struct flow *base,
                    struct ofpbuf *odp_actions)
diff --git a/lib/odp-util.h b/lib/odp-util.h
index ad0fb30..da62aa5 100644
--- a/lib/odp-util.h
+++ b/lib/odp-util.h
@@ -115,6 +115,7 @@ const char *odp_key_fitness_to_string(enum odp_key_fitness);
 
 void commit_odp_tunnel_action(const struct flow *, struct flow *base,
                               struct ofpbuf *odp_actions);
+void commit_odp_recirculate_action(struct ofpbuf *odp_actions);
 void commit_odp_actions(const struct flow *, struct flow *base,
                         struct ofpbuf *odp_actions);
 \f
diff --git a/ofproto/ofproto-dpif.c b/ofproto/ofproto-dpif.c
index 47830c1..5129da1 100644
--- a/ofproto/ofproto-dpif.c
+++ b/ofproto/ofproto-dpif.c
@@ -119,7 +119,8 @@ static struct rule_dpif *rule_dpif_miss_rule(struct ofproto_dpif *ofproto,
 
 static void rule_credit_stats(struct rule_dpif *,
                               const struct dpif_flow_stats *);
-static void flow_push_stats(struct facet *, const struct dpif_flow_stats *);
+static void flow_push_stats(struct facet *, const struct dpif_flow_stats *,
+                            const struct ofpact *, size_t ofpacts_len);
 static tag_type rule_calculate_tag(const struct flow *,
                                    const struct minimask *, uint32_t basis);
 static void rule_invalidate(const struct rule_dpif *);
@@ -276,6 +277,17 @@ struct action_xlate_ctx {
     uint16_t nf_output_iface;   /* Output interface index for NetFlow. */
     mirror_mask_t mirrors;      /* Bitmap of associated mirrors. */
 
+    size_t ofpacts_len;         /* The number of bytes of the ofpacts
+                                 * argument to xlate_actions() processed
+                                 * by it. This is used to calculate an
+                                 * offset into ofpacts for calls to
+                                 * xlate_actions on recirculated packets */
+
+    uint32_t recirculation_id;  /* skb_mark to use to identify
+                                 * recirculation. */
+    bool recircualted;          /* True if the context does not add a
+                                 * recirculate action. False otherwise. */
+
 /* xlate_actions() initializes and uses these members, but the client has no
  * reason to look at them. */
 
@@ -312,7 +324,8 @@ static void action_xlate_ctx_init(struct action_xlate_ctx *,
                                   struct ofproto_dpif *, const struct flow *,
                                   const struct initial_vals *initial_vals,
                                   struct rule_dpif *,
-                                  uint8_t tcp_flags, const struct ofpbuf *);
+                                  uint8_t tcp_flags, const struct ofpbuf *,
+                                  uint32_t recirculation_id);
 static void xlate_actions(struct action_xlate_ctx *,
                           const struct ofpact *ofpacts, size_t ofpacts_len,
                           struct ofpbuf *odp_actions);
@@ -494,13 +507,40 @@ struct facet {
     struct subfacet one_subfacet;
 
     long long int learn_rl;      /* Rate limiter for facet_learn(). */
+
+    const struct ofpact *ofpacts;   /* ofpacts for this facet.
+                                     * Will differ from rule->up.ofpacts
+                                     * if facet is for a recirculated packet. */
+    size_t ofpacts_len;             /* ofpacts_len for this facet
+                                     * Will differ from * rule->up.ofpacts_len
+                                     * if facet is for a recirculated packet. */
+
+    uint32_t recirculation_id;       /* Recirculation id.
+                                      * Non-sero for a facet
+                                      * that recirculates packets;
+                                      * used as the value of flow.skb_mark
+                                      * in the facet of recirculated packets.
+                                      * Zero otherwise. */
+    struct hmap_node recirculation_id_hmap_node;
+                                    /* In owning ofproto's 'recirculation_id'
+                                     * hmap. */
+    const struct ofpact *recirculation_ofpacts;
+                                    /* ofpacts for facets of packets
+                                     * recirculated by this facet */
+    size_t recirculation_ofpacts_len;
+                                    /* ofpacts_len for facets of packets
+                                     * recirculated by this facet */
+
+    bool recirculated;              /* Facet of a recirculated packet? */
 };
 
-static struct facet *facet_create(struct rule_dpif *,
-                                  const struct flow *, uint32_t hash);
+static struct facet *facet_create(struct rule_dpif *, const struct flow *,
+                                  const struct ofpact *, size_t ofpacts_len,
+                                  bool recirculated, uint32_t hash);
 static void facet_remove(struct facet *);
 static void facet_free(struct facet *);
 
+static struct facet *facet_find_by_id(struct ofproto_dpif *, uint32_t id);
 static struct facet *facet_find(struct ofproto_dpif *,
                                 const struct flow *, uint32_t hash);
 static struct facet *facet_lookup_valid(struct ofproto_dpif *,
@@ -703,6 +743,7 @@ struct ofproto_dpif {
 
     /* Facets. */
     struct hmap facets;
+    struct hmap recirculation_ids;
     struct hmap subfacets;
     struct governor *governor;
     long long int consistency_rl;
@@ -1358,6 +1399,7 @@ construct(struct ofproto *ofproto_)
     ofproto->has_bonded_bundles = false;
 
     hmap_init(&ofproto->facets);
+    hmap_init(&ofproto->recirculation_ids);
     hmap_init(&ofproto->subfacets);
     ofproto->governor = NULL;
     ofproto->consistency_rl = LLONG_MIN;
@@ -3408,6 +3450,31 @@ port_is_lacp_current(const struct ofport *ofport_)
             : -1);
 }
 \f
+/* Recirculation Id */
+#define RECIRCULATION_ID_NONE  0
+#define RECIRCULATION_ID_DUMMY 2
+#define RECIRCULATION_ID_MIN   RECIRCULATION_ID_DUMMY
+
+static uint32_t recirculation_id_hash(uint32_t id)
+{
+    return hash_words(&id, 1, 0);
+}
+
+/* XXX: This does not prevent id collision */
+static uint32_t get_recirculation_id(void)
+{
+    static uint32_t id = RECIRCULATION_ID_MIN;
+
+    if (id < RECIRCULATION_ID_MIN)
+        id = RECIRCULATION_ID_MIN;
+    /* Skip IPSEC_MARK bit it is reserved */
+    if (id & IPSEC_MARK) {
+        id++;
+        ovs_assert(!(id & IPSEC_MARK));
+    }
+    return id++;
+}
+\f
 /* Upcall handling. */
 
 /* Flow miss batching.
@@ -3565,6 +3632,15 @@ static bool
 flow_miss_should_make_facet(struct ofproto_dpif *ofproto,
                             struct flow_miss *miss, uint32_t hash)
 {
+    /* A facet is currently required to handle recirculation.
+     * There currently isn't a good way to detect if recirculation will
+     * occur or not. So in the mean time assume that it can't occur
+     * for non-MPLS packets and it may occur for MPLS packets
+     */
+    if (eth_type_mpls(miss->flow.dl_type)) {
+        return true;
+    }
+
     if (!ofproto->governor) {
         size_t n_subfacets;
 
@@ -3584,8 +3660,8 @@ flow_miss_should_make_facet(struct ofproto_dpif *ofproto,
  * or creating any datapath flow.  May add an "execute" operation to 'ops' and
  * increment '*n_ops'. */
 static void
-handle_flow_miss_without_facet(struct flow_miss *miss,
-                               struct rule_dpif *rule,
+handle_flow_miss_without_facet(struct flow_miss *miss, struct rule_dpif *rule,
+                               const struct ofpact *ofpacts, size_t ofpacts_len,
                                struct flow_miss_op *ops, size_t *n_ops)
 {
     struct ofproto_dpif *ofproto = ofproto_dpif_cast(rule->up.ofproto);
@@ -3606,10 +3682,10 @@ handle_flow_miss_without_facet(struct flow_miss *miss,
         rule_credit_stats(rule, &stats);
 
         action_xlate_ctx_init(&ctx, ofproto, &miss->flow,
-                              &miss->initial_vals, rule, 0, packet);
+                              &miss->initial_vals, rule, 0, packet,
+                              RECIRCULATION_ID_DUMMY);
         ctx.resubmit_stats = &stats;
-        xlate_actions(&ctx, rule->up.ofpacts, rule->up.ofpacts_len,
-                      &odp_actions);
+        xlate_actions(&ctx, ofpacts, ofpacts_len, &odp_actions);
 
         if (odp_actions.size) {
             struct dpif_execute *execute = &op->dpif_op.u.execute;
@@ -3723,14 +3799,30 @@ handle_flow_miss(struct flow_miss *miss, struct flow_miss_op *ops,
 
     facet = facet_lookup_valid(ofproto, &miss->flow, hash);
     if (!facet) {
-        struct rule_dpif *rule = rule_dpif_lookup(ofproto, &miss->flow);
+        struct rule_dpif *rule;
+        const struct ofpact *ofpacts;
+        size_t ofpacts_len;
+        struct facet *parent_facet;
+
+        parent_facet = facet_find_by_id(ofproto, miss->flow.skb_mark);
+        if (parent_facet) {
+            rule = parent_facet->rule;
+            ofpacts = parent_facet->recirculation_ofpacts;
+            ofpacts_len = parent_facet->recirculation_ofpacts_len;
+        } else {
+            rule = rule_dpif_lookup(ofproto, &miss->flow);
+            ofpacts = rule->up.ofpacts;
+            ofpacts_len = rule->up.ofpacts_len;
+        }
 
         if (!flow_miss_should_make_facet(ofproto, miss, hash)) {
-            handle_flow_miss_without_facet(miss, rule, ops, n_ops);
+            handle_flow_miss_without_facet(miss, rule, ofpacts,
+                                           ofpacts_len, ops, n_ops);
             return;
         }
 
-        facet = facet_create(rule, &miss->flow, hash);
+        facet = facet_create(rule, &miss->flow, ofpacts, ofpacts_len,
+                             parent_facet != NULL, hash);
         now = facet->used;
     } else {
         now = time_msec();
@@ -4494,7 +4586,9 @@ rule_expire(struct rule_dpif *rule)
  * The facet will initially have no subfacets.  The caller should create (at
  * least) one subfacet with subfacet_create(). */
 static struct facet *
-facet_create(struct rule_dpif *rule, const struct flow *flow, uint32_t hash)
+facet_create(struct rule_dpif *rule, const struct flow *flow,
+             const struct ofpact *ofpacts, size_t ofpacts_len,
+             bool recirculated, uint32_t hash)
 {
     struct ofproto_dpif *ofproto = ofproto_dpif_cast(rule->up.ofproto);
     struct facet *facet;
@@ -4502,9 +4596,13 @@ facet_create(struct rule_dpif *rule, const struct flow *flow, uint32_t hash)
     facet = xzalloc(sizeof *facet);
     facet->used = time_msec();
     hmap_insert(&ofproto->facets, &facet->hmap_node, hash);
+    hmap_node_nullify(&facet->recirculation_id_hmap_node);
     list_push_back(&rule->facets, &facet->list_node);
     facet->rule = rule;
     facet->flow = *flow;
+    facet->ofpacts = ofpacts;
+    facet->ofpacts_len = ofpacts_len;
+    facet->recirculated = recirculated;
     list_init(&facet->subfacets);
     netflow_flow_init(&facet->nf_flow);
     netflow_flow_update_time(ofproto->netflow, &facet->nf_flow, facet->used);
@@ -4574,6 +4672,10 @@ facet_remove(struct facet *facet)
     }
     hmap_remove(&ofproto->facets, &facet->hmap_node);
     list_remove(&facet->list_node);
+    if (!hmap_node_is_null(&facet->recirculation_id_hmap_node)) {
+        hmap_remove(&ofproto->recirculation_ids,
+                    &facet->recirculation_id_hmap_node);
+    }
     facet_free(facet);
 }
 
@@ -4603,10 +4705,10 @@ facet_learn(struct facet *facet)
 
     action_xlate_ctx_init(&ctx, ofproto, &facet->flow,
                           &subfacet->initial_vals,
-                          facet->rule, facet->tcp_flags, NULL);
+                          facet->rule, facet->tcp_flags, NULL,
+                          facet->recirculation_id);
     ctx.may_learn = true;
-    xlate_actions_for_side_effects(&ctx, facet->rule->up.ofpacts,
-                                   facet->rule->up.ofpacts_len);
+    xlate_actions_for_side_effects(&ctx, facet->ofpacts, facet->ofpacts_len);
 }
 
 static void
@@ -4742,6 +4844,36 @@ facet_find(struct ofproto_dpif *ofproto,
     return NULL;
 }
 
+/* Searches 'ofproto''s table of facets with recircualtion ids
+ * for a facet whose recicualtion_id is 'id'.
+ * Returns it if found, otherwise a null pointer.
+ *
+ * The returned facet might need revalidation; use facet_lookup_valid()
+ * instead if that is important. */
+static struct facet *
+facet_find_by_id(struct ofproto_dpif *ofproto, uint32_t id)
+{
+    uint32_t hash = recirculation_id_hash(id);
+    struct facet *facet;
+
+    /* some values are never used */
+    if (id == RECIRCULATION_ID_NONE || (id & IPSEC_MARK)) {
+        return NULL;
+    }
+
+    /* This is a ridiculous way to look things up, most likely the id
+     * should be cooked somehow to allow a more efficient lookup.
+     */
+    HMAP_FOR_EACH_WITH_HASH (facet, recirculation_id_hmap_node,
+                             hash, &ofproto->recirculation_ids) {
+        if (facet->recirculation_id == id) {
+            return facet;
+        }
+    }
+
+    return NULL;
+}
+
 /* Searches 'ofproto''s table of facets for one exactly equal to 'flow'.
  * Returns it if found, otherwise a null pointer.
  *
@@ -4814,8 +4946,10 @@ subfacet_should_install(struct subfacet *subfacet, enum slow_path_reason slow,
                               subfacet->actions_len))));
 }
 
-static bool
-facet_check_consistency(struct facet *facet)
+static size_t
+facet_check_actions_consistency(struct facet *facet, struct rule_dpif *rule,
+                                const struct ofpact *ofpacts,
+                                size_t ofpacts_len)
 {
     static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 15);
 
@@ -4824,33 +4958,10 @@ facet_check_consistency(struct facet *facet)
     uint64_t odp_actions_stub[1024 / 8];
     struct ofpbuf odp_actions;
 
-    struct rule_dpif *rule;
     struct subfacet *subfacet;
     bool may_log = false;
-    bool ok;
-
-    /* Check the rule for consistency. */
-    rule = rule_dpif_lookup(ofproto, &facet->flow);
-    ok = rule == facet->rule;
-    if (!ok) {
-        may_log = !VLOG_DROP_WARN(&rl);
-        if (may_log) {
-            struct ds s;
-
-            ds_init(&s);
-            flow_format(&s, &facet->flow);
-            ds_put_format(&s, ": facet associated with wrong rule (was "
-                          "table=%"PRIu8",", facet->rule->up.table_id);
-            cls_rule_format(&facet->rule->up.cr, &s);
-            ds_put_format(&s, ") (should have been table=%"PRIu8",",
-                          rule->up.table_id);
-            cls_rule_format(&rule->up.cr, &s);
-            ds_put_char(&s, ')');
-
-            VLOG_WARN("%s", ds_cstr(&s));
-            ds_destroy(&s);
-        }
-    }
+    bool ok = true;
+    size_t ofpacts_consumed_len = 0;
 
     /* Check the datapath actions for consistency. */
     ofpbuf_use_stub(&odp_actions, odp_actions_stub, sizeof odp_actions_stub);
@@ -4860,9 +4971,10 @@ facet_check_consistency(struct facet *facet)
         struct ds s;
 
         action_xlate_ctx_init(&ctx, ofproto, &facet->flow,
-                              &subfacet->initial_vals, rule, 0, NULL);
-        xlate_actions(&ctx, rule->up.ofpacts, rule->up.ofpacts_len,
-                      &odp_actions);
+                              &subfacet->initial_vals, rule, 0, NULL,
+                              facet->recirculation_id);
+        xlate_actions(&ctx, ofpacts, ofpacts_len, &odp_actions);
+        ofpacts_consumed_len = ctx.ofpacts_len;
 
         if (subfacet->path == SF_NOT_INSTALLED) {
             /* This only happens if the datapath reported an error when we
@@ -4921,6 +5033,84 @@ facet_check_consistency(struct facet *facet)
     }
     ofpbuf_uninit(&odp_actions);
 
+    return ok ? ofpacts_consumed_len : 0;
+}
+
+static bool
+facet_check_consistency(struct facet *facet)
+{
+    static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 15);
+
+    struct ofproto_dpif *ofproto = ofproto_dpif_cast(facet->rule->up.ofproto);
+
+    const struct ofpact *ofpacts;
+    size_t ofpacts_len;
+
+    struct rule_dpif *rule;
+    struct facet *chain[MAX_RECIRCULATION_DEPTH + 1];
+    int top;
+    bool may_log = false;
+    bool ok;
+
+    top = 0;
+    chain[0] = facet;
+
+    while (chain[top]->recirculated && top <= MAX_RECIRCULATION_DEPTH) {
+        chain[top + 1] = facet_find_by_id(ofproto, chain[top]->flow.skb_mark);
+        if (!chain[top + 1])  {
+            may_log = !VLOG_DROP_WARN(&rl);
+            if (may_log) {
+                struct ds s;
+
+                ds_init(&s);
+                flow_format(&s, &chain[top]->flow);
+                ds_put_format(&s, ": parent facet of facet for "
+                              "recirculated packets could not be found");
+
+                VLOG_WARN("%s", ds_cstr(&s));
+                ds_destroy(&s);
+            }
+            break;
+        }
+        top++;
+    }
+
+    rule = rule_dpif_lookup(ofproto, &chain[top]->flow);
+    ok = rule == chain[top]->rule;
+    if (!ok) {
+        may_log = !VLOG_DROP_WARN(&rl);
+        if (may_log) {
+            struct ds s;
+
+            ds_init(&s);
+            flow_format(&s, &chain[top]->flow);
+            ds_put_format(&s, ": facet associated with wrong rule (was "
+                          "table=%"PRIu8",", chain[top]->rule->up.table_id);
+            cls_rule_format(&chain[top]->rule->up.cr, &s);
+            ds_put_format(&s, ") (should have been table=%"PRIu8",",
+                          rule->up.table_id);
+            cls_rule_format(&rule->up.cr, &s);
+            ds_put_char(&s, ')');
+
+            VLOG_WARN("%s", ds_cstr(&s));
+            ds_destroy(&s);
+        }
+    }
+
+    ofpacts = rule->up.ofpacts;
+    ofpacts_len = rule->up.ofpacts_len;
+    do {
+        size_t consumed;
+        consumed = facet_check_actions_consistency(chain[top], rule,
+                                                   ofpacts, ofpacts_len);
+        if (!consumed) {
+            ok = false;
+            break;
+        }
+        ofpacts = ofpact_end(ofpacts, consumed);
+        ofpacts_len -= consumed;
+    } while(top--);
+
     return ok;
 }
 
@@ -4991,7 +5181,8 @@ facet_revalidate(struct facet *facet)
         enum slow_path_reason slow;
 
         action_xlate_ctx_init(&ctx, ofproto, &facet->flow,
-                              &subfacet->initial_vals, new_rule, 0, NULL);
+                              &subfacet->initial_vals, new_rule, 0, NULL,
+                              facet->recirculation_id);
         xlate_actions(&ctx, new_rule->up.ofpacts, new_rule->up.ofpacts_len,
                       &odp_actions);
 
@@ -5089,11 +5280,13 @@ facet_push_stats(struct facet *facet)
     stats.tcp_flags = 0;
 
     if (stats.n_packets || stats.n_bytes || facet->used > facet->prev_used) {
+
         facet->prev_packet_count = facet->packet_count;
         facet->prev_byte_count = facet->byte_count;
         facet->prev_used = facet->used;
 
-        flow_push_stats(facet, &stats);
+        flow_push_stats(facet, &stats,
+                        facet->ofpacts, facet->ofpacts_len);
 
         update_mirror_stats(ofproto_dpif_cast(facet->rule->up.ofproto),
                             facet->mirrors, stats.n_packets, stats.n_bytes);
@@ -5133,7 +5326,8 @@ rule_credit_stats(struct rule_dpif *rule, const struct dpif_flow_stats *stats)
 /* Pushes flow statistics to the rules which 'facet->flow' resubmits
  * into given 'facet->rule''s actions and mirrors. */
 static void
-flow_push_stats(struct facet *facet, const struct dpif_flow_stats *stats)
+flow_push_stats(struct facet *facet, const struct dpif_flow_stats *stats,
+                const struct ofpact *ofpacts, size_t ofpacts_len)
 {
     struct rule_dpif *rule = facet->rule;
     struct ofproto_dpif *ofproto = ofproto_dpif_cast(rule->up.ofproto);
@@ -5143,10 +5337,11 @@ flow_push_stats(struct facet *facet, const struct dpif_flow_stats *stats)
     ofproto_rule_update_used(&rule->up, stats->used);
 
     action_xlate_ctx_init(&ctx, ofproto, &facet->flow,
-                          &subfacet->initial_vals, rule, 0, NULL);
+                          &subfacet->initial_vals, rule, 0, NULL,
+                          facet->recirculation_id);
     ctx.resubmit_stats = stats;
-    xlate_actions_for_side_effects(&ctx, rule->up.ofpacts,
-                                   rule->up.ofpacts_len);
+
+    xlate_actions_for_side_effects(&ctx, ofpacts, ofpacts_len);
 }
 \f
 /* Subfacets. */
@@ -5306,8 +5501,19 @@ subfacet_make_actions(struct subfacet *subfacet, const struct ofpbuf *packet,
     struct action_xlate_ctx ctx;
 
     action_xlate_ctx_init(&ctx, ofproto, &facet->flow,
-                          &subfacet->initial_vals, rule, 0, packet);
-    xlate_actions(&ctx, rule->up.ofpacts, rule->up.ofpacts_len, odp_actions);
+                          &subfacet->initial_vals, rule, 0, packet,
+                          facet->recirculation_id);
+    xlate_actions(&ctx, facet->ofpacts, facet->ofpacts_len, odp_actions);
+    if (ctx.recircualted) {
+        facet->recirculation_id = ctx.recirculation_id;
+        facet->recirculation_ofpacts = ofpact_end(facet->ofpacts,
+                                                  ctx.ofpacts_len);
+        facet->recirculation_ofpacts_len =
+                        facet->ofpacts_len - ctx.ofpacts_len;
+        hmap_insert(&ofproto->recirculation_ids,
+                    &facet->recirculation_id_hmap_node,
+                    recirculation_id_hash(facet->recirculation_id));
+    }
     facet->tags = ctx.tags;
     facet->has_learn = ctx.has_learn;
     facet->has_normal = ctx.has_normal;
@@ -5638,7 +5844,8 @@ rule_dpif_execute(struct rule_dpif *rule, const struct flow *flow,
     initial_vals.tunnel_ip_tos = flow->tunnel.ip_tos;
     ofpbuf_use_stub(&odp_actions, odp_actions_stub, sizeof odp_actions_stub);
     action_xlate_ctx_init(&ctx, ofproto, flow, &initial_vals,
-                          rule, stats.tcp_flags, packet);
+                          rule, stats.tcp_flags, packet,
+                          RECIRCULATION_ID_DUMMY);
     ctx.resubmit_stats = &stats;
     xlate_actions(&ctx, rule->up.ofpacts, rule->up.ofpacts_len, &odp_actions);
 
@@ -6320,6 +6527,16 @@ execute_dec_mpls_ttl_action(struct action_xlate_ctx *ctx)
 }
 
 static void
+execute_recircualte_action(struct action_xlate_ctx *ctx)
+{
+    if (ctx->recirculation_id == RECIRCULATION_ID_NONE) {
+        ctx->recirculation_id = get_recirculation_id();
+    }
+    ctx->recircualted = true;
+    ctx->flow.skb_mark = ctx->recirculation_id;
+}
+
+static void
 xlate_output_action(struct action_xlate_ctx *ctx,
                     uint16_t port, uint16_t max_len, bool may_packet_in)
 {
@@ -6560,6 +6777,7 @@ do_xlate_actions(const struct ofpact *ofpacts, size_t ofpacts_len,
                  struct action_xlate_ctx *ctx)
 {
     bool was_evictable = true;
+    bool may_recirculate = false;
     const struct ofpact *a;
 
     if (ctx->rule) {
@@ -6628,18 +6846,30 @@ do_xlate_actions(const struct ofpact *ofpacts, size_t ofpacts_len,
 
         case OFPACT_SET_IPV4_SRC:
             if (ctx->flow.dl_type == htons(ETH_TYPE_IP)) {
+                if (may_recirculate) {
+                    execute_recircualte_action(ctx);
+                    goto out;
+                }
                 ctx->flow.nw_src = ofpact_get_SET_IPV4_SRC(a)->ipv4;
             }
             break;
 
         case OFPACT_SET_IPV4_DST:
             if (ctx->flow.dl_type == htons(ETH_TYPE_IP)) {
+                if (may_recirculate) {
+                    execute_recircualte_action(ctx);
+                    goto out;
+                }
                 ctx->flow.nw_dst = ofpact_get_SET_IPV4_DST(a)->ipv4;
             }
             break;
 
         case OFPACT_SET_IPV4_DSCP:
             /* OpenFlow 1.0 only supports IPv4. */
+            if (may_recirculate) {
+                execute_recircualte_action(ctx);
+                goto out;
+            }
             if (ctx->flow.dl_type == htons(ETH_TYPE_IP)) {
                 ctx->flow.nw_tos &= ~IP_DSCP_MASK;
                 ctx->flow.nw_tos |= ofpact_get_SET_IPV4_DSCP(a)->dscp;
@@ -6648,12 +6878,20 @@ do_xlate_actions(const struct ofpact *ofpacts, size_t ofpacts_len,
 
         case OFPACT_SET_L4_SRC_PORT:
             if (is_ip_any(&ctx->flow)) {
+                if (may_recirculate) {
+                    execute_recircualte_action(ctx);
+                    goto out;
+                }
                 ctx->flow.tp_src = htons(ofpact_get_SET_L4_SRC_PORT(a)->port);
             }
             break;
 
         case OFPACT_SET_L4_DST_PORT:
             if (is_ip_any(&ctx->flow)) {
+                if (may_recirculate) {
+                    execute_recircualte_action(ctx);
+                    goto out;
+                }
                 ctx->flow.tp_dst = htons(ofpact_get_SET_L4_DST_PORT(a)->port);
             }
             break;
@@ -6694,10 +6932,15 @@ do_xlate_actions(const struct ofpact *ofpacts, size_t ofpacts_len,
 
         case OFPACT_PUSH_MPLS:
             execute_mpls_push_action(ctx, ofpact_get_PUSH_MPLS(a)->ethertype);
+            may_recirculate = false;
             break;
 
         case OFPACT_POP_MPLS:
             execute_mpls_pop_action(ctx, ofpact_get_POP_MPLS(a)->ethertype);
+            if (ctx->flow.dl_type == htons(ETH_TYPE_IP) ||
+                ctx->flow.dl_type == htons(ETH_TYPE_IPV6)) {
+                may_recirculate = true;
+            }
             break;
 
         case OFPACT_SET_MPLS_TTL:
@@ -6713,7 +6956,10 @@ do_xlate_actions(const struct ofpact *ofpacts, size_t ofpacts_len,
             break;
 
         case OFPACT_DEC_TTL:
-            if (compose_dec_ttl(ctx, ofpact_get_DEC_TTL(a))) {
+            if (may_recirculate) {
+                execute_recircualte_action(ctx);
+                goto out;
+            } else if (compose_dec_ttl(ctx, ofpact_get_DEC_TTL(a))) {
                 goto out;
             }
             break;
@@ -6800,6 +7046,7 @@ do_xlate_actions(const struct ofpact *ofpacts, size_t ofpacts_len,
     }
 
 out:
+    ctx->ofpacts_len = (char *)(a) - (char *)ofpacts;
     if (ctx->rule) {
         ctx->rule->up.evictable = was_evictable;
     }
@@ -6810,7 +7057,8 @@ action_xlate_ctx_init(struct action_xlate_ctx *ctx,
                       struct ofproto_dpif *ofproto, const struct flow *flow,
                       const struct initial_vals *initial_vals,
                       struct rule_dpif *rule,
-                      uint8_t tcp_flags, const struct ofpbuf *packet)
+                      uint8_t tcp_flags, const struct ofpbuf *packet,
+                      uint32_t recirculation_id)
 {
     ovs_be64 initial_tun_id = flow->tunnel.tun_id;
 
@@ -6833,7 +7081,13 @@ action_xlate_ctx_init(struct action_xlate_ctx *ctx,
      *   registers.
      * - Tunnel 'base_flow' is completely cleared since that is what the
      *   kernel does.  If we wish to maintain the original values an action
-     *   needs to be generated. */
+     *   needs to be generated.
+     * - The recirculation_id element of flow and base flow are set to
+     *   recirculate_id, which is the id that will be used by a recirculation
+     *   action of one is added. It is stored in flow and base_flow for
+     *   convenience as the recirculation_id element of flow and base flow
+     *   are otherwise unused  by action_xlate_ctx_init().
+     */
 
     ctx->ofproto = ofproto;
     ctx->flow = *flow;
@@ -6849,6 +7103,7 @@ action_xlate_ctx_init(struct action_xlate_ctx *ctx,
     ctx->resubmit_hook = NULL;
     ctx->report_hook = NULL;
     ctx->resubmit_stats = NULL;
+    ctx->recirculation_id = recirculation_id;
 }
 
 /* Translates the 'ofpacts_len' bytes of "struct ofpacts" starting at 'ofpacts'
@@ -6885,6 +7140,7 @@ xlate_actions(struct action_xlate_ctx *ctx,
     ctx->orig_skb_priority = ctx->flow.skb_priority;
     ctx->table_id = 0;
     ctx->exit = false;
+    ctx->recircualted = false;
 
     ofpbuf_use_stub(&ctx->stack, ctx->init_stack, sizeof ctx->init_stack);
 
@@ -6933,6 +7189,11 @@ xlate_actions(struct action_xlate_ctx *ctx,
 
         if (tunnel_ecn_ok(ctx) && (!in_port || may_receive(in_port, ctx))) {
             do_xlate_actions(ofpacts, ofpacts_len, ctx);
+            if (ctx->recircualted) {
+                commit_odp_actions(&ctx->flow, &ctx->base_flow,
+                                   ctx->odp_actions);
+                commit_odp_recirculate_action(odp_actions);
+            }
 
             /* We've let OFPP_NORMAL and the learning action look at the
              * packet, so drop it now if forwarding is disabled. */
@@ -7692,7 +7953,8 @@ packet_out(struct ofproto *ofproto_, struct ofpbuf *packet,
     initial_vals.vlan_tci = flow->vlan_tci;
     initial_vals.tunnel_ip_tos = 0;
     action_xlate_ctx_init(&ctx, ofproto, flow, &initial_vals, NULL,
-                          packet_get_tcp_flags(packet, flow), packet);
+                          packet_get_tcp_flags(packet, flow), packet,
+                          RECIRCULATION_ID_DUMMY);
     ctx.resubmit_stats = &stats;
 
     ofpbuf_use_stub(&odp_actions,
@@ -8077,7 +8339,7 @@ ofproto_trace(struct ofproto_dpif *ofproto, const struct flow *flow,
         ofpbuf_use_stub(&odp_actions,
                         odp_actions_stub, sizeof odp_actions_stub);
         action_xlate_ctx_init(&trace.ctx, ofproto, flow, initial_vals,
-                              rule, tcp_flags, packet);
+                              rule, tcp_flags, packet, RECIRCULATION_ID_DUMMY);
         trace.ctx.resubmit_hook = trace_resubmit;
         trace.ctx.report_hook = trace_report;
         xlate_actions(&trace.ctx, rule->up.ofpacts, rule->up.ofpacts_len,
-- 
1.7.10.4

^ permalink raw reply related

* [PATCH 2/4] Move execute_set_action to lib/odp-util.c
From: Simon Horman @ 2013-04-08  6:43 UTC (permalink / raw)
  To: dev-yBygre7rU0TnMu66kgdUjQ, netdev-u79uwXL29TY76Z2rM5mHXA
  Cc: Isaku Yamahata, Ravi K
In-Reply-To: <1365403431-18102-1-git-send-email-horms-/R6kz+dDXgpPR4JQBCEnsQ@public.gmane.org>

Move execute_set_action from lib/dpif-netedev.c to lib/odp-util.c

This is in preparation for using execute_set_action()
in lib/odp-util.c to handle recirculation/

Signed-off-by: Simon Horman <horms-/R6kz+dDXgpPR4JQBCEnsQ@public.gmane.org>

---

packet.c might be a better place for execute_set_action()
but I'm unsure if accessing struct ovs_key_ethernet would
lead to a layering violation.

This patch depends on the patch "Add packet recirculation"

v5
* No change

rfc4
* make use of skb_mark

rfc2 - rfc3
* omitted

rfc1
* Initial post

Conflicts:
	lib/dpif-netdev.c
---
 lib/dpif-netdev.c |   76 -----------------------------------------------------
 lib/odp-util.c    |   76 +++++++++++++++++++++++++++++++++++++++++++++++++++++
 lib/odp-util.h    |    3 +++
 3 files changed, 79 insertions(+), 76 deletions(-)

diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
index 31255f6..e698e1e 100644
--- a/lib/dpif-netdev.c
+++ b/lib/dpif-netdev.c
@@ -1115,15 +1115,6 @@ dpif_netdev_wait(struct dpif *dpif)
 }
 
 static void
-dp_netdev_set_dl(struct ofpbuf *packet, const struct ovs_key_ethernet *eth_key)
-{
-    struct eth_header *eh = packet->l2;
-
-    memcpy(eh->eth_src, eth_key->eth_src, sizeof eh->eth_src);
-    memcpy(eh->eth_dst, eth_key->eth_dst, sizeof eh->eth_dst);
-}
-
-static void
 dp_netdev_output_port(struct dp_netdev *dp, struct ofpbuf *packet,
                       uint32_t out_port)
 {
@@ -1228,73 +1219,6 @@ dp_netdev_action_userspace(struct dp_netdev *dp,
     dp_netdev_output_userspace(dp, packet, DPIF_UC_ACTION, key, userdata);
 }
 
-static void
-execute_set_action(struct ofpbuf *packet, const struct nlattr *a,
-                   uint32_t *skb_mark)
-{
-    enum ovs_key_attr type = nl_attr_type(a);
-    const struct ovs_key_ipv4 *ipv4_key;
-    const struct ovs_key_ipv6 *ipv6_key;
-    const struct ovs_key_tcp *tcp_key;
-    const struct ovs_key_udp *udp_key;
-
-    switch (type) {
-    case OVS_KEY_ATTR_PRIORITY:
-    case OVS_KEY_ATTR_TUNNEL:
-        /* not implemented */
-        break;
-
-    case OVS_KEY_ATTR_SKB_MARK:
-        *skb_mark = nl_attr_get_u32(a);
-        break;
-
-    case OVS_KEY_ATTR_ETHERNET:
-        dp_netdev_set_dl(packet,
-                   nl_attr_get_unspec(a, sizeof(struct ovs_key_ethernet)));
-        break;
-
-    case OVS_KEY_ATTR_IPV4:
-        ipv4_key = nl_attr_get_unspec(a, sizeof(struct ovs_key_ipv4));
-        packet_set_ipv4(packet, ipv4_key->ipv4_src, ipv4_key->ipv4_dst,
-                        ipv4_key->ipv4_tos, ipv4_key->ipv4_ttl);
-        break;
-
-    case OVS_KEY_ATTR_IPV6:
-        ipv6_key = nl_attr_get_unspec(a, sizeof(struct ovs_key_ipv6));
-        packet_set_ipv6(packet, ipv6_key->ipv6_proto, ipv6_key->ipv6_src,
-                        ipv6_key->ipv6_dst, ipv6_key->ipv6_tclass,
-                        ipv6_key->ipv6_label, ipv6_key->ipv6_hlimit);
-        break;
-
-    case OVS_KEY_ATTR_TCP:
-        tcp_key = nl_attr_get_unspec(a, sizeof(struct ovs_key_tcp));
-        packet_set_tcp_port(packet, tcp_key->tcp_src, tcp_key->tcp_dst);
-        break;
-
-     case OVS_KEY_ATTR_UDP:
-        udp_key = nl_attr_get_unspec(a, sizeof(struct ovs_key_udp));
-        packet_set_udp_port(packet, udp_key->udp_src, udp_key->udp_dst);
-        break;
-
-     case OVS_KEY_ATTR_MPLS:
-         set_mpls_lse(packet, nl_attr_get_be32(a));
-         break;
-
-     case OVS_KEY_ATTR_UNSPEC:
-     case OVS_KEY_ATTR_ENCAP:
-     case OVS_KEY_ATTR_ETHERTYPE:
-     case OVS_KEY_ATTR_IN_PORT:
-     case OVS_KEY_ATTR_VLAN:
-     case OVS_KEY_ATTR_ICMP:
-     case OVS_KEY_ATTR_ICMPV6:
-     case OVS_KEY_ATTR_ARP:
-     case OVS_KEY_ATTR_ND:
-     case __OVS_KEY_ATTR_MAX:
-     default:
-        NOT_REACHED();
-    }
-}
-
 static bool
 dp_netdev_execute_actions(struct dp_netdev *dp,
                           struct ofpbuf *packet, struct flow *key,
diff --git a/lib/odp-util.c b/lib/odp-util.c
index e18e109..ad5873c 100644
--- a/lib/odp-util.c
+++ b/lib/odp-util.c
@@ -2420,3 +2420,79 @@ commit_odp_actions(const struct flow *flow, struct flow *base,
     commit_set_priority_action(flow, base, odp_actions);
     commit_set_skb_mark_action(flow, base, odp_actions);
 }
+
+static void
+dp_netdev_set_dl(struct ofpbuf *packet, const struct ovs_key_ethernet *eth_key)
+{
+    struct eth_header *eh = packet->l2;
+
+    memcpy(eh->eth_src, eth_key->eth_src, sizeof eh->eth_src);
+    memcpy(eh->eth_dst, eth_key->eth_dst, sizeof eh->eth_dst);
+}
+
+void
+execute_set_action(struct ofpbuf *packet, const struct nlattr *a,
+                   uint32_t *skb_mark)
+{
+    enum ovs_key_attr type = nl_attr_type(a);
+    const struct ovs_key_ipv4 *ipv4_key;
+    const struct ovs_key_ipv6 *ipv6_key;
+    const struct ovs_key_tcp *tcp_key;
+    const struct ovs_key_udp *udp_key;
+
+    switch (type) {
+    case OVS_KEY_ATTR_PRIORITY:
+    case OVS_KEY_ATTR_TUNNEL:
+        /* not implemented */
+        break;
+
+    case OVS_KEY_ATTR_SKB_MARK:
+        *skb_mark = nl_attr_get_u32(a);
+        break;
+
+    case OVS_KEY_ATTR_ETHERNET:
+        dp_netdev_set_dl(packet,
+                   nl_attr_get_unspec(a, sizeof(struct ovs_key_ethernet)));
+        break;
+
+    case OVS_KEY_ATTR_IPV4:
+        ipv4_key = nl_attr_get_unspec(a, sizeof(struct ovs_key_ipv4));
+        packet_set_ipv4(packet, ipv4_key->ipv4_src, ipv4_key->ipv4_dst,
+                        ipv4_key->ipv4_tos, ipv4_key->ipv4_ttl);
+        break;
+
+    case OVS_KEY_ATTR_IPV6:
+        ipv6_key = nl_attr_get_unspec(a, sizeof(struct ovs_key_ipv6));
+        packet_set_ipv6(packet, ipv6_key->ipv6_proto, ipv6_key->ipv6_src,
+                        ipv6_key->ipv6_dst, ipv6_key->ipv6_tclass,
+                        ipv6_key->ipv6_label, ipv6_key->ipv6_hlimit);
+        break;
+
+    case OVS_KEY_ATTR_TCP:
+        tcp_key = nl_attr_get_unspec(a, sizeof(struct ovs_key_tcp));
+        packet_set_tcp_port(packet, tcp_key->tcp_src, tcp_key->tcp_dst);
+        break;
+
+     case OVS_KEY_ATTR_UDP:
+        udp_key = nl_attr_get_unspec(a, sizeof(struct ovs_key_udp));
+        packet_set_udp_port(packet, udp_key->udp_src, udp_key->udp_dst);
+        break;
+
+     case OVS_KEY_ATTR_MPLS:
+         set_mpls_lse(packet, nl_attr_get_be32(a));
+         break;
+
+     case OVS_KEY_ATTR_UNSPEC:
+     case OVS_KEY_ATTR_ENCAP:
+     case OVS_KEY_ATTR_ETHERTYPE:
+     case OVS_KEY_ATTR_IN_PORT:
+     case OVS_KEY_ATTR_VLAN:
+     case OVS_KEY_ATTR_ICMP:
+     case OVS_KEY_ATTR_ICMPV6:
+     case OVS_KEY_ATTR_ARP:
+     case OVS_KEY_ATTR_ND:
+     case __OVS_KEY_ATTR_MAX:
+     default:
+        NOT_REACHED();
+    }
+}
diff --git a/lib/odp-util.h b/lib/odp-util.h
index da62aa5..637d6a5 100644
--- a/lib/odp-util.h
+++ b/lib/odp-util.h
@@ -159,6 +159,9 @@ void odp_put_tunnel_action(const struct flow_tnl *tunnel,
 void odp_put_skb_mark_action(const uint32_t skb_mark,
                              struct ofpbuf *odp_actions);
 
+void execute_set_action(struct ofpbuf *packet, const struct nlattr *a,
+                        uint32_t *skb_mark);
+
 /* Reasons why a subfacet might not be fast-pathable. */
 enum slow_path_reason {
     /* These reasons are mutually exclusive. */
-- 
1.7.10.4

^ permalink raw reply related

* [PATCH 4/4] Avoid recirculation id collision
From: Simon Horman @ 2013-04-08  6:43 UTC (permalink / raw)
  To: dev-yBygre7rU0TnMu66kgdUjQ, netdev-u79uwXL29TY76Z2rM5mHXA
  Cc: Isaku Yamahata, Ravi K
In-Reply-To: <1365403431-18102-1-git-send-email-horms-/R6kz+dDXgpPR4JQBCEnsQ@public.gmane.org>

Avoid recirculation id collision by checking that an id is
not already associated with a facet.

Consecutive recirculation ids are used and thus it is possible for
there to be situations where a very large number of ids have to
be checked before finding one that is not already associated with a facet.

To mitigate the performance impact of such situations a limit on
the number of checks is in place and if no unused recirculation id
can be found then the miss is handled without facets as this can
be done using a dummy recirculation id.

Signed-off-by: Simon Horman <horms-/R6kz+dDXgpPR4JQBCEnsQ@public.gmane.org>

This patch depends on the patch "Allow recirculation without facets"

---

v5
* First post
---
 ofproto/ofproto-dpif.c |   55 +++++++++++++++++++++++++++++++++++++++---------
 1 file changed, 45 insertions(+), 10 deletions(-)

diff --git a/ofproto/ofproto-dpif.c b/ofproto/ofproto-dpif.c
index 67121f2..e9ab58c 100644
--- a/ofproto/ofproto-dpif.c
+++ b/ofproto/ofproto-dpif.c
@@ -3455,24 +3455,51 @@ port_is_lacp_current(const struct ofport *ofport_)
 #define RECIRCULATION_ID_DUMMY 2
 #define RECIRCULATION_ID_MIN   RECIRCULATION_ID_DUMMY
 
+#define RECIRCULATION_ID_MAX_LOOP 1024  /* Arbitrary value to prevent
+                                         * endless loop */
+
 static uint32_t recirculation_id_hash(uint32_t id)
 {
     return hash_words(&id, 1, 0);
 }
 
-/* XXX: This does not prevent id collision */
-static uint32_t get_recirculation_id(void)
+static uint32_t recirculation_id = RECIRCULATION_ID_MIN;
+static uint32_t validated_recirculation_id = RECIRCULATION_ID_NONE;
+
+static uint32_t peek_recirculation_id(struct ofproto_dpif *ofproto)
 {
-    static uint32_t id = RECIRCULATION_ID_MIN;
+    static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 15);
+
+    int loop = RECIRCULATION_ID_MAX_LOOP;
 
-    if (id < RECIRCULATION_ID_MIN)
-        id = RECIRCULATION_ID_MIN;
-    /* Skip IPSEC_MARK bit it is reserved */
-    if (id & IPSEC_MARK) {
-        id++;
-        ovs_assert(!(id & IPSEC_MARK));
+    if (validated_recirculation_id == recirculation_id) {
+        return recirculation_id;
+    }
+
+    while (loop--) {
+        if (recirculation_id < RECIRCULATION_ID_MIN)
+            recirculation_id = RECIRCULATION_ID_MIN;
+        /* Skip IPSEC_MARK bit it is reserved */
+        if (recirculation_id & IPSEC_MARK) {
+            recirculation_id++;
+            ovs_assert(!(recirculation_id & IPSEC_MARK));
+        }
+        if (!facet_find_by_id(ofproto, recirculation_id)) {
+            validated_recirculation_id = recirculation_id;
+            return recirculation_id;
+        }
+        recirculation_id++;
     }
-    return id++;
+
+    VLOG_WARN_RL(&rl, "Failed to allocate recirulation id after %d attempts\n",
+                 RECIRCULATION_ID_MAX_LOOP);
+    return RECIRCULATION_ID_NONE;
+}
+
+static uint32_t get_recirculation_id(void)
+{
+    ovs_assert(recirculation_id == validated_recirculation_id);
+    return recirculation_id++;
 }
 \f
 /* Upcall handling. */
@@ -3690,6 +3717,14 @@ static bool
 flow_miss_should_make_facet(struct ofproto_dpif *ofproto,
                             struct flow_miss *miss, uint32_t hash)
 {
+    /* If the packet is MPLS then recirculation may be used and
+     * this will not be possible with facets if there are no recirculation
+     * ids available */
+    if (eth_type_mpls(miss->flow.dl_type) &&
+        peek_recirculation_id(ofproto) == RECIRCULATION_ID_NONE) {
+        return false;
+    }
+
     if (!ofproto->governor) {
         size_t n_subfacets;
 
-- 
1.7.10.4

^ permalink raw reply related

* [PATCH 3/4] Allow recirculation without facets
From: Simon Horman @ 2013-04-08  6:43 UTC (permalink / raw)
  To: dev, netdev; +Cc: Ravi K, Isaku Yamahata, Jesse Gross, Ben Pfaff
In-Reply-To: <1365403431-18102-1-git-send-email-horms@verge.net.au>

This covers the following cases:

* Handle flow miss without facet
  - Previously the use of facets was forced if there was
    any chance of a recirculation action. That is, for
    all flows misses of MPLS packets.
* Packet Out

Signed-off-by: Simon Horman <horms@verge.net.au>

---

This patch depends on the patch "Move execute_set_action to lib/odp-util.c"

v5
* Use RECIRCULATION_ID_DUMMY

rfc4
* Initial post
---
 ofproto/ofproto-dpif.c |  132 ++++++++++++++++++++++++++++++++++++++++--------
 1 file changed, 111 insertions(+), 21 deletions(-)

diff --git a/ofproto/ofproto-dpif.c b/ofproto/ofproto-dpif.c
index 5129da1..67121f2 100644
--- a/ofproto/ofproto-dpif.c
+++ b/ofproto/ofproto-dpif.c
@@ -3571,6 +3571,64 @@ flow_miss_find(struct hmap *todo, const struct ofproto_dpif *ofproto,
     return NULL;
 }
 
+static void
+execute_actions_for_recircualtion(struct ofpbuf *packet,
+                                  const struct nlattr *actions,
+                                  size_t actions_len, uint32_t *skb_mark)
+{
+    const struct nlattr *a;
+    unsigned int left;
+
+    NL_ATTR_FOR_EACH_UNSAFE (a, left, actions, actions_len) {
+        int type = nl_attr_type(a);
+
+        switch ((enum ovs_action_attr) type) {
+
+        case OVS_ACTION_ATTR_PUSH_VLAN: {
+            const struct ovs_action_push_vlan *vlan = nl_attr_get(a);
+            eth_push_vlan(packet, vlan->vlan_tci);
+            break;
+        }
+
+        case OVS_ACTION_ATTR_POP_VLAN:
+            eth_pop_vlan(packet);
+            break;
+
+        case OVS_ACTION_ATTR_PUSH_MPLS: {
+            const struct ovs_action_push_mpls *mpls = nl_attr_get(a);
+            push_mpls(packet, mpls->mpls_ethertype, mpls->mpls_lse);
+            break;
+         }
+
+        case OVS_ACTION_ATTR_POP_MPLS:
+            pop_mpls(packet, nl_attr_get_be16(a));
+            break;
+
+        case OVS_ACTION_ATTR_SET:
+            execute_set_action(packet, nl_attr_get(a), skb_mark);
+            break;
+
+        case OVS_ACTION_ATTR_RECIRCULATE:
+            if (packet->l2) {
+                ofpbuf_push_uninit(packet, (char *)packet->l2 -
+                                   (char *)packet->data);
+            }
+            return;
+
+        case OVS_ACTION_ATTR_OUTPUT:
+        case OVS_ACTION_ATTR_USERSPACE:
+        case OVS_ACTION_ATTR_SAMPLE:
+        case OVS_ACTION_ATTR_UNSPEC:
+        case __OVS_ACTION_ATTR_MAX:
+            NOT_REACHED();
+        }
+    }
+
+    /* There should always be a OVS_ACTION_ATTR_RECIRCULATE present
+     * in actions if this function is called */
+    NOT_REACHED();
+}
+
 /* Partially Initializes 'op' as an "execute" operation for 'miss' and
  * 'packet'.  The caller must initialize op->actions and op->actions_len.  If
  * 'miss' is associated with a subfacet the caller must also initialize the
@@ -3632,15 +3690,6 @@ static bool
 flow_miss_should_make_facet(struct ofproto_dpif *ofproto,
                             struct flow_miss *miss, uint32_t hash)
 {
-    /* A facet is currently required to handle recirculation.
-     * There currently isn't a good way to detect if recirculation will
-     * occur or not. So in the mean time assume that it can't occur
-     * for non-MPLS packets and it may occur for MPLS packets
-     */
-    if (eth_type_mpls(miss->flow.dl_type)) {
-        return true;
-    }
-
     if (!ofproto->governor) {
         size_t n_subfacets;
 
@@ -3656,6 +3705,50 @@ flow_miss_should_make_facet(struct ofproto_dpif *ofproto,
                                         list_size(&miss->packets));
 }
 
+static const struct flow *
+xlate_with_recirculate(struct ofproto_dpif *ofproto, struct rule_dpif *rule,
+                       const struct flow *flow, struct flow *flow_storage,
+                       const struct initial_vals *initial_vals,
+                       const struct ofpact *ofpacts, size_t ofpacts_len,
+                       struct ofpbuf *odp_actions,
+                       struct dpif_flow_stats *stats, struct ofpbuf *packet)
+{
+    struct initial_vals initial_vals_ = *initial_vals;
+
+    while (1) {
+        struct action_xlate_ctx ctx;
+        uint32_t skb_mark = flow->skb_mark;
+
+        ofpbuf_clear(odp_actions);
+        action_xlate_ctx_init(&ctx, ofproto, flow, &initial_vals_,
+                              rule, stats->tcp_flags, packet,
+                              RECIRCULATION_ID_DUMMY);
+        ctx.resubmit_stats = stats;
+        xlate_actions(&ctx, ofpacts, ofpacts_len, odp_actions);
+
+        if (!ctx.recircualted) {
+            break;
+        }
+
+        /* Update the packet */
+        execute_actions_for_recircualtion(packet, odp_actions->data,
+                                          odp_actions->size, &skb_mark);
+        ofpbuf_clear(odp_actions);
+
+        /* Replace the flow */
+        flow_extract(packet, flow->skb_priority, skb_mark,
+                     NULL, flow->in_port, flow_storage);
+        flow = flow_storage;
+        initial_vals_.vlan_tci = flow->vlan_tci;
+        initial_vals_.tunnel_ip_tos = flow->tunnel.ip_tos;
+
+        ofpacts = ofpact_end(ofpacts, ctx.ofpacts_len);
+        ofpacts_len -= ctx.ofpacts_len;
+    }
+
+    return flow;
+}
+
 /* Handles 'miss', which matches 'rule', without creating a facet or subfacet
  * or creating any datapath flow.  May add an "execute" operation to 'ops' and
  * increment '*n_ops'. */
@@ -3666,8 +3759,8 @@ handle_flow_miss_without_facet(struct flow_miss *miss, struct rule_dpif *rule,
 {
     struct ofproto_dpif *ofproto = ofproto_dpif_cast(rule->up.ofproto);
     long long int now = time_msec();
-    struct action_xlate_ctx ctx;
     struct ofpbuf *packet;
+    struct flow flow_storage;
 
     LIST_FOR_EACH (packet, list_node, &miss->packets) {
         struct flow_miss_op *op = &ops[*n_ops];
@@ -3681,11 +3774,9 @@ handle_flow_miss_without_facet(struct flow_miss *miss, struct rule_dpif *rule,
         dpif_flow_stats_extract(&miss->flow, packet, now, &stats);
         rule_credit_stats(rule, &stats);
 
-        action_xlate_ctx_init(&ctx, ofproto, &miss->flow,
-                              &miss->initial_vals, rule, 0, packet,
-                              RECIRCULATION_ID_DUMMY);
-        ctx.resubmit_stats = &stats;
-        xlate_actions(&ctx, ofpacts, ofpacts_len, &odp_actions);
+        xlate_with_recirculate(ofproto, rule, &miss->flow, &flow_storage,
+                               &miss->initial_vals, ofpacts, ofpacts_len,
+                               &odp_actions, &stats, packet);
 
         if (odp_actions.size) {
             struct dpif_execute *execute = &op->dpif_op.u.execute;
@@ -7937,10 +8028,10 @@ packet_out(struct ofproto *ofproto_, struct ofpbuf *packet,
     struct initial_vals initial_vals;
     struct odputil_keybuf keybuf;
     struct dpif_flow_stats stats;
+    struct flow flow_storage;
 
     struct ofpbuf key;
 
-    struct action_xlate_ctx ctx;
     uint64_t odp_actions_stub[1024 / 8];
     struct ofpbuf odp_actions;
 
@@ -7952,14 +8043,13 @@ packet_out(struct ofproto *ofproto_, struct ofpbuf *packet,
 
     initial_vals.vlan_tci = flow->vlan_tci;
     initial_vals.tunnel_ip_tos = 0;
-    action_xlate_ctx_init(&ctx, ofproto, flow, &initial_vals, NULL,
-                          packet_get_tcp_flags(packet, flow), packet,
-                          RECIRCULATION_ID_DUMMY);
-    ctx.resubmit_stats = &stats;
 
     ofpbuf_use_stub(&odp_actions,
                     odp_actions_stub, sizeof odp_actions_stub);
-    xlate_actions(&ctx, ofpacts, ofpacts_len, &odp_actions);
+    flow = xlate_with_recirculate(ofproto, NULL, flow, &flow_storage,
+                                  &initial_vals, ofpacts, ofpacts_len,
+                                  &odp_actions, &stats, packet);
+
     dpif_execute(ofproto->backer->dpif, key.data, key.size,
                  odp_actions.data, odp_actions.size, packet);
     ofpbuf_uninit(&odp_actions);
-- 
1.7.10.4

^ permalink raw reply related

* Re: be2net: GRO for non-inet protocols
From: Erik Hugne @ 2013-04-08  6:40 UTC (permalink / raw)
  To: Eric Dumazet; +Cc: sathya.perla, subbu.seetharaman, ajit.khaparde, netdev
In-Reply-To: <1365175872.3405.3.camel@edumazet-glaptop>

On Fri, Apr 05, 2013 at 08:31:12AM -0700, Eric Dumazet wrote:
> On Fri, 2013-04-05 at 08:28 -0700, Eric Dumazet wrote:
> 
> >  /* Process the RX completion indicated by rxcp when GRO is disabled */
> > -static void be_rx_compl_process(struct be_rx_obj *rxo,
> > +static void be_rx_compl_process(struct be_rx_obj *rxo, struct napi_struct *napi,
> >  				struct be_rx_compl_info *rxcp)
> >  {
> >  	struct be_adapter *adapter = rxo->adapter;
> > @@ -1385,7 +1385,7 @@ static void be_rx_compl_process(struct be_rx_obj *rxo,
> >  	if (rxcp->vlanf)
> >  		__vlan_hwaccel_put_tag(skb, rxcp->vlan_tag);
> >  
> > -	netif_receive_skb(skb);
> > +	napi_gro_receive(&napi, skb);
> 
> That would be : napi_gro_receive(napi, skb);
> 

Thanks Eric, it works as expected after applying this.

//E

^ permalink raw reply

* how to test multipe unicast MAC address
From: Rayagond K @ 2013-04-08  7:08 UTC (permalink / raw)
  To: netdev

Hi All,

How to assign multiple unicast MAC address to single interface ?
How to test multiple unicast MAC address filtering ?

Thanks in advance.

wwr
Rayagond.

^ permalink raw reply

* Ignoring destination MAC address in incoming packets
From: Krzysztof Halasa @ 2013-04-08  7:08 UTC (permalink / raw)
  To: netdev

Hi,

I have a bit unusual problem today. Is there a way to ignore destination
MAC address in incoming packets?

I need to do this on Atheros Wi-Fi. Basically what I need to achieve is
a sniffer.

Something like this:
- ATH5k working in ad-hoc mode, certain ESSID and channel selected.
- UDP packets in certain range are to be forwarded to other IPs.
- the wifi interface must be able to normally communicate (within its
  ESSID and channel).
- it has to work with iptables etc. (DNAT, routing).

I know I can change a line in the kernel (the one setting pkt_type to
OTHER_HOST) but perhaps there is some standard way for things like this?

Or should I use AF_PACKET and forward the data with userspace helper?

Any other idea?

TIA.
-- 
Krzysztof Halasa

^ permalink raw reply

* Re: Ignoring destination MAC address in incoming packets
From: Antonio Quartulli @ 2013-04-08  7:19 UTC (permalink / raw)
  To: Krzysztof Halasa; +Cc: netdev
In-Reply-To: <m3txnhmrdm.fsf@intrepid.localdomain>

[-- Attachment #1: Type: text/plain, Size: 1029 bytes --]

On Mon, Apr 08, 2013 at 09:08:21AM +0200, Krzysztof Halasa wrote:
> Hi,
> 
> I have a bit unusual problem today. Is there a way to ignore destination
> MAC address in incoming packets?
> 
> I need to do this on Atheros Wi-Fi. Basically what I need to achieve is
> a sniffer.
> 
> Something like this:
> - ATH5k working in ad-hoc mode, certain ESSID and channel selected.
> - UDP packets in certain range are to be forwarded to other IPs.
> - the wifi interface must be able to normally communicate (within its
>   ESSID and channel).
> - it has to work with iptables etc. (DNAT, routing).
> 
> I know I can change a line in the kernel (the one setting pkt_type to
> OTHER_HOST) but perhaps there is some standard way for things like this?
> 
> Or should I use AF_PACKET and forward the data with userspace helper?
> 
> Any other idea?

May be a stupid suggestion, but what about setting promisc mode on?

Cheers,

-- 
Antonio Quartulli

..each of us alone is worth nothing..
Ernesto "Che" Guevara

[-- Attachment #2: Type: application/pgp-signature, Size: 836 bytes --]

^ permalink raw reply

* [PATCH 1/2] crypto: add CMAC support to CryptoAPI
From: Jussi Kivilinna @ 2013-04-08  7:48 UTC (permalink / raw)
  To: linux-crypto
  Cc: Steffen Klassert, netdev, Tom St Denis, Herbert Xu,
	David S. Miller

Patch adds support for NIST recommended block cipher mode CMAC to CryptoAPI.

This work is based on Tom St Denis' earlier patch,
 http://marc.info/?l=linux-crypto-vger&m=135877306305466&w=2

Cc: Tom St Denis <tstdenis@elliptictech.com>
Signed-off-by: Jussi Kivilinna <jussi.kivilinna@iki.fi>
---
 crypto/Kconfig   |   11 ++
 crypto/Makefile  |    1 
 crypto/cmac.c    |  315 ++++++++++++++++++++++++++++++++++++++++++++++++++++++
 crypto/tcrypt.c  |   11 ++
 crypto/testmgr.c |   18 +++
 crypto/testmgr.h |  125 +++++++++++++++++++++
 6 files changed, 480 insertions(+), 1 deletion(-)
 create mode 100644 crypto/cmac.c

diff --git a/crypto/Kconfig b/crypto/Kconfig
index 6cc27f1..c1142f3 100644
--- a/crypto/Kconfig
+++ b/crypto/Kconfig
@@ -283,6 +283,17 @@ config CRYPTO_XTS
 
 comment "Hash modes"
 
+config CRYPTO_CMAC
+	tristate "CMAC support"
+	select CRYPTO_HASH
+	select CRYPTO_MANAGER
+	help
+	  Cipher-based Message Authentication Code (CMAC) specified by
+	  The National Institute of Standards and Technology (NIST).
+
+	  https://tools.ietf.org/html/rfc4493
+	  http://csrc.nist.gov/publications/nistpubs/800-38B/SP_800-38B.pdf
+
 config CRYPTO_HMAC
 	tristate "HMAC support"
 	select CRYPTO_HASH
diff --git a/crypto/Makefile b/crypto/Makefile
index be1a1be..a8e9b0f 100644
--- a/crypto/Makefile
+++ b/crypto/Makefile
@@ -32,6 +32,7 @@ cryptomgr-y := algboss.o testmgr.o
 
 obj-$(CONFIG_CRYPTO_MANAGER2) += cryptomgr.o
 obj-$(CONFIG_CRYPTO_USER) += crypto_user.o
+obj-$(CONFIG_CRYPTO_CMAC) += cmac.o
 obj-$(CONFIG_CRYPTO_HMAC) += hmac.o
 obj-$(CONFIG_CRYPTO_VMAC) += vmac.o
 obj-$(CONFIG_CRYPTO_XCBC) += xcbc.o
diff --git a/crypto/cmac.c b/crypto/cmac.c
new file mode 100644
index 0000000..50880cf
--- /dev/null
+++ b/crypto/cmac.c
@@ -0,0 +1,315 @@
+/*
+ * CMAC: Cipher Block Mode for Authentication
+ *
+ * Copyright © 2013 Jussi Kivilinna <jussi.kivilinna@iki.fi>
+ *
+ * Based on work by:
+ *  Copyright © 2013 Tom St Denis <tstdenis@elliptictech.com>
+ * Based on crypto/xcbc.c:
+ *  Copyright © 2006 USAGI/WIDE Project,
+ *   Author: Kazunori Miyazawa <miyazawa@linux-ipv6.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ */
+
+#include <crypto/internal/hash.h>
+#include <linux/err.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+
+/*
+ * +------------------------
+ * | <parent tfm>
+ * +------------------------
+ * | cmac_tfm_ctx
+ * +------------------------
+ * | consts (block size * 2)
+ * +------------------------
+ */
+struct cmac_tfm_ctx {
+	struct crypto_cipher *child;
+	u8 ctx[];
+};
+
+/*
+ * +------------------------
+ * | <shash desc>
+ * +------------------------
+ * | cmac_desc_ctx
+ * +------------------------
+ * | odds (block size)
+ * +------------------------
+ * | prev (block size)
+ * +------------------------
+ */
+struct cmac_desc_ctx {
+	unsigned int len;
+	u8 ctx[];
+};
+
+static int crypto_cmac_digest_setkey(struct crypto_shash *parent,
+				     const u8 *inkey, unsigned int keylen)
+{
+	unsigned long alignmask = crypto_shash_alignmask(parent);
+	struct cmac_tfm_ctx *ctx = crypto_shash_ctx(parent);
+	unsigned int bs = crypto_shash_blocksize(parent);
+	__be64 *consts = PTR_ALIGN((void *)ctx->ctx, alignmask + 1);
+	u64 _const[2];
+	int i, err = 0;
+	u8 msb_mask, gfmask;
+
+	err = crypto_cipher_setkey(ctx->child, inkey, keylen);
+	if (err)
+		return err;
+
+	/* encrypt the zero block */
+	memset(consts, 0, bs);
+	crypto_cipher_encrypt_one(ctx->child, (u8 *)consts, (u8 *)consts);
+
+	switch (bs) {
+	case 16:
+		gfmask = 0x87;
+		_const[0] = be64_to_cpu(consts[1]);
+		_const[1] = be64_to_cpu(consts[0]);
+
+		/* gf(2^128) multiply zero-ciphertext with u and u^2 */
+		for (i = 0; i < 4; i += 2) {
+			msb_mask = ((s64)_const[1] >> 63) & gfmask;
+			_const[1] = (_const[1] << 1) | (_const[0] >> 63);
+			_const[0] = (_const[0] << 1) ^ msb_mask;
+
+			consts[i + 0] = cpu_to_be64(_const[1]);
+			consts[i + 1] = cpu_to_be64(_const[0]);
+		}
+
+		break;
+	case 8:
+		gfmask = 0x1B;
+		_const[0] = be64_to_cpu(consts[0]);
+
+		/* gf(2^64) multiply zero-ciphertext with u and u^2 */
+		for (i = 0; i < 2; i++) {
+			msb_mask = ((s64)_const[0] >> 63) & gfmask;
+			_const[0] = (_const[0] << 1) ^ msb_mask;
+
+			consts[i] = cpu_to_be64(_const[0]);
+		}
+
+		break;
+	}
+
+	return 0;
+}
+
+static int crypto_cmac_digest_init(struct shash_desc *pdesc)
+{
+	unsigned long alignmask = crypto_shash_alignmask(pdesc->tfm);
+	struct cmac_desc_ctx *ctx = shash_desc_ctx(pdesc);
+	int bs = crypto_shash_blocksize(pdesc->tfm);
+	u8 *prev = PTR_ALIGN((void *)ctx->ctx, alignmask + 1) + bs;
+
+	ctx->len = 0;
+	memset(prev, 0, bs);
+
+	return 0;
+}
+
+static int crypto_cmac_digest_update(struct shash_desc *pdesc, const u8 *p,
+				     unsigned int len)
+{
+	struct crypto_shash *parent = pdesc->tfm;
+	unsigned long alignmask = crypto_shash_alignmask(parent);
+	struct cmac_tfm_ctx *tctx = crypto_shash_ctx(parent);
+	struct cmac_desc_ctx *ctx = shash_desc_ctx(pdesc);
+	struct crypto_cipher *tfm = tctx->child;
+	int bs = crypto_shash_blocksize(parent);
+	u8 *odds = PTR_ALIGN((void *)ctx->ctx, alignmask + 1);
+	u8 *prev = odds + bs;
+
+	/* checking the data can fill the block */
+	if ((ctx->len + len) <= bs) {
+		memcpy(odds + ctx->len, p, len);
+		ctx->len += len;
+		return 0;
+	}
+
+	/* filling odds with new data and encrypting it */
+	memcpy(odds + ctx->len, p, bs - ctx->len);
+	len -= bs - ctx->len;
+	p += bs - ctx->len;
+
+	crypto_xor(prev, odds, bs);
+	crypto_cipher_encrypt_one(tfm, prev, prev);
+
+	/* clearing the length */
+	ctx->len = 0;
+
+	/* encrypting the rest of data */
+	while (len > bs) {
+		crypto_xor(prev, p, bs);
+		crypto_cipher_encrypt_one(tfm, prev, prev);
+		p += bs;
+		len -= bs;
+	}
+
+	/* keeping the surplus of blocksize */
+	if (len) {
+		memcpy(odds, p, len);
+		ctx->len = len;
+	}
+
+	return 0;
+}
+
+static int crypto_cmac_digest_final(struct shash_desc *pdesc, u8 *out)
+{
+	struct crypto_shash *parent = pdesc->tfm;
+	unsigned long alignmask = crypto_shash_alignmask(parent);
+	struct cmac_tfm_ctx *tctx = crypto_shash_ctx(parent);
+	struct cmac_desc_ctx *ctx = shash_desc_ctx(pdesc);
+	struct crypto_cipher *tfm = tctx->child;
+	int bs = crypto_shash_blocksize(parent);
+	u8 *consts = PTR_ALIGN((void *)tctx->ctx, alignmask + 1);
+	u8 *odds = PTR_ALIGN((void *)ctx->ctx, alignmask + 1);
+	u8 *prev = odds + bs;
+	unsigned int offset = 0;
+
+	if (ctx->len != bs) {
+		unsigned int rlen;
+		u8 *p = odds + ctx->len;
+
+		*p = 0x80;
+		p++;
+
+		rlen = bs - ctx->len - 1;
+		if (rlen)
+			memset(p, 0, rlen);
+
+		offset += bs;
+	}
+
+	crypto_xor(prev, odds, bs);
+	crypto_xor(prev, consts + offset, bs);
+
+	crypto_cipher_encrypt_one(tfm, out, prev);
+
+	return 0;
+}
+
+static int cmac_init_tfm(struct crypto_tfm *tfm)
+{
+	struct crypto_cipher *cipher;
+	struct crypto_instance *inst = (void *)tfm->__crt_alg;
+	struct crypto_spawn *spawn = crypto_instance_ctx(inst);
+	struct cmac_tfm_ctx *ctx = crypto_tfm_ctx(tfm);
+
+	cipher = crypto_spawn_cipher(spawn);
+	if (IS_ERR(cipher))
+		return PTR_ERR(cipher);
+
+	ctx->child = cipher;
+
+	return 0;
+};
+
+static void cmac_exit_tfm(struct crypto_tfm *tfm)
+{
+	struct cmac_tfm_ctx *ctx = crypto_tfm_ctx(tfm);
+	crypto_free_cipher(ctx->child);
+}
+
+static int cmac_create(struct crypto_template *tmpl, struct rtattr **tb)
+{
+	struct shash_instance *inst;
+	struct crypto_alg *alg;
+	unsigned long alignmask;
+	int err;
+
+	err = crypto_check_attr_type(tb, CRYPTO_ALG_TYPE_SHASH);
+	if (err)
+		return err;
+
+	alg = crypto_get_attr_alg(tb, CRYPTO_ALG_TYPE_CIPHER,
+				  CRYPTO_ALG_TYPE_MASK);
+	if (IS_ERR(alg))
+		return PTR_ERR(alg);
+
+	switch (alg->cra_blocksize) {
+	case 16:
+	case 8:
+		break;
+	default:
+		goto out_put_alg;
+	}
+
+	inst = shash_alloc_instance("cmac", alg);
+	err = PTR_ERR(inst);
+	if (IS_ERR(inst))
+		goto out_put_alg;
+
+	err = crypto_init_spawn(shash_instance_ctx(inst), alg,
+				shash_crypto_instance(inst),
+				CRYPTO_ALG_TYPE_MASK);
+	if (err)
+		goto out_free_inst;
+
+	alignmask = alg->cra_alignmask | (sizeof(long) - 1);
+	inst->alg.base.cra_alignmask = alignmask;
+	inst->alg.base.cra_priority = alg->cra_priority;
+	inst->alg.base.cra_blocksize = alg->cra_blocksize;
+
+	inst->alg.digestsize = alg->cra_blocksize;
+	inst->alg.descsize =
+		ALIGN(sizeof(struct cmac_desc_ctx), crypto_tfm_ctx_alignment())
+		+ (alignmask & ~(crypto_tfm_ctx_alignment() - 1))
+		+ alg->cra_blocksize * 2;
+
+	inst->alg.base.cra_ctxsize =
+		ALIGN(sizeof(struct cmac_tfm_ctx), alignmask + 1)
+		+ alg->cra_blocksize * 2;
+
+	inst->alg.base.cra_init = cmac_init_tfm;
+	inst->alg.base.cra_exit = cmac_exit_tfm;
+
+	inst->alg.init = crypto_cmac_digest_init;
+	inst->alg.update = crypto_cmac_digest_update;
+	inst->alg.final = crypto_cmac_digest_final;
+	inst->alg.setkey = crypto_cmac_digest_setkey;
+
+	err = shash_register_instance(tmpl, inst);
+	if (err) {
+out_free_inst:
+		shash_free_instance(shash_crypto_instance(inst));
+	}
+
+out_put_alg:
+	crypto_mod_put(alg);
+	return err;
+}
+
+static struct crypto_template crypto_cmac_tmpl = {
+	.name = "cmac",
+	.create = cmac_create,
+	.free = shash_free_instance,
+	.module = THIS_MODULE,
+};
+
+static int __init crypto_cmac_module_init(void)
+{
+	return crypto_register_template(&crypto_cmac_tmpl);
+}
+
+static void __exit crypto_cmac_module_exit(void)
+{
+	crypto_unregister_template(&crypto_cmac_tmpl);
+}
+
+module_init(crypto_cmac_module_init);
+module_exit(crypto_cmac_module_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("CMAC keyed hash algorithm");
diff --git a/crypto/tcrypt.c b/crypto/tcrypt.c
index 6b911ef..24ea7df 100644
--- a/crypto/tcrypt.c
+++ b/crypto/tcrypt.c
@@ -1095,7 +1095,6 @@ static int do_test(int m)
 		break;
 
 	case 28:
-
 		ret += tcrypt_test("tgr160");
 		break;
 
@@ -1118,6 +1117,7 @@ static int do_test(int m)
 		ret += tcrypt_test("lrw(camellia)");
 		ret += tcrypt_test("xts(camellia)");
 		break;
+
 	case 33:
 		ret += tcrypt_test("sha224");
 		break;
@@ -1213,6 +1213,7 @@ static int do_test(int m)
 	case 109:
 		ret += tcrypt_test("vmac(aes)");
 		break;
+
 	case 110:
 		ret += tcrypt_test("hmac(crc32)");
 		break;
@@ -1229,6 +1230,14 @@ static int do_test(int m)
 		ret += tcrypt_test("rfc4543(gcm(aes))");
 		break;
 
+	case 153:
+		ret += tcrypt_test("cmac(aes)");
+		break;
+
+	case 154:
+		ret += tcrypt_test("cmac(des3_ede)");
+		break;
+
 	case 200:
 		test_cipher_speed("ecb(aes)", ENCRYPT, sec, NULL, 0,
 				speed_template_16_24_32);
diff --git a/crypto/testmgr.c b/crypto/testmgr.c
index f37e544..3807084 100644
--- a/crypto/testmgr.c
+++ b/crypto/testmgr.c
@@ -1913,6 +1913,24 @@ static const struct alg_test_desc alg_test_descs[] = {
 			}
 		}
 	}, {
+		.alg = "cmac(aes)",
+		.test = alg_test_hash,
+		.suite = {
+			.hash = {
+				.vecs = aes_cmac128_tv_template,
+				.count = CMAC_AES_TEST_VECTORS
+			}
+		}
+	}, {
+		.alg = "cmac(des3_ede)",
+		.test = alg_test_hash,
+		.suite = {
+			.hash = {
+				.vecs = des3_ede_cmac64_tv_template,
+				.count = CMAC_DES3_EDE_TEST_VECTORS
+			}
+		}
+	}, {
 		.alg = "compress_null",
 		.test = alg_test_null,
 	}, {
diff --git a/crypto/testmgr.h b/crypto/testmgr.h
index 92db37d..d503660 100644
--- a/crypto/testmgr.h
+++ b/crypto/testmgr.h
@@ -1639,6 +1639,131 @@ static struct hash_testvec hmac_sha256_tv_template[] = {
 	},
 };
 
+#define CMAC_AES_TEST_VECTORS 6
+
+static struct hash_testvec aes_cmac128_tv_template[] = {
+	{ /* From NIST Special Publication 800-38B, AES-128 */
+		.key		= "\x2b\x7e\x15\x16\x28\xae\xd2\xa6"
+				  "\xab\xf7\x15\x88\x09\xcf\x4f\x3c",
+		.plaintext	= zeroed_string,
+		.digest		= "\xbb\x1d\x69\x29\xe9\x59\x37\x28"
+				  "\x7f\xa3\x7d\x12\x9b\x75\x67\x46",
+		.psize		= 0,
+		.ksize		= 16,
+	}, {
+		.key		= "\x2b\x7e\x15\x16\x28\xae\xd2\xa6"
+				  "\xab\xf7\x15\x88\x09\xcf\x4f\x3c",
+		.plaintext	= "\x6b\xc1\xbe\xe2\x2e\x40\x9f\x96"
+				  "\xe9\x3d\x7e\x11\x73\x93\x17\x2a",
+		.digest		= "\x07\x0a\x16\xb4\x6b\x4d\x41\x44"
+				  "\xf7\x9b\xdd\x9d\xd0\x4a\x28\x7c",
+		.psize		= 16,
+		.ksize		= 16,
+	}, {
+		.key		= "\x2b\x7e\x15\x16\x28\xae\xd2\xa6"
+				  "\xab\xf7\x15\x88\x09\xcf\x4f\x3c",
+		.plaintext	= "\x6b\xc1\xbe\xe2\x2e\x40\x9f\x96"
+				  "\xe9\x3d\x7e\x11\x73\x93\x17\x2a"
+				  "\xae\x2d\x8a\x57\x1e\x03\xac\x9c"
+				  "\x9e\xb7\x6f\xac\x45\xaf\x8e\x51"
+				  "\x30\xc8\x1c\x46\xa3\x5c\xe4\x11",
+		.digest		= "\xdf\xa6\x67\x47\xde\x9a\xe6\x30"
+				  "\x30\xca\x32\x61\x14\x97\xc8\x27",
+		.psize		= 40,
+		.ksize		= 16,
+	}, {
+		.key		= "\x2b\x7e\x15\x16\x28\xae\xd2\xa6"
+				  "\xab\xf7\x15\x88\x09\xcf\x4f\x3c",
+		.plaintext	= "\x6b\xc1\xbe\xe2\x2e\x40\x9f\x96"
+				  "\xe9\x3d\x7e\x11\x73\x93\x17\x2a"
+				  "\xae\x2d\x8a\x57\x1e\x03\xac\x9c"
+				  "\x9e\xb7\x6f\xac\x45\xaf\x8e\x51"
+				  "\x30\xc8\x1c\x46\xa3\x5c\xe4\x11"
+				  "\xe5\xfb\xc1\x19\x1a\x0a\x52\xef"
+				  "\xf6\x9f\x24\x45\xdf\x4f\x9b\x17"
+				  "\xad\x2b\x41\x7b\xe6\x6c\x37\x10",
+		.digest		= "\x51\xf0\xbe\xbf\x7e\x3b\x9d\x92"
+				  "\xfc\x49\x74\x17\x79\x36\x3c\xfe",
+		.psize		= 64,
+		.ksize		= 16,
+	}, { /* From NIST Special Publication 800-38B, AES-256 */
+		.key		= "\x60\x3d\xeb\x10\x15\xca\x71\xbe"
+				  "\x2b\x73\xae\xf0\x85\x7d\x77\x81"
+				  "\x1f\x35\x2c\x07\x3b\x61\x08\xd7"
+				  "\x2d\x98\x10\xa3\x09\x14\xdf\xf4",
+		.plaintext	= zeroed_string,
+		.digest		= "\x02\x89\x62\xf6\x1b\x7b\xf8\x9e"
+				  "\xfc\x6b\x55\x1f\x46\x67\xd9\x83",
+		.psize		= 0,
+		.ksize		= 32,
+	}, {
+		.key		= "\x60\x3d\xeb\x10\x15\xca\x71\xbe"
+				  "\x2b\x73\xae\xf0\x85\x7d\x77\x81"
+				  "\x1f\x35\x2c\x07\x3b\x61\x08\xd7"
+				  "\x2d\x98\x10\xa3\x09\x14\xdf\xf4",
+		.plaintext	= "\x6b\xc1\xbe\xe2\x2e\x40\x9f\x96"
+				  "\xe9\x3d\x7e\x11\x73\x93\x17\x2a"
+				  "\xae\x2d\x8a\x57\x1e\x03\xac\x9c"
+				  "\x9e\xb7\x6f\xac\x45\xaf\x8e\x51"
+				  "\x30\xc8\x1c\x46\xa3\x5c\xe4\x11"
+				  "\xe5\xfb\xc1\x19\x1a\x0a\x52\xef"
+				  "\xf6\x9f\x24\x45\xdf\x4f\x9b\x17"
+				  "\xad\x2b\x41\x7b\xe6\x6c\x37\x10",
+		.digest		= "\xe1\x99\x21\x90\x54\x9f\x6e\xd5"
+				  "\x69\x6a\x2c\x05\x6c\x31\x54\x10",
+		.psize		= 64,
+		.ksize		= 32,
+	}
+};
+
+#define CMAC_DES3_EDE_TEST_VECTORS 4
+
+static struct hash_testvec des3_ede_cmac64_tv_template[] = {
+/*
+ * From NIST Special Publication 800-38B, Three Key TDEA
+ * Corrected test vectors from:
+ *  http://csrc.nist.gov/publications/nistpubs/800-38B/Updated_CMAC_Examples.pdf
+ */
+	{
+		.key		= "\x8a\xa8\x3b\xf8\xcb\xda\x10\x62"
+				  "\x0b\xc1\xbf\x19\xfb\xb6\xcd\x58"
+				  "\xbc\x31\x3d\x4a\x37\x1c\xa8\xb5",
+		.plaintext	= zeroed_string,
+		.digest		= "\xb7\xa6\x88\xe1\x22\xff\xaf\x95",
+		.psize		= 0,
+		.ksize		= 24,
+	}, {
+		.key		= "\x8a\xa8\x3b\xf8\xcb\xda\x10\x62"
+				  "\x0b\xc1\xbf\x19\xfb\xb6\xcd\x58"
+				  "\xbc\x31\x3d\x4a\x37\x1c\xa8\xb5",
+		.plaintext	= "\x6b\xc1\xbe\xe2\x2e\x40\x9f\x96",
+		.digest		= "\x8e\x8f\x29\x31\x36\x28\x37\x97",
+		.psize		= 8,
+		.ksize		= 24,
+	}, {
+		.key		= "\x8a\xa8\x3b\xf8\xcb\xda\x10\x62"
+				  "\x0b\xc1\xbf\x19\xfb\xb6\xcd\x58"
+				  "\xbc\x31\x3d\x4a\x37\x1c\xa8\xb5",
+		.plaintext	= "\x6b\xc1\xbe\xe2\x2e\x40\x9f\x96"
+				  "\xe9\x3d\x7e\x11\x73\x93\x17\x2a"
+				  "\xae\x2d\x8a\x57",
+		.digest		= "\x74\x3d\xdb\xe0\xce\x2d\xc2\xed",
+		.psize		= 20,
+		.ksize		= 24,
+	}, {
+		.key		= "\x8a\xa8\x3b\xf8\xcb\xda\x10\x62"
+				  "\x0b\xc1\xbf\x19\xfb\xb6\xcd\x58"
+				  "\xbc\x31\x3d\x4a\x37\x1c\xa8\xb5",
+		.plaintext	= "\x6b\xc1\xbe\xe2\x2e\x40\x9f\x96"
+				  "\xe9\x3d\x7e\x11\x73\x93\x17\x2a"
+				  "\xae\x2d\x8a\x57\x1e\x03\xac\x9c"
+				  "\x9e\xb7\x6f\xac\x45\xaf\x8e\x51",
+		.digest		= "\x33\xe6\xb1\x09\x24\x00\xea\xe5",
+		.psize		= 32,
+		.ksize		= 24,
+	}
+};
+
 #define XCBC_AES_TEST_VECTORS 6
 
 static struct hash_testvec aes_xcbc128_tv_template[] = {

^ permalink raw reply related

* [PATCH 2/2] xfrm: add rfc4494 AES-CMAC-96 support
From: Jussi Kivilinna @ 2013-04-08  7:48 UTC (permalink / raw)
  To: linux-crypto
  Cc: Steffen Klassert, netdev, Tom St Denis, Herbert Xu,
	David S. Miller
In-Reply-To: <20130408074844.6866.90093.stgit@localhost6.localdomain6>

Now that CryptoAPI has support for CMAC, we can add support for AES-CMAC-96
(rfc4494).

Cc: Tom St Denis <tstdenis@elliptictech.com>
Signed-off-by: Jussi Kivilinna <jussi.kivilinna@iki.fi>
---
 net/xfrm/xfrm_algo.c |   13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/net/xfrm/xfrm_algo.c b/net/xfrm/xfrm_algo.c
index 6fb9d00..ab4ef72 100644
--- a/net/xfrm/xfrm_algo.c
+++ b/net/xfrm/xfrm_algo.c
@@ -311,6 +311,19 @@ static struct xfrm_algo_desc aalg_list[] = {
 		.sadb_alg_maxbits = 128
 	}
 },
+{
+	/* rfc4494 */
+	.name = "cmac(aes)",
+
+	.uinfo = {
+		.auth = {
+			.icv_truncbits = 96,
+			.icv_fullbits = 128,
+		}
+	},
+
+	.pfkey_supported = 0,
+},
 };
 
 static struct xfrm_algo_desc ealg_list[] = {

^ permalink raw reply related

* Re: Ignoring destination MAC address in incoming packets
From: Krzysztof Halasa @ 2013-04-08  7:49 UTC (permalink / raw)
  To: Antonio Quartulli; +Cc: netdev
In-Reply-To: <20130408071903.GB5938@ritirata.org>

Antonio Quartulli <ordex@autistici.org> writes:

> May be a stupid suggestion, but what about setting promisc mode on?

I use promiscuous mode, but the packets are still dropped because
skb->pkt_type = PACKET_OTHERHOST (since the destination MAC address
doesn't match and is not broadcast/multicast).

I realize accepting all packets blindly opens a can of worms and this
must be well thought. Perhaps a user space tool using AF_PACKET is a
better way?
-- 
Krzysztof Halasa

^ permalink raw reply

* [patch 2/2] qeth: fix qeth_wait_for_threads() deadlock for OSN devices
From: frank.blaschka @ 2013-04-08  8:19 UTC (permalink / raw)
  To: davem; +Cc: netdev, linux-s390, Stefan Raspl
In-Reply-To: <20130408081925.995757091@de.ibm.com>

[-- Attachment #1: 602-qeth-thread-deadlock.diff --]
[-- Type: text/plain, Size: 3942 bytes --]

From: Stefan Raspl <raspl@linux.vnet.ibm.com>

Any recovery thread will deadlock when calling qeth_wait_for_threads(), most
notably when triggering a recovery on an OSN device.
This patch will store the recovery thread's task pointer on recovery
invocation and check in qeth_wait_for_threads() respectively to avoid
deadlocks.

Signed-off-by: Stefan Raspl <raspl@linux.vnet.ibm.com>
Signed-off-by: Frank Blaschka <blaschka@linux.vnet.ibm.com>
Reviewed-by: Ursula Braun <ursula.braun@de.ibm.com>
---
 drivers/s390/net/qeth_core.h      |    3 +++
 drivers/s390/net/qeth_core_main.c |   19 +++++++++++++++++++
 drivers/s390/net/qeth_l2_main.c   |    2 ++
 drivers/s390/net/qeth_l3_main.c   |    2 ++
 4 files changed, 26 insertions(+)

--- a/drivers/s390/net/qeth_core.h
+++ b/drivers/s390/net/qeth_core.h
@@ -769,6 +769,7 @@ struct qeth_card {
 	unsigned long thread_start_mask;
 	unsigned long thread_allowed_mask;
 	unsigned long thread_running_mask;
+	struct task_struct *recovery_task;
 	spinlock_t ip_lock;
 	struct list_head ip_list;
 	struct list_head *ip_tbd_list;
@@ -862,6 +863,8 @@ extern struct qeth_card_list_struct qeth
 extern struct kmem_cache *qeth_core_header_cache;
 extern struct qeth_dbf_info qeth_dbf[QETH_DBF_INFOS];
 
+void qeth_set_recovery_task(struct qeth_card *);
+void qeth_clear_recovery_task(struct qeth_card *);
 void qeth_set_allowed_threads(struct qeth_card *, unsigned long , int);
 int qeth_threads_running(struct qeth_card *, unsigned long);
 int qeth_wait_for_threads(struct qeth_card *, unsigned long);
--- a/drivers/s390/net/qeth_core_main.c
+++ b/drivers/s390/net/qeth_core_main.c
@@ -177,6 +177,23 @@ const char *qeth_get_cardname_short(stru
 	return "n/a";
 }
 
+void qeth_set_recovery_task(struct qeth_card *card)
+{
+	card->recovery_task = current;
+}
+EXPORT_SYMBOL_GPL(qeth_set_recovery_task);
+
+void qeth_clear_recovery_task(struct qeth_card *card)
+{
+	card->recovery_task = NULL;
+}
+EXPORT_SYMBOL_GPL(qeth_clear_recovery_task);
+
+static bool qeth_is_recovery_task(const struct qeth_card *card)
+{
+	return card->recovery_task == current;
+}
+
 void qeth_set_allowed_threads(struct qeth_card *card, unsigned long threads,
 			 int clear_start_mask)
 {
@@ -205,6 +222,8 @@ EXPORT_SYMBOL_GPL(qeth_threads_running);
 
 int qeth_wait_for_threads(struct qeth_card *card, unsigned long threads)
 {
+	if (qeth_is_recovery_task(card))
+		return 0;
 	return wait_event_interruptible(card->wait_q,
 			qeth_threads_running(card, threads) == 0);
 }
--- a/drivers/s390/net/qeth_l2_main.c
+++ b/drivers/s390/net/qeth_l2_main.c
@@ -1143,6 +1143,7 @@ static int qeth_l2_recover(void *ptr)
 	QETH_CARD_TEXT(card, 2, "recover2");
 	dev_warn(&card->gdev->dev,
 		"A recovery process has been started for the device\n");
+	qeth_set_recovery_task(card);
 	__qeth_l2_set_offline(card->gdev, 1);
 	rc = __qeth_l2_set_online(card->gdev, 1);
 	if (!rc)
@@ -1153,6 +1154,7 @@ static int qeth_l2_recover(void *ptr)
 		dev_warn(&card->gdev->dev, "The qeth device driver "
 				"failed to recover an error on the device\n");
 	}
+	qeth_clear_recovery_task(card);
 	qeth_clear_thread_start_bit(card, QETH_RECOVER_THREAD);
 	qeth_clear_thread_running_bit(card, QETH_RECOVER_THREAD);
 	return 0;
--- a/drivers/s390/net/qeth_l3_main.c
+++ b/drivers/s390/net/qeth_l3_main.c
@@ -3515,6 +3515,7 @@ static int qeth_l3_recover(void *ptr)
 	QETH_CARD_TEXT(card, 2, "recover2");
 	dev_warn(&card->gdev->dev,
 		"A recovery process has been started for the device\n");
+	qeth_set_recovery_task(card);
 	__qeth_l3_set_offline(card->gdev, 1);
 	rc = __qeth_l3_set_online(card->gdev, 1);
 	if (!rc)
@@ -3525,6 +3526,7 @@ static int qeth_l3_recover(void *ptr)
 		dev_warn(&card->gdev->dev, "The qeth device driver "
 				"failed to recover an error on the device\n");
 	}
+	qeth_clear_recovery_task(card);
 	qeth_clear_thread_start_bit(card, QETH_RECOVER_THREAD);
 	qeth_clear_thread_running_bit(card, QETH_RECOVER_THREAD);
 	return 0;

^ permalink raw reply

* [patch 0/2] s390: network bug fixes for net [v2]
From: frank.blaschka @ 2013-04-08  8:19 UTC (permalink / raw)
  To: davem; +Cc: netdev, linux-s390

Hi Dave,

here are the fixes for net again, including
feedback from Eric (Thx!)

shortlog:

Ursula Braun (1)
af_iucv: fix recvmsg by replacing skb_pull() function

Stefan Raspl (1)
qeth: fix qeth_wait_for_threads() deadlock for OSN devices

Thanks,
        Frank

^ permalink raw reply

page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox