netdev.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH] Allow receiving packets on the fallback tunnel if they pass sanity checks
@ 2012-06-05 15:40 Phil Dibowitz
  2012-06-07 21:53 ` David Miller
  2012-06-21  4:04 ` David Miller
  0 siblings, 2 replies; 6+ messages in thread
From: Phil Dibowitz @ 2012-06-05 15:40 UTC (permalink / raw)
  To: davem; +Cc: netdev, phild

Ville Nuorvala wrote this patch to solve a problem we were having at Facebook.
He hasn't had time to send it upstream yet, but gave me permission to do it,
and I want to make sure it makes it upstream by World v6 Launch Day.

>From b413062771afbba064ae9bc49b5daed7abb1243d Mon Sep 17 00:00:00 2001
From: Ville Nuorvala <ville.nuorvala@gmail.com>
Subject: [PATCH] Allow receiving packets on the fallback tunnel if they pass sanity checks

The same IPv6 address checks are performed as with any normal tunnel,
but as the fallback tunnel endpoint addresses are unspecified, the checks
must be performed on a per-packet basis, rather than at tunnel
configuration time.

Signed-off-by: Ville Nuorvala <ville.nuorvala@gmail.com>
Tested-by: Phil Dibowitz <phil@ipom.com>

---
 include/net/ip6_tunnel.h |    2 +
 net/ipv6/ip6_tunnel.c    |   65 +++++++++++++++++++++++++++-------------------
 2 files changed, 40 insertions(+), 27 deletions(-)

diff --git a/include/net/ip6_tunnel.h b/include/net/ip6_tunnel.h
index fc73e66..358fb86 100644
--- a/include/net/ip6_tunnel.h
+++ b/include/net/ip6_tunnel.h
@@ -9,6 +9,8 @@
 #define IP6_TNL_F_CAP_XMIT 0x10000
 /* capable of receiving packets */
 #define IP6_TNL_F_CAP_RCV 0x20000
+/* determine capability on a per-packet basis */
+#define IP6_TNL_F_CAP_PER_PACKET 0x40000
 
 /* IPv6 tunnel */
 
diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index c9015fa..04a3cba 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -684,24 +684,50 @@ static void ip6ip6_dscp_ecn_decapsulate(const struct ip6_tnl *t,
 		IP6_ECN_set_ce(ipv6_hdr(skb));
 }
 
+static __u32 ip6_tnl_get_cap(struct ip6_tnl *t,
+			     const struct in6_addr *laddr,
+			     const struct in6_addr *raddr)
+{
+	struct ip6_tnl_parm *p = &t->parms;
+	int ltype = ipv6_addr_type(laddr);
+	int rtype = ipv6_addr_type(raddr);
+	__u32 flags = 0;
+
+	if (ltype == IPV6_ADDR_ANY || rtype == IPV6_ADDR_ANY) {
+		flags = IP6_TNL_F_CAP_PER_PACKET;
+	} else if (ltype & (IPV6_ADDR_UNICAST|IPV6_ADDR_MULTICAST) &&
+		   rtype & (IPV6_ADDR_UNICAST|IPV6_ADDR_MULTICAST) &&
+		   !((ltype|rtype) & IPV6_ADDR_LOOPBACK) &&
+		   (!((ltype|rtype) & IPV6_ADDR_LINKLOCAL) || p->link)) {
+		if (ltype&IPV6_ADDR_UNICAST)
+			flags |= IP6_TNL_F_CAP_XMIT;
+		if (rtype&IPV6_ADDR_UNICAST)
+			flags |= IP6_TNL_F_CAP_RCV;
+	}
+	return flags;
+}
+
 /* called with rcu_read_lock() */
-static inline int ip6_tnl_rcv_ctl(struct ip6_tnl *t)
+static inline int ip6_tnl_rcv_ctl(struct ip6_tnl *t,
+				  const struct in6_addr *laddr,
+				  const struct in6_addr *raddr)
 {
 	struct ip6_tnl_parm *p = &t->parms;
 	int ret = 0;
 	struct net *net = dev_net(t->dev);
 
-	if (p->flags & IP6_TNL_F_CAP_RCV) {
+	if ((p->flags & IP6_TNL_F_CAP_RCV) ||
+	    ((p->flags & IP6_TNL_F_CAP_PER_PACKET) &&
+	     (ip6_tnl_get_cap(t, laddr, raddr) & IP6_TNL_F_CAP_RCV))) {
 		struct net_device *ldev = NULL;
 
 		if (p->link)
 			ldev = dev_get_by_index_rcu(net, p->link);
 
-		if ((ipv6_addr_is_multicast(&p->laddr) ||
-		     likely(ipv6_chk_addr(net, &p->laddr, ldev, 0))) &&
-		    likely(!ipv6_chk_addr(net, &p->raddr, NULL, 0)))
+		if ((ipv6_addr_is_multicast(laddr) ||
+		     likely(ipv6_chk_addr(net, laddr, ldev, 0))) &&
+		    likely(!ipv6_chk_addr(net, raddr, NULL, 0)))
 			ret = 1;
-
 	}
 	return ret;
 }
@@ -740,7 +766,7 @@ static int ip6_tnl_rcv(struct sk_buff *skb, __u16 protocol,
 			goto discard;
 		}
 
-		if (!ip6_tnl_rcv_ctl(t)) {
+		if (!ip6_tnl_rcv_ctl(t, &ipv6h->daddr, &ipv6h->saddr)) {
 			t->dev->stats.rx_dropped++;
 			rcu_read_unlock();
 			goto discard;
@@ -1114,25 +1140,6 @@ tx_err:
 	return NETDEV_TX_OK;
 }
 
-static void ip6_tnl_set_cap(struct ip6_tnl *t)
-{
-	struct ip6_tnl_parm *p = &t->parms;
-	int ltype = ipv6_addr_type(&p->laddr);
-	int rtype = ipv6_addr_type(&p->raddr);
-
-	p->flags &= ~(IP6_TNL_F_CAP_XMIT|IP6_TNL_F_CAP_RCV);
-
-	if (ltype & (IPV6_ADDR_UNICAST|IPV6_ADDR_MULTICAST) &&
-	    rtype & (IPV6_ADDR_UNICAST|IPV6_ADDR_MULTICAST) &&
-	    !((ltype|rtype) & IPV6_ADDR_LOOPBACK) &&
-	    (!((ltype|rtype) & IPV6_ADDR_LINKLOCAL) || p->link)) {
-		if (ltype&IPV6_ADDR_UNICAST)
-			p->flags |= IP6_TNL_F_CAP_XMIT;
-		if (rtype&IPV6_ADDR_UNICAST)
-			p->flags |= IP6_TNL_F_CAP_RCV;
-	}
-}
-
 static void ip6_tnl_link_config(struct ip6_tnl *t)
 {
 	struct net_device *dev = t->dev;
@@ -1153,7 +1160,8 @@ static void ip6_tnl_link_config(struct ip6_tnl *t)
 	if (!(p->flags&IP6_TNL_F_USE_ORIG_FLOWLABEL))
 		fl6->flowlabel |= IPV6_FLOWLABEL_MASK & p->flowinfo;
 
-	ip6_tnl_set_cap(t);
+	p->flags &= ~(IP6_TNL_F_CAP_XMIT|IP6_TNL_F_CAP_RCV|IP6_TNL_F_CAP_PER_PACKET);
+	p->flags |= ip6_tnl_get_cap(t, &p->laddr, &p->raddr);
 
 	if (p->flags&IP6_TNL_F_CAP_XMIT && p->flags&IP6_TNL_F_CAP_RCV)
 		dev->flags |= IFF_POINTOPOINT;
@@ -1438,6 +1446,9 @@ static int __net_init ip6_fb_tnl_dev_init(struct net_device *dev)
 
 	t->parms.proto = IPPROTO_IPV6;
 	dev_hold(dev);
+
+	ip6_tnl_link_config(t);
+
 	rcu_assign_pointer(ip6n->tnls_wc[0], t);
 	return 0;
 }

-- 
Phil Dibowitz                             phil@ipom.com
Open Source software and tech docs        Insanity Palace of Metallica
http://www.phildev.net/                   http://www.ipom.com/

"Be who you are and say what you feel, because those who mind don't matter
 and those who matter don't mind."
 - Dr. Seuss

^ permalink raw reply related	[flat|nested] 6+ messages in thread

* Re: [PATCH] Allow receiving packets on the fallback tunnel if they pass sanity checks
  2012-06-05 15:40 [PATCH] Allow receiving packets on the fallback tunnel if they pass sanity checks Phil Dibowitz
@ 2012-06-07 21:53 ` David Miller
  2012-06-07 22:46   ` Phil Dibowitz
  2012-06-21  4:04 ` David Miller
  1 sibling, 1 reply; 6+ messages in thread
From: David Miller @ 2012-06-07 21:53 UTC (permalink / raw)
  To: phil; +Cc: netdev, phild

From: Phil Dibowitz <phil@ipom.com>
Date: Tue, 5 Jun 2012 08:40:58 -0700

> and I want to make sure it makes it upstream by World v6 Launch Day.

Submitting a patch just a day or two beforehand is absolutely not the
way to achieve this.

It's a pointless deadline anyways, even if I put it in today it would
be months to years before the majority of real users actually have it
running on their machines.

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH] Allow receiving packets on the fallback tunnel if they pass sanity checks
  2012-06-07 21:53 ` David Miller
@ 2012-06-07 22:46   ` Phil Dibowitz
  0 siblings, 0 replies; 6+ messages in thread
From: Phil Dibowitz @ 2012-06-07 22:46 UTC (permalink / raw)
  To: David Miller; +Cc: netdev, phild

On Thu, Jun 07, 2012 at 02:53:12PM -0700, David Miller wrote:
> From: Phil Dibowitz <phil@ipom.com>
> Date: Tue, 5 Jun 2012 08:40:58 -0700
> 
> > and I want to make sure it makes it upstream by World v6 Launch Day.
> 
> Submitting a patch just a day or two beforehand is absolutely not the
> way to achieve this.

My wording was poor. I didn't expect it to be in the kernel, I just wanted to
at least get it out the door by then.

-- 
Phil Dibowitz                             phil@ipom.com
Open Source software and tech docs        Insanity Palace of Metallica
http://www.phildev.net/                   http://www.ipom.com/

"Be who you are and say what you feel, because those who mind don't matter
 and those who matter don't mind."
 - Dr. Seuss

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH] Allow receiving packets on the fallback tunnel if they pass sanity checks
  2012-06-05 15:40 [PATCH] Allow receiving packets on the fallback tunnel if they pass sanity checks Phil Dibowitz
  2012-06-07 21:53 ` David Miller
@ 2012-06-21  4:04 ` David Miller
  2012-06-26  3:37   ` Phil Dibowitz
  1 sibling, 1 reply; 6+ messages in thread
From: David Miller @ 2012-06-21  4:04 UTC (permalink / raw)
  To: phil; +Cc: netdev, phild

From: Phil Dibowitz <phil@ipom.com>
Date: Tue, 5 Jun 2012 08:40:58 -0700

> From b413062771afbba064ae9bc49b5daed7abb1243d Mon Sep 17 00:00:00 2001
> From: Ville Nuorvala <ville.nuorvala@gmail.com>
> Subject: [PATCH] Allow receiving packets on the fallback tunnel if they pass sanity checks
> 
> The same IPv6 address checks are performed as with any normal tunnel,
> but as the fallback tunnel endpoint addresses are unspecified, the checks
> must be performed on a per-packet basis, rather than at tunnel
> configuration time.
> 
> Signed-off-by: Ville Nuorvala <ville.nuorvala@gmail.com>
> Tested-by: Phil Dibowitz <phil@ipom.com>

I've reviewed this change but I still have no idea why it's
necessary.

You need to compose a more lengthy and detailed commit log message
explaining everything before I'm going to consider applying this
patch.

You can't just say "we have some problem at Facebook, this patch fixes
it", and then merely describe word by word the content of the patch
without explaining the "why".  That simply doesn't cut it.

Thanks.

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH] Allow receiving packets on the fallback tunnel if they pass sanity checks
  2012-06-21  4:04 ` David Miller
@ 2012-06-26  3:37   ` Phil Dibowitz
  2012-06-29  1:09     ` David Miller
  0 siblings, 1 reply; 6+ messages in thread
From: Phil Dibowitz @ 2012-06-26  3:37 UTC (permalink / raw)
  To: David Miller; +Cc: netdev, phild

[-- Attachment #1: Type: text/plain, Size: 2435 bytes --]

On 06/20/2012 09:04 PM, David Miller wrote:
> From: Phil Dibowitz <phil@ipom.com>
> Date: Tue, 5 Jun 2012 08:40:58 -0700
> 
>> From b413062771afbba064ae9bc49b5daed7abb1243d Mon Sep 17 00:00:00 2001
>> From: Ville Nuorvala <ville.nuorvala@gmail.com>
>> Subject: [PATCH] Allow receiving packets on the fallback tunnel if they pass sanity checks
>>
>> The same IPv6 address checks are performed as with any normal tunnel,
>> but as the fallback tunnel endpoint addresses are unspecified, the checks
>> must be performed on a per-packet basis, rather than at tunnel
>> configuration time.
>>
>> Signed-off-by: Ville Nuorvala <ville.nuorvala@gmail.com>
>> Tested-by: Phil Dibowitz <phil@ipom.com>
> 
> I've reviewed this change but I still have no idea why it's
> necessary.
> 
> You need to compose a more lengthy and detailed commit log message
> explaining everything before I'm going to consider applying this
> patch.
> 
> You can't just say "we have some problem at Facebook, this patch fixes
> it", and then merely describe word by word the content of the patch
> without explaining the "why".  That simply doesn't cut it.

Sure. Sorry, I just kept Ville's patch description.

We do Layer-3 DSR via IP-in-IP tunneling. Our load balancers wrap an extra IP
header on incoming packets so they can be routed to the backend. In the v4
tunnel driver, when these packets fall on the default tunl0 device, the
behavior is to decapsulate them and drop them back on the stack. So our setup
is that tunl0 has the VIP and eth0 has (obviously) the backend's real address.

In IPv6 we do the same thing, but the v6 tunnel driver didn't have this same
behavior - if you didn't have an explicit tunnel setup, it would drop the packet.

This patch brings that v4 feature to the v6 driver.

I think that's the level of detail you're looking for, but I'm happy to expand
on anything in particular. I also break this down in tons of detail here:

https://www.facebook.com/notes/facebook-engineering/under-the-hood-network-implementation-for-world-ipv6-launch/10150873176303920

-- 
Phil Dibowitz                             phil@ipom.com
Open Source software and tech docs        Insanity Palace of Metallica
http://www.phildev.net/                   http://www.ipom.com/

"Be who you are and say what you feel, because those who mind don't matter
 and those who matter don't mind."
 - Dr. Seuss



[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 262 bytes --]

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH] Allow receiving packets on the fallback tunnel if they pass sanity checks
  2012-06-26  3:37   ` Phil Dibowitz
@ 2012-06-29  1:09     ` David Miller
  0 siblings, 0 replies; 6+ messages in thread
From: David Miller @ 2012-06-29  1:09 UTC (permalink / raw)
  To: phil; +Cc: netdev, phild

From: Phil Dibowitz <phil@ipom.com>
Date: Mon, 25 Jun 2012 20:37:35 -0700

> Sure. Sorry, I just kept Ville's patch description.
> 
> We do Layer-3 DSR via IP-in-IP tunneling. Our load balancers wrap an extra IP
> header on incoming packets so they can be routed to the backend. In the v4
> tunnel driver, when these packets fall on the default tunl0 device, the
> behavior is to decapsulate them and drop them back on the stack. So our setup
> is that tunl0 has the VIP and eth0 has (obviously) the backend's real address.
> 
> In IPv6 we do the same thing, but the v6 tunnel driver didn't have this same
> behavior - if you didn't have an explicit tunnel setup, it would drop the packet.
> 
> This patch brings that v4 feature to the v6 driver.
> 
> I think that's the level of detail you're looking for, but I'm happy to expand
> on anything in particular. I also break this down in tons of detail here:

This is a lot better, please resubmit the patch with a proper verbose
commit message, and please submit it properly and without any unrelated
content in the message body as described in Documentation/SubmittingPatches

^ permalink raw reply	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2012-06-29  1:09 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2012-06-05 15:40 [PATCH] Allow receiving packets on the fallback tunnel if they pass sanity checks Phil Dibowitz
2012-06-07 21:53 ` David Miller
2012-06-07 22:46   ` Phil Dibowitz
2012-06-21  4:04 ` David Miller
2012-06-26  3:37   ` Phil Dibowitz
2012-06-29  1:09     ` David Miller

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).