netdev.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH][IPSEC][4/7] inter address family ipsec tunnel
@ 2006-11-24  5:38 Kazunori MIYAZAWA
  2006-12-01  0:48 ` David Miller
  0 siblings, 1 reply; 9+ messages in thread
From: Kazunori MIYAZAWA @ 2006-11-24  5:38 UTC (permalink / raw)
  To: Miika Komu, Diego Beltrami, Herbert Xu, David Miller; +Cc: netdev, usagi-core

This patch adds IPv6 over IPv4 IPsec tunnel.

Signed-off-by: Miika Komu <miika@iki.fi>
Signed-off-by: Diego Beltrami <Diego.Beltrami@hiit.fi>
Signed-off-by: Kazunori Miyazawa <miyazawa@linux-ipv6.org>

---
 net/ipv4/xfrm4_mode_tunnel.c |   57 +++++++++++++++++++++++++++++-------
 net/ipv4/xfrm4_policy.c      |   66 +++++++++++++++++++++++++++++++++---------
 2 files changed, 98 insertions(+), 25 deletions(-)

diff --git a/net/ipv4/xfrm4_mode_tunnel.c b/net/ipv4/xfrm4_mode_tunnel.c
index e23c21d..e54c549 100644
--- a/net/ipv4/xfrm4_mode_tunnel.c
+++ b/net/ipv4/xfrm4_mode_tunnel.c
@@ -23,6 +23,12 @@ static inline void ipip_ecn_decapsulate(
 		IP_ECN_set_ce(inner_iph);
 }
 
+static inline void ipip6_ecn_decapsulate(struct iphdr *iph, struct sk_buff *skb)
+{
+	if (INET_ECN_is_ce(iph->tos))
+		IP6_ECN_set_ce(skb->nh.ipv6h);
+}
+
 /* Add encapsulation header.
  *
  * The top IP header will be constructed per RFC 2401.  The following fields
@@ -36,6 +42,7 @@ static inline void ipip_ecn_decapsulate(
 static int xfrm4_tunnel_output(struct xfrm_state *x, struct sk_buff *skb)
 {
 	struct dst_entry *dst = skb->dst;
+	struct xfrm_dst *xdst = (struct xfrm_dst*)dst;
 	struct iphdr *iph, *top_iph;
 	int flags;
 
@@ -48,15 +55,27 @@ static int xfrm4_tunnel_output(struct xf
 	top_iph->ihl = 5;
 	top_iph->version = 4;
 
+	flags = x->props.flags;
+
 	/* DS disclosed */
-	top_iph->tos = INET_ECN_encapsulate(iph->tos, iph->tos);
+	if (xdst->route->ops->family == AF_INET) {
+		top_iph->protocol = IPPROTO_IPIP;
+		top_iph->tos = INET_ECN_encapsulate(iph->tos, iph->tos);
+		top_iph->frag_off = (flags & XFRM_STATE_NOPMTUDISC) ?
+			0 : (iph->frag_off & htons(IP_DF));
+	}
+#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
+	else {
+		struct ipv6hdr *ipv6h = (struct ipv6hdr*)iph;
+		top_iph->protocol = IPPROTO_IPV6;
+		top_iph->tos = INET_ECN_encapsulate(iph->tos, ipv6_get_dsfield(ipv6h));
+		top_iph->frag_off = 0;
+	}
+#endif
 
-	flags = x->props.flags;
 	if (flags & XFRM_STATE_NOECN)
 		IP_ECN_clear(top_iph);
 
-	top_iph->frag_off = (flags & XFRM_STATE_NOPMTUDISC) ?
-		0 : (iph->frag_off & htons(IP_DF));
 	if (!top_iph->frag_off)
 		__ip_select_ident(top_iph, dst->child, 0);
 
@@ -64,7 +83,6 @@ static int xfrm4_tunnel_output(struct xf
 
 	top_iph->saddr = x->props.saddr.a4;
 	top_iph->daddr = x->id.daddr.a4;
-	top_iph->protocol = IPPROTO_IPIP;
 
 	memset(&(IPCB(skb)->opt), 0, sizeof(struct ip_options));
 	return 0;
@@ -75,8 +93,16 @@ static int xfrm4_tunnel_input(struct xfr
 	struct iphdr *iph = skb->nh.iph;
 	int err = -EINVAL;
 
-	if (iph->protocol != IPPROTO_IPIP)
-		goto out;
+	switch(iph->protocol){
+		case IPPROTO_IPIP:
+#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
+		case IPPROTO_IPV6:
+			break;
+#endif
+		default:
+			goto out;
+	}
+
 	if (!pskb_may_pull(skb, sizeof(struct iphdr)))
 		goto out;
 
@@ -84,10 +110,19 @@ static int xfrm4_tunnel_input(struct xfr
 	    (err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC)))
 		goto out;
 
-	if (x->props.flags & XFRM_STATE_DECAP_DSCP)
-		ipv4_copy_dscp(iph, skb->h.ipiph);
-	if (!(x->props.flags & XFRM_STATE_NOECN))
-		ipip_ecn_decapsulate(skb);
+	if (iph->protocol == IPPROTO_IPIP) {
+		if (x->props.flags & XFRM_STATE_DECAP_DSCP)
+			ipv4_copy_dscp(iph, skb->h.ipiph);
+		if (!(x->props.flags & XFRM_STATE_NOECN))
+			ipip_ecn_decapsulate(skb);
+	}
+#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
+	else {
+		if (!(x->props.flags & XFRM_STATE_NOECN))
+			ipip6_ecn_decapsulate(iph, skb);
+		skb->protocol = htons(ETH_P_IPV6);
+	}
+#endif
 	skb->mac.raw = memmove(skb->data - skb->mac_len,
 			       skb->mac.raw, skb->mac_len);
 	skb->nh.raw = skb->data;
diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c
index d4107bb..992f484 100644
--- a/net/ipv4/xfrm4_policy.c
+++ b/net/ipv4/xfrm4_policy.c
@@ -72,17 +72,20 @@ __xfrm4_bundle_create(struct xfrm_policy
 	struct dst_entry *dst, *dst_prev;
 	struct rtable *rt0 = (struct rtable*)(*dst_p);
 	struct rtable *rt = rt0;
-	__be32 remote = fl->fl4_dst;
-	__be32 local  = fl->fl4_src;
 	struct flowi fl_tunnel = {
 		.nl_u = {
 			.ip4_u = {
-				.saddr = local,
-				.daddr = remote,
+				.saddr = fl->fl4_src,
+				.daddr = fl->fl4_dst,
 				.tos = fl->fl4_tos
 			}
 		}
 	};
+	union {
+		struct in6_addr *in6;
+		struct in_addr *in;
+	} remote, local;
+	unsigned short encap_family = 0;
 	int i;
 	int err;
 	int header_len = 0;
@@ -94,7 +97,6 @@ __xfrm4_bundle_create(struct xfrm_policy
 	for (i = 0; i < nx; i++) {
 		struct dst_entry *dst1 = dst_alloc(&xfrm4_dst_ops);
 		struct xfrm_dst *xdst;
-		int tunnel = 0;
 
 		if (unlikely(dst1 == NULL)) {
 			err = -ENOBUFS;
@@ -116,21 +118,52 @@ __xfrm4_bundle_create(struct xfrm_policy
 
 		dst1->next = dst_prev;
 		dst_prev = dst1;
-		if (xfrm[i]->props.mode != XFRM_MODE_TRANSPORT) {
-			remote = xfrm[i]->id.daddr.a4;
-			local  = xfrm[i]->props.saddr.a4;
-			tunnel = 1;
+
+		if (xfrm[i]->props.mode == XFRM_MODE_TUNNEL) {
+			encap_family = xfrm[i]->props.family;
+
+			switch(encap_family) {
+			case AF_INET:
+				remote.in = (struct in_addr*)&xfrm[i]->id.daddr;
+				local.in = (struct in_addr*)&xfrm[i]->props.saddr;
+				break;
+#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
+			case AF_INET6:
+				remote.in6 = (struct in6_addr*)&xfrm[i]->id.daddr;
+				local.in6 = (struct in6_addr*)&xfrm[i]->props.saddr;
+				break;
+#endif
+			default:
+				BUG_ON(1);
+			}
 		}
 		header_len += xfrm[i]->props.header_len;
 		trailer_len += xfrm[i]->props.trailer_len;
 
-		if (tunnel) {
-			fl_tunnel.fl4_src = local;
-			fl_tunnel.fl4_dst = remote;
+		if (encap_family) {
+			switch(encap_family) {
+			case AF_INET:
+				fl_tunnel.fl4_dst = remote.in->s_addr;
+				fl_tunnel.fl4_src = local.in->s_addr;
+				break;
+#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
+			case AF_INET6:
+				ipv6_addr_copy(&fl_tunnel.fl6_dst, remote.in6);
+				ipv6_addr_copy(&fl_tunnel.fl6_src, local.in6);
+				break;
+#endif
+			default:
+				BUG_ON(1);
+			}
 			err = xfrm_dst_lookup((struct xfrm_dst **)&rt,
-					      &fl_tunnel, AF_INET);
+					      &fl_tunnel, encap_family);
 			if (err)
 				goto error;
+			/* Without this, the atomic inc below segfaults */
+			if (encap_family == AF_INET6) {
+				rt->peer = NULL;
+				rt_bind_peer(rt,1);
+			}
 		} else
 			dst_hold(&rt->u.dst);
 	}
@@ -162,7 +195,12 @@ __xfrm4_bundle_create(struct xfrm_policy
 		/* Copy neighbout for reachability confirmation */
 		dst_prev->neighbour	= neigh_clone(rt->u.dst.neighbour);
 		dst_prev->input		= rt->u.dst.input;
-		dst_prev->output	= xfrm4_output;
+		if (dst_prev->xfrm->props.family == AF_INET)
+			dst_prev->output = xfrm4_output;
+#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
+		else
+			dst_prev->output = xfrm6_output;
+#endif
 		if (rt->peer)
 			atomic_inc(&rt->peer->refcnt);
 		x->u.rt.peer = rt->peer;
-- 

^ permalink raw reply related	[flat|nested] 9+ messages in thread

* Re: [PATCH][IPSEC][4/7] inter address family ipsec tunnel
  2006-11-24  5:38 [PATCH][IPSEC][4/7] inter address family ipsec tunnel Kazunori MIYAZAWA
@ 2006-12-01  0:48 ` David Miller
  2006-12-01  4:41   ` Kazunori MIYAZAWA
  0 siblings, 1 reply; 9+ messages in thread
From: David Miller @ 2006-12-01  0:48 UTC (permalink / raw)
  To: kazunori; +Cc: miika, Diego.Beltrami, herbert, netdev, usagi-core

From: Kazunori MIYAZAWA <kazunori@miyazawa.org>
Date: Fri, 24 Nov 2006 14:38:39 +0900

What is going on here?

> +			/* Without this, the atomic inc below segfaults */
> +			if (encap_family == AF_INET6) {
> +				rt->peer = NULL;
> +				rt_bind_peer(rt,1);
> +			}
 ...
> -		dst_prev->output	= xfrm4_output;
> +		if (dst_prev->xfrm->props.family == AF_INET)
> +			dst_prev->output = xfrm4_output;
> +#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
> +		else
> +			dst_prev->output = xfrm6_output;
> +#endif
>  		if (rt->peer)
>  			atomic_inc(&rt->peer->refcnt);

If it's non-NULL and you get a segfault for atomic_inc() that
means there is garbage here, and it seems that if you're
setting it to NULL explicitly then it's just a workaround
for whatever problem is causing it to be non-NULL to begin
with.

What is putting a non-valid pointer value there?  Is this an IPV6 or
IPSEC dst route by chance?  If so, that makes this change really
wrong, and we are corrupting the route by running rt_bind_peer() on
it.  rt_bind_peer() is only valid on ipv4 route entries.

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH][IPSEC][4/7] inter address family ipsec tunnel
  2006-12-01  0:48 ` David Miller
@ 2006-12-01  4:41   ` Kazunori MIYAZAWA
  2006-12-06 11:35     ` Kazunori MIYAZAWA
  0 siblings, 1 reply; 9+ messages in thread
From: Kazunori MIYAZAWA @ 2006-12-01  4:41 UTC (permalink / raw)
  To: David Miller; +Cc: miika, Diego.Beltrami, herbert, netdev, usagi-core

David Miller wrote:
> From: Kazunori MIYAZAWA <kazunori@miyazawa.org>
> Date: Fri, 24 Nov 2006 14:38:39 +0900
> 
> What is going on here?
> 
>> +			/* Without this, the atomic inc below segfaults */
>> +			if (encap_family == AF_INET6) {
>> +				rt->peer = NULL;
>> +				rt_bind_peer(rt,1);
>> +			}
>  ...
>> -		dst_prev->output	= xfrm4_output;
>> +		if (dst_prev->xfrm->props.family == AF_INET)
>> +			dst_prev->output = xfrm4_output;
>> +#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
>> +		else
>> +			dst_prev->output = xfrm6_output;
>> +#endif
>>  		if (rt->peer)
>>  			atomic_inc(&rt->peer->refcnt);
> 
> If it's non-NULL and you get a segfault for atomic_inc() that
> means there is garbage here, and it seems that if you're
> setting it to NULL explicitly then it's just a workaround
> for whatever problem is causing it to be non-NULL to begin
> with.
> 
> What is putting a non-valid pointer value there?  Is this an IPV6 or
> IPSEC dst route by chance?  If so, that makes this change really
> wrong, and we are corrupting the route by running rt_bind_peer() on
> it.  rt_bind_peer() is only valid on ipv4 route entries.

Thank you for your good catch.
I think atomic_inc must be done in case of props.family == AF_INET.
And we probably should manage reference count of the device in case of
AF_INET6.

Anyway I'll check and fix it.

Thank you.

--
Kazunori Miyazawa

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH][IPSEC][4/7] inter address family ipsec tunnel
  2006-12-01  4:41   ` Kazunori MIYAZAWA
@ 2006-12-06 11:35     ` Kazunori MIYAZAWA
  2006-12-07  7:37       ` David Miller
  0 siblings, 1 reply; 9+ messages in thread
From: Kazunori MIYAZAWA @ 2006-12-06 11:35 UTC (permalink / raw)
  To: Kazunori MIYAZAWA
  Cc: davem, miika, Diego.Beltrami, herbert, netdev, usagi-core

On Fri, 01 Dec 2006 13:41:39 +0900
Kazunori MIYAZAWA <kazunori@miyazawa.org> wrote:

> David Miller wrote:
> > From: Kazunori MIYAZAWA <kazunori@miyazawa.org>
> > Date: Fri, 24 Nov 2006 14:38:39 +0900
> > 
> > What is going on here?
> > 
> >> +			/* Without this, the atomic inc below segfaults */
> >> +			if (encap_family == AF_INET6) {
> >> +				rt->peer = NULL;
> >> +				rt_bind_peer(rt,1);
> >> +			}
> >  ...
> >> -		dst_prev->output	= xfrm4_output;
> >> +		if (dst_prev->xfrm->props.family == AF_INET)
> >> +			dst_prev->output = xfrm4_output;
> >> +#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
> >> +		else
> >> +			dst_prev->output = xfrm6_output;
> >> +#endif
> >>  		if (rt->peer)
> >>  			atomic_inc(&rt->peer->refcnt);
> > 
> > If it's non-NULL and you get a segfault for atomic_inc() that
> > means there is garbage here, and it seems that if you're
> > setting it to NULL explicitly then it's just a workaround
> > for whatever problem is causing it to be non-NULL to begin
> > with.
> > 
> > What is putting a non-valid pointer value there?  Is this an IPV6 or
> > IPSEC dst route by chance?  If so, that makes this change really
> > wrong, and we are corrupting the route by running rt_bind_peer() on
> > it.  rt_bind_peer() is only valid on ipv4 route entries.
> 
> Thank you for your good catch.
> I think atomic_inc must be done in case of props.family == AF_INET.
> And we probably should manage reference count of the device in case of
> AF_INET6.
> 
> Anyway I'll check and fix it.
> 
> Thank you.
> 

Hello David,

Sorry, I mixed up changes in the patches. I described that [4/7] will add
"IPv6 over IPv4 IPsec tunnel". However I send a patch for "IPv4 over IPv6"
as a reply because it includes the discussing item.
So this patch adds IPv4 over IPv6 IPsec tunnel. It's complicated.

I deleted subustituting NULL for rt->peer and moved atomic_inc stuff
under the "if" section.

BTW, I have a question about descrementing the reference count of rt->peer.
The reference cound in normal "dst" structure is decremented by calling
inet_putpeer from ipv4_dst_destroy. But xfrm4_dst_destroy does not call inet_putpeer.
Where do we decrement the count? Should xfrm4_dst_destroy do that?

Signed-off-by: Miika Komu <miika@iki.fi>
Signed-off-by: Diego Beltrami <Diego.Beltrami@hiit.fi>
Signed-off-by: Kazunori Miyazawa <miyazawa@linux-ipv6.org>


---
 net/ipv4/xfrm4_policy.c      |   68 ++++++++++++++++++++++++++++++++----------
 net/ipv6/xfrm6_mode_tunnel.c |   42 ++++++++++++++++++++------
 2 files changed, 83 insertions(+), 27 deletions(-)

diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c
index d4107bb..37b14e8 100644
--- a/net/ipv4/xfrm4_policy.c
+++ b/net/ipv4/xfrm4_policy.c
@@ -72,17 +72,20 @@ __xfrm4_bundle_create(struct xfrm_policy
 	struct dst_entry *dst, *dst_prev;
 	struct rtable *rt0 = (struct rtable*)(*dst_p);
 	struct rtable *rt = rt0;
-	__be32 remote = fl->fl4_dst;
-	__be32 local  = fl->fl4_src;
 	struct flowi fl_tunnel = {
 		.nl_u = {
 			.ip4_u = {
-				.saddr = local,
-				.daddr = remote,
+				.saddr = fl->fl4_src,
+				.daddr = fl->fl4_dst,
 				.tos = fl->fl4_tos
 			}
 		}
 	};
+	union {
+		struct in6_addr *in6;
+		struct in_addr *in;
+	} remote, local;
+	unsigned short encap_family = 0;
 	int i;
 	int err;
 	int header_len = 0;
@@ -94,7 +97,6 @@ __xfrm4_bundle_create(struct xfrm_policy
 	for (i = 0; i < nx; i++) {
 		struct dst_entry *dst1 = dst_alloc(&xfrm4_dst_ops);
 		struct xfrm_dst *xdst;
-		int tunnel = 0;
 
 		if (unlikely(dst1 == NULL)) {
 			err = -ENOBUFS;
@@ -116,19 +118,45 @@ __xfrm4_bundle_create(struct xfrm_policy
 
 		dst1->next = dst_prev;
 		dst_prev = dst1;
-		if (xfrm[i]->props.mode != XFRM_MODE_TRANSPORT) {
-			remote = xfrm[i]->id.daddr.a4;
-			local  = xfrm[i]->props.saddr.a4;
-			tunnel = 1;
+
+		if (xfrm[i]->props.mode == XFRM_MODE_TUNNEL) {
+			encap_family = xfrm[i]->props.family;
+
+			switch(encap_family) {
+			case AF_INET:
+				remote.in = (struct in_addr*)&xfrm[i]->id.daddr;
+				local.in = (struct in_addr*)&xfrm[i]->props.saddr;
+				break;
+#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
+			case AF_INET6:
+				remote.in6 = (struct in6_addr*)&xfrm[i]->id.daddr;
+				local.in6 = (struct in6_addr*)&xfrm[i]->props.saddr;
+				break;
+#endif
+			default:
+				BUG_ON(1);
+			}
 		}
 		header_len += xfrm[i]->props.header_len;
 		trailer_len += xfrm[i]->props.trailer_len;
 
-		if (tunnel) {
-			fl_tunnel.fl4_src = local;
-			fl_tunnel.fl4_dst = remote;
+		if (encap_family) {
+			switch(encap_family) {
+			case AF_INET:
+				fl_tunnel.fl4_dst = remote.in->s_addr;
+				fl_tunnel.fl4_src = local.in->s_addr;
+				break;
+#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
+			case AF_INET6:
+				ipv6_addr_copy(&fl_tunnel.fl6_dst, remote.in6);
+				ipv6_addr_copy(&fl_tunnel.fl6_src, local.in6);
+				break;
+#endif
+			default:
+				BUG_ON(1);
+			}
 			err = xfrm_dst_lookup((struct xfrm_dst **)&rt,
-					      &fl_tunnel, AF_INET);
+					      &fl_tunnel, encap_family);
 			if (err)
 				goto error;
 		} else
@@ -162,10 +190,16 @@ __xfrm4_bundle_create(struct xfrm_policy
 		/* Copy neighbout for reachability confirmation */
 		dst_prev->neighbour	= neigh_clone(rt->u.dst.neighbour);
 		dst_prev->input		= rt->u.dst.input;
-		dst_prev->output	= xfrm4_output;
-		if (rt->peer)
-			atomic_inc(&rt->peer->refcnt);
-		x->u.rt.peer = rt->peer;
+		if (dst_prev->xfrm->props.family == AF_INET) {
+			dst_prev->output = xfrm4_output;
+			if (rt->peer)
+				atomic_inc(&rt->peer->refcnt);
+			x->u.rt.peer = rt->peer;
+		}
+#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
+		else
+			dst_prev->output = xfrm6_output;
+#endif
 		/* Sheit... I remember I did this right. Apparently,
 		 * it was magically lost, so this code needs audit */
 		x->u.rt.rt_flags = rt0->rt_flags&(RTCF_BROADCAST|RTCF_MULTICAST|RTCF_LOCAL);
diff --git a/net/ipv6/xfrm6_mode_tunnel.c b/net/ipv6/xfrm6_mode_tunnel.c
index 5e7d8a7..51eb59d 100644
--- a/net/ipv6/xfrm6_mode_tunnel.c
+++ b/net/ipv6/xfrm6_mode_tunnel.c
@@ -25,6 +25,12 @@ static inline void ipip6_ecn_decapsulate
 		IP6_ECN_set_ce(inner_iph);
 }
 
+static inline void ip6ip_ecn_decapsulate(struct sk_buff *skb)
+{
+	if (INET_ECN_is_ce(ipv6_get_dsfield(skb->nh.ipv6h)))
+		IP_ECN_set_ce(skb->h.ipiph);
+}
+
 /* Add encapsulation header.
  *
  * The top IP header will be constructed per RFC 2401.  The following fields
@@ -40,6 +46,7 @@ static inline void ipip6_ecn_decapsulate
 static int xfrm6_tunnel_output(struct xfrm_state *x, struct sk_buff *skb)
 {
 	struct dst_entry *dst = skb->dst;
+	struct xfrm_dst *xdst = (struct xfrm_dst*)dst;
 	struct ipv6hdr *iph, *top_iph;
 	int dsfield;
 
@@ -52,16 +59,24 @@ static int xfrm6_tunnel_output(struct xf
 	skb->h.ipv6h = top_iph + 1;
 
 	top_iph->version = 6;
-	top_iph->priority = iph->priority;
-	top_iph->flow_lbl[0] = iph->flow_lbl[0];
-	top_iph->flow_lbl[1] = iph->flow_lbl[1];
-	top_iph->flow_lbl[2] = iph->flow_lbl[2];
+	if (xdst->route->ops->family == AF_INET6) {
+		top_iph->priority = iph->priority;
+		top_iph->flow_lbl[0] = iph->flow_lbl[0];
+		top_iph->flow_lbl[1] = iph->flow_lbl[1];
+		top_iph->flow_lbl[2] = iph->flow_lbl[2];
+		top_iph->nexthdr = IPPROTO_IPV6; 
+	} else {
+		top_iph->priority = 0;
+		top_iph->flow_lbl[0] = 0;
+		top_iph->flow_lbl[1] = 0;
+		top_iph->flow_lbl[2] = 0;
+		top_iph->nexthdr = IPPROTO_IPIP;
+	}
 	dsfield = ipv6_get_dsfield(top_iph);
 	dsfield = INET_ECN_encapsulate(dsfield, dsfield);
 	if (x->props.flags & XFRM_STATE_NOECN)
 		dsfield &= ~INET_ECN_MASK;
 	ipv6_change_dsfield(top_iph, 0, dsfield);
-	top_iph->nexthdr = IPPROTO_IPV6; 
 	top_iph->hop_limit = dst_metric(dst->child, RTAX_HOPLIMIT);
 	ipv6_addr_copy(&top_iph->saddr, (struct in6_addr *)&x->props.saddr);
 	ipv6_addr_copy(&top_iph->daddr, (struct in6_addr *)&x->id.daddr);
@@ -72,7 +87,8 @@ static int xfrm6_tunnel_input(struct xfr
 {
 	int err = -EINVAL;
 
-	if (skb->nh.raw[IP6CB(skb)->nhoff] != IPPROTO_IPV6)
+	if (skb->nh.raw[IP6CB(skb)->nhoff] != IPPROTO_IPV6
+	    && skb->nh.raw[IP6CB(skb)->nhoff] != IPPROTO_IPIP)
 		goto out;
 	if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
 		goto out;
@@ -81,10 +97,16 @@ static int xfrm6_tunnel_input(struct xfr
 	    (err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC)))
 		goto out;
 
-	if (x->props.flags & XFRM_STATE_DECAP_DSCP)
-		ipv6_copy_dscp(skb->nh.ipv6h, skb->h.ipv6h);
-	if (!(x->props.flags & XFRM_STATE_NOECN))
-		ipip6_ecn_decapsulate(skb);
+	if (skb->nh.raw[IP6CB(skb)->nhoff] == IPPROTO_IPV6) {
+		if (x->props.flags & XFRM_STATE_DECAP_DSCP)
+			ipv6_copy_dscp(skb->nh.ipv6h, skb->h.ipv6h);
+		if (!(x->props.flags & XFRM_STATE_NOECN))
+			ipip6_ecn_decapsulate(skb);
+	} else {
+		if (!(x->props.flags & XFRM_STATE_NOECN))
+			ip6ip_ecn_decapsulate(skb);
+		skb->protocol = htons(ETH_P_IP);
+	}
 	skb->mac.raw = memmove(skb->data - skb->mac_len,
 			       skb->mac.raw, skb->mac_len);
 	skb->nh.raw = skb->data;
-- 
1.4.1






^ permalink raw reply related	[flat|nested] 9+ messages in thread

* Re: [PATCH][IPSEC][4/7] inter address family ipsec tunnel
  2006-12-06 11:35     ` Kazunori MIYAZAWA
@ 2006-12-07  7:37       ` David Miller
  2006-12-07  7:40         ` David Miller
  2006-12-07  7:48         ` David Miller
  0 siblings, 2 replies; 9+ messages in thread
From: David Miller @ 2006-12-07  7:37 UTC (permalink / raw)
  To: kazunori; +Cc: miika, Diego.Beltrami, herbert, netdev, usagi-core

From: Kazunori MIYAZAWA <kazunori@miyazawa.org>
Date: Wed, 6 Dec 2006 20:35:37 +0900

> Sorry, I mixed up changes in the patches. I described that [4/7] will add
> "IPv6 over IPv4 IPsec tunnel". However I send a patch for "IPv4 over IPv6"
> as a reply because it includes the discussing item.
> So this patch adds IPv4 over IPv6 IPsec tunnel. It's complicated.
> 
> I deleted subustituting NULL for rt->peer and moved atomic_inc stuff
> under the "if" section.

I have applied this patch, thanks for fixing it up.

> BTW, I have a question about descrementing the reference count of
> rt->peer.  The reference cound in normal "dst" structure is
> decremented by calling inet_putpeer from ipv4_dst_destroy. But
> xfrm4_dst_destroy does not call inet_putpeer.  Where do we decrement
> the count? Should xfrm4_dst_destroy do that?

Indeed, it is a real leak.  And yes, I believe that xfrm4_dst_destroy()
should release it.  I will make this fix, thank you.

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH][IPSEC][4/7] inter address family ipsec tunnel
  2006-12-07  7:37       ` David Miller
@ 2006-12-07  7:40         ` David Miller
  2006-12-07  7:48         ` David Miller
  1 sibling, 0 replies; 9+ messages in thread
From: David Miller @ 2006-12-07  7:40 UTC (permalink / raw)
  To: kazunori; +Cc: miika, Diego.Beltrami, herbert, netdev, usagi-core

From: David Miller <davem@davemloft.net>
Date: Wed, 06 Dec 2006 23:37:49 -0800 (PST)

> From: Kazunori MIYAZAWA <kazunori@miyazawa.org>
> Date: Wed, 6 Dec 2006 20:35:37 +0900
> 
> > Sorry, I mixed up changes in the patches. I described that [4/7] will add
> > "IPv6 over IPv4 IPsec tunnel". However I send a patch for "IPv4 over IPv6"
> > as a reply because it includes the discussing item.
> > So this patch adds IPv4 over IPv6 IPsec tunnel. It's complicated.
> > 
> > I deleted subustituting NULL for rt->peer and moved atomic_inc stuff
> > under the "if" section.
> 
> I have applied this patch, thanks for fixing it up.

Actually, I have to revert this change again, it still has a problem.
Sorry :-/

I very much feared the following kind of obstacle with these changes.
Specifically, references to modular IPV6 code from statically compiled
IPV4 code.  Which produces the following build error:

net/built-in.o: In function `__xfrm4_bundle_create':xfrm4_policy.c:(.text+0x59a88): undefined reference to `xfrm6_output'
:xfrm4_policy.c:(.text+0x59ac0): undefined reference to `xfrm6_output'

Please test with IPV6 built modular in the future, thank you.

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH][IPSEC][4/7] inter address family ipsec tunnel
  2006-12-07  7:37       ` David Miller
  2006-12-07  7:40         ` David Miller
@ 2006-12-07  7:48         ` David Miller
  2006-12-07 11:23           ` Kazunori MIYAZAWA
  1 sibling, 1 reply; 9+ messages in thread
From: David Miller @ 2006-12-07  7:48 UTC (permalink / raw)
  To: kazunori; +Cc: miika, Diego.Beltrami, herbert, netdev, usagi-core

From: David Miller <davem@davemloft.net>
Date: Wed, 06 Dec 2006 23:37:49 -0800 (PST)

> From: Kazunori MIYAZAWA <kazunori@miyazawa.org>
> Date: Wed, 6 Dec 2006 20:35:37 +0900
> 
> > BTW, I have a question about descrementing the reference count of
> > rt->peer.  The reference cound in normal "dst" structure is
> > decremented by calling inet_putpeer from ipv4_dst_destroy. But
> > xfrm4_dst_destroy does not call inet_putpeer.  Where do we decrement
> > the count? Should xfrm4_dst_destroy do that?
> 
> Indeed, it is a real leak.  And yes, I believe that xfrm4_dst_destroy()
> should release it.  I will make this fix, thank you.

For reference, this is the fix I checked in.

Thanks again for spotting this problem.

I suppose your patch will need to add an address family check for this
too.  Actually... I think address family check is needed for 'idev'
release in xfrm4_dst_destroy() too, if you agree please also add that
fix to your patch.

It is very complicated, using IPV6 route in xfrm4 code, because now
all "X->u.rt" references need to be audited.

commit 26db167702756d0022f8ea5f1f30cad3018cfe31
Author: David S. Miller <davem@sunset.davemloft.net>
Date:   Wed Dec 6 23:45:15 2006 -0800

    [IPSEC]: Fix inetpeer leak in ipv4 xfrm dst entries.
    
    We grab a reference to the route's inetpeer entry but
    forget to release it in xfrm4_dst_destroy().
    
    Bug discovered by Kazunori MIYAZAWA <kazunori@miyazawa.org>
    
    Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c
index d4107bb..fb9f69c 100644
--- a/net/ipv4/xfrm4_policy.c
+++ b/net/ipv4/xfrm4_policy.c
@@ -274,6 +274,8 @@ static void xfrm4_dst_destroy(struct dst
 
 	if (likely(xdst->u.rt.idev))
 		in_dev_put(xdst->u.rt.idev);
+	if (likely(xdst->u.rt.peer))
+		inet_putpeer(xdst->u.rt.peer);
 	xfrm_dst_destroy(xdst);
 }
 

^ permalink raw reply related	[flat|nested] 9+ messages in thread

* Re: [PATCH][IPSEC][4/7] inter address family ipsec tunnel
  2006-12-07  7:48         ` David Miller
@ 2006-12-07 11:23           ` Kazunori MIYAZAWA
  2006-12-07 22:03             ` David Miller
  0 siblings, 1 reply; 9+ messages in thread
From: Kazunori MIYAZAWA @ 2006-12-07 11:23 UTC (permalink / raw)
  To: David Miller; +Cc: miika, Diego.Beltrami, herbert, netdev, usagi-core

David Miller wrote:
> From: David Miller <davem@davemloft.net>
> Date: Wed, 06 Dec 2006 23:37:49 -0800 (PST)
> 
>> From: Kazunori MIYAZAWA <kazunori@miyazawa.org>
>> Date: Wed, 6 Dec 2006 20:35:37 +0900
>>
>>> BTW, I have a question about descrementing the reference count of
>>> rt->peer.  The reference cound in normal "dst" structure is
>>> decremented by calling inet_putpeer from ipv4_dst_destroy. But
>>> xfrm4_dst_destroy does not call inet_putpeer.  Where do we decrement
>>> the count? Should xfrm4_dst_destroy do that?
>> Indeed, it is a real leak.  And yes, I believe that xfrm4_dst_destroy()
>> should release it.  I will make this fix, thank you.
> 
> For reference, this is the fix I checked in.
> 
> Thanks again for spotting this problem.
> 

Thank you for making the patch.
Will it be merged to 2.6.19.x?

> I suppose your patch will need to add an address family check for this
> too.  Actually... I think address family check is needed for 'idev'
> release in xfrm4_dst_destroy() too, if you agree please also add that
> fix to your patch.
> 

Yes, I will add address family check for your patch.

Umm, I guess we don't need address family check because xdst
is allocated with xfrm4_dst_ops and the family of rt0 is
always equal to the original address family.
They are always AF_INET in this case.

I will try to fix the unresolved symbol issue.

> It is very complicated, using IPV6 route in xfrm4 code, because now
> all "X->u.rt" references need to be audited.
> 
> commit 26db167702756d0022f8ea5f1f30cad3018cfe31
> Author: David S. Miller <davem@sunset.davemloft.net>
> Date:   Wed Dec 6 23:45:15 2006 -0800
> 
>     [IPSEC]: Fix inetpeer leak in ipv4 xfrm dst entries.
>     
>     We grab a reference to the route's inetpeer entry but
>     forget to release it in xfrm4_dst_destroy().
>     
>     Bug discovered by Kazunori MIYAZAWA <kazunori@miyazawa.org>
>     
>     Signed-off-by: David S. Miller <davem@davemloft.net>
> 
> diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c
> index d4107bb..fb9f69c 100644
> --- a/net/ipv4/xfrm4_policy.c
> +++ b/net/ipv4/xfrm4_policy.c
> @@ -274,6 +274,8 @@ static void xfrm4_dst_destroy(struct dst
>  
>  	if (likely(xdst->u.rt.idev))
>  		in_dev_put(xdst->u.rt.idev);
> +	if (likely(xdst->u.rt.peer))
> +		inet_putpeer(xdst->u.rt.peer);
>  	xfrm_dst_destroy(xdst);
>  }
>  
> -
> To unsubscribe from this list: send the line "unsubscribe netdev" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html

--
Kazunori Miyazawa

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH][IPSEC][4/7] inter address family ipsec tunnel
  2006-12-07 11:23           ` Kazunori MIYAZAWA
@ 2006-12-07 22:03             ` David Miller
  0 siblings, 0 replies; 9+ messages in thread
From: David Miller @ 2006-12-07 22:03 UTC (permalink / raw)
  To: kazunori; +Cc: miika, Diego.Beltrami, herbert, netdev, usagi-core

From: Kazunori MIYAZAWA <kazunori@miyazawa.org>
Date: Thu, 07 Dec 2006 20:23:48 +0900

> David Miller wrote:
> > From: David Miller <davem@davemloft.net>
> > Date: Wed, 06 Dec 2006 23:37:49 -0800 (PST)
> > 
> >> From: Kazunori MIYAZAWA <kazunori@miyazawa.org>
> >> Date: Wed, 6 Dec 2006 20:35:37 +0900
> >>
> >>> BTW, I have a question about descrementing the reference count of
> >>> rt->peer.  The reference cound in normal "dst" structure is
> >>> decremented by calling inet_putpeer from ipv4_dst_destroy. But
> >>> xfrm4_dst_destroy does not call inet_putpeer.  Where do we decrement
> >>> the count? Should xfrm4_dst_destroy do that?
> >> Indeed, it is a real leak.  And yes, I believe that xfrm4_dst_destroy()
> >> should release it.  I will make this fix, thank you.
> > 
> > For reference, this is the fix I checked in.
> > 
> > Thanks again for spotting this problem.
> 
> Thank you for making the patch.
> Will it be merged to 2.6.19.x?

Yes, I submitted it to -stable last night and Chris W. just
queued it up for the -stable branches (even 2.6.18.x etc.)

^ permalink raw reply	[flat|nested] 9+ messages in thread

end of thread, other threads:[~2006-12-07 22:03 UTC | newest]

Thread overview: 9+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2006-11-24  5:38 [PATCH][IPSEC][4/7] inter address family ipsec tunnel Kazunori MIYAZAWA
2006-12-01  0:48 ` David Miller
2006-12-01  4:41   ` Kazunori MIYAZAWA
2006-12-06 11:35     ` Kazunori MIYAZAWA
2006-12-07  7:37       ` David Miller
2006-12-07  7:40         ` David Miller
2006-12-07  7:48         ` David Miller
2006-12-07 11:23           ` Kazunori MIYAZAWA
2006-12-07 22:03             ` David Miller

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).