* [PATCH 1/2] ipv6: Fix IPsec slowpath fragmentation problem
2011-10-11 11:42 [PATCH net-next 0/2] ipv6: IPsec pmtu/fragmentation fixes Steffen Klassert
@ 2011-10-11 11:43 ` Steffen Klassert
2011-10-19 3:53 ` David Miller
2011-10-11 11:44 ` [PATCH 2/2] xfrm6: Don't call icmpv6_send on local error Steffen Klassert
1 sibling, 1 reply; 5+ messages in thread
From: Steffen Klassert @ 2011-10-11 11:43 UTC (permalink / raw)
To: David Miller; +Cc: Herbert Xu, netdev
ip6_append_data() builds packets based on the mtu from dst_mtu(rt->dst.path).
On IPsec the effective mtu is lower because we need to add the protocol
headers and trailers later when we do the IPsec transformations. So after
the IPsec transformations the packet might be too big, which leads to a
slowpath fragmentation then. This patch fixes this by building the packets
based on the lower IPsec mtu from dst_mtu(&rt->dst) and adapts the exthdr
handling to this.
Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
---
net/ipv6/ip6_output.c | 18 ++++++++++++------
net/ipv6/raw.c | 3 +--
2 files changed, 13 insertions(+), 8 deletions(-)
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 835c04b..1e20b64 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -1193,6 +1193,7 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to,
struct sk_buff *skb;
unsigned int maxfraglen, fragheaderlen;
int exthdrlen;
+ int dst_exthdrlen;
int hh_len;
int mtu;
int copy;
@@ -1248,7 +1249,7 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to,
np->cork.hop_limit = hlimit;
np->cork.tclass = tclass;
mtu = np->pmtudisc == IPV6_PMTUDISC_PROBE ?
- rt->dst.dev->mtu : dst_mtu(rt->dst.path);
+ rt->dst.dev->mtu : dst_mtu(&rt->dst);
if (np->frag_size < mtu) {
if (np->frag_size)
mtu = np->frag_size;
@@ -1259,16 +1260,17 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to,
cork->length = 0;
sk->sk_sndmsg_page = NULL;
sk->sk_sndmsg_off = 0;
- exthdrlen = rt->dst.header_len + (opt ? opt->opt_flen : 0) -
- rt->rt6i_nfheader_len;
+ exthdrlen = (opt ? opt->opt_flen : 0) - rt->rt6i_nfheader_len;
length += exthdrlen;
transhdrlen += exthdrlen;
+ dst_exthdrlen = rt->dst.header_len;
} else {
rt = (struct rt6_info *)cork->dst;
fl6 = &inet->cork.fl.u.ip6;
opt = np->cork.opt;
transhdrlen = 0;
exthdrlen = 0;
+ dst_exthdrlen = 0;
mtu = cork->fragsize;
}
@@ -1368,6 +1370,8 @@ alloc_new_skb:
else
alloclen = datalen + fragheaderlen;
+ alloclen += dst_exthdrlen;
+
/*
* The last fragment gets additional space at tail.
* Note: we overallocate on fragments with MSG_MODE
@@ -1419,9 +1423,9 @@ alloc_new_skb:
/*
* Find where to start putting bytes
*/
- data = skb_put(skb, fraglen);
- skb_set_network_header(skb, exthdrlen);
- data += fragheaderlen;
+ data = skb_put(skb, fraglen + dst_exthdrlen);
+ skb_set_network_header(skb, exthdrlen + dst_exthdrlen);
+ data += fragheaderlen + dst_exthdrlen;
skb->transport_header = (skb->network_header +
fragheaderlen);
if (fraggap) {
@@ -1434,6 +1438,7 @@ alloc_new_skb:
pskb_trim_unique(skb_prev, maxfraglen);
}
copy = datalen - transhdrlen - fraggap;
+
if (copy < 0) {
err = -EINVAL;
kfree_skb(skb);
@@ -1448,6 +1453,7 @@ alloc_new_skb:
length -= datalen - fraggap;
transhdrlen = 0;
exthdrlen = 0;
+ dst_exthdrlen = 0;
csummode = CHECKSUM_NONE;
/*
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index 3486f62..6f7824e 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -542,8 +542,7 @@ static int rawv6_push_pending_frames(struct sock *sk, struct flowi6 *fl6,
goto out;
offset = rp->offset;
- total_len = inet_sk(sk)->cork.base.length - (skb_network_header(skb) -
- skb->data);
+ total_len = inet_sk(sk)->cork.base.length;
if (offset >= total_len - 1) {
err = -EINVAL;
ip6_flush_pending_frames(sk);
--
1.7.0.4
^ permalink raw reply related [flat|nested] 5+ messages in thread* [PATCH 2/2] xfrm6: Don't call icmpv6_send on local error
2011-10-11 11:42 [PATCH net-next 0/2] ipv6: IPsec pmtu/fragmentation fixes Steffen Klassert
2011-10-11 11:43 ` [PATCH 1/2] ipv6: Fix IPsec slowpath fragmentation problem Steffen Klassert
@ 2011-10-11 11:44 ` Steffen Klassert
2011-10-19 3:53 ` David Miller
1 sibling, 1 reply; 5+ messages in thread
From: Steffen Klassert @ 2011-10-11 11:44 UTC (permalink / raw)
To: David Miller; +Cc: Herbert Xu, netdev
Calling icmpv6_send() on a local message size error leads to
an incorrect update of the path mtu. So use xfrm6_local_rxpmtu()
to notify about the pmtu if the IPV6_DONTFRAG socket option is
set on an udp or raw socket, according RFC 3542 and use
ipv6_local_error() otherwise.
Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
---
net/ipv6/xfrm6_output.c | 56 +++++++++++++++++++++++++++++++++++++++++++++-
1 files changed, 54 insertions(+), 2 deletions(-)
diff --git a/net/ipv6/xfrm6_output.c b/net/ipv6/xfrm6_output.c
index 49a91c5..faae417 100644
--- a/net/ipv6/xfrm6_output.c
+++ b/net/ipv6/xfrm6_output.c
@@ -28,6 +28,43 @@ int xfrm6_find_1stfragopt(struct xfrm_state *x, struct sk_buff *skb,
EXPORT_SYMBOL(xfrm6_find_1stfragopt);
+static int xfrm6_local_dontfrag(struct sk_buff *skb)
+{
+ int proto;
+ struct sock *sk = skb->sk;
+
+ if (sk) {
+ proto = sk->sk_protocol;
+
+ if (proto == IPPROTO_UDP || proto == IPPROTO_RAW)
+ return inet6_sk(sk)->dontfrag;
+ }
+
+ return 0;
+}
+
+static void xfrm6_local_rxpmtu(struct sk_buff *skb, u32 mtu)
+{
+ struct flowi6 fl6;
+ struct sock *sk = skb->sk;
+
+ fl6.flowi6_oif = sk->sk_bound_dev_if;
+ ipv6_addr_copy(&fl6.daddr, &ipv6_hdr(skb)->daddr);
+
+ ipv6_local_rxpmtu(sk, &fl6, mtu);
+}
+
+static void xfrm6_local_error(struct sk_buff *skb, u32 mtu)
+{
+ struct flowi6 fl6;
+ struct sock *sk = skb->sk;
+
+ fl6.fl6_dport = inet_sk(sk)->inet_dport;
+ ipv6_addr_copy(&fl6.daddr, &ipv6_hdr(skb)->daddr);
+
+ ipv6_local_error(sk, EMSGSIZE, &fl6, mtu);
+}
+
static int xfrm6_tunnel_check_size(struct sk_buff *skb)
{
int mtu, ret = 0;
@@ -39,7 +76,13 @@ static int xfrm6_tunnel_check_size(struct sk_buff *skb)
if (!skb->local_df && skb->len > mtu) {
skb->dev = dst->dev;
- icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
+
+ if (xfrm6_local_dontfrag(skb))
+ xfrm6_local_rxpmtu(skb, mtu);
+ else if (skb->sk)
+ xfrm6_local_error(skb, mtu);
+ else
+ icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
ret = -EMSGSIZE;
}
@@ -93,9 +136,18 @@ static int __xfrm6_output(struct sk_buff *skb)
{
struct dst_entry *dst = skb_dst(skb);
struct xfrm_state *x = dst->xfrm;
+ int mtu = ip6_skb_dst_mtu(skb);
+
+ if (skb->len > mtu && xfrm6_local_dontfrag(skb)) {
+ xfrm6_local_rxpmtu(skb, mtu);
+ return -EMSGSIZE;
+ } else if (!skb->local_df && skb->len > mtu && skb->sk) {
+ xfrm6_local_error(skb, mtu);
+ return -EMSGSIZE;
+ }
if ((x && x->props.mode == XFRM_MODE_TUNNEL) &&
- ((skb->len > ip6_skb_dst_mtu(skb) && !skb_is_gso(skb)) ||
+ ((skb->len > mtu && !skb_is_gso(skb)) ||
dst_allfrag(skb_dst(skb)))) {
return ip6_fragment(skb, x->outer_mode->afinfo->output_finish);
}
--
1.7.0.4
^ permalink raw reply related [flat|nested] 5+ messages in thread