netdev.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Florian Westphal <fw@strlen.de>
To: <netdev@vger.kernel.org>
Cc: hannes@stressinduktion.org, Florian Westphal <fw@strlen.de>,
	Eric Dumazet <edumazet@google.com>
Subject: [PATCH -next] net: preserve geometry of fragment sizes when forwarding
Date: Thu,  7 May 2015 23:04:24 +0200	[thread overview]
Message-ID: <1431032664-6478-1-git-send-email-fw@strlen.de> (raw)

There was interest in keeping geometry of original fragments on forward.

This (re)enables this feature.

on router with mtu 1500 on all interfaces and netfilter conntrack enabled:

incoming packet on router:
IP (ttl 64, offset 0, flags [+], ICMP, length 1276) 192.168.7.1 > 10.0.0.2: ICMP echo request, length 1256
IP (ttl 64, offset 1256, flags [+], ICMP, length 1276) 192.168.7.1 > 10.0.0.2: ip-proto-1
IP (ttl 64, offset 2512, flags [none], ICMP, length 516) 192.168.7.1 > 10.0.0.2: ip-proto-1

Without patch, refragmentation uses device mtu. incoming packet on destination host:
IP (ttl 63, offset 0, flags [+], ICMP, length 1500) 192.168.7.1 > 10.0.0.2: ICMP echo request, length 1480
IP (ttl 63, offset 1480, flags [+], ICMP, length 1500) 192.168.7.1 > 10.0.0.2: ip-proto-1
IP (ttl 63, offset 2960, flags [none], ICMP, length 68) 192.168.7.1 > 10.0.0.2: ip-proto-1

With patch, ip_fragment skb_has_frag_list fastpath gets used:
IP (ttl 63, offset 0, flags [+], ICMP, length 1276) 192.168.7.1 > 10.0.0.2: ICMP echo request, length 1256
IP (ttl 63, offset 1256, flags [+], ICMP, length 1276) 192.168.7.1 > 10.0.0.2: ip-proto-1
IP (ttl 63, offset 2512, flags [none], ICMP, length 516) 192.168.7.1 > 10.0.0.2: ip-proto-1

Caveat:
This disables the optimization made in commit
3cc4949269e01f39443d0 ("ipv4: use skb coalescing in defragmentation") for
everyone as soon as nf_defrag_ipv4 modules are loaded (conntrack defrag
hooks earlier than ipv4 stacks own defragmentation for local delivery),
and there is no way to easily determine if we will forward the skb at that
stage.

ip_fragment checks the size of the frag skbs vs. the outgoing device mtu
before using them so if device mtu is smaller than the frag skb length
the device mtu will be used instead for refragmentation.

Cc: Eric Dumazet <edumazet@google.com>
Signed-off-by: Florian Westphal <fw@strlen.de>
---
 net/ipv4/ip_fragment.c | 19 +++++++++++++------
 1 file changed, 13 insertions(+), 6 deletions(-)

diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index cc1da6d..31fbb18 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -93,7 +93,7 @@ int ip_frag_mem(struct net *net)
 }
 
 static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev,
-			 struct net_device *dev);
+			 struct net_device *dev, bool preserve_frags);
 
 struct ip4_create_arg {
 	struct iphdr *iph;
@@ -315,7 +315,8 @@ static int ip_frag_reinit(struct ipq *qp)
 }
 
 /* Add new segment to existing queue. */
-static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb)
+static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb,
+			 bool preserve_frags)
 {
 	struct sk_buff *prev, *next;
 	struct net_device *dev;
@@ -483,7 +484,7 @@ found:
 		unsigned long orefdst = skb->_skb_refdst;
 
 		skb->_skb_refdst = 0UL;
-		err = ip_frag_reasm(qp, prev, dev);
+		err = ip_frag_reasm(qp, prev, dev, preserve_frags);
 		skb->_skb_refdst = orefdst;
 		return err;
 	}
@@ -500,7 +501,7 @@ err:
 /* Build a new IP datagram from all its fragments. */
 
 static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev,
-			 struct net_device *dev)
+			 struct net_device *dev, bool preserve_frags)
 {
 	struct net *net = container_of(qp->q.net, struct net, ipv4.frags);
 	struct iphdr *iph;
@@ -590,7 +591,8 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev,
 		else if (head->ip_summed == CHECKSUM_COMPLETE)
 			head->csum = csum_add(head->csum, fp->csum);
 
-		if (skb_try_coalesce(head, fp, &headstolen, &delta)) {
+		if (!preserve_frags &&
+		    skb_try_coalesce(head, fp, &headstolen, &delta)) {
 			kfree_skb_partial(fp, headstolen);
 		} else {
 			if (!skb_shinfo(head)->frag_list)
@@ -629,6 +631,11 @@ out_fail:
 	return err;
 }
 
+static bool preserve_fraglist(u32 user)
+{
+	return user != IP_DEFRAG_LOCAL_DELIVER;
+}
+
 /* Process an incoming IP datagram fragment. */
 int ip_defrag(struct sk_buff *skb, u32 user)
 {
@@ -645,7 +652,7 @@ int ip_defrag(struct sk_buff *skb, u32 user)
 
 		spin_lock(&qp->q.lock);
 
-		ret = ip_frag_queue(qp, skb);
+		ret = ip_frag_queue(qp, skb, preserve_fraglist(user));
 
 		spin_unlock(&qp->q.lock);
 		ipq_put(qp);
-- 
2.0.5

             reply	other threads:[~2015-05-07 21:04 UTC|newest]

Thread overview: 13+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2015-05-07 21:04 Florian Westphal [this message]
2015-05-18 19:39 ` [PATCH -next] net: preserve geometry of fragment sizes when forwarding David Miller
2015-05-18 20:06   ` Florian Westphal
2015-05-18 20:28     ` David Miller
2015-05-18 20:40       ` Florian Westphal
2015-05-18 20:55         ` David Miller
2015-05-18 21:33           ` Florian Westphal
2015-05-18 22:50             ` Herbert Xu
2015-05-18 23:02               ` Florian Westphal
2015-05-18 23:20                 ` Herbert Xu
2015-05-18 23:51             ` David Miller
2015-05-19 12:34               ` Florian Westphal
2015-05-19 19:34                 ` Jay Vosburgh

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1431032664-6478-1-git-send-email-fw@strlen.de \
    --to=fw@strlen.de \
    --cc=edumazet@google.com \
    --cc=hannes@stressinduktion.org \
    --cc=netdev@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).