netdev.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: "Timo Teräs" <timo.teras@iki.fi>
To: netdev@vger.kernel.org
Subject: [PATCH] [IPV4]: Update MTU to all related cache entries in ip_rt_frag_needed()
Date: Thu, 24 Apr 2008 13:05:27 +0300	[thread overview]
Message-ID: <48105B67.7060804@iki.fi> (raw)

Add struct net_device parameter to ip_rt_frag_needed() and update MTU to
cache entries where ifindex is specified. This is similar to what is
already done in ip_rt_redirect().

Signed-off-by: Timo Teras <timo.teras@iki.fi>
---
I noticed this bug when using GRE tunnel bound to specific device. This
causes ip_gre to use ifindex in ip_route_output_key() lookups and the
results are not proper unless the MTU is updated for entries where
ifindex is specified (causing my tunneled packets to be dropped).

I have a patch for older kernels too if this is good for -stable.

 include/net/route.h |    2 +-
 net/ipv4/icmp.c     |    3 ++-
 net/ipv4/route.c    |   38 ++++++++++++++++++++++----------------
 3 files changed, 25 insertions(+), 18 deletions(-)

diff --git a/include/net/route.h b/include/net/route.h
index c633880..fc836ff 100644
--- a/include/net/route.h
+++ b/include/net/route.h
@@ -116,7 +116,7 @@ extern int		__ip_route_output_key(struct net *, struct rtable **, const struct f
 extern int		ip_route_output_key(struct net *, struct rtable **, struct flowi *flp);
 extern int		ip_route_output_flow(struct net *, struct rtable **rp, struct flowi *flp, struct sock *sk, int flags);
 extern int		ip_route_input(struct sk_buff*, __be32 dst, __be32 src, u8 tos, struct net_device *devin);
-extern unsigned short	ip_rt_frag_needed(struct net *net, struct iphdr *iph, unsigned short new_mtu);
+extern unsigned short	ip_rt_frag_needed(struct net *net, struct iphdr *iph, unsigned short new_mtu, struct net_device *dev);
 extern void		ip_rt_send_redirect(struct sk_buff *skb);
 
 extern unsigned		inet_addr_type(struct net *net, __be32 addr);
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index c67d00e..8739735 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -691,7 +691,8 @@ static void icmp_unreach(struct sk_buff *skb)
 					       NIPQUAD(iph->daddr));
 			} else {
 				info = ip_rt_frag_needed(net, iph,
-						     ntohs(icmph->un.frag.mtu));
+							 ntohs(icmph->un.frag.mtu),
+							 skb->dev);
 				if (!info)
 					goto out;
 			}
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 780e948..0d2f4da 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -1429,11 +1429,13 @@ static inline unsigned short guess_mtu(unsigned short old_mtu)
 }
 
 unsigned short ip_rt_frag_needed(struct net *net, struct iphdr *iph,
-				 unsigned short new_mtu)
+				 unsigned short new_mtu,
+				 struct net_device *dev)
 {
-	int i;
+	int i, k;
 	unsigned short old_mtu = ntohs(iph->tot_len);
 	struct rtable *rth;
+	int  ikeys[2] = { dev->ifindex, 0 };
 	__be32  skeys[2] = { iph->saddr, 0, };
 	__be32  daddr = iph->daddr;
 	unsigned short est_mtu = 0;
@@ -1441,22 +1443,26 @@ unsigned short ip_rt_frag_needed(struct net *net, struct iphdr *iph,
 	if (ipv4_config.no_pmtu_disc)
 		return 0;
 
-	for (i = 0; i < 2; i++) {
-		unsigned hash = rt_hash(daddr, skeys[i], 0);
+	for (k = 0; k < 2; k++) {
+		for (i = 0; i < 2; i++) {
+			unsigned hash = rt_hash(daddr, skeys[i], ikeys[k]);
 
-		rcu_read_lock();
-		for (rth = rcu_dereference(rt_hash_table[hash].chain); rth;
-		     rth = rcu_dereference(rth->u.dst.rt_next)) {
-			if (rth->fl.fl4_dst == daddr &&
-			    rth->fl.fl4_src == skeys[i] &&
-			    rth->rt_dst  == daddr &&
-			    rth->rt_src  == iph->saddr &&
-			    rth->fl.iif == 0 &&
-			    !(dst_metric_locked(&rth->u.dst, RTAX_MTU)) &&
-			    net_eq(dev_net(rth->u.dst.dev), net) &&
-			    rth->rt_genid == atomic_read(&rt_genid)) {
+			rcu_read_lock();
+			for (rth = rcu_dereference(rt_hash_table[hash].chain); rth;
+			     rth = rcu_dereference(rth->u.dst.rt_next)) {
 				unsigned short mtu = new_mtu;
 
+				if (rth->fl.fl4_dst != daddr ||
+				    rth->fl.fl4_src != skeys[i] ||
+				    rth->rt_dst != daddr ||
+				    rth->rt_src != iph->saddr ||
+				    rth->fl.oif != ikeys[k] ||
+				    rth->fl.iif != 0 ||
+				    dst_metric_locked(&rth->u.dst, RTAX_MTU) ||
+				    !net_eq(dev_net(rth->u.dst.dev), net) ||
+				    rth->rt_genid != atomic_read(&rt_genid))
+					continue;
+
 				if (new_mtu < 68 || new_mtu >= old_mtu) {
 
 					/* BSD 4.2 compatibility hack :-( */
@@ -1482,8 +1488,8 @@ unsigned short ip_rt_frag_needed(struct net *net, struct iphdr *iph,
 					est_mtu = mtu;
 				}
 			}
+			rcu_read_unlock();
 		}
-		rcu_read_unlock();
 	}
 	return est_mtu ? : new_mtu;
 }
-- 
1.5.2.5


             reply	other threads:[~2008-04-24 10:05 UTC|newest]

Thread overview: 4+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2008-04-24 10:05 Timo Teräs [this message]
2008-04-27  6:27 ` [PATCH] [IPV4]: Update MTU to all related cache entries in ip_rt_frag_needed() David Miller
2008-04-27  8:35   ` Timo Teräs
2008-04-29 10:32     ` David Miller

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=48105B67.7060804@iki.fi \
    --to=timo.teras@iki.fi \
    --cc=netdev@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).