All of lore.kernel.org
 help / color / mirror / Atom feed
From: "Timo Teräs" <timo.teras@iki.fi>
To: netdev@vger.kernel.org
Subject: [PATCH] [IPV4]: Update MTU to all related cache entries in ip_rt_frag_needed()
Date: Thu, 24 Apr 2008 13:05:27 +0300	[thread overview]
Message-ID: <48105B67.7060804@iki.fi> (raw)

Add struct net_device parameter to ip_rt_frag_needed() and update MTU to
cache entries where ifindex is specified. This is similar to what is
already done in ip_rt_redirect().

Signed-off-by: Timo Teras <timo.teras@iki.fi>
---
I noticed this bug when using GRE tunnel bound to specific device. This
causes ip_gre to use ifindex in ip_route_output_key() lookups and the
results are not proper unless the MTU is updated for entries where
ifindex is specified (causing my tunneled packets to be dropped).

I have a patch for older kernels too if this is good for -stable.

 include/net/route.h |    2 +-
 net/ipv4/icmp.c     |    3 ++-
 net/ipv4/route.c    |   38 ++++++++++++++++++++++----------------
 3 files changed, 25 insertions(+), 18 deletions(-)

diff --git a/include/net/route.h b/include/net/route.h
index c633880..fc836ff 100644
--- a/include/net/route.h
+++ b/include/net/route.h
@@ -116,7 +116,7 @@ extern int		__ip_route_output_key(struct net *, struct rtable **, const struct f
 extern int		ip_route_output_key(struct net *, struct rtable **, struct flowi *flp);
 extern int		ip_route_output_flow(struct net *, struct rtable **rp, struct flowi *flp, struct sock *sk, int flags);
 extern int		ip_route_input(struct sk_buff*, __be32 dst, __be32 src, u8 tos, struct net_device *devin);
-extern unsigned short	ip_rt_frag_needed(struct net *net, struct iphdr *iph, unsigned short new_mtu);
+extern unsigned short	ip_rt_frag_needed(struct net *net, struct iphdr *iph, unsigned short new_mtu, struct net_device *dev);
 extern void		ip_rt_send_redirect(struct sk_buff *skb);
 
 extern unsigned		inet_addr_type(struct net *net, __be32 addr);
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index c67d00e..8739735 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -691,7 +691,8 @@ static void icmp_unreach(struct sk_buff *skb)
 					       NIPQUAD(iph->daddr));
 			} else {
 				info = ip_rt_frag_needed(net, iph,
-						     ntohs(icmph->un.frag.mtu));
+							 ntohs(icmph->un.frag.mtu),
+							 skb->dev);
 				if (!info)
 					goto out;
 			}
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 780e948..0d2f4da 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -1429,11 +1429,13 @@ static inline unsigned short guess_mtu(unsigned short old_mtu)
 }
 
 unsigned short ip_rt_frag_needed(struct net *net, struct iphdr *iph,
-				 unsigned short new_mtu)
+				 unsigned short new_mtu,
+				 struct net_device *dev)
 {
-	int i;
+	int i, k;
 	unsigned short old_mtu = ntohs(iph->tot_len);
 	struct rtable *rth;
+	int  ikeys[2] = { dev->ifindex, 0 };
 	__be32  skeys[2] = { iph->saddr, 0, };
 	__be32  daddr = iph->daddr;
 	unsigned short est_mtu = 0;
@@ -1441,22 +1443,26 @@ unsigned short ip_rt_frag_needed(struct net *net, struct iphdr *iph,
 	if (ipv4_config.no_pmtu_disc)
 		return 0;
 
-	for (i = 0; i < 2; i++) {
-		unsigned hash = rt_hash(daddr, skeys[i], 0);
+	for (k = 0; k < 2; k++) {
+		for (i = 0; i < 2; i++) {
+			unsigned hash = rt_hash(daddr, skeys[i], ikeys[k]);
 
-		rcu_read_lock();
-		for (rth = rcu_dereference(rt_hash_table[hash].chain); rth;
-		     rth = rcu_dereference(rth->u.dst.rt_next)) {
-			if (rth->fl.fl4_dst == daddr &&
-			    rth->fl.fl4_src == skeys[i] &&
-			    rth->rt_dst  == daddr &&
-			    rth->rt_src  == iph->saddr &&
-			    rth->fl.iif == 0 &&
-			    !(dst_metric_locked(&rth->u.dst, RTAX_MTU)) &&
-			    net_eq(dev_net(rth->u.dst.dev), net) &&
-			    rth->rt_genid == atomic_read(&rt_genid)) {
+			rcu_read_lock();
+			for (rth = rcu_dereference(rt_hash_table[hash].chain); rth;
+			     rth = rcu_dereference(rth->u.dst.rt_next)) {
 				unsigned short mtu = new_mtu;
 
+				if (rth->fl.fl4_dst != daddr ||
+				    rth->fl.fl4_src != skeys[i] ||
+				    rth->rt_dst != daddr ||
+				    rth->rt_src != iph->saddr ||
+				    rth->fl.oif != ikeys[k] ||
+				    rth->fl.iif != 0 ||
+				    dst_metric_locked(&rth->u.dst, RTAX_MTU) ||
+				    !net_eq(dev_net(rth->u.dst.dev), net) ||
+				    rth->rt_genid != atomic_read(&rt_genid))
+					continue;
+
 				if (new_mtu < 68 || new_mtu >= old_mtu) {
 
 					/* BSD 4.2 compatibility hack :-( */
@@ -1482,8 +1488,8 @@ unsigned short ip_rt_frag_needed(struct net *net, struct iphdr *iph,
 					est_mtu = mtu;
 				}
 			}
+			rcu_read_unlock();
 		}
-		rcu_read_unlock();
 	}
 	return est_mtu ? : new_mtu;
 }
-- 
1.5.2.5


             reply	other threads:[~2008-04-24 10:05 UTC|newest]

Thread overview: 4+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2008-04-24 10:05 Timo Teräs [this message]
2008-04-27  6:27 ` [PATCH] [IPV4]: Update MTU to all related cache entries in ip_rt_frag_needed() David Miller
2008-04-27  8:35   ` Timo Teräs
2008-04-29 10:32     ` David Miller

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=48105B67.7060804@iki.fi \
    --to=timo.teras@iki.fi \
    --cc=netdev@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.