netdev.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: "Timo Teräs" <timo.teras@iki.fi>
To: netdev@vger.kernel.org
Cc: "Timo Teräs" <timo.teras@iki.fi>,
	"Steffen Klassert" <steffen.klassert@secunet.com>
Subject: [PATCH net-next 3/6] ipv4: properly refresh rtable entries on pmtu/redirect events
Date: Mon, 27 May 2013 14:16:13 +0300	[thread overview]
Message-ID: <1369653376-4731-4-git-send-email-timo.teras@iki.fi> (raw)
In-Reply-To: <1369653376-4731-1-git-send-email-timo.teras@iki.fi>

This reverts commit 05ab86c5 (xfrm4: Invalidate all ipv4 routes on
IPsec pmtu events). Flushing all cached entries is not needed.

Instead, invalidate only the related next hop dsts to recheck for
the added next hop exception where needed. This also fixes a subtle
race due to bumping generation id's before updating the pmtu.

Cc: Steffen Klassert <steffen.klassert@secunet.com>
Signed-off-by: Timo Teräs <timo.teras@iki.fi>
---
 net/ipv4/ah4.c    |  7 ++-----
 net/ipv4/esp4.c   |  7 ++-----
 net/ipv4/ipcomp.c |  7 ++-----
 net/ipv4/route.c  | 63 ++++++++++++++++++++++++++++++++-----------------------
 4 files changed, 43 insertions(+), 41 deletions(-)

diff --git a/net/ipv4/ah4.c b/net/ipv4/ah4.c
index 2e7f194..7179026 100644
--- a/net/ipv4/ah4.c
+++ b/net/ipv4/ah4.c
@@ -419,12 +419,9 @@ static void ah4_err(struct sk_buff *skb, u32 info)
 	if (!x)
 		return;
 
-	if (icmp_hdr(skb)->type == ICMP_DEST_UNREACH) {
-		atomic_inc(&flow_cache_genid);
-		rt_genid_bump(net);
-
+	if (icmp_hdr(skb)->type == ICMP_DEST_UNREACH)
 		ipv4_update_pmtu(skb, net, info, 0, 0, IPPROTO_AH, 0);
-	} else
+	else
 		ipv4_redirect(skb, net, 0, 0, IPPROTO_AH, 0);
 	xfrm_state_put(x);
 }
diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c
index 4cfe34d..ab3d814 100644
--- a/net/ipv4/esp4.c
+++ b/net/ipv4/esp4.c
@@ -502,12 +502,9 @@ static void esp4_err(struct sk_buff *skb, u32 info)
 	if (!x)
 		return;
 
-	if (icmp_hdr(skb)->type == ICMP_DEST_UNREACH) {
-		atomic_inc(&flow_cache_genid);
-		rt_genid_bump(net);
-
+	if (icmp_hdr(skb)->type == ICMP_DEST_UNREACH)
 		ipv4_update_pmtu(skb, net, info, 0, 0, IPPROTO_ESP, 0);
-	} else
+	else
 		ipv4_redirect(skb, net, 0, 0, IPPROTO_ESP, 0);
 	xfrm_state_put(x);
 }
diff --git a/net/ipv4/ipcomp.c b/net/ipv4/ipcomp.c
index 59cb8c7..826be4c 100644
--- a/net/ipv4/ipcomp.c
+++ b/net/ipv4/ipcomp.c
@@ -47,12 +47,9 @@ static void ipcomp4_err(struct sk_buff *skb, u32 info)
 	if (!x)
 		return;
 
-	if (icmp_hdr(skb)->type == ICMP_DEST_UNREACH) {
-		atomic_inc(&flow_cache_genid);
-		rt_genid_bump(net);
-
+	if (icmp_hdr(skb)->type == ICMP_DEST_UNREACH)
 		ipv4_update_pmtu(skb, net, info, 0, 0, IPPROTO_COMP, 0);
-	} else
+	else
 		ipv4_redirect(skb, net, 0, 0, IPPROTO_COMP, 0);
 	xfrm_state_put(x);
 }
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 550781a..561a378 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -594,11 +594,25 @@ static inline u32 fnhe_hashfun(__be32 daddr)
 	return hval & (FNHE_HASH_SIZE - 1);
 }
 
+static void fill_route_from_fnhe(struct rtable *rt, struct fib_nh_exception *fnhe)
+{
+	rt->rt_pmtu = fnhe->fnhe_pmtu;
+	rt->dst.expires = fnhe->fnhe_expires;
+
+	if (fnhe->fnhe_gw) {
+		rt->rt_flags |= RTCF_REDIRECTED;
+		rt->rt_gateway = fnhe->fnhe_gw;
+		rt->rt_uses_gateway = 1;
+	}
+}
+
 static void update_or_create_fnhe(struct fib_nh *nh, __be32 daddr, __be32 gw,
 				  u32 pmtu, unsigned long expires)
 {
 	struct fnhe_hash_bucket *hash;
 	struct fib_nh_exception *fnhe;
+	struct rtable *rt;
+	unsigned int i;
 	int depth;
 	u32 hval = fnhe_hashfun(daddr);
 
@@ -627,8 +641,12 @@ static void update_or_create_fnhe(struct fib_nh *nh, __be32 daddr, __be32 gw,
 			fnhe->fnhe_gw = gw;
 		if (pmtu) {
 			fnhe->fnhe_pmtu = pmtu;
-			fnhe->fnhe_expires = expires;
+			fnhe->fnhe_expires = max(1UL, expires);
 		}
+		/* Update all cached dsts too */
+		rt = rcu_dereference(fnhe->fnhe_rth);
+		if (rt)
+			fill_route_from_fnhe(rt, fnhe);
 	} else {
 		if (depth > FNHE_RECLAIM_DEPTH)
 			fnhe = fnhe_oldest(hash);
@@ -644,6 +662,18 @@ static void update_or_create_fnhe(struct fib_nh *nh, __be32 daddr, __be32 gw,
 		fnhe->fnhe_gw = gw;
 		fnhe->fnhe_pmtu = pmtu;
 		fnhe->fnhe_expires = expires;
+
+		/* Exception created; mark the cached routes for the nexthop
+		 * stale, so anyone caching it rechecks if this exception
+		 * applies to them.
+		 */
+		for_each_possible_cpu(i) {
+			struct rtable __rcu **prt;
+			prt = per_cpu_ptr(nh->nh_pcpu_rth_output, i);
+			rt = rcu_dereference(*prt);
+			if (rt)
+				rt->dst.obsolete = DST_OBSOLETE_KILL;
+		}
 	}
 
 	fnhe->fnhe_stamp = jiffies;
@@ -917,13 +947,6 @@ static void __ip_rt_update_pmtu(struct rtable *rt, struct flowi4 *fl4, u32 mtu)
 	if (mtu < ip_rt_min_pmtu)
 		mtu = ip_rt_min_pmtu;
 
-	if (!rt->rt_pmtu) {
-		dst->obsolete = DST_OBSOLETE_KILL;
-	} else {
-		rt->rt_pmtu = mtu;
-		dst->expires = max(1UL, jiffies + ip_rt_mtu_expires);
-	}
-
 	rcu_read_lock();
 	if (fib_lookup(dev_net(dst->dev), fl4, &res) == 0) {
 		struct fib_nh *nh = &FIB_RES_NH(res);
@@ -1063,11 +1086,11 @@ static struct dst_entry *ipv4_dst_check(struct dst_entry *dst, u32 cookie)
 	 * DST_OBSOLETE_FORCE_CHK which forces validation calls down
 	 * into this function always.
 	 *
-	 * When a PMTU/redirect information update invalidates a
-	 * route, this is indicated by setting obsolete to
-	 * DST_OBSOLETE_KILL.
+	 * When a PMTU/redirect information update invalidates a route,
+	 * this is indicated by setting obsolete to DST_OBSOLETE_KILL or
+	 * DST_OBSOLETE_DEAD by dst_free().
 	 */
-	if (dst->obsolete == DST_OBSOLETE_KILL || rt_is_expired(rt))
+	if (dst->obsolete != DST_OBSOLETE_FORCE_CHK || rt_is_expired(rt))
 		return NULL;
 	return dst;
 }
@@ -1215,20 +1238,8 @@ static bool rt_bind_exception(struct rtable *rt, struct fib_nh_exception *fnhe,
 			fnhe->fnhe_pmtu = 0;
 			fnhe->fnhe_expires = 0;
 		}
-		if (fnhe->fnhe_pmtu) {
-			unsigned long expires = fnhe->fnhe_expires;
-			unsigned long diff = expires - jiffies;
-
-			if (time_before(jiffies, expires)) {
-				rt->rt_pmtu = fnhe->fnhe_pmtu;
-				dst_set_expires(&rt->dst, diff);
-			}
-		}
-		if (fnhe->fnhe_gw) {
-			rt->rt_flags |= RTCF_REDIRECTED;
-			rt->rt_gateway = fnhe->fnhe_gw;
-			rt->rt_uses_gateway = 1;
-		} else if (!rt->rt_gateway)
+		fill_route_from_fnhe(rt, fnhe);
+		if (!rt->rt_gateway)
 			rt->rt_gateway = daddr;
 
 		rcu_assign_pointer(fnhe->fnhe_rth, rt);
-- 
1.8.2.3

  parent reply	other threads:[~2013-05-27 11:14 UTC|newest]

Thread overview: 24+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2013-05-27 11:16 [PATCH net-next 0/6] ipv4 fixes for dmvpn Timo Teräs
2013-05-27 11:16 ` [PATCH net-next 1/6] net: inform NETDEV_CHANGE callbacks which flags were changed Timo Teräs
2013-05-27 11:18   ` Jiri Pirko
2013-05-27 11:16 ` [PATCH net-next 2/6] arp: flush arp cache on IFF_NOARP change Timo Teräs
2013-05-27 11:16 ` Timo Teräs [this message]
2013-05-27 11:16 ` [PATCH net-next 4/6] ipv4: rate limit updating of next hop exceptions with same pmtu Timo Teräs
2013-05-27 11:16 ` [PATCH net-next 5/6] ipv4: use separate genid for next hop exceptions Timo Teräs
2013-05-27 11:16 ` [PATCH RFC net-next 6/6] ipv4: use next hop exceptions also for input routes Timo Teräs
2013-05-28  6:33 ` [PATCH net-next 0/6] ipv4 fixes for dmvpn Timo Teras
2013-05-28  6:38   ` David Miller
2013-05-28  6:46     ` [PATCH net-next 1/3] ipv4: properly refresh rtable entries on pmtu/redirect events Timo Teräs
2013-05-28  6:46       ` [PATCH net-next 2/3] ipv4: rate limit updating of next hop exceptions with same pmtu Timo Teräs
2013-05-28  8:45         ` Julian Anastasov
2013-05-28 10:07           ` Timo Teras
2013-05-28 21:04             ` Julian Anastasov
2013-05-29  5:07               ` Timo Teras
2013-05-29 22:49                 ` Julian Anastasov
2013-06-03  7:10         ` David Miller
2013-05-28  6:46       ` [PATCH net-next 3/3] ipv4: use separate genid for next hop exceptions Timo Teräs
2013-06-03  7:10         ` David Miller
2013-05-28  8:25       ` [PATCH net-next 1/3] ipv4: properly refresh rtable entries on pmtu/redirect events Julian Anastasov
2013-05-28  8:53         ` Timo Teras
2013-05-28 19:44           ` Julian Anastasov
2013-06-03  7:09       ` David Miller

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1369653376-4731-4-git-send-email-timo.teras@iki.fi \
    --to=timo.teras@iki.fi \
    --cc=netdev@vger.kernel.org \
    --cc=steffen.klassert@secunet.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).