netdev.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH 1/5] route: Use the device mtu as the default for blackhole routes
@ 2011-11-23 12:12 Steffen Klassert
  2011-11-23 12:12 ` [PATCH 2/5] net: Rename the dst_opt default_mtu method to mtu Steffen Klassert
                   ` (3 more replies)
  0 siblings, 4 replies; 12+ messages in thread
From: Steffen Klassert @ 2011-11-23 12:12 UTC (permalink / raw)
  To: David Miller; +Cc: netdev

As it is, we return null as the default mtu of blackhole routes.
This may lead to a propagation of a bogus pmtu if the default_mtu
method of a blackhole route is invoked. So return dst->dev->mtu
as the default mtu instead.

Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
---
 net/ipv4/route.c |    2 +-
 net/ipv6/route.c |    2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 0c74da8..5b17bf1 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -2757,7 +2757,7 @@ static struct dst_entry *ipv4_blackhole_dst_check(struct dst_entry *dst, u32 coo
 
 static unsigned int ipv4_blackhole_default_mtu(const struct dst_entry *dst)
 {
-	return 0;
+	return dst->dev->mtu;
 }
 
 static void ipv4_rt_blackhole_update_pmtu(struct dst_entry *dst, u32 mtu)
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 8473016..d8fbd18 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -157,7 +157,7 @@ static struct dst_ops ip6_dst_ops_template = {
 
 static unsigned int ip6_blackhole_default_mtu(const struct dst_entry *dst)
 {
-	return 0;
+	return dst->dev->mtu;
 }
 
 static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, u32 mtu)
-- 
1.7.0.4

^ permalink raw reply related	[flat|nested] 12+ messages in thread

* [PATCH 2/5] net: Rename the dst_opt default_mtu method to mtu
  2011-11-23 12:12 [PATCH 1/5] route: Use the device mtu as the default for blackhole routes Steffen Klassert
@ 2011-11-23 12:12 ` Steffen Klassert
  2011-11-23 12:13 ` [PATCH 3/5] net: Move mtu handling down to the protocol depended handlers Steffen Klassert
                   ` (2 subsequent siblings)
  3 siblings, 0 replies; 12+ messages in thread
From: Steffen Klassert @ 2011-11-23 12:12 UTC (permalink / raw)
  To: David Miller; +Cc: netdev

We plan to invoke the dst_opt->default_mtu() method unconditioally
from dst_mtu(). So rename the method to dst_opt->mtu() to match
the name with the new meaning.

Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
---
 include/net/dst.h      |    2 +-
 include/net/dst_ops.h  |    2 +-
 net/decnet/dn_route.c  |    6 +++---
 net/ipv4/route.c       |   10 +++++-----
 net/ipv6/route.c       |   10 +++++-----
 net/xfrm/xfrm_policy.c |    6 +++---
 6 files changed, 18 insertions(+), 18 deletions(-)

diff --git a/include/net/dst.h b/include/net/dst.h
index 4fb6c43..666de31 100644
--- a/include/net/dst.h
+++ b/include/net/dst.h
@@ -208,7 +208,7 @@ static inline u32 dst_mtu(const struct dst_entry *dst)
 	u32 mtu = dst_metric_raw(dst, RTAX_MTU);
 
 	if (!mtu)
-		mtu = dst->ops->default_mtu(dst);
+		mtu = dst->ops->mtu(dst);
 
 	return mtu;
 }
diff --git a/include/net/dst_ops.h b/include/net/dst_ops.h
index 9adb998..e1c2ee0 100644
--- a/include/net/dst_ops.h
+++ b/include/net/dst_ops.h
@@ -17,7 +17,7 @@ struct dst_ops {
 	int			(*gc)(struct dst_ops *ops);
 	struct dst_entry *	(*check)(struct dst_entry *, __u32 cookie);
 	unsigned int		(*default_advmss)(const struct dst_entry *);
-	unsigned int		(*default_mtu)(const struct dst_entry *);
+	unsigned int		(*mtu)(const struct dst_entry *);
 	u32 *			(*cow_metrics)(struct dst_entry *, unsigned long);
 	void			(*destroy)(struct dst_entry *);
 	void			(*ifdown)(struct dst_entry *,
diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c
index a77d161..db48679 100644
--- a/net/decnet/dn_route.c
+++ b/net/decnet/dn_route.c
@@ -112,7 +112,7 @@ static unsigned long dn_rt_deadline;
 static int dn_dst_gc(struct dst_ops *ops);
 static struct dst_entry *dn_dst_check(struct dst_entry *, __u32);
 static unsigned int dn_dst_default_advmss(const struct dst_entry *dst);
-static unsigned int dn_dst_default_mtu(const struct dst_entry *dst);
+static unsigned int dn_dst_mtu(const struct dst_entry *dst);
 static void dn_dst_destroy(struct dst_entry *);
 static struct dst_entry *dn_dst_negative_advice(struct dst_entry *);
 static void dn_dst_link_failure(struct sk_buff *);
@@ -135,7 +135,7 @@ static struct dst_ops dn_dst_ops = {
 	.gc =			dn_dst_gc,
 	.check =		dn_dst_check,
 	.default_advmss =	dn_dst_default_advmss,
-	.default_mtu =		dn_dst_default_mtu,
+	.mtu =			dn_dst_mtu,
 	.cow_metrics =		dst_cow_metrics_generic,
 	.destroy =		dn_dst_destroy,
 	.negative_advice =	dn_dst_negative_advice,
@@ -825,7 +825,7 @@ static unsigned int dn_dst_default_advmss(const struct dst_entry *dst)
 	return dn_mss_from_pmtu(dst->dev, dst_mtu(dst));
 }
 
-static unsigned int dn_dst_default_mtu(const struct dst_entry *dst)
+static unsigned int dn_dst_mtu(const struct dst_entry *dst)
 {
 	return dst->dev->mtu;
 }
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 5b17bf1..f1ac3ef 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -138,7 +138,7 @@ static int rt_chain_length_max __read_mostly	= 20;
 
 static struct dst_entry *ipv4_dst_check(struct dst_entry *dst, u32 cookie);
 static unsigned int	 ipv4_default_advmss(const struct dst_entry *dst);
-static unsigned int	 ipv4_default_mtu(const struct dst_entry *dst);
+static unsigned int	 ipv4_mtu(const struct dst_entry *dst);
 static void		 ipv4_dst_destroy(struct dst_entry *dst);
 static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst);
 static void		 ipv4_link_failure(struct sk_buff *skb);
@@ -193,7 +193,7 @@ static struct dst_ops ipv4_dst_ops = {
 	.gc =			rt_garbage_collect,
 	.check =		ipv4_dst_check,
 	.default_advmss =	ipv4_default_advmss,
-	.default_mtu =		ipv4_default_mtu,
+	.mtu =			ipv4_mtu,
 	.cow_metrics =		ipv4_cow_metrics,
 	.destroy =		ipv4_dst_destroy,
 	.ifdown =		ipv4_dst_ifdown,
@@ -1814,7 +1814,7 @@ static unsigned int ipv4_default_advmss(const struct dst_entry *dst)
 	return advmss;
 }
 
-static unsigned int ipv4_default_mtu(const struct dst_entry *dst)
+static unsigned int ipv4_mtu(const struct dst_entry *dst)
 {
 	unsigned int mtu = dst->dev->mtu;
 
@@ -2755,7 +2755,7 @@ static struct dst_entry *ipv4_blackhole_dst_check(struct dst_entry *dst, u32 coo
 	return NULL;
 }
 
-static unsigned int ipv4_blackhole_default_mtu(const struct dst_entry *dst)
+static unsigned int ipv4_blackhole_mtu(const struct dst_entry *dst)
 {
 	return dst->dev->mtu;
 }
@@ -2775,7 +2775,7 @@ static struct dst_ops ipv4_dst_blackhole_ops = {
 	.protocol		=	cpu_to_be16(ETH_P_IP),
 	.destroy		=	ipv4_dst_destroy,
 	.check			=	ipv4_blackhole_dst_check,
-	.default_mtu		=	ipv4_blackhole_default_mtu,
+	.mtu			=	ipv4_blackhole_mtu,
 	.default_advmss		=	ipv4_default_advmss,
 	.update_pmtu		=	ipv4_rt_blackhole_update_pmtu,
 	.cow_metrics		=	ipv4_rt_blackhole_cow_metrics,
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index d8fbd18..76645d7 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -77,7 +77,7 @@ static struct rt6_info *ip6_rt_copy(const struct rt6_info *ort,
 				    const struct in6_addr *dest);
 static struct dst_entry	*ip6_dst_check(struct dst_entry *dst, u32 cookie);
 static unsigned int	 ip6_default_advmss(const struct dst_entry *dst);
-static unsigned int	 ip6_default_mtu(const struct dst_entry *dst);
+static unsigned int	 ip6_mtu(const struct dst_entry *dst);
 static struct dst_entry *ip6_negative_advice(struct dst_entry *);
 static void		ip6_dst_destroy(struct dst_entry *);
 static void		ip6_dst_ifdown(struct dst_entry *,
@@ -144,7 +144,7 @@ static struct dst_ops ip6_dst_ops_template = {
 	.gc_thresh		=	1024,
 	.check			=	ip6_dst_check,
 	.default_advmss		=	ip6_default_advmss,
-	.default_mtu		=	ip6_default_mtu,
+	.mtu			=	ip6_mtu,
 	.cow_metrics		=	ipv6_cow_metrics,
 	.destroy		=	ip6_dst_destroy,
 	.ifdown			=	ip6_dst_ifdown,
@@ -155,7 +155,7 @@ static struct dst_ops ip6_dst_ops_template = {
 	.neigh_lookup		=	ip6_neigh_lookup,
 };
 
-static unsigned int ip6_blackhole_default_mtu(const struct dst_entry *dst)
+static unsigned int ip6_blackhole_mtu(const struct dst_entry *dst)
 {
 	return dst->dev->mtu;
 }
@@ -175,7 +175,7 @@ static struct dst_ops ip6_dst_blackhole_ops = {
 	.protocol		=	cpu_to_be16(ETH_P_IPV6),
 	.destroy		=	ip6_dst_destroy,
 	.check			=	ip6_dst_check,
-	.default_mtu		=	ip6_blackhole_default_mtu,
+	.mtu			=	ip6_blackhole_mtu,
 	.default_advmss		=	ip6_default_advmss,
 	.update_pmtu		=	ip6_rt_blackhole_update_pmtu,
 	.cow_metrics		=	ip6_rt_blackhole_cow_metrics,
@@ -1041,7 +1041,7 @@ static unsigned int ip6_default_advmss(const struct dst_entry *dst)
 	return mtu;
 }
 
-static unsigned int ip6_default_mtu(const struct dst_entry *dst)
+static unsigned int ip6_mtu(const struct dst_entry *dst)
 {
 	unsigned int mtu = IPV6_MIN_MTU;
 	struct inet6_dev *idev;
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index 552df27..b8be51e 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -2382,7 +2382,7 @@ static unsigned int xfrm_default_advmss(const struct dst_entry *dst)
 	return dst_metric_advmss(dst->path);
 }
 
-static unsigned int xfrm_default_mtu(const struct dst_entry *dst)
+static unsigned int xfrm_mtu(const struct dst_entry *dst)
 {
 	return dst_mtu(dst->path);
 }
@@ -2411,8 +2411,8 @@ int xfrm_policy_register_afinfo(struct xfrm_policy_afinfo *afinfo)
 			dst_ops->check = xfrm_dst_check;
 		if (likely(dst_ops->default_advmss == NULL))
 			dst_ops->default_advmss = xfrm_default_advmss;
-		if (likely(dst_ops->default_mtu == NULL))
-			dst_ops->default_mtu = xfrm_default_mtu;
+		if (likely(dst_ops->mtu == NULL))
+			dst_ops->mtu = xfrm_mtu;
 		if (likely(dst_ops->negative_advice == NULL))
 			dst_ops->negative_advice = xfrm_negative_advice;
 		if (likely(dst_ops->link_failure == NULL))
-- 
1.7.0.4

^ permalink raw reply related	[flat|nested] 12+ messages in thread

* [PATCH 3/5] net: Move mtu handling down to the protocol depended handlers
  2011-11-23 12:12 [PATCH 1/5] route: Use the device mtu as the default for blackhole routes Steffen Klassert
  2011-11-23 12:12 ` [PATCH 2/5] net: Rename the dst_opt default_mtu method to mtu Steffen Klassert
@ 2011-11-23 12:13 ` Steffen Klassert
  2011-11-23 12:14 ` [PATCH 4/5] route: struct rtable can be const in rt_is_input_route and rt_is_output_route Steffen Klassert
  2011-11-23 12:14 ` [PATCH 5/5] ipv4: Don't use the cached pmtu informations for input routes Steffen Klassert
  3 siblings, 0 replies; 12+ messages in thread
From: Steffen Klassert @ 2011-11-23 12:13 UTC (permalink / raw)
  To: David Miller; +Cc: netdev

We move all mtu handling from dst_mtu() down to the protocol
layer. So each protocol can implement the mtu handling in
a different manner.

Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
---
 include/net/dst.h      |    7 +------
 net/decnet/dn_route.c  |    4 +++-
 net/ipv4/route.c       |   11 +++++++++--
 net/ipv6/route.c       |   11 +++++++++--
 net/xfrm/xfrm_policy.c |    4 +++-
 5 files changed, 25 insertions(+), 12 deletions(-)

diff --git a/include/net/dst.h b/include/net/dst.h
index 666de31..6faec1a 100644
--- a/include/net/dst.h
+++ b/include/net/dst.h
@@ -205,12 +205,7 @@ dst_feature(const struct dst_entry *dst, u32 feature)
 
 static inline u32 dst_mtu(const struct dst_entry *dst)
 {
-	u32 mtu = dst_metric_raw(dst, RTAX_MTU);
-
-	if (!mtu)
-		mtu = dst->ops->mtu(dst);
-
-	return mtu;
+	return dst->ops->mtu(dst);
 }
 
 /* RTT metrics are stored in milliseconds for user ABI, but used as jiffies */
diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c
index db48679..94f4ec0 100644
--- a/net/decnet/dn_route.c
+++ b/net/decnet/dn_route.c
@@ -827,7 +827,9 @@ static unsigned int dn_dst_default_advmss(const struct dst_entry *dst)
 
 static unsigned int dn_dst_mtu(const struct dst_entry *dst)
 {
-	return dst->dev->mtu;
+	unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
+
+	return mtu ? : dst->dev->mtu;
 }
 
 static struct neighbour *dn_dst_neigh_lookup(const struct dst_entry *dst, const void *daddr)
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index f1ac3ef..11d1b20 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -1816,7 +1816,12 @@ static unsigned int ipv4_default_advmss(const struct dst_entry *dst)
 
 static unsigned int ipv4_mtu(const struct dst_entry *dst)
 {
-	unsigned int mtu = dst->dev->mtu;
+	unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
+
+	if (mtu)
+		return mtu;
+
+	mtu = dst->dev->mtu;
 
 	if (unlikely(dst_metric_locked(dst, RTAX_MTU))) {
 		const struct rtable *rt = (const struct rtable *) dst;
@@ -2757,7 +2762,9 @@ static struct dst_entry *ipv4_blackhole_dst_check(struct dst_entry *dst, u32 coo
 
 static unsigned int ipv4_blackhole_mtu(const struct dst_entry *dst)
 {
-	return dst->dev->mtu;
+	unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
+
+	return mtu ? : dst->dev->mtu;
 }
 
 static void ipv4_rt_blackhole_update_pmtu(struct dst_entry *dst, u32 mtu)
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 76645d7..3399dd3 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -157,7 +157,9 @@ static struct dst_ops ip6_dst_ops_template = {
 
 static unsigned int ip6_blackhole_mtu(const struct dst_entry *dst)
 {
-	return dst->dev->mtu;
+	unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
+
+	return mtu ? : dst->dev->mtu;
 }
 
 static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, u32 mtu)
@@ -1043,8 +1045,13 @@ static unsigned int ip6_default_advmss(const struct dst_entry *dst)
 
 static unsigned int ip6_mtu(const struct dst_entry *dst)
 {
-	unsigned int mtu = IPV6_MIN_MTU;
 	struct inet6_dev *idev;
+	unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
+
+	if (mtu)
+		return mtu;
+
+	mtu = IPV6_MIN_MTU;
 
 	rcu_read_lock();
 	idev = __in6_dev_get(dst->dev);
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index b8be51e..2118d64 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -2384,7 +2384,9 @@ static unsigned int xfrm_default_advmss(const struct dst_entry *dst)
 
 static unsigned int xfrm_mtu(const struct dst_entry *dst)
 {
-	return dst_mtu(dst->path);
+	unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
+
+	return mtu ? : dst_mtu(dst->path);
 }
 
 static struct neighbour *xfrm_neigh_lookup(const struct dst_entry *dst, const void *daddr)
-- 
1.7.0.4

^ permalink raw reply related	[flat|nested] 12+ messages in thread

* [PATCH 4/5] route: struct rtable can be const in rt_is_input_route and rt_is_output_route
  2011-11-23 12:12 [PATCH 1/5] route: Use the device mtu as the default for blackhole routes Steffen Klassert
  2011-11-23 12:12 ` [PATCH 2/5] net: Rename the dst_opt default_mtu method to mtu Steffen Klassert
  2011-11-23 12:13 ` [PATCH 3/5] net: Move mtu handling down to the protocol depended handlers Steffen Klassert
@ 2011-11-23 12:14 ` Steffen Klassert
  2011-11-23 12:14 ` [PATCH 5/5] ipv4: Don't use the cached pmtu informations for input routes Steffen Klassert
  3 siblings, 0 replies; 12+ messages in thread
From: Steffen Klassert @ 2011-11-23 12:14 UTC (permalink / raw)
  To: David Miller; +Cc: netdev


Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
---
 include/net/route.h |    4 ++--
 1 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/include/net/route.h b/include/net/route.h
index db7b343..91855d1 100644
--- a/include/net/route.h
+++ b/include/net/route.h
@@ -71,12 +71,12 @@ struct rtable {
 	struct fib_info		*fi; /* for client ref to shared metrics */
 };
 
-static inline bool rt_is_input_route(struct rtable *rt)
+static inline bool rt_is_input_route(const struct rtable *rt)
 {
 	return rt->rt_route_iif != 0;
 }
 
-static inline bool rt_is_output_route(struct rtable *rt)
+static inline bool rt_is_output_route(const struct rtable *rt)
 {
 	return rt->rt_route_iif == 0;
 }
-- 
1.7.0.4

^ permalink raw reply related	[flat|nested] 12+ messages in thread

* [PATCH 5/5] ipv4: Don't use the cached pmtu informations for input routes
  2011-11-23 12:12 [PATCH 1/5] route: Use the device mtu as the default for blackhole routes Steffen Klassert
                   ` (2 preceding siblings ...)
  2011-11-23 12:14 ` [PATCH 4/5] route: struct rtable can be const in rt_is_input_route and rt_is_output_route Steffen Klassert
@ 2011-11-23 12:14 ` Steffen Klassert
  2011-11-23 15:52   ` Herbert Xu
  3 siblings, 1 reply; 12+ messages in thread
From: Steffen Klassert @ 2011-11-23 12:14 UTC (permalink / raw)
  To: David Miller; +Cc: netdev

The pmtu informations on the inetpeer are visible for output and
input routes. On packet forwarding, we might propagate a learned
pmtu to the sender. As we update the pmtu informations of the
inetpeer on demand, the original sender of the forwarded packets
might never notice when the pmtu to that inetpeer increases.
So use the mtu of the outgoing device on packet forwarding instead
of the pmtu to the final destination.

Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
---
 net/ipv4/route.c |    4 ++--
 1 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 11d1b20..fb47c8f 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -1816,15 +1816,15 @@ static unsigned int ipv4_default_advmss(const struct dst_entry *dst)
 
 static unsigned int ipv4_mtu(const struct dst_entry *dst)
 {
+	const struct rtable *rt = (const struct rtable *) dst;
 	unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
 
-	if (mtu)
+	if (mtu && rt_is_output_route(rt))
 		return mtu;
 
 	mtu = dst->dev->mtu;
 
 	if (unlikely(dst_metric_locked(dst, RTAX_MTU))) {
-		const struct rtable *rt = (const struct rtable *) dst;
 
 		if (rt->rt_gateway != rt->rt_dst && mtu > 576)
 			mtu = 576;
-- 
1.7.0.4

^ permalink raw reply related	[flat|nested] 12+ messages in thread

* Re: [PATCH 5/5] ipv4: Don't use the cached pmtu informations for input routes
  2011-11-23 12:14 ` [PATCH 5/5] ipv4: Don't use the cached pmtu informations for input routes Steffen Klassert
@ 2011-11-23 15:52   ` Herbert Xu
  2011-11-23 21:49     ` David Miller
  0 siblings, 1 reply; 12+ messages in thread
From: Herbert Xu @ 2011-11-23 15:52 UTC (permalink / raw)
  To: Steffen Klassert; +Cc: davem, netdev

Steffen Klassert <steffen.klassert@secunet.com> wrote:
> The pmtu informations on the inetpeer are visible for output and
> input routes. On packet forwarding, we might propagate a learned
> pmtu to the sender. As we update the pmtu informations of the
> inetpeer on demand, the original sender of the forwarded packets
> might never notice when the pmtu to that inetpeer increases.
> So use the mtu of the outgoing device on packet forwarding instead
> of the pmtu to the final destination.

I disagree.  MTU increases are detected by the source retrying,
so they most certainly should notice if our cached value expires.

Cheers,
-- 
Email: Herbert Xu <herbert@gondor.apana.org.au>
Home Page: http://gondor.apana.org.au/~herbert/
PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH 5/5] ipv4: Don't use the cached pmtu informations for input routes
  2011-11-23 15:52   ` Herbert Xu
@ 2011-11-23 21:49     ` David Miller
  2011-11-24  0:10       ` Herbert Xu
  0 siblings, 1 reply; 12+ messages in thread
From: David Miller @ 2011-11-23 21:49 UTC (permalink / raw)
  To: herbert; +Cc: steffen.klassert, netdev

From: Herbert Xu <herbert@gondor.hengli.com.au>
Date: Wed, 23 Nov 2011 23:52:45 +0800

> Steffen Klassert <steffen.klassert@secunet.com> wrote:
>> The pmtu informations on the inetpeer are visible for output and
>> input routes. On packet forwarding, we might propagate a learned
>> pmtu to the sender. As we update the pmtu informations of the
>> inetpeer on demand, the original sender of the forwarded packets
>> might never notice when the pmtu to that inetpeer increases.
>> So use the mtu of the outgoing device on packet forwarding instead
>> of the pmtu to the final destination.
> 
> I disagree.  MTU increases are detected by the source retrying,
> so they most certainly should notice if our cached value expires.

Herbert, consider the issue one level higher :-)

The issue is that "we", the forwarding entity, end up with a cached on
the input route and report this cached PMTU to the sender.

This takes a while to time out.

So even if the sender "probes" he will be kept from seeing new MTU space
becomming available due to our cached value.

What Steffen is doing is simply re-instating the behavior we had
before the inetpeer conversion occurred.  We never used learned PMTU
information on input routes, only output routes stored them.

But now that input and output routes share learned information from
the same set of inetpeer entries, we have to accomodate cases like this
one.

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH 5/5] ipv4: Don't use the cached pmtu informations for input routes
  2011-11-23 21:49     ` David Miller
@ 2011-11-24  0:10       ` Herbert Xu
  2011-11-24  0:17         ` David Miller
  0 siblings, 1 reply; 12+ messages in thread
From: Herbert Xu @ 2011-11-24  0:10 UTC (permalink / raw)
  To: David Miller; +Cc: steffen.klassert, netdev

On Wed, Nov 23, 2011 at 04:49:41PM -0500, David Miller wrote:
>
> The issue is that "we", the forwarding entity, end up with a cached on
> the input route and report this cached PMTU to the sender.
> 
> This takes a while to time out.

I understand.  However, this could be seen as an optimisation
as if somehow we as a router discovered the smaller MTU this
would save everybody behind us from having to disocver it by
probing until our discovery expires.

Is there a particular scenario where this breaks? I could only think
of policy routing cases but that's not unique to forwarding as it
also affects us as a host.

Thanks,
-- 
Email: Herbert Xu <herbert@gondor.apana.org.au>
Home Page: http://gondor.apana.org.au/~herbert/
PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH 5/5] ipv4: Don't use the cached pmtu informations for input routes
  2011-11-24  0:10       ` Herbert Xu
@ 2011-11-24  0:17         ` David Miller
  2011-11-24  1:01           ` Herbert Xu
  0 siblings, 1 reply; 12+ messages in thread
From: David Miller @ 2011-11-24  0:17 UTC (permalink / raw)
  To: herbert; +Cc: steffen.klassert, netdev

From: Herbert Xu <herbert@gondor.hengli.com.au>
Date: Thu, 24 Nov 2011 08:10:35 +0800

> Is there a particular scenario where this breaks? I could only think
> of policy routing cases but that's not unique to forwarding as it
> also affects us as a host.

Steffen has a specific situation.

But regardless I think the current behavior is terrible.

It means there is a propagation delay of PMTU increasing which is on the
order of the number of hops between the sender and the PMTU increased link.
So if the PMTU timeout is X, the propagation time is X * N_HOPS.

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH 5/5] ipv4: Don't use the cached pmtu informations for input routes
  2011-11-24  0:17         ` David Miller
@ 2011-11-24  1:01           ` Herbert Xu
  2011-11-24  8:49             ` Steffen Klassert
  0 siblings, 1 reply; 12+ messages in thread
From: Herbert Xu @ 2011-11-24  1:01 UTC (permalink / raw)
  To: David Miller; +Cc: steffen.klassert, netdev

On Wed, Nov 23, 2011 at 07:17:58PM -0500, David Miller wrote:
>
> It means there is a propagation delay of PMTU increasing which is on the
> order of the number of hops between the sender and the PMTU increased link.
> So if the PMTU timeout is X, the propagation time is X * N_HOPS.

That assumes that each router in the path would constantly probe
to refresh its cached value, as otherwise they should all expire
after X.

However, as this was the original behaviour, I think going back to
it is fine.

I am curious in knowing exactly what scenario Steffen ran into
that prompted this change though.

Cheers,
-- 
Email: Herbert Xu <herbert@gondor.apana.org.au>
Home Page: http://gondor.apana.org.au/~herbert/
PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH 5/5] ipv4: Don't use the cached pmtu informations for input routes
  2011-11-24  1:01           ` Herbert Xu
@ 2011-11-24  8:49             ` Steffen Klassert
  2011-11-24 12:06               ` Herbert Xu
  0 siblings, 1 reply; 12+ messages in thread
From: Steffen Klassert @ 2011-11-24  8:49 UTC (permalink / raw)
  To: Herbert Xu; +Cc: David Miller, netdev

On Thu, Nov 24, 2011 at 09:01:47AM +0800, Herbert Xu wrote:
> On Wed, Nov 23, 2011 at 07:17:58PM -0500, David Miller wrote:
> >
> > It means there is a propagation delay of PMTU increasing which is on the
> > order of the number of hops between the sender and the PMTU increased link.
> > So if the PMTU timeout is X, the propagation time is X * N_HOPS.
> 
> That assumes that each router in the path would constantly probe
> to refresh its cached value, as otherwise they should all expire
> after X.

Well, it simply got unpredictable how long pmtu propagation takes.
This depends now on the network activity of the routers and on the
individual configuration of the expiration time on each router. With
the original behaviour, we could expect to notice a pmtu increase
if we probe after our timeout X. Now we don't know when this is going
to happen.

Btw. we leak a trigger to check if the learned pmtu information has
expired in our current tree, the learned pmtu informations never
expire. So we are doing good to not propagate our learned pmtu
to other hosts :)

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH 5/5] ipv4: Don't use the cached pmtu informations for input routes
  2011-11-24  8:49             ` Steffen Klassert
@ 2011-11-24 12:06               ` Herbert Xu
  0 siblings, 0 replies; 12+ messages in thread
From: Herbert Xu @ 2011-11-24 12:06 UTC (permalink / raw)
  To: Steffen Klassert; +Cc: David Miller, netdev

On Thu, Nov 24, 2011 at 09:49:52AM +0100, Steffen Klassert wrote:
>
> Well, it simply got unpredictable how long pmtu propagation takes.
> This depends now on the network activity of the routers and on the
> individual configuration of the expiration time on each router. With
> the original behaviour, we could expect to notice a pmtu increase
> if we probe after our timeout X. Now we don't know when this is going
> to happen.

With IPsec tunnel mode this already happens to an extent anyway.
Any PMTU event outside the tunnel is invisible to hosts within it.
Only the router can see those events.

However, as I said I'm not too bothered with this so feel free
to go with your patch.

Cheers,
-- 
Email: Herbert Xu <herbert@gondor.apana.org.au>
Home Page: http://gondor.apana.org.au/~herbert/
PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt

^ permalink raw reply	[flat|nested] 12+ messages in thread

end of thread, other threads:[~2011-11-24 12:06 UTC | newest]

Thread overview: 12+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2011-11-23 12:12 [PATCH 1/5] route: Use the device mtu as the default for blackhole routes Steffen Klassert
2011-11-23 12:12 ` [PATCH 2/5] net: Rename the dst_opt default_mtu method to mtu Steffen Klassert
2011-11-23 12:13 ` [PATCH 3/5] net: Move mtu handling down to the protocol depended handlers Steffen Klassert
2011-11-23 12:14 ` [PATCH 4/5] route: struct rtable can be const in rt_is_input_route and rt_is_output_route Steffen Klassert
2011-11-23 12:14 ` [PATCH 5/5] ipv4: Don't use the cached pmtu informations for input routes Steffen Klassert
2011-11-23 15:52   ` Herbert Xu
2011-11-23 21:49     ` David Miller
2011-11-24  0:10       ` Herbert Xu
2011-11-24  0:17         ` David Miller
2011-11-24  1:01           ` Herbert Xu
2011-11-24  8:49             ` Steffen Klassert
2011-11-24 12:06               ` Herbert Xu

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).