* [PATCH 1/5] route: Use the device mtu as the default for blackhole routes
@ 2011-11-23 12:12 Steffen Klassert
2011-11-23 12:12 ` [PATCH 2/5] net: Rename the dst_opt default_mtu method to mtu Steffen Klassert
` (3 more replies)
0 siblings, 4 replies; 12+ messages in thread
From: Steffen Klassert @ 2011-11-23 12:12 UTC (permalink / raw)
To: David Miller; +Cc: netdev
As it is, we return null as the default mtu of blackhole routes.
This may lead to a propagation of a bogus pmtu if the default_mtu
method of a blackhole route is invoked. So return dst->dev->mtu
as the default mtu instead.
Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
---
net/ipv4/route.c | 2 +-
net/ipv6/route.c | 2 +-
2 files changed, 2 insertions(+), 2 deletions(-)
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 0c74da8..5b17bf1 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -2757,7 +2757,7 @@ static struct dst_entry *ipv4_blackhole_dst_check(struct dst_entry *dst, u32 coo
static unsigned int ipv4_blackhole_default_mtu(const struct dst_entry *dst)
{
- return 0;
+ return dst->dev->mtu;
}
static void ipv4_rt_blackhole_update_pmtu(struct dst_entry *dst, u32 mtu)
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 8473016..d8fbd18 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -157,7 +157,7 @@ static struct dst_ops ip6_dst_ops_template = {
static unsigned int ip6_blackhole_default_mtu(const struct dst_entry *dst)
{
- return 0;
+ return dst->dev->mtu;
}
static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, u32 mtu)
--
1.7.0.4
^ permalink raw reply related [flat|nested] 12+ messages in thread* [PATCH 2/5] net: Rename the dst_opt default_mtu method to mtu 2011-11-23 12:12 [PATCH 1/5] route: Use the device mtu as the default for blackhole routes Steffen Klassert @ 2011-11-23 12:12 ` Steffen Klassert 2011-11-23 12:13 ` [PATCH 3/5] net: Move mtu handling down to the protocol depended handlers Steffen Klassert ` (2 subsequent siblings) 3 siblings, 0 replies; 12+ messages in thread From: Steffen Klassert @ 2011-11-23 12:12 UTC (permalink / raw) To: David Miller; +Cc: netdev We plan to invoke the dst_opt->default_mtu() method unconditioally from dst_mtu(). So rename the method to dst_opt->mtu() to match the name with the new meaning. Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com> --- include/net/dst.h | 2 +- include/net/dst_ops.h | 2 +- net/decnet/dn_route.c | 6 +++--- net/ipv4/route.c | 10 +++++----- net/ipv6/route.c | 10 +++++----- net/xfrm/xfrm_policy.c | 6 +++--- 6 files changed, 18 insertions(+), 18 deletions(-) diff --git a/include/net/dst.h b/include/net/dst.h index 4fb6c43..666de31 100644 --- a/include/net/dst.h +++ b/include/net/dst.h @@ -208,7 +208,7 @@ static inline u32 dst_mtu(const struct dst_entry *dst) u32 mtu = dst_metric_raw(dst, RTAX_MTU); if (!mtu) - mtu = dst->ops->default_mtu(dst); + mtu = dst->ops->mtu(dst); return mtu; } diff --git a/include/net/dst_ops.h b/include/net/dst_ops.h index 9adb998..e1c2ee0 100644 --- a/include/net/dst_ops.h +++ b/include/net/dst_ops.h @@ -17,7 +17,7 @@ struct dst_ops { int (*gc)(struct dst_ops *ops); struct dst_entry * (*check)(struct dst_entry *, __u32 cookie); unsigned int (*default_advmss)(const struct dst_entry *); - unsigned int (*default_mtu)(const struct dst_entry *); + unsigned int (*mtu)(const struct dst_entry *); u32 * (*cow_metrics)(struct dst_entry *, unsigned long); void (*destroy)(struct dst_entry *); void (*ifdown)(struct dst_entry *, diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c index a77d161..db48679 100644 --- a/net/decnet/dn_route.c +++ b/net/decnet/dn_route.c @@ -112,7 +112,7 @@ static unsigned long dn_rt_deadline; static int dn_dst_gc(struct dst_ops *ops); static struct dst_entry *dn_dst_check(struct dst_entry *, __u32); static unsigned int dn_dst_default_advmss(const struct dst_entry *dst); -static unsigned int dn_dst_default_mtu(const struct dst_entry *dst); +static unsigned int dn_dst_mtu(const struct dst_entry *dst); static void dn_dst_destroy(struct dst_entry *); static struct dst_entry *dn_dst_negative_advice(struct dst_entry *); static void dn_dst_link_failure(struct sk_buff *); @@ -135,7 +135,7 @@ static struct dst_ops dn_dst_ops = { .gc = dn_dst_gc, .check = dn_dst_check, .default_advmss = dn_dst_default_advmss, - .default_mtu = dn_dst_default_mtu, + .mtu = dn_dst_mtu, .cow_metrics = dst_cow_metrics_generic, .destroy = dn_dst_destroy, .negative_advice = dn_dst_negative_advice, @@ -825,7 +825,7 @@ static unsigned int dn_dst_default_advmss(const struct dst_entry *dst) return dn_mss_from_pmtu(dst->dev, dst_mtu(dst)); } -static unsigned int dn_dst_default_mtu(const struct dst_entry *dst) +static unsigned int dn_dst_mtu(const struct dst_entry *dst) { return dst->dev->mtu; } diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 5b17bf1..f1ac3ef 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -138,7 +138,7 @@ static int rt_chain_length_max __read_mostly = 20; static struct dst_entry *ipv4_dst_check(struct dst_entry *dst, u32 cookie); static unsigned int ipv4_default_advmss(const struct dst_entry *dst); -static unsigned int ipv4_default_mtu(const struct dst_entry *dst); +static unsigned int ipv4_mtu(const struct dst_entry *dst); static void ipv4_dst_destroy(struct dst_entry *dst); static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst); static void ipv4_link_failure(struct sk_buff *skb); @@ -193,7 +193,7 @@ static struct dst_ops ipv4_dst_ops = { .gc = rt_garbage_collect, .check = ipv4_dst_check, .default_advmss = ipv4_default_advmss, - .default_mtu = ipv4_default_mtu, + .mtu = ipv4_mtu, .cow_metrics = ipv4_cow_metrics, .destroy = ipv4_dst_destroy, .ifdown = ipv4_dst_ifdown, @@ -1814,7 +1814,7 @@ static unsigned int ipv4_default_advmss(const struct dst_entry *dst) return advmss; } -static unsigned int ipv4_default_mtu(const struct dst_entry *dst) +static unsigned int ipv4_mtu(const struct dst_entry *dst) { unsigned int mtu = dst->dev->mtu; @@ -2755,7 +2755,7 @@ static struct dst_entry *ipv4_blackhole_dst_check(struct dst_entry *dst, u32 coo return NULL; } -static unsigned int ipv4_blackhole_default_mtu(const struct dst_entry *dst) +static unsigned int ipv4_blackhole_mtu(const struct dst_entry *dst) { return dst->dev->mtu; } @@ -2775,7 +2775,7 @@ static struct dst_ops ipv4_dst_blackhole_ops = { .protocol = cpu_to_be16(ETH_P_IP), .destroy = ipv4_dst_destroy, .check = ipv4_blackhole_dst_check, - .default_mtu = ipv4_blackhole_default_mtu, + .mtu = ipv4_blackhole_mtu, .default_advmss = ipv4_default_advmss, .update_pmtu = ipv4_rt_blackhole_update_pmtu, .cow_metrics = ipv4_rt_blackhole_cow_metrics, diff --git a/net/ipv6/route.c b/net/ipv6/route.c index d8fbd18..76645d7 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -77,7 +77,7 @@ static struct rt6_info *ip6_rt_copy(const struct rt6_info *ort, const struct in6_addr *dest); static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie); static unsigned int ip6_default_advmss(const struct dst_entry *dst); -static unsigned int ip6_default_mtu(const struct dst_entry *dst); +static unsigned int ip6_mtu(const struct dst_entry *dst); static struct dst_entry *ip6_negative_advice(struct dst_entry *); static void ip6_dst_destroy(struct dst_entry *); static void ip6_dst_ifdown(struct dst_entry *, @@ -144,7 +144,7 @@ static struct dst_ops ip6_dst_ops_template = { .gc_thresh = 1024, .check = ip6_dst_check, .default_advmss = ip6_default_advmss, - .default_mtu = ip6_default_mtu, + .mtu = ip6_mtu, .cow_metrics = ipv6_cow_metrics, .destroy = ip6_dst_destroy, .ifdown = ip6_dst_ifdown, @@ -155,7 +155,7 @@ static struct dst_ops ip6_dst_ops_template = { .neigh_lookup = ip6_neigh_lookup, }; -static unsigned int ip6_blackhole_default_mtu(const struct dst_entry *dst) +static unsigned int ip6_blackhole_mtu(const struct dst_entry *dst) { return dst->dev->mtu; } @@ -175,7 +175,7 @@ static struct dst_ops ip6_dst_blackhole_ops = { .protocol = cpu_to_be16(ETH_P_IPV6), .destroy = ip6_dst_destroy, .check = ip6_dst_check, - .default_mtu = ip6_blackhole_default_mtu, + .mtu = ip6_blackhole_mtu, .default_advmss = ip6_default_advmss, .update_pmtu = ip6_rt_blackhole_update_pmtu, .cow_metrics = ip6_rt_blackhole_cow_metrics, @@ -1041,7 +1041,7 @@ static unsigned int ip6_default_advmss(const struct dst_entry *dst) return mtu; } -static unsigned int ip6_default_mtu(const struct dst_entry *dst) +static unsigned int ip6_mtu(const struct dst_entry *dst) { unsigned int mtu = IPV6_MIN_MTU; struct inet6_dev *idev; diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 552df27..b8be51e 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -2382,7 +2382,7 @@ static unsigned int xfrm_default_advmss(const struct dst_entry *dst) return dst_metric_advmss(dst->path); } -static unsigned int xfrm_default_mtu(const struct dst_entry *dst) +static unsigned int xfrm_mtu(const struct dst_entry *dst) { return dst_mtu(dst->path); } @@ -2411,8 +2411,8 @@ int xfrm_policy_register_afinfo(struct xfrm_policy_afinfo *afinfo) dst_ops->check = xfrm_dst_check; if (likely(dst_ops->default_advmss == NULL)) dst_ops->default_advmss = xfrm_default_advmss; - if (likely(dst_ops->default_mtu == NULL)) - dst_ops->default_mtu = xfrm_default_mtu; + if (likely(dst_ops->mtu == NULL)) + dst_ops->mtu = xfrm_mtu; if (likely(dst_ops->negative_advice == NULL)) dst_ops->negative_advice = xfrm_negative_advice; if (likely(dst_ops->link_failure == NULL)) -- 1.7.0.4 ^ permalink raw reply related [flat|nested] 12+ messages in thread
* [PATCH 3/5] net: Move mtu handling down to the protocol depended handlers 2011-11-23 12:12 [PATCH 1/5] route: Use the device mtu as the default for blackhole routes Steffen Klassert 2011-11-23 12:12 ` [PATCH 2/5] net: Rename the dst_opt default_mtu method to mtu Steffen Klassert @ 2011-11-23 12:13 ` Steffen Klassert 2011-11-23 12:14 ` [PATCH 4/5] route: struct rtable can be const in rt_is_input_route and rt_is_output_route Steffen Klassert 2011-11-23 12:14 ` [PATCH 5/5] ipv4: Don't use the cached pmtu informations for input routes Steffen Klassert 3 siblings, 0 replies; 12+ messages in thread From: Steffen Klassert @ 2011-11-23 12:13 UTC (permalink / raw) To: David Miller; +Cc: netdev We move all mtu handling from dst_mtu() down to the protocol layer. So each protocol can implement the mtu handling in a different manner. Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com> --- include/net/dst.h | 7 +------ net/decnet/dn_route.c | 4 +++- net/ipv4/route.c | 11 +++++++++-- net/ipv6/route.c | 11 +++++++++-- net/xfrm/xfrm_policy.c | 4 +++- 5 files changed, 25 insertions(+), 12 deletions(-) diff --git a/include/net/dst.h b/include/net/dst.h index 666de31..6faec1a 100644 --- a/include/net/dst.h +++ b/include/net/dst.h @@ -205,12 +205,7 @@ dst_feature(const struct dst_entry *dst, u32 feature) static inline u32 dst_mtu(const struct dst_entry *dst) { - u32 mtu = dst_metric_raw(dst, RTAX_MTU); - - if (!mtu) - mtu = dst->ops->mtu(dst); - - return mtu; + return dst->ops->mtu(dst); } /* RTT metrics are stored in milliseconds for user ABI, but used as jiffies */ diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c index db48679..94f4ec0 100644 --- a/net/decnet/dn_route.c +++ b/net/decnet/dn_route.c @@ -827,7 +827,9 @@ static unsigned int dn_dst_default_advmss(const struct dst_entry *dst) static unsigned int dn_dst_mtu(const struct dst_entry *dst) { - return dst->dev->mtu; + unsigned int mtu = dst_metric_raw(dst, RTAX_MTU); + + return mtu ? : dst->dev->mtu; } static struct neighbour *dn_dst_neigh_lookup(const struct dst_entry *dst, const void *daddr) diff --git a/net/ipv4/route.c b/net/ipv4/route.c index f1ac3ef..11d1b20 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1816,7 +1816,12 @@ static unsigned int ipv4_default_advmss(const struct dst_entry *dst) static unsigned int ipv4_mtu(const struct dst_entry *dst) { - unsigned int mtu = dst->dev->mtu; + unsigned int mtu = dst_metric_raw(dst, RTAX_MTU); + + if (mtu) + return mtu; + + mtu = dst->dev->mtu; if (unlikely(dst_metric_locked(dst, RTAX_MTU))) { const struct rtable *rt = (const struct rtable *) dst; @@ -2757,7 +2762,9 @@ static struct dst_entry *ipv4_blackhole_dst_check(struct dst_entry *dst, u32 coo static unsigned int ipv4_blackhole_mtu(const struct dst_entry *dst) { - return dst->dev->mtu; + unsigned int mtu = dst_metric_raw(dst, RTAX_MTU); + + return mtu ? : dst->dev->mtu; } static void ipv4_rt_blackhole_update_pmtu(struct dst_entry *dst, u32 mtu) diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 76645d7..3399dd3 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -157,7 +157,9 @@ static struct dst_ops ip6_dst_ops_template = { static unsigned int ip6_blackhole_mtu(const struct dst_entry *dst) { - return dst->dev->mtu; + unsigned int mtu = dst_metric_raw(dst, RTAX_MTU); + + return mtu ? : dst->dev->mtu; } static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, u32 mtu) @@ -1043,8 +1045,13 @@ static unsigned int ip6_default_advmss(const struct dst_entry *dst) static unsigned int ip6_mtu(const struct dst_entry *dst) { - unsigned int mtu = IPV6_MIN_MTU; struct inet6_dev *idev; + unsigned int mtu = dst_metric_raw(dst, RTAX_MTU); + + if (mtu) + return mtu; + + mtu = IPV6_MIN_MTU; rcu_read_lock(); idev = __in6_dev_get(dst->dev); diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index b8be51e..2118d64 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -2384,7 +2384,9 @@ static unsigned int xfrm_default_advmss(const struct dst_entry *dst) static unsigned int xfrm_mtu(const struct dst_entry *dst) { - return dst_mtu(dst->path); + unsigned int mtu = dst_metric_raw(dst, RTAX_MTU); + + return mtu ? : dst_mtu(dst->path); } static struct neighbour *xfrm_neigh_lookup(const struct dst_entry *dst, const void *daddr) -- 1.7.0.4 ^ permalink raw reply related [flat|nested] 12+ messages in thread
* [PATCH 4/5] route: struct rtable can be const in rt_is_input_route and rt_is_output_route 2011-11-23 12:12 [PATCH 1/5] route: Use the device mtu as the default for blackhole routes Steffen Klassert 2011-11-23 12:12 ` [PATCH 2/5] net: Rename the dst_opt default_mtu method to mtu Steffen Klassert 2011-11-23 12:13 ` [PATCH 3/5] net: Move mtu handling down to the protocol depended handlers Steffen Klassert @ 2011-11-23 12:14 ` Steffen Klassert 2011-11-23 12:14 ` [PATCH 5/5] ipv4: Don't use the cached pmtu informations for input routes Steffen Klassert 3 siblings, 0 replies; 12+ messages in thread From: Steffen Klassert @ 2011-11-23 12:14 UTC (permalink / raw) To: David Miller; +Cc: netdev Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com> --- include/net/route.h | 4 ++-- 1 files changed, 2 insertions(+), 2 deletions(-) diff --git a/include/net/route.h b/include/net/route.h index db7b343..91855d1 100644 --- a/include/net/route.h +++ b/include/net/route.h @@ -71,12 +71,12 @@ struct rtable { struct fib_info *fi; /* for client ref to shared metrics */ }; -static inline bool rt_is_input_route(struct rtable *rt) +static inline bool rt_is_input_route(const struct rtable *rt) { return rt->rt_route_iif != 0; } -static inline bool rt_is_output_route(struct rtable *rt) +static inline bool rt_is_output_route(const struct rtable *rt) { return rt->rt_route_iif == 0; } -- 1.7.0.4 ^ permalink raw reply related [flat|nested] 12+ messages in thread
* [PATCH 5/5] ipv4: Don't use the cached pmtu informations for input routes 2011-11-23 12:12 [PATCH 1/5] route: Use the device mtu as the default for blackhole routes Steffen Klassert ` (2 preceding siblings ...) 2011-11-23 12:14 ` [PATCH 4/5] route: struct rtable can be const in rt_is_input_route and rt_is_output_route Steffen Klassert @ 2011-11-23 12:14 ` Steffen Klassert 2011-11-23 15:52 ` Herbert Xu 3 siblings, 1 reply; 12+ messages in thread From: Steffen Klassert @ 2011-11-23 12:14 UTC (permalink / raw) To: David Miller; +Cc: netdev The pmtu informations on the inetpeer are visible for output and input routes. On packet forwarding, we might propagate a learned pmtu to the sender. As we update the pmtu informations of the inetpeer on demand, the original sender of the forwarded packets might never notice when the pmtu to that inetpeer increases. So use the mtu of the outgoing device on packet forwarding instead of the pmtu to the final destination. Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com> --- net/ipv4/route.c | 4 ++-- 1 files changed, 2 insertions(+), 2 deletions(-) diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 11d1b20..fb47c8f 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1816,15 +1816,15 @@ static unsigned int ipv4_default_advmss(const struct dst_entry *dst) static unsigned int ipv4_mtu(const struct dst_entry *dst) { + const struct rtable *rt = (const struct rtable *) dst; unsigned int mtu = dst_metric_raw(dst, RTAX_MTU); - if (mtu) + if (mtu && rt_is_output_route(rt)) return mtu; mtu = dst->dev->mtu; if (unlikely(dst_metric_locked(dst, RTAX_MTU))) { - const struct rtable *rt = (const struct rtable *) dst; if (rt->rt_gateway != rt->rt_dst && mtu > 576) mtu = 576; -- 1.7.0.4 ^ permalink raw reply related [flat|nested] 12+ messages in thread
* Re: [PATCH 5/5] ipv4: Don't use the cached pmtu informations for input routes 2011-11-23 12:14 ` [PATCH 5/5] ipv4: Don't use the cached pmtu informations for input routes Steffen Klassert @ 2011-11-23 15:52 ` Herbert Xu 2011-11-23 21:49 ` David Miller 0 siblings, 1 reply; 12+ messages in thread From: Herbert Xu @ 2011-11-23 15:52 UTC (permalink / raw) To: Steffen Klassert; +Cc: davem, netdev Steffen Klassert <steffen.klassert@secunet.com> wrote: > The pmtu informations on the inetpeer are visible for output and > input routes. On packet forwarding, we might propagate a learned > pmtu to the sender. As we update the pmtu informations of the > inetpeer on demand, the original sender of the forwarded packets > might never notice when the pmtu to that inetpeer increases. > So use the mtu of the outgoing device on packet forwarding instead > of the pmtu to the final destination. I disagree. MTU increases are detected by the source retrying, so they most certainly should notice if our cached value expires. Cheers, -- Email: Herbert Xu <herbert@gondor.apana.org.au> Home Page: http://gondor.apana.org.au/~herbert/ PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt ^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [PATCH 5/5] ipv4: Don't use the cached pmtu informations for input routes 2011-11-23 15:52 ` Herbert Xu @ 2011-11-23 21:49 ` David Miller 2011-11-24 0:10 ` Herbert Xu 0 siblings, 1 reply; 12+ messages in thread From: David Miller @ 2011-11-23 21:49 UTC (permalink / raw) To: herbert; +Cc: steffen.klassert, netdev From: Herbert Xu <herbert@gondor.hengli.com.au> Date: Wed, 23 Nov 2011 23:52:45 +0800 > Steffen Klassert <steffen.klassert@secunet.com> wrote: >> The pmtu informations on the inetpeer are visible for output and >> input routes. On packet forwarding, we might propagate a learned >> pmtu to the sender. As we update the pmtu informations of the >> inetpeer on demand, the original sender of the forwarded packets >> might never notice when the pmtu to that inetpeer increases. >> So use the mtu of the outgoing device on packet forwarding instead >> of the pmtu to the final destination. > > I disagree. MTU increases are detected by the source retrying, > so they most certainly should notice if our cached value expires. Herbert, consider the issue one level higher :-) The issue is that "we", the forwarding entity, end up with a cached on the input route and report this cached PMTU to the sender. This takes a while to time out. So even if the sender "probes" he will be kept from seeing new MTU space becomming available due to our cached value. What Steffen is doing is simply re-instating the behavior we had before the inetpeer conversion occurred. We never used learned PMTU information on input routes, only output routes stored them. But now that input and output routes share learned information from the same set of inetpeer entries, we have to accomodate cases like this one. ^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [PATCH 5/5] ipv4: Don't use the cached pmtu informations for input routes 2011-11-23 21:49 ` David Miller @ 2011-11-24 0:10 ` Herbert Xu 2011-11-24 0:17 ` David Miller 0 siblings, 1 reply; 12+ messages in thread From: Herbert Xu @ 2011-11-24 0:10 UTC (permalink / raw) To: David Miller; +Cc: steffen.klassert, netdev On Wed, Nov 23, 2011 at 04:49:41PM -0500, David Miller wrote: > > The issue is that "we", the forwarding entity, end up with a cached on > the input route and report this cached PMTU to the sender. > > This takes a while to time out. I understand. However, this could be seen as an optimisation as if somehow we as a router discovered the smaller MTU this would save everybody behind us from having to disocver it by probing until our discovery expires. Is there a particular scenario where this breaks? I could only think of policy routing cases but that's not unique to forwarding as it also affects us as a host. Thanks, -- Email: Herbert Xu <herbert@gondor.apana.org.au> Home Page: http://gondor.apana.org.au/~herbert/ PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt ^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [PATCH 5/5] ipv4: Don't use the cached pmtu informations for input routes 2011-11-24 0:10 ` Herbert Xu @ 2011-11-24 0:17 ` David Miller 2011-11-24 1:01 ` Herbert Xu 0 siblings, 1 reply; 12+ messages in thread From: David Miller @ 2011-11-24 0:17 UTC (permalink / raw) To: herbert; +Cc: steffen.klassert, netdev From: Herbert Xu <herbert@gondor.hengli.com.au> Date: Thu, 24 Nov 2011 08:10:35 +0800 > Is there a particular scenario where this breaks? I could only think > of policy routing cases but that's not unique to forwarding as it > also affects us as a host. Steffen has a specific situation. But regardless I think the current behavior is terrible. It means there is a propagation delay of PMTU increasing which is on the order of the number of hops between the sender and the PMTU increased link. So if the PMTU timeout is X, the propagation time is X * N_HOPS. ^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [PATCH 5/5] ipv4: Don't use the cached pmtu informations for input routes 2011-11-24 0:17 ` David Miller @ 2011-11-24 1:01 ` Herbert Xu 2011-11-24 8:49 ` Steffen Klassert 0 siblings, 1 reply; 12+ messages in thread From: Herbert Xu @ 2011-11-24 1:01 UTC (permalink / raw) To: David Miller; +Cc: steffen.klassert, netdev On Wed, Nov 23, 2011 at 07:17:58PM -0500, David Miller wrote: > > It means there is a propagation delay of PMTU increasing which is on the > order of the number of hops between the sender and the PMTU increased link. > So if the PMTU timeout is X, the propagation time is X * N_HOPS. That assumes that each router in the path would constantly probe to refresh its cached value, as otherwise they should all expire after X. However, as this was the original behaviour, I think going back to it is fine. I am curious in knowing exactly what scenario Steffen ran into that prompted this change though. Cheers, -- Email: Herbert Xu <herbert@gondor.apana.org.au> Home Page: http://gondor.apana.org.au/~herbert/ PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt ^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [PATCH 5/5] ipv4: Don't use the cached pmtu informations for input routes 2011-11-24 1:01 ` Herbert Xu @ 2011-11-24 8:49 ` Steffen Klassert 2011-11-24 12:06 ` Herbert Xu 0 siblings, 1 reply; 12+ messages in thread From: Steffen Klassert @ 2011-11-24 8:49 UTC (permalink / raw) To: Herbert Xu; +Cc: David Miller, netdev On Thu, Nov 24, 2011 at 09:01:47AM +0800, Herbert Xu wrote: > On Wed, Nov 23, 2011 at 07:17:58PM -0500, David Miller wrote: > > > > It means there is a propagation delay of PMTU increasing which is on the > > order of the number of hops between the sender and the PMTU increased link. > > So if the PMTU timeout is X, the propagation time is X * N_HOPS. > > That assumes that each router in the path would constantly probe > to refresh its cached value, as otherwise they should all expire > after X. Well, it simply got unpredictable how long pmtu propagation takes. This depends now on the network activity of the routers and on the individual configuration of the expiration time on each router. With the original behaviour, we could expect to notice a pmtu increase if we probe after our timeout X. Now we don't know when this is going to happen. Btw. we leak a trigger to check if the learned pmtu information has expired in our current tree, the learned pmtu informations never expire. So we are doing good to not propagate our learned pmtu to other hosts :) ^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [PATCH 5/5] ipv4: Don't use the cached pmtu informations for input routes 2011-11-24 8:49 ` Steffen Klassert @ 2011-11-24 12:06 ` Herbert Xu 0 siblings, 0 replies; 12+ messages in thread From: Herbert Xu @ 2011-11-24 12:06 UTC (permalink / raw) To: Steffen Klassert; +Cc: David Miller, netdev On Thu, Nov 24, 2011 at 09:49:52AM +0100, Steffen Klassert wrote: > > Well, it simply got unpredictable how long pmtu propagation takes. > This depends now on the network activity of the routers and on the > individual configuration of the expiration time on each router. With > the original behaviour, we could expect to notice a pmtu increase > if we probe after our timeout X. Now we don't know when this is going > to happen. With IPsec tunnel mode this already happens to an extent anyway. Any PMTU event outside the tunnel is invisible to hosts within it. Only the router can see those events. However, as I said I'm not too bothered with this so feel free to go with your patch. Cheers, -- Email: Herbert Xu <herbert@gondor.apana.org.au> Home Page: http://gondor.apana.org.au/~herbert/ PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt ^ permalink raw reply [flat|nested] 12+ messages in thread
end of thread, other threads:[~2011-11-24 12:06 UTC | newest] Thread overview: 12+ messages (download: mbox.gz follow: Atom feed -- links below jump to the message on this page -- 2011-11-23 12:12 [PATCH 1/5] route: Use the device mtu as the default for blackhole routes Steffen Klassert 2011-11-23 12:12 ` [PATCH 2/5] net: Rename the dst_opt default_mtu method to mtu Steffen Klassert 2011-11-23 12:13 ` [PATCH 3/5] net: Move mtu handling down to the protocol depended handlers Steffen Klassert 2011-11-23 12:14 ` [PATCH 4/5] route: struct rtable can be const in rt_is_input_route and rt_is_output_route Steffen Klassert 2011-11-23 12:14 ` [PATCH 5/5] ipv4: Don't use the cached pmtu informations for input routes Steffen Klassert 2011-11-23 15:52 ` Herbert Xu 2011-11-23 21:49 ` David Miller 2011-11-24 0:10 ` Herbert Xu 2011-11-24 0:17 ` David Miller 2011-11-24 1:01 ` Herbert Xu 2011-11-24 8:49 ` Steffen Klassert 2011-11-24 12:06 ` Herbert Xu
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox; as well as URLs for NNTP newsgroup(s).