* [PATCH] ipv4: remove ip_rt_secret timer
@ 2010-05-06 17:16 Neil Horman
2010-05-06 17:32 ` Eric Dumazet
` (3 more replies)
0 siblings, 4 replies; 17+ messages in thread
From: Neil Horman @ 2010-05-06 17:16 UTC (permalink / raw)
To: netdev; +Cc: davem, kuznet, jmorris, yoshfuji, kaber, nhorman
A while back there was a discussion regarding the rt_secret_interval timer.
Given that we've had the ability to do emergency route cache rebuilds for awhile
now, based on a statistical analysis of the various hash chain lengths in the
cache, the use of the flush timer is somewhat redundant. This patch removes the
rt_secret_interval sysctl, allowing us to rely solely on the statistical
analysis mechanism to determine the need for route cache flushes.
Signed-off-by: Neil Horman <nhorman@tuxdriver.com>
include/net/netns/ipv4.h | 1
net/ipv4/route.c | 108 -----------------------------------------------
2 files changed, 2 insertions(+), 107 deletions(-)
diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h
index ae07fee..d68c3f1 100644
--- a/include/net/netns/ipv4.h
+++ b/include/net/netns/ipv4.h
@@ -55,7 +55,6 @@ struct netns_ipv4 {
int sysctl_rt_cache_rebuild_count;
int current_rt_cache_rebuild_count;
- struct timer_list rt_secret_timer;
atomic_t rt_genid;
#ifdef CONFIG_IP_MROUTE
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index a947428..ffd3da1 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -129,7 +129,6 @@ static int ip_rt_gc_elasticity __read_mostly = 8;
static int ip_rt_mtu_expires __read_mostly = 10 * 60 * HZ;
static int ip_rt_min_pmtu __read_mostly = 512 + 20 + 20;
static int ip_rt_min_advmss __read_mostly = 256;
-static int ip_rt_secret_interval __read_mostly = 10 * 60 * HZ;
static int rt_chain_length_max __read_mostly = 20;
static struct delayed_work expires_work;
@@ -918,32 +917,11 @@ void rt_cache_flush_batch(void)
rt_do_flush(!in_softirq());
}
-/*
- * We change rt_genid and let gc do the cleanup
- */
-static void rt_secret_rebuild(unsigned long __net)
-{
- struct net *net = (struct net *)__net;
- rt_cache_invalidate(net);
- mod_timer(&net->ipv4.rt_secret_timer, jiffies + ip_rt_secret_interval);
-}
-
-static void rt_secret_rebuild_oneshot(struct net *net)
-{
- del_timer_sync(&net->ipv4.rt_secret_timer);
- rt_cache_invalidate(net);
- if (ip_rt_secret_interval)
- mod_timer(&net->ipv4.rt_secret_timer, jiffies + ip_rt_secret_interval);
-}
-
static void rt_emergency_hash_rebuild(struct net *net)
{
- if (net_ratelimit()) {
+ if (net_ratelimit())
printk(KERN_WARNING "Route hash chain too long!\n");
- printk(KERN_WARNING "Adjust your secret_interval!\n");
- }
-
- rt_secret_rebuild_oneshot(net);
+ rt_cache_invalidate(net);
}
/*
@@ -3101,48 +3079,6 @@ static int ipv4_sysctl_rtcache_flush(ctl_table *__ctl, int write,
return -EINVAL;
}
-static void rt_secret_reschedule(int old)
-{
- struct net *net;
- int new = ip_rt_secret_interval;
- int diff = new - old;
-
- if (!diff)
- return;
-
- rtnl_lock();
- for_each_net(net) {
- int deleted = del_timer_sync(&net->ipv4.rt_secret_timer);
- long time;
-
- if (!new)
- continue;
-
- if (deleted) {
- time = net->ipv4.rt_secret_timer.expires - jiffies;
-
- if (time <= 0 || (time += diff) <= 0)
- time = 0;
- } else
- time = new;
-
- mod_timer(&net->ipv4.rt_secret_timer, jiffies + time);
- }
- rtnl_unlock();
-}
-
-static int ipv4_sysctl_rt_secret_interval(ctl_table *ctl, int write,
- void __user *buffer, size_t *lenp,
- loff_t *ppos)
-{
- int old = ip_rt_secret_interval;
- int ret = proc_dointvec_jiffies(ctl, write, buffer, lenp, ppos);
-
- rt_secret_reschedule(old);
-
- return ret;
-}
-
static ctl_table ipv4_route_table[] = {
{
.procname = "gc_thresh",
@@ -3251,13 +3187,6 @@ static ctl_table ipv4_route_table[] = {
.mode = 0644,
.proc_handler = proc_dointvec,
},
- {
- .procname = "secret_interval",
- .data = &ip_rt_secret_interval,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = ipv4_sysctl_rt_secret_interval,
- },
{ }
};
@@ -3337,36 +3266,6 @@ static __net_initdata struct pernet_operations sysctl_route_ops = {
#endif
-static __net_init int rt_secret_timer_init(struct net *net)
-{
- atomic_set(&net->ipv4.rt_genid,
- (int) ((num_physpages ^ (num_physpages>>8)) ^
- (jiffies ^ (jiffies >> 7))));
-
- net->ipv4.rt_secret_timer.function = rt_secret_rebuild;
- net->ipv4.rt_secret_timer.data = (unsigned long)net;
- init_timer_deferrable(&net->ipv4.rt_secret_timer);
-
- if (ip_rt_secret_interval) {
- net->ipv4.rt_secret_timer.expires =
- jiffies + net_random() % ip_rt_secret_interval +
- ip_rt_secret_interval;
- add_timer(&net->ipv4.rt_secret_timer);
- }
- return 0;
-}
-
-static __net_exit void rt_secret_timer_exit(struct net *net)
-{
- del_timer_sync(&net->ipv4.rt_secret_timer);
-}
-
-static __net_initdata struct pernet_operations rt_secret_timer_ops = {
- .init = rt_secret_timer_init,
- .exit = rt_secret_timer_exit,
-};
-
-
#ifdef CONFIG_NET_CLS_ROUTE
struct ip_rt_acct __percpu *ip_rt_acct __read_mostly;
#endif /* CONFIG_NET_CLS_ROUTE */
@@ -3424,9 +3323,6 @@ int __init ip_rt_init(void)
schedule_delayed_work(&expires_work,
net_random() % ip_rt_gc_interval + ip_rt_gc_interval);
- if (register_pernet_subsys(&rt_secret_timer_ops))
- printk(KERN_ERR "Unable to setup rt_secret_timer\n");
-
if (ip_rt_proc_init())
printk(KERN_ERR "Unable to create route proc files\n");
#ifdef CONFIG_XFRM
^ permalink raw reply related [flat|nested] 17+ messages in thread
* Re: [PATCH] ipv4: remove ip_rt_secret timer
2010-05-06 17:16 [PATCH] ipv4: remove ip_rt_secret timer Neil Horman
@ 2010-05-06 17:32 ` Eric Dumazet
2010-05-06 18:02 ` Neil Horman
2010-05-06 20:29 ` [PATCH] ipv4: remove ip_rt_secret timer (v2) Neil Horman
` (2 subsequent siblings)
3 siblings, 1 reply; 17+ messages in thread
From: Eric Dumazet @ 2010-05-06 17:32 UTC (permalink / raw)
To: Neil Horman; +Cc: netdev, davem, kuznet, jmorris, yoshfuji, kaber
Le jeudi 06 mai 2010 à 13:16 -0400, Neil Horman a écrit :
> A while back there was a discussion regarding the rt_secret_interval timer.
> Given that we've had the ability to do emergency route cache rebuilds for awhile
> now, based on a statistical analysis of the various hash chain lengths in the
> cache, the use of the flush timer is somewhat redundant. This patch removes the
> rt_secret_interval sysctl, allowing us to rely solely on the statistical
> analysis mechanism to determine the need for route cache flushes.
>
> Signed-off-by: Neil Horman <nhorman@tuxdriver.com>
>
>
Nice cleanup try Neil, but this gives to attackers more time to hit the
cache (infinite time should be enough as a matter of fact ;) )
Hints :
- What is the initial value of rt_genid ?
- How/When is it changed (full 32 bits are changed or small
perturbations ? check rt_cache_invalidate())
Thanks
> include/net/netns/ipv4.h | 1
> net/ipv4/route.c | 108 -----------------------------------------------
> 2 files changed, 2 insertions(+), 107 deletions(-)
>
> diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h
> index ae07fee..d68c3f1 100644
> --- a/include/net/netns/ipv4.h
> +++ b/include/net/netns/ipv4.h
> @@ -55,7 +55,6 @@ struct netns_ipv4 {
> int sysctl_rt_cache_rebuild_count;
> int current_rt_cache_rebuild_count;
>
> - struct timer_list rt_secret_timer;
> atomic_t rt_genid;
>
> #ifdef CONFIG_IP_MROUTE
> diff --git a/net/ipv4/route.c b/net/ipv4/route.c
> index a947428..ffd3da1 100644
> --- a/net/ipv4/route.c
> +++ b/net/ipv4/route.c
> @@ -129,7 +129,6 @@ static int ip_rt_gc_elasticity __read_mostly = 8;
> static int ip_rt_mtu_expires __read_mostly = 10 * 60 * HZ;
> static int ip_rt_min_pmtu __read_mostly = 512 + 20 + 20;
> static int ip_rt_min_advmss __read_mostly = 256;
> -static int ip_rt_secret_interval __read_mostly = 10 * 60 * HZ;
> static int rt_chain_length_max __read_mostly = 20;
>
> static struct delayed_work expires_work;
> @@ -918,32 +917,11 @@ void rt_cache_flush_batch(void)
> rt_do_flush(!in_softirq());
> }
>
> -/*
> - * We change rt_genid and let gc do the cleanup
> - */
> -static void rt_secret_rebuild(unsigned long __net)
> -{
> - struct net *net = (struct net *)__net;
> - rt_cache_invalidate(net);
> - mod_timer(&net->ipv4.rt_secret_timer, jiffies + ip_rt_secret_interval);
> -}
> -
> -static void rt_secret_rebuild_oneshot(struct net *net)
> -{
> - del_timer_sync(&net->ipv4.rt_secret_timer);
> - rt_cache_invalidate(net);
> - if (ip_rt_secret_interval)
> - mod_timer(&net->ipv4.rt_secret_timer, jiffies + ip_rt_secret_interval);
> -}
> -
> static void rt_emergency_hash_rebuild(struct net *net)
> {
> - if (net_ratelimit()) {
> + if (net_ratelimit())
> printk(KERN_WARNING "Route hash chain too long!\n");
> - printk(KERN_WARNING "Adjust your secret_interval!\n");
> - }
> -
> - rt_secret_rebuild_oneshot(net);
> + rt_cache_invalidate(net);
> }
>
> /*
> @@ -3101,48 +3079,6 @@ static int ipv4_sysctl_rtcache_flush(ctl_table *__ctl, int write,
> return -EINVAL;
> }
>
> -static void rt_secret_reschedule(int old)
> -{
> - struct net *net;
> - int new = ip_rt_secret_interval;
> - int diff = new - old;
> -
> - if (!diff)
> - return;
> -
> - rtnl_lock();
> - for_each_net(net) {
> - int deleted = del_timer_sync(&net->ipv4.rt_secret_timer);
> - long time;
> -
> - if (!new)
> - continue;
> -
> - if (deleted) {
> - time = net->ipv4.rt_secret_timer.expires - jiffies;
> -
> - if (time <= 0 || (time += diff) <= 0)
> - time = 0;
> - } else
> - time = new;
> -
> - mod_timer(&net->ipv4.rt_secret_timer, jiffies + time);
> - }
> - rtnl_unlock();
> -}
> -
> -static int ipv4_sysctl_rt_secret_interval(ctl_table *ctl, int write,
> - void __user *buffer, size_t *lenp,
> - loff_t *ppos)
> -{
> - int old = ip_rt_secret_interval;
> - int ret = proc_dointvec_jiffies(ctl, write, buffer, lenp, ppos);
> -
> - rt_secret_reschedule(old);
> -
> - return ret;
> -}
> -
> static ctl_table ipv4_route_table[] = {
> {
> .procname = "gc_thresh",
> @@ -3251,13 +3187,6 @@ static ctl_table ipv4_route_table[] = {
> .mode = 0644,
> .proc_handler = proc_dointvec,
> },
> - {
> - .procname = "secret_interval",
> - .data = &ip_rt_secret_interval,
> - .maxlen = sizeof(int),
> - .mode = 0644,
> - .proc_handler = ipv4_sysctl_rt_secret_interval,
> - },
> { }
> };
>
> @@ -3337,36 +3266,6 @@ static __net_initdata struct pernet_operations sysctl_route_ops = {
> #endif
>
>
> -static __net_init int rt_secret_timer_init(struct net *net)
> -{
> - atomic_set(&net->ipv4.rt_genid,
> - (int) ((num_physpages ^ (num_physpages>>8)) ^
> - (jiffies ^ (jiffies >> 7))));
> -
> - net->ipv4.rt_secret_timer.function = rt_secret_rebuild;
> - net->ipv4.rt_secret_timer.data = (unsigned long)net;
> - init_timer_deferrable(&net->ipv4.rt_secret_timer);
> -
> - if (ip_rt_secret_interval) {
> - net->ipv4.rt_secret_timer.expires =
> - jiffies + net_random() % ip_rt_secret_interval +
> - ip_rt_secret_interval;
> - add_timer(&net->ipv4.rt_secret_timer);
> - }
> - return 0;
> -}
> -
> -static __net_exit void rt_secret_timer_exit(struct net *net)
> -{
> - del_timer_sync(&net->ipv4.rt_secret_timer);
> -}
> -
> -static __net_initdata struct pernet_operations rt_secret_timer_ops = {
> - .init = rt_secret_timer_init,
> - .exit = rt_secret_timer_exit,
> -};
> -
> -
> #ifdef CONFIG_NET_CLS_ROUTE
> struct ip_rt_acct __percpu *ip_rt_acct __read_mostly;
> #endif /* CONFIG_NET_CLS_ROUTE */
> @@ -3424,9 +3323,6 @@ int __init ip_rt_init(void)
> schedule_delayed_work(&expires_work,
> net_random() % ip_rt_gc_interval + ip_rt_gc_interval);
>
> - if (register_pernet_subsys(&rt_secret_timer_ops))
> - printk(KERN_ERR "Unable to setup rt_secret_timer\n");
> -
> if (ip_rt_proc_init())
> printk(KERN_ERR "Unable to create route proc files\n");
> #ifdef CONFIG_XFRM
> --
> To unsubscribe from this list: send the line "unsubscribe netdev" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at http://vger.kernel.org/majordomo-info.html
>
^ permalink raw reply [flat|nested] 17+ messages in thread
* Re: [PATCH] ipv4: remove ip_rt_secret timer
2010-05-06 17:32 ` Eric Dumazet
@ 2010-05-06 18:02 ` Neil Horman
2010-05-06 18:33 ` Eric Dumazet
0 siblings, 1 reply; 17+ messages in thread
From: Neil Horman @ 2010-05-06 18:02 UTC (permalink / raw)
To: Eric Dumazet; +Cc: netdev, davem, kuznet, jmorris, yoshfuji, kaber
On Thu, May 06, 2010 at 07:32:35PM +0200, Eric Dumazet wrote:
> Le jeudi 06 mai 2010 à 13:16 -0400, Neil Horman a écrit :
> > A while back there was a discussion regarding the rt_secret_interval timer.
> > Given that we've had the ability to do emergency route cache rebuilds for awhile
> > now, based on a statistical analysis of the various hash chain lengths in the
> > cache, the use of the flush timer is somewhat redundant. This patch removes the
> > rt_secret_interval sysctl, allowing us to rely solely on the statistical
> > analysis mechanism to determine the need for route cache flushes.
> >
> > Signed-off-by: Neil Horman <nhorman@tuxdriver.com>
> >
> >
>
> Nice cleanup try Neil, but this gives to attackers more time to hit the
> cache (infinite time should be enough as a matter of fact ;) )
>
Not sure I follow what your complaint is. I get that this gives attackers
plenty of time to try to attack the cache, but thats rather the point of the
statistics gathering for the cache, and why I don't think we need the secret
timer any more. With the statistical analysis we do on the route cache every
gc cycle, we can tell if an attacker has guessed our rt_genid value, and is
making any chains in the cache abnormally long. Thats when we do the rebuild,
modifying the rt_genid, forcing the attacker to re-discover it (which should be
difficult). Theres no need to change this periodically if you're not being
attacked.
> Hints :
>
> - What is the initial value of rt_genid ?
>
> - How/When is it changed (full 32 bits are changed or small
> perturbations ? check rt_cache_invalidate())
>
/*
* Pertubation of rt_genid by a small quantity [1..256]
* Using 8 bits of shuffling ensure we can call rt_cache_invalidate()
* many times (2^24) without giving recent rt_genid.
* Jenkins hash is strong enough that litle changes of rt_genid are OK.
*/
static void rt_cache_invalidate(struct net *net)
{
unsigned char shuffle;
get_random_bytes(&shuffle, sizeof(shuffle));
atomic_add(shuffle + 1U, &net->ipv4.rt_genid);
}
Clearly, its small changes. To paraphrase the comment, Changes to rt_genid are
small enough to be confident that we don't repetatively use a gen_id often, but
sufficiently random that attackers cannot easily guess the next gen_id based on
the current value. Both the timer and the statistics code use this invalidation
technique previously, and the latter continues to do so.
I've not changed anything regarding how we
invalidate, only when we choose to invalidate. Invalidation can lead to
slowdowns during routing, since it send frames through the slow path after an
invalidation. It behooves us to avoid preforming this invalidation without
need, and since we have a mechanism in place to do that invalidation specfically
when we need to, lets get rid of the code that handles that, and make it a bit
cleaner. If there are users that feel strongly that they need to defend against
potential attacks by periodically changing their rt_genid, its still possible.
Its as simple as putting:
echo -1 > /proc/sys/net/ipv4/route/flush
in a cron job.
Regards
Neil
^ permalink raw reply [flat|nested] 17+ messages in thread
* Re: [PATCH] ipv4: remove ip_rt_secret timer
2010-05-06 18:02 ` Neil Horman
@ 2010-05-06 18:33 ` Eric Dumazet
2010-05-06 19:54 ` Neil Horman
0 siblings, 1 reply; 17+ messages in thread
From: Eric Dumazet @ 2010-05-06 18:33 UTC (permalink / raw)
To: Neil Horman; +Cc: netdev, davem, kuznet, jmorris, yoshfuji, kaber
Le jeudi 06 mai 2010 à 14:02 -0400, Neil Horman a écrit :
> On Thu, May 06, 2010 at 07:32:35PM +0200, Eric Dumazet wrote:
> > Le jeudi 06 mai 2010 à 13:16 -0400, Neil Horman a écrit :
> > > A while back there was a discussion regarding the rt_secret_interval timer.
> > > Given that we've had the ability to do emergency route cache rebuilds for awhile
> > > now, based on a statistical analysis of the various hash chain lengths in the
> > > cache, the use of the flush timer is somewhat redundant. This patch removes the
> > > rt_secret_interval sysctl, allowing us to rely solely on the statistical
> > > analysis mechanism to determine the need for route cache flushes.
> > >
> > > Signed-off-by: Neil Horman <nhorman@tuxdriver.com>
> > >
> > >
> >
> > Nice cleanup try Neil, but this gives to attackers more time to hit the
> > cache (infinite time should be enough as a matter of fact ;) )
> >
> Not sure I follow what your complaint is. I get that this gives attackers
> plenty of time to try to attack the cache, but thats rather the point of the
> statistics gathering for the cache, and why I don't think we need the secret
> timer any more. With the statistical analysis we do on the route cache every
> gc cycle, we can tell if an attacker has guessed our rt_genid value, and is
> making any chains in the cache abnormally long. Thats when we do the rebuild,
> modifying the rt_genid, forcing the attacker to re-discover it (which should be
> difficult). Theres no need to change this periodically if you're not being
> attacked.
>
> > Hints :
> >
> > - What is the initial value of rt_genid ?
> >
> > - How/When is it changed (full 32 bits are changed or small
> > perturbations ? check rt_cache_invalidate())
> >
> /*
> * Pertubation of rt_genid by a small quantity [1..256]
> * Using 8 bits of shuffling ensure we can call rt_cache_invalidate()
> * many times (2^24) without giving recent rt_genid.
> * Jenkins hash is strong enough that litle changes of rt_genid are OK.
> */
> static void rt_cache_invalidate(struct net *net)
> {
> unsigned char shuffle;
>
> get_random_bytes(&shuffle, sizeof(shuffle));
> atomic_add(shuffle + 1U, &net->ipv4.rt_genid);
> }
>
> Clearly, its small changes. To paraphrase the comment, Changes to rt_genid are
> small enough to be confident that we don't repetatively use a gen_id often, but
> sufficiently random that attackers cannot easily guess the next gen_id based on
> the current value. Both the timer and the statistics code use this invalidation
> technique previously, and the latter continues to do so.
>
> I've not changed anything regarding how we
> invalidate, only when we choose to invalidate. Invalidation can lead to
> slowdowns during routing, since it send frames through the slow path after an
> invalidation. It behooves us to avoid preforming this invalidation without
> need, and since we have a mechanism in place to do that invalidation specfically
> when we need to, lets get rid of the code that handles that, and make it a bit
> cleaner. If there are users that feel strongly that they need to defend against
> potential attacks by periodically changing their rt_genid, its still possible.
> Its as simple as putting:
> echo -1 > /proc/sys/net/ipv4/route/flush
> in a cron job.
>
I have some customers that will simply kill me if their routing cache is
disabled by a smart attack, slowing down their server by a 4x factor.
I know its possible, it has been done.
For a quiet machine possible rt_genid values range are known from
attacker, and hash size is also known. Thats really too easy for the bad
guys...
Neil, I think your cleanup should stay a cleanup for the moment, or you
must make sure rt_genid initial value is not 0 (read your patch
again...)
I agree we dont need anymore the complex timer logic. We could keep the
secret_interval (default to 0 if you really want) and force a
rt_cache_invalidate() call once in a while from the periodic
rt_check_expire() for example.
^ permalink raw reply [flat|nested] 17+ messages in thread
* Re: [PATCH] ipv4: remove ip_rt_secret timer
2010-05-06 18:33 ` Eric Dumazet
@ 2010-05-06 19:54 ` Neil Horman
2010-05-06 20:10 ` Eric Dumazet
0 siblings, 1 reply; 17+ messages in thread
From: Neil Horman @ 2010-05-06 19:54 UTC (permalink / raw)
To: Eric Dumazet; +Cc: netdev, davem, kuznet, jmorris, yoshfuji, kaber
On Thu, May 06, 2010 at 08:33:33PM +0200, Eric Dumazet wrote:
> Le jeudi 06 mai 2010 à 14:02 -0400, Neil Horman a écrit :
> > On Thu, May 06, 2010 at 07:32:35PM +0200, Eric Dumazet wrote:
> > > Le jeudi 06 mai 2010 à 13:16 -0400, Neil Horman a écrit :
> > > > A while back there was a discussion regarding the rt_secret_interval timer.
> > > > Given that we've had the ability to do emergency route cache rebuilds for awhile
> > > > now, based on a statistical analysis of the various hash chain lengths in the
> > > > cache, the use of the flush timer is somewhat redundant. This patch removes the
> > > > rt_secret_interval sysctl, allowing us to rely solely on the statistical
> > > > analysis mechanism to determine the need for route cache flushes.
> > > >
> > > > Signed-off-by: Neil Horman <nhorman@tuxdriver.com>
> > > >
> > > >
> > >
> > > Nice cleanup try Neil, but this gives to attackers more time to hit the
> > > cache (infinite time should be enough as a matter of fact ;) )
> > >
> > Not sure I follow what your complaint is. I get that this gives attackers
> > plenty of time to try to attack the cache, but thats rather the point of the
> > statistics gathering for the cache, and why I don't think we need the secret
> > timer any more. With the statistical analysis we do on the route cache every
> > gc cycle, we can tell if an attacker has guessed our rt_genid value, and is
> > making any chains in the cache abnormally long. Thats when we do the rebuild,
> > modifying the rt_genid, forcing the attacker to re-discover it (which should be
> > difficult). Theres no need to change this periodically if you're not being
> > attacked.
> >
> > > Hints :
> > >
> > > - What is the initial value of rt_genid ?
> > >
> > > - How/When is it changed (full 32 bits are changed or small
> > > perturbations ? check rt_cache_invalidate())
> > >
> > /*
> > * Pertubation of rt_genid by a small quantity [1..256]
> > * Using 8 bits of shuffling ensure we can call rt_cache_invalidate()
> > * many times (2^24) without giving recent rt_genid.
> > * Jenkins hash is strong enough that litle changes of rt_genid are OK.
> > */
> > static void rt_cache_invalidate(struct net *net)
> > {
> > unsigned char shuffle;
> >
> > get_random_bytes(&shuffle, sizeof(shuffle));
> > atomic_add(shuffle + 1U, &net->ipv4.rt_genid);
> > }
> >
> > Clearly, its small changes. To paraphrase the comment, Changes to rt_genid are
> > small enough to be confident that we don't repetatively use a gen_id often, but
> > sufficiently random that attackers cannot easily guess the next gen_id based on
> > the current value. Both the timer and the statistics code use this invalidation
> > technique previously, and the latter continues to do so.
> >
> > I've not changed anything regarding how we
> > invalidate, only when we choose to invalidate. Invalidation can lead to
> > slowdowns during routing, since it send frames through the slow path after an
> > invalidation. It behooves us to avoid preforming this invalidation without
> > need, and since we have a mechanism in place to do that invalidation specfically
> > when we need to, lets get rid of the code that handles that, and make it a bit
> > cleaner. If there are users that feel strongly that they need to defend against
> > potential attacks by periodically changing their rt_genid, its still possible.
> > Its as simple as putting:
> > echo -1 > /proc/sys/net/ipv4/route/flush
> > in a cron job.
> >
>
> I have some customers that will simply kill me if their routing cache is
> disabled by a smart attack, slowing down their server by a 4x factor.
>
> I know its possible, it has been done.
>
Ok, I can understand that, but I don't think a single user profile needs to
dictate policy here. the statistics code has a configurable threshold for where
to disable caching, so I would think you can just set that to be high. And if
you need even more than that, you can do as I suggested, adding a:
echo -1 > /proc/sys/net/ipv4/route/flush
to a cron job on a short interval. That will preform the _exact_ same operation
that the in-kernel timer did previously. And the other 99% of our users don't
have to suffer a periodic cache invalidation when they don't need it.
> For a quiet machine possible rt_genid values range are known from
> attacker, and hash size is also known. Thats really too easy for the bad
> guys...
>
Well, ok, I can buy your argument, but I think the problem you are describing is
orthogonoal to what my change here does. If its too easy for attackers to guess
our genid secret, then we need to make it more complex to guess, not force
everyone to change it more frequently.
> Neil, I think your cleanup should stay a cleanup for the moment, or you
> must make sure rt_genid initial value is not 0 (read your patch
> again...)
>
Yeah, I get that we start the gen_id at 0, that doesn't come from this change,
its always that way in the net_alloc initalization. I certainly don't have a
problem during inialization starting with a randomization of that value. I'll
post an updated patch shortly.
> I agree we dont need anymore the complex timer logic. We could keep the
> secret_interval (default to 0 if you really want) and force a
> rt_cache_invalidate() call once in a while from the periodic
> rt_check_expire() for example.
>
Doing that doesn't solve my aim however, which is to avoid performing rt_genid
updates when no one is attacking you at all. I completely agree that we can
start the gen_id at some random value (by forcing an initial invalidation),
however. Beyond that however, if someone is managing to guess our secret value,
then we need to make our secret value more complex to determine. Perhaps given
the reduction in the number of times we need to iterate our gen_id with the
timer gone, we can use something more heavyweight to determine the the hash
secret (the cprng perhaps?).
Regards
Neil
>
>
>
^ permalink raw reply [flat|nested] 17+ messages in thread
* Re: [PATCH] ipv4: remove ip_rt_secret timer
2010-05-06 19:54 ` Neil Horman
@ 2010-05-06 20:10 ` Eric Dumazet
2010-05-06 20:25 ` Neil Horman
0 siblings, 1 reply; 17+ messages in thread
From: Eric Dumazet @ 2010-05-06 20:10 UTC (permalink / raw)
To: Neil Horman; +Cc: netdev, davem, kuznet, jmorris, yoshfuji, kaber
> Doing that doesn't solve my aim however, which is to avoid performing rt_genid
> updates when no one is attacking you at all. I completely agree that we can
> start the gen_id at some random value (by forcing an initial invalidation),
> however. Beyond that however, if someone is managing to guess our secret value,
> then we need to make our secret value more complex to determine. Perhaps given
> the reduction in the number of times we need to iterate our gen_id with the
> timer gone, we can use something more heavyweight to determine the the hash
> secret (the cprng perhaps?).
Secrets that dont change are known to be honey pots for hackers.
I just dont see why we want to risk security regressions for something
that proved to work well.
Cache invalidation is just a genid change nowadays, and dont have side
effects.
Considering we do cache invalidation when routes are changed anyway, I
dont get why we should avoid the invalidation once every xxx seconds...
If you believe this cache invalidation has problems, maybe we should
address them and not hide them ?
^ permalink raw reply [flat|nested] 17+ messages in thread
* Re: [PATCH] ipv4: remove ip_rt_secret timer
2010-05-06 20:10 ` Eric Dumazet
@ 2010-05-06 20:25 ` Neil Horman
0 siblings, 0 replies; 17+ messages in thread
From: Neil Horman @ 2010-05-06 20:25 UTC (permalink / raw)
To: Eric Dumazet; +Cc: netdev, davem, kuznet, jmorris, yoshfuji, kaber
On Thu, May 06, 2010 at 10:10:14PM +0200, Eric Dumazet wrote:
>
> > Doing that doesn't solve my aim however, which is to avoid performing rt_genid
> > updates when no one is attacking you at all. I completely agree that we can
> > start the gen_id at some random value (by forcing an initial invalidation),
> > however. Beyond that however, if someone is managing to guess our secret value,
> > then we need to make our secret value more complex to determine. Perhaps given
> > the reduction in the number of times we need to iterate our gen_id with the
> > timer gone, we can use something more heavyweight to determine the the hash
> > secret (the cprng perhaps?).
>
> Secrets that dont change are known to be honey pots for hackers.
>
> I just dont see why we want to risk security regressions for something
> that proved to work well.
>
Because we have two ways of doing the same thing now, and I don't see why we
should maintain code for both. I get that a timer based invalidation works
well. So does the statistical analysis.
> Cache invalidation is just a genid change nowadays, and dont have side
> effects.
>
I disagree with this, changing a genid in and of itself is fast, yes, but it
creates a need for the cache to get repopulated, sending packets through the
slow routing path. On high volume systems this causes a performance
degradation. The timer approach makes that a periodic degradation, one that I
would like to avoid if possible.
I get that hackers like secrets to stay unchanged so that they can figure out
what they are. Its not like we're leaving ourselves vulnerable here, we're just
rebuilding only when we need it, not every X seconds. And if someone is
_really_ in need of a periodic rebuild, and can cope with the performance hit,
then they can still do that from user space, as I've pointed out. We just don't
need to keep the code in the kernel any more.
> Considering we do cache invalidation when routes are changed anyway, I
> dont get why we should avoid the invalidation once every xxx seconds...
>
Who says routes are going to change that often? I know you dont believe that a
former is a substitute for the latter. As for why we should avoid periodic
invalidation, I've said it several times now.
> If you believe this cache invalidation has problems, maybe we should
> address them and not hide them ?
>
Now you're just being intentionally obtuse. Eric, you know perfectly good and
well what my reasons are for wanting to remove the rt_secret timer. Its why we
did the statistical analysis code in the first place. There just not a large
need for it. If you want to do periodic invalidation, fine, do it. Just do it
in user space. We have an on-demand strategy in the kernel that has been
working well for quite some time, and is superior in performance for 99% of the
use cases out there. So lets lighten the maintenence workload for the code
thats not strictly needed anymore by getting rid of it.
^ permalink raw reply [flat|nested] 17+ messages in thread
* Re: [PATCH] ipv4: remove ip_rt_secret timer (v2)
2010-05-06 17:16 [PATCH] ipv4: remove ip_rt_secret timer Neil Horman
2010-05-06 17:32 ` Eric Dumazet
@ 2010-05-06 20:29 ` Neil Horman
2010-05-06 21:08 ` Eric Dumazet
2010-05-07 19:55 ` [PATCH] ipv4: remove ip_rt_secret timer (v3) Neil Horman
2010-05-08 1:01 ` [PATCH] ipv4: remove ip_rt_secret timer (v4) Neil Horman
3 siblings, 1 reply; 17+ messages in thread
From: Neil Horman @ 2010-05-06 20:29 UTC (permalink / raw)
To: netdev; +Cc: davem, kuznet, jmorris, yoshfuji, kaber
Version 2 of this patch, taking Erics comment about making the rt_genid non-zero
when a netns is created. This makes sense, and helps prevent attackers from
guessing our initial secret value
A while back there was a discussion regarding the rt_secret_interval timer.
Given that we've had the ability to do emergency route cache rebuilds for awhile
now, based on a statistical analysis of the various hash chain lengths in the
cache, the use of the flush timer is somewhat redundant. This patch removes the
rt_secret_interval sysctl, allowing us to rely solely on the statistical
analysis mechanism to determine the need for route cache flushes.
Signed-off-by: Neil Horman <nhorman@tuxdriver.com>
include/net/netns/ipv4.h | 1
net/ipv4/route.c | 111 ++++-------------------------------------------
2 files changed, 11 insertions(+), 101 deletions(-)
diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h
index ae07fee..d68c3f1 100644
--- a/include/net/netns/ipv4.h
+++ b/include/net/netns/ipv4.h
@@ -55,7 +55,6 @@ struct netns_ipv4 {
int sysctl_rt_cache_rebuild_count;
int current_rt_cache_rebuild_count;
- struct timer_list rt_secret_timer;
atomic_t rt_genid;
#ifdef CONFIG_IP_MROUTE
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index a947428..e55a066 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -129,7 +129,6 @@ static int ip_rt_gc_elasticity __read_mostly = 8;
static int ip_rt_mtu_expires __read_mostly = 10 * 60 * HZ;
static int ip_rt_min_pmtu __read_mostly = 512 + 20 + 20;
static int ip_rt_min_advmss __read_mostly = 256;
-static int ip_rt_secret_interval __read_mostly = 10 * 60 * HZ;
static int rt_chain_length_max __read_mostly = 20;
static struct delayed_work expires_work;
@@ -918,32 +917,11 @@ void rt_cache_flush_batch(void)
rt_do_flush(!in_softirq());
}
-/*
- * We change rt_genid and let gc do the cleanup
- */
-static void rt_secret_rebuild(unsigned long __net)
-{
- struct net *net = (struct net *)__net;
- rt_cache_invalidate(net);
- mod_timer(&net->ipv4.rt_secret_timer, jiffies + ip_rt_secret_interval);
-}
-
-static void rt_secret_rebuild_oneshot(struct net *net)
-{
- del_timer_sync(&net->ipv4.rt_secret_timer);
- rt_cache_invalidate(net);
- if (ip_rt_secret_interval)
- mod_timer(&net->ipv4.rt_secret_timer, jiffies + ip_rt_secret_interval);
-}
-
static void rt_emergency_hash_rebuild(struct net *net)
{
- if (net_ratelimit()) {
+ if (net_ratelimit())
printk(KERN_WARNING "Route hash chain too long!\n");
- printk(KERN_WARNING "Adjust your secret_interval!\n");
- }
-
- rt_secret_rebuild_oneshot(net);
+ rt_cache_invalidate(net);
}
/*
@@ -3101,48 +3079,6 @@ static int ipv4_sysctl_rtcache_flush(ctl_table *__ctl, int write,
return -EINVAL;
}
-static void rt_secret_reschedule(int old)
-{
- struct net *net;
- int new = ip_rt_secret_interval;
- int diff = new - old;
-
- if (!diff)
- return;
-
- rtnl_lock();
- for_each_net(net) {
- int deleted = del_timer_sync(&net->ipv4.rt_secret_timer);
- long time;
-
- if (!new)
- continue;
-
- if (deleted) {
- time = net->ipv4.rt_secret_timer.expires - jiffies;
-
- if (time <= 0 || (time += diff) <= 0)
- time = 0;
- } else
- time = new;
-
- mod_timer(&net->ipv4.rt_secret_timer, jiffies + time);
- }
- rtnl_unlock();
-}
-
-static int ipv4_sysctl_rt_secret_interval(ctl_table *ctl, int write,
- void __user *buffer, size_t *lenp,
- loff_t *ppos)
-{
- int old = ip_rt_secret_interval;
- int ret = proc_dointvec_jiffies(ctl, write, buffer, lenp, ppos);
-
- rt_secret_reschedule(old);
-
- return ret;
-}
-
static ctl_table ipv4_route_table[] = {
{
.procname = "gc_thresh",
@@ -3251,13 +3187,6 @@ static ctl_table ipv4_route_table[] = {
.mode = 0644,
.proc_handler = proc_dointvec,
},
- {
- .procname = "secret_interval",
- .data = &ip_rt_secret_interval,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = ipv4_sysctl_rt_secret_interval,
- },
{ }
};
@@ -3336,34 +3265,18 @@ static __net_initdata struct pernet_operations sysctl_route_ops = {
};
#endif
-
-static __net_init int rt_secret_timer_init(struct net *net)
+static __net_init int rt_genid_init(struct net *net)
{
- atomic_set(&net->ipv4.rt_genid,
- (int) ((num_physpages ^ (num_physpages>>8)) ^
- (jiffies ^ (jiffies >> 7))));
-
- net->ipv4.rt_secret_timer.function = rt_secret_rebuild;
- net->ipv4.rt_secret_timer.data = (unsigned long)net;
- init_timer_deferrable(&net->ipv4.rt_secret_timer);
-
- if (ip_rt_secret_interval) {
- net->ipv4.rt_secret_timer.expires =
- jiffies + net_random() % ip_rt_secret_interval +
- ip_rt_secret_interval;
- add_timer(&net->ipv4.rt_secret_timer);
- }
+ /*
+ * This just serves to start off each new net namespace
+ * with a non-zero rt_genid value, making it harder to guess
+ */
+ rt_cache_invalidate(net);
return 0;
}
-static __net_exit void rt_secret_timer_exit(struct net *net)
-{
- del_timer_sync(&net->ipv4.rt_secret_timer);
-}
-
-static __net_initdata struct pernet_operations rt_secret_timer_ops = {
- .init = rt_secret_timer_init,
- .exit = rt_secret_timer_exit,
+static __net_initdata struct pernet_operations rt_genid_ops = {
+ .init = rt_genid_init,
};
@@ -3424,9 +3337,6 @@ int __init ip_rt_init(void)
schedule_delayed_work(&expires_work,
net_random() % ip_rt_gc_interval + ip_rt_gc_interval);
- if (register_pernet_subsys(&rt_secret_timer_ops))
- printk(KERN_ERR "Unable to setup rt_secret_timer\n");
-
if (ip_rt_proc_init())
printk(KERN_ERR "Unable to create route proc files\n");
#ifdef CONFIG_XFRM
@@ -3438,6 +3348,7 @@ int __init ip_rt_init(void)
#ifdef CONFIG_SYSCTL
register_pernet_subsys(&sysctl_route_ops);
#endif
+ register_pernet_subsys(&rt_genid_ops);
return rc;
}
^ permalink raw reply related [flat|nested] 17+ messages in thread
* Re: [PATCH] ipv4: remove ip_rt_secret timer (v2)
2010-05-06 20:29 ` [PATCH] ipv4: remove ip_rt_secret timer (v2) Neil Horman
@ 2010-05-06 21:08 ` Eric Dumazet
2010-05-07 0:02 ` nhorman
0 siblings, 1 reply; 17+ messages in thread
From: Eric Dumazet @ 2010-05-06 21:08 UTC (permalink / raw)
To: Neil Horman; +Cc: netdev, davem, kuznet, jmorris, yoshfuji, kaber
Le jeudi 06 mai 2010 à 16:29 -0400, Neil Horman a écrit :
> Version 2 of this patch, taking Erics comment about making the rt_genid non-zero
> when a netns is created. This makes sense, and helps prevent attackers from
> guessing our initial secret value
>
>
>
> A while back there was a discussion regarding the rt_secret_interval timer.
> Given that we've had the ability to do emergency route cache rebuilds for awhile
> now, based on a statistical analysis of the various hash chain lengths in the
> cache, the use of the flush timer is somewhat redundant. This patch removes the
> rt_secret_interval sysctl, allowing us to rely solely on the statistical
> analysis mechanism to determine the need for route cache flushes.
>
> Signed-off-by: Neil Horman <nhorman@tuxdriver.com>
>
> -
> -static __net_init int rt_secret_timer_init(struct net *net)
> +static __net_init int rt_genid_init(struct net *net)
> {
> - atomic_set(&net->ipv4.rt_genid,
> - (int) ((num_physpages ^ (num_physpages>>8)) ^
> - (jiffies ^ (jiffies >> 7))));
> -
> + /*
> + * This just serves to start off each new net namespace
> + * with a non-zero rt_genid value, making it harder to guess
> + */
> + rt_cache_invalidate(net);
> return 0;
> }
>
I am _sorry_ to be such a paranoiac guy.
Could you please feed more than 8 bits here ?
like :
get_random_bytes(&net->ipv4.rt_genid, sizeof(net->ipv4.rt_genid));
There is no need to comment this in the code, this kind of rnd init is
very common in net tree.
^ permalink raw reply [flat|nested] 17+ messages in thread
* Re: [PATCH] ipv4: remove ip_rt_secret timer (v2)
2010-05-06 21:08 ` Eric Dumazet
@ 2010-05-07 0:02 ` nhorman
0 siblings, 0 replies; 17+ messages in thread
From: nhorman @ 2010-05-07 0:02 UTC (permalink / raw)
To: Eric Dumazet, Neil Horman; +Cc: netdev, davem, kuznet, jmorris, yoshfuji, kaber
On Thu, 6 May 2010 17:08:05 -0400, Eric Dumazet wrote:
> Le jeudi 06 mai 2010 à 16:29 -0400, Neil Horman a écrit :
> > Version 2 of this patch, taking Erics comment about making the rt_genid non-zero
> > when a netns is created. This makes sense, and helps prevent attackers from
> > guessing our initial secret value
> >
> >
> >
> > A while back there was a discussion regarding the rt_secret_interval timer.
> > Given that we've had the ability to do emergency route cache rebuilds for awhile
> > now, based on a statistical anal> > cache, the use of the flush timer is somewhat redundant. This patch removes the
> > rt_secret_interval sysctl, allowing us to rely solely on the statistical
> > analysis mechanism to determine the need for route cache flushes.
> >
> > Signed-off-by: Neil Horman <nhorman@tuxdriver.com>
> >
>
> > -
> > -static __net_init int rt_secret_timer_init(struct net *net)
> > +static __net_init int rt_genid_init(struct net *net)
> > {
> > - atomic_set(&net->ipv4.rt_genid,
> > - (int) ((num_physpages ^ (num_physpages>>8)) ^
> > - (jiffies ^ (jiffies >> 7))));
> > -
>
>
> > + /*
> > + * This just serves to start off each new net namespace
> > + * with a non-zero rt_genid value, making it harder to guess
> > + */
> > + rt_cache_invalidate(net);
> > return 0;
> > }
> >
>
> I am _sorry_ to be such a paranoiac guy.
>
Don't be sorry, I think your concern is valid, I just don't want to keep old code around when
> Could you please feed more than 8 bits here ?
>
> like :
>
> get_random_bytes(&net->ipv4.rt_genid, sizeof(net->ipv4.rt_genid));
>
Sure, I'm good with that. I'm not at my desk right now, but ill do that in the morning.
> There is no need to comment this in the code, this kind of rnd init is
> very common in net tree.
>
Ok, copy that, ill fix that up at the same time.
Thanks & regards
Neil
>
>
>
>
>
^ permalink raw reply [flat|nested] 17+ messages in thread
* Re: [PATCH] ipv4: remove ip_rt_secret timer (v3)
2010-05-06 17:16 [PATCH] ipv4: remove ip_rt_secret timer Neil Horman
2010-05-06 17:32 ` Eric Dumazet
2010-05-06 20:29 ` [PATCH] ipv4: remove ip_rt_secret timer (v2) Neil Horman
@ 2010-05-07 19:55 ` Neil Horman
2010-05-07 21:04 ` Eric Dumazet
2010-05-08 1:01 ` [PATCH] ipv4: remove ip_rt_secret timer (v4) Neil Horman
3 siblings, 1 reply; 17+ messages in thread
From: Neil Horman @ 2010-05-07 19:55 UTC (permalink / raw)
To: netdev; +Cc: davem, kuznet, jmorris, yoshfuji, kaber
Hey-
Sorry for the delay, but I got interested in Erics suggestion of
changing how we update rt_genid, and had a few thoughts. Heres version 3 of
this patch:
Change Notes:
1) Removed the secret_interval binary interface entry in the list (forgot to do
that before)
2) Took Erics Suggestion to change the update for net->ipv4.rt_genid. Now
instead of doing a small incremental change, we simply grab 32 new random bits.
3) The change in (2) got me thinking that part of the reason we used the Jenkins
hash in rt_genid was to ensure non-repetion of id's in a short time period
(which is important, so as not to inadvertently reuse route cache entries that
should be getting expired). While extra randomness makes it harder for
attackers to guess the secret, it makes it possible to return to previously
guessed values as well (if they're lucky). As such, I created a configurable
option, CONFIG_GENID_DUPCHECK. With this option on, the low order 8 bits of the
genid are replaced with a rolling counter, that increments on each new genid.
This creates in effect, a 256 deep list of previously used genid values. In
rt_drop we compare the genids for duplicates. If we find that 2 genid values
have different indexes, but idential remaining bits, they are noted as a repeat
genid, and we call rt_cache_invalidate to generate a new genid and avoid the
duplication problem.
I've done some testing with this patch here, and it seems to work well.
Summary
A while back there was a discussion regarding the rt_secret_interval timer.
Given that we've had the ability to do emergency route cache rebuilds for awhile
now, based on a statistical analysis of the various hash chain lengths in the
cache, the use of the flush timer is somewhat redundant. This patch removes the
rt_secret_interval sysctl, allowing us to rely solely on the statistical
analysis mechanism to determine the need for route cache flushes.
Signed-off-by: Neil Horman <nhorman@tuxdriver.com>
include/net/netns/ipv4.h | 5 -
kernel/sysctl_binary.c | 1
net/ipv4/Kconfig | 10 ++
net/ipv4/route.c | 164 ++++++++++++++++-------------------------------
4 files changed, 69 insertions(+), 111 deletions(-)
diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h
index ae07fee..fb53b3c 100644
--- a/include/net/netns/ipv4.h
+++ b/include/net/netns/ipv4.h
@@ -55,9 +55,10 @@ struct netns_ipv4 {
int sysctl_rt_cache_rebuild_count;
int current_rt_cache_rebuild_count;
- struct timer_list rt_secret_timer;
atomic_t rt_genid;
-
+#ifdef CONFIG_GENID_DUPCHECK
+ atomic_t rt_genidx;
+#endif
#ifdef CONFIG_IP_MROUTE
#ifndef CONFIG_IP_MROUTE_MULTIPLE_TABLES
struct mr_table *mrt;
diff --git a/kernel/sysctl_binary.c b/kernel/sysctl_binary.c
index 5903057..937d31d 100644
--- a/kernel/sysctl_binary.c
+++ b/kernel/sysctl_binary.c
@@ -224,7 +224,6 @@ static const struct bin_table bin_net_ipv4_route_table[] = {
{ CTL_INT, NET_IPV4_ROUTE_MTU_EXPIRES, "mtu_expires" },
{ CTL_INT, NET_IPV4_ROUTE_MIN_PMTU, "min_pmtu" },
{ CTL_INT, NET_IPV4_ROUTE_MIN_ADVMSS, "min_adv_mss" },
- { CTL_INT, NET_IPV4_ROUTE_SECRET_INTERVAL, "secret_interval" },
{}
};
diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig
index 8e3a1fd..ad934a9 100644
--- a/net/ipv4/Kconfig
+++ b/net/ipv4/Kconfig
@@ -98,6 +98,16 @@ config IP_FIB_TRIE_STATS
Keep track of statistics on structure of FIB TRIE table.
Useful for testing and measuring TRIE performance.
+config GENID_DUPCHECK
+ bool "Duplicate generation id checking in the route cache"
+ ---help---
+ Add checks to the route cache to detect duplicate route cache
+ generation id. Duplicate id's can lead to inappropriate reuse of
+ route caches entries, which can degrate performance. This check
+ adds an index count of 8 bits to the genid which can determine if
+ the same genid is reused within the last 256 iterations. Turn this
+ on
+
config IP_MULTIPLE_TABLES
bool "IP: policy routing"
depends on IP_ADVANCED_ROUTER
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index a947428..57e51d4 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -112,6 +112,10 @@
#define RT_FL_TOS(oldflp) \
((u32)(oldflp->fl4_tos & (IPTOS_RT_MASK | RTO_ONLINK)))
+#ifdef CONFIG_GENID_DUPCHECK
+#define GENID_IDX_MASK 0xffffff00
+#endif
+
#define IP_MAX_MTU 0xFFF0
#define RT_GC_TIMEOUT (300*HZ)
@@ -129,7 +133,6 @@ static int ip_rt_gc_elasticity __read_mostly = 8;
static int ip_rt_mtu_expires __read_mostly = 10 * 60 * HZ;
static int ip_rt_min_pmtu __read_mostly = 512 + 20 + 20;
static int ip_rt_min_advmss __read_mostly = 256;
-static int ip_rt_secret_interval __read_mostly = 10 * 60 * HZ;
static int rt_chain_length_max __read_mostly = 20;
static struct delayed_work expires_work;
@@ -147,7 +150,7 @@ static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst);
static void ipv4_link_failure(struct sk_buff *skb);
static void ip_rt_update_pmtu(struct dst_entry *dst, u32 mtu);
static int rt_garbage_collect(struct dst_ops *ops);
-
+static void rt_cache_invalidate(struct net *net);
static struct dst_ops ipv4_dst_ops = {
.family = AF_INET,
@@ -270,6 +273,34 @@ static inline int rt_genid(struct net *net)
return atomic_read(&net->ipv4.rt_genid);
}
+#ifdef CONFIG_GENID_DUPCHECK
+static inline int rt_genid_dup(int genid1, int genid2)
+{
+ int idx1, idx2;
+ int rid1, rid2;
+
+ idx1 = genid1 & ~GENID_IDX_MASK;
+ idx2 = genid2 & ~GENID_IDX_MASK;
+
+ rid1 = genid1 & GENID_IDX_MASK;
+ rid2 = genid2 & GENID_IDX_MASK;
+
+ /*
+ * Duplicate genids are ids in which
+ * the idx value is different, but the
+ * remainder of the genid is identical
+ */
+ if ((idx1 != idx2) && (rid1 == rid2))
+ return 1;
+ return 0;
+}
+#else
+static inline int rt_genid_dup(genid1, int genid2)
+{
+ return 0;
+}
+#endif
+
#ifdef CONFIG_PROC_FS
struct rt_cache_iter_state {
struct seq_net_private p;
@@ -615,6 +646,10 @@ static inline void rt_free(struct rtable *rt)
static inline void rt_drop(struct rtable *rt)
{
+ if (rt_genid_dup(rt->rt_genid, rt_genid(dev_net(rt->u.dst.dev)))) {
+ printk(KERN_WARNING "Duplicate route genid found!\n");
+ rt_cache_invalidate(dev_net(rt->u.dst.dev));
+ }
ip_rt_put(rt);
call_rcu_bh(&rt->u.dst.rcu_head, dst_rcu_free);
}
@@ -888,17 +923,18 @@ static void rt_worker_func(struct work_struct *work)
}
/*
- * Pertubation of rt_genid by a small quantity [1..256]
- * Using 8 bits of shuffling ensure we can call rt_cache_invalidate()
- * many times (2^24) without giving recent rt_genid.
- * Jenkins hash is strong enough that litle changes of rt_genid are OK.
+ * invalidate the cache by making a new rt_genid.
*/
static void rt_cache_invalidate(struct net *net)
{
- unsigned char shuffle;
+ unsigned int new;
- get_random_bytes(&shuffle, sizeof(shuffle));
- atomic_add(shuffle + 1U, &net->ipv4.rt_genid);
+ get_random_bytes(&new, sizeof(new));
+#ifdef CONFIG_GENID_DUPCHECK
+ new &= GENID_IDX_MASK;
+ new |= (atomic_inc_return(&net->ipv4.rt_genidx) & ~GENID_IDX_MASK);
+#endif
+ atomic_set(&net->ipv4.rt_genid, new);
}
/*
@@ -918,32 +954,11 @@ void rt_cache_flush_batch(void)
rt_do_flush(!in_softirq());
}
-/*
- * We change rt_genid and let gc do the cleanup
- */
-static void rt_secret_rebuild(unsigned long __net)
-{
- struct net *net = (struct net *)__net;
- rt_cache_invalidate(net);
- mod_timer(&net->ipv4.rt_secret_timer, jiffies + ip_rt_secret_interval);
-}
-
-static void rt_secret_rebuild_oneshot(struct net *net)
-{
- del_timer_sync(&net->ipv4.rt_secret_timer);
- rt_cache_invalidate(net);
- if (ip_rt_secret_interval)
- mod_timer(&net->ipv4.rt_secret_timer, jiffies + ip_rt_secret_interval);
-}
-
static void rt_emergency_hash_rebuild(struct net *net)
{
- if (net_ratelimit()) {
+ if (net_ratelimit())
printk(KERN_WARNING "Route hash chain too long!\n");
- printk(KERN_WARNING "Adjust your secret_interval!\n");
- }
-
- rt_secret_rebuild_oneshot(net);
+ rt_cache_invalidate(net);
}
/*
@@ -3101,48 +3116,6 @@ static int ipv4_sysctl_rtcache_flush(ctl_table *__ctl, int write,
return -EINVAL;
}
-static void rt_secret_reschedule(int old)
-{
- struct net *net;
- int new = ip_rt_secret_interval;
- int diff = new - old;
-
- if (!diff)
- return;
-
- rtnl_lock();
- for_each_net(net) {
- int deleted = del_timer_sync(&net->ipv4.rt_secret_timer);
- long time;
-
- if (!new)
- continue;
-
- if (deleted) {
- time = net->ipv4.rt_secret_timer.expires - jiffies;
-
- if (time <= 0 || (time += diff) <= 0)
- time = 0;
- } else
- time = new;
-
- mod_timer(&net->ipv4.rt_secret_timer, jiffies + time);
- }
- rtnl_unlock();
-}
-
-static int ipv4_sysctl_rt_secret_interval(ctl_table *ctl, int write,
- void __user *buffer, size_t *lenp,
- loff_t *ppos)
-{
- int old = ip_rt_secret_interval;
- int ret = proc_dointvec_jiffies(ctl, write, buffer, lenp, ppos);
-
- rt_secret_reschedule(old);
-
- return ret;
-}
-
static ctl_table ipv4_route_table[] = {
{
.procname = "gc_thresh",
@@ -3251,13 +3224,6 @@ static ctl_table ipv4_route_table[] = {
.mode = 0644,
.proc_handler = proc_dointvec,
},
- {
- .procname = "secret_interval",
- .data = &ip_rt_secret_interval,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = ipv4_sysctl_rt_secret_interval,
- },
{ }
};
@@ -3336,34 +3302,18 @@ static __net_initdata struct pernet_operations sysctl_route_ops = {
};
#endif
-
-static __net_init int rt_secret_timer_init(struct net *net)
+static __net_init int rt_genid_init(struct net *net)
{
- atomic_set(&net->ipv4.rt_genid,
- (int) ((num_physpages ^ (num_physpages>>8)) ^
- (jiffies ^ (jiffies >> 7))));
-
- net->ipv4.rt_secret_timer.function = rt_secret_rebuild;
- net->ipv4.rt_secret_timer.data = (unsigned long)net;
- init_timer_deferrable(&net->ipv4.rt_secret_timer);
-
- if (ip_rt_secret_interval) {
- net->ipv4.rt_secret_timer.expires =
- jiffies + net_random() % ip_rt_secret_interval +
- ip_rt_secret_interval;
- add_timer(&net->ipv4.rt_secret_timer);
- }
+ /*
+ * This just serves to start off each new net namespace
+ * with a non-zero rt_genid value, making it harder to guess
+ */
+ rt_cache_invalidate(net);
return 0;
}
-static __net_exit void rt_secret_timer_exit(struct net *net)
-{
- del_timer_sync(&net->ipv4.rt_secret_timer);
-}
-
-static __net_initdata struct pernet_operations rt_secret_timer_ops = {
- .init = rt_secret_timer_init,
- .exit = rt_secret_timer_exit,
+static __net_initdata struct pernet_operations rt_genid_ops = {
+ .init = rt_genid_init,
};
@@ -3424,9 +3374,6 @@ int __init ip_rt_init(void)
schedule_delayed_work(&expires_work,
net_random() % ip_rt_gc_interval + ip_rt_gc_interval);
- if (register_pernet_subsys(&rt_secret_timer_ops))
- printk(KERN_ERR "Unable to setup rt_secret_timer\n");
-
if (ip_rt_proc_init())
printk(KERN_ERR "Unable to create route proc files\n");
#ifdef CONFIG_XFRM
@@ -3438,6 +3385,7 @@ int __init ip_rt_init(void)
#ifdef CONFIG_SYSCTL
register_pernet_subsys(&sysctl_route_ops);
#endif
+ register_pernet_subsys(&rt_genid_ops);
return rc;
}
^ permalink raw reply related [flat|nested] 17+ messages in thread
* Re: [PATCH] ipv4: remove ip_rt_secret timer (v3)
2010-05-07 19:55 ` [PATCH] ipv4: remove ip_rt_secret timer (v3) Neil Horman
@ 2010-05-07 21:04 ` Eric Dumazet
2010-05-07 23:15 ` Neil Horman
0 siblings, 1 reply; 17+ messages in thread
From: Eric Dumazet @ 2010-05-07 21:04 UTC (permalink / raw)
To: Neil Horman; +Cc: netdev, davem, kuznet, jmorris, yoshfuji, kaber
Le vendredi 07 mai 2010 à 15:55 -0400, Neil Horman a écrit :
> Hey-
> Sorry for the delay, but I got interested in Erics suggestion of
> changing how we update rt_genid, and had a few thoughts. Heres version 3 of
> this patch:
>
We have time, no hurry Neil.
> Change Notes:
>
> 1) Removed the secret_interval binary interface entry in the list (forgot to do
> that before)
>
> 2) Took Erics Suggestion to change the update for net->ipv4.rt_genid. Now
> instead of doing a small incremental change, we simply grab 32 new random bits.
>
My suggestion was to initialize _once_ at boot time, the _full_ 32bits.
Not to change the perturbations, they are very fine, and need no extra
CONFIG_SOME_MAGICAL_SWITCH.
We have a guarantee that no duplicates are delivered unless you perform
2^24 generations in a short period of time.
But because you want to change full 32bits, you need a complex dupcheck
thing ?
> 3) The change in (2) got me thinking that part of the reason we used the Jenkins
> hash in rt_genid was to ensure non-repetion of id's in a short time period
> (which is important, so as not to inadvertently reuse route cache entries that
> should be getting expired). While extra randomness makes it harder for
> attackers to guess the secret, it makes it possible to return to previously
> guessed values as well (if they're lucky). As such, I created a configurable
> option, CONFIG_GENID_DUPCHECK. With this option on, the low order 8 bits of the
> genid are replaced with a rolling counter, that increments on each new genid.
> This creates in effect, a 256 deep list of previously used genid values. In
> rt_drop we compare the genids for duplicates. If we find that 2 genid values
> have different indexes, but idential remaining bits, they are noted as a repeat
> genid, and we call rt_cache_invalidate to generate a new genid and avoid the
> duplication problem.
>
>
This is not necessary and over engineering if you ask me.
You now rely on probabilistic rules, and depends on get_random_bytes()
be really random, or a new CONFIG setting...
What exact problem do you want to solve Neil ?
Please submit your initial patch, with the small changes :
1) Remove the secret_interval binary interface entry in the list
2) Initialize full 32bits at struct net init time.
Thanks
^ permalink raw reply [flat|nested] 17+ messages in thread
* Re: [PATCH] ipv4: remove ip_rt_secret timer (v3)
2010-05-07 21:04 ` Eric Dumazet
@ 2010-05-07 23:15 ` Neil Horman
0 siblings, 0 replies; 17+ messages in thread
From: Neil Horman @ 2010-05-07 23:15 UTC (permalink / raw)
To: Eric Dumazet; +Cc: netdev, davem, kuznet, jmorris, yoshfuji, kaber
On Fri, May 07, 2010 at 11:04:31PM +0200, Eric Dumazet wrote:
> Le vendredi 07 mai 2010 à 15:55 -0400, Neil Horman a écrit :
> > Hey-
> > Sorry for the delay, but I got interested in Erics suggestion of
> > changing how we update rt_genid, and had a few thoughts. Heres version 3 of
> > this patch:
> >
>
> We have time, no hurry Neil.
>
Yeah, but if I don't keep on top of they slip off my todo list pretty quick :)
> > Change Notes:
> >
> > 1) Removed the secret_interval binary interface entry in the list (forgot to do
> > that before)
> >
> > 2) Took Erics Suggestion to change the update for net->ipv4.rt_genid. Now
> > instead of doing a small incremental change, we simply grab 32 new random bits.
> >
>
> My suggestion was to initialize _once_ at boot time, the _full_ 32bits.
>
Apologies, my read of your statement was that you wanted to randomize the genid
every iteration, not just the first, to avoid the genid n+1 being within 256 of
the last genid.
> Not to change the perturbations, they are very fine, and need no extra
> CONFIG_SOME_MAGICAL_SWITCH.
>
> We have a guarantee that no duplicates are delivered unless you perform
> 2^24 generations in a short period of time.
>
Yes, I mentioned that, thats why I added the index check.
> But because you want to change full 32bits, you need a complex dupcheck
> thing ?
>
We don't _need_ it, thats why I made it configurable.
> > 3) The change in (2) got me thinking that part of the reason we used the Jenkins
> > hash in rt_genid was to ensure non-repetion of id's in a short time period
> > (which is important, so as not to inadvertently reuse route cache entries that
> > should be getting expired). While extra randomness makes it harder for
> > attackers to guess the secret, it makes it possible to return to previously
> > guessed values as well (if they're lucky). As such, I created a configurable
> > option, CONFIG_GENID_DUPCHECK. With this option on, the low order 8 bits of the
> > genid are replaced with a rolling counter, that increments on each new genid.
> > This creates in effect, a 256 deep list of previously used genid values. In
> > rt_drop we compare the genids for duplicates. If we find that 2 genid values
> > have different indexes, but idential remaining bits, they are noted as a repeat
> > genid, and we call rt_cache_invalidate to generate a new genid and avoid the
> > duplication problem.
> >
> >
>
> This is not necessary and over engineering if you ask me.
>
I can't say I disagree, but I was looking at this change based on your
suggestion.
> You now rely on probabilistic rules, and depends on get_random_bytes()
> be really random, or a new CONFIG setting...
>
> What exact problem do you want to solve Neil ?
>
You know good and well what I'm trying to do here, don't be thick. The only
reason I was making changes to the genid in the first place was because you were
asking for them. I'm more than happy to make a simpler version of this.
Apologies for not interpreting your previous request the way you had intended.
> Please submit your initial patch, with the small changes :
>
> 1) Remove the secret_interval binary interface entry in the list
>
> 2) Initialize full 32bits at struct net init time.
>
Yeah, ok. I'll repost on monday.
Thanks
Neil
^ permalink raw reply [flat|nested] 17+ messages in thread
* Re: [PATCH] ipv4: remove ip_rt_secret timer (v4)
2010-05-06 17:16 [PATCH] ipv4: remove ip_rt_secret timer Neil Horman
` (2 preceding siblings ...)
2010-05-07 19:55 ` [PATCH] ipv4: remove ip_rt_secret timer (v3) Neil Horman
@ 2010-05-08 1:01 ` Neil Horman
2010-05-08 6:36 ` Eric Dumazet
3 siblings, 1 reply; 17+ messages in thread
From: Neil Horman @ 2010-05-08 1:01 UTC (permalink / raw)
To: netdev; +Cc: davem, kuznet, jmorris, yoshfuji, kaber
Hey, Had a moment tonight so I spun version 4 of the patch.
Change notes:
1) Redid the initialization, in light of Erics clarification. We randomly seed
all 32 bits of the rt_genid for a netns, but still just do 256 byte
modifications on cache invalidations
2) got rid of the dup checking crap.
Summary:
A while back there was a discussion regarding the rt_secret_interval timer.
Given that we've had the ability to do emergency route cache rebuilds for awhile
now, based on a statistical analysis of the various hash chain lengths in the
cache, the use of the flush timer is somewhat redundant. This patch removes the
rt_secret_interval sysctl, allowing us to rely solely on the statistical
analysis mechanism to determine the need for route cache flushes.
Signed-off-by: Neil Horman <nhorman@tuxdriver.com>
include/net/netns/ipv4.h | 1
kernel/sysctl_binary.c | 1
net/ipv4/route.c | 108 +++--------------------------------------------
3 files changed, 8 insertions(+), 102 deletions(-)
diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h
index ae07fee..d68c3f1 100644
--- a/include/net/netns/ipv4.h
+++ b/include/net/netns/ipv4.h
@@ -55,7 +55,6 @@ struct netns_ipv4 {
int sysctl_rt_cache_rebuild_count;
int current_rt_cache_rebuild_count;
- struct timer_list rt_secret_timer;
atomic_t rt_genid;
#ifdef CONFIG_IP_MROUTE
diff --git a/kernel/sysctl_binary.c b/kernel/sysctl_binary.c
index 5903057..937d31d 100644
--- a/kernel/sysctl_binary.c
+++ b/kernel/sysctl_binary.c
@@ -224,7 +224,6 @@ static const struct bin_table bin_net_ipv4_route_table[] = {
{ CTL_INT, NET_IPV4_ROUTE_MTU_EXPIRES, "mtu_expires" },
{ CTL_INT, NET_IPV4_ROUTE_MIN_PMTU, "min_pmtu" },
{ CTL_INT, NET_IPV4_ROUTE_MIN_ADVMSS, "min_adv_mss" },
- { CTL_INT, NET_IPV4_ROUTE_SECRET_INTERVAL, "secret_interval" },
{}
};
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index a947428..dea3f92 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -129,7 +129,6 @@ static int ip_rt_gc_elasticity __read_mostly = 8;
static int ip_rt_mtu_expires __read_mostly = 10 * 60 * HZ;
static int ip_rt_min_pmtu __read_mostly = 512 + 20 + 20;
static int ip_rt_min_advmss __read_mostly = 256;
-static int ip_rt_secret_interval __read_mostly = 10 * 60 * HZ;
static int rt_chain_length_max __read_mostly = 20;
static struct delayed_work expires_work;
@@ -918,32 +917,11 @@ void rt_cache_flush_batch(void)
rt_do_flush(!in_softirq());
}
-/*
- * We change rt_genid and let gc do the cleanup
- */
-static void rt_secret_rebuild(unsigned long __net)
-{
- struct net *net = (struct net *)__net;
- rt_cache_invalidate(net);
- mod_timer(&net->ipv4.rt_secret_timer, jiffies + ip_rt_secret_interval);
-}
-
-static void rt_secret_rebuild_oneshot(struct net *net)
-{
- del_timer_sync(&net->ipv4.rt_secret_timer);
- rt_cache_invalidate(net);
- if (ip_rt_secret_interval)
- mod_timer(&net->ipv4.rt_secret_timer, jiffies + ip_rt_secret_interval);
-}
-
static void rt_emergency_hash_rebuild(struct net *net)
{
- if (net_ratelimit()) {
+ if (net_ratelimit())
printk(KERN_WARNING "Route hash chain too long!\n");
- printk(KERN_WARNING "Adjust your secret_interval!\n");
- }
-
- rt_secret_rebuild_oneshot(net);
+ rt_cache_invalidate(net);
}
/*
@@ -3101,48 +3079,6 @@ static int ipv4_sysctl_rtcache_flush(ctl_table *__ctl, int write,
return -EINVAL;
}
-static void rt_secret_reschedule(int old)
-{
- struct net *net;
- int new = ip_rt_secret_interval;
- int diff = new - old;
-
- if (!diff)
- return;
-
- rtnl_lock();
- for_each_net(net) {
- int deleted = del_timer_sync(&net->ipv4.rt_secret_timer);
- long time;
-
- if (!new)
- continue;
-
- if (deleted) {
- time = net->ipv4.rt_secret_timer.expires - jiffies;
-
- if (time <= 0 || (time += diff) <= 0)
- time = 0;
- } else
- time = new;
-
- mod_timer(&net->ipv4.rt_secret_timer, jiffies + time);
- }
- rtnl_unlock();
-}
-
-static int ipv4_sysctl_rt_secret_interval(ctl_table *ctl, int write,
- void __user *buffer, size_t *lenp,
- loff_t *ppos)
-{
- int old = ip_rt_secret_interval;
- int ret = proc_dointvec_jiffies(ctl, write, buffer, lenp, ppos);
-
- rt_secret_reschedule(old);
-
- return ret;
-}
-
static ctl_table ipv4_route_table[] = {
{
.procname = "gc_thresh",
@@ -3251,13 +3187,6 @@ static ctl_table ipv4_route_table[] = {
.mode = 0644,
.proc_handler = proc_dointvec,
},
- {
- .procname = "secret_interval",
- .data = &ip_rt_secret_interval,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = ipv4_sysctl_rt_secret_interval,
- },
{ }
};
@@ -3336,34 +3265,15 @@ static __net_initdata struct pernet_operations sysctl_route_ops = {
};
#endif
-
-static __net_init int rt_secret_timer_init(struct net *net)
+static __net_init int rt_genid_init(struct net *net)
{
- atomic_set(&net->ipv4.rt_genid,
- (int) ((num_physpages ^ (num_physpages>>8)) ^
- (jiffies ^ (jiffies >> 7))));
-
- net->ipv4.rt_secret_timer.function = rt_secret_rebuild;
- net->ipv4.rt_secret_timer.data = (unsigned long)net;
- init_timer_deferrable(&net->ipv4.rt_secret_timer);
-
- if (ip_rt_secret_interval) {
- net->ipv4.rt_secret_timer.expires =
- jiffies + net_random() % ip_rt_secret_interval +
- ip_rt_secret_interval;
- add_timer(&net->ipv4.rt_secret_timer);
- }
+ get_random_bytes(&net->ipv4.rt_genid,
+ sizeof(net->ipv4.rt_genid));
return 0;
}
-static __net_exit void rt_secret_timer_exit(struct net *net)
-{
- del_timer_sync(&net->ipv4.rt_secret_timer);
-}
-
-static __net_initdata struct pernet_operations rt_secret_timer_ops = {
- .init = rt_secret_timer_init,
- .exit = rt_secret_timer_exit,
+static __net_initdata struct pernet_operations rt_genid_ops = {
+ .init = rt_genid_init,
};
@@ -3424,9 +3334,6 @@ int __init ip_rt_init(void)
schedule_delayed_work(&expires_work,
net_random() % ip_rt_gc_interval + ip_rt_gc_interval);
- if (register_pernet_subsys(&rt_secret_timer_ops))
- printk(KERN_ERR "Unable to setup rt_secret_timer\n");
-
if (ip_rt_proc_init())
printk(KERN_ERR "Unable to create route proc files\n");
#ifdef CONFIG_XFRM
@@ -3438,6 +3345,7 @@ int __init ip_rt_init(void)
#ifdef CONFIG_SYSCTL
register_pernet_subsys(&sysctl_route_ops);
#endif
+ register_pernet_subsys(&rt_genid_ops);
return rc;
}
^ permalink raw reply related [flat|nested] 17+ messages in thread
* Re: [PATCH] ipv4: remove ip_rt_secret timer (v4)
2010-05-08 1:01 ` [PATCH] ipv4: remove ip_rt_secret timer (v4) Neil Horman
@ 2010-05-08 6:36 ` Eric Dumazet
2010-05-08 8:58 ` David Miller
2010-05-08 12:54 ` Neil Horman
0 siblings, 2 replies; 17+ messages in thread
From: Eric Dumazet @ 2010-05-08 6:36 UTC (permalink / raw)
To: Neil Horman; +Cc: netdev, davem, kuznet, jmorris, yoshfuji, kaber
Le vendredi 07 mai 2010 à 21:01 -0400, Neil Horman a écrit :
> Hey, Had a moment tonight so I spun version 4 of the patch.
>
> Change notes:
> 1) Redid the initialization, in light of Erics clarification. We randomly seed
> all 32 bits of the rt_genid for a netns, but still just do 256 byte
> modifications on cache invalidations
>
> 2) got rid of the dup checking crap.
>
>
> Summary:
>
> A while back there was a discussion regarding the rt_secret_interval timer.
> Given that we've had the ability to do emergency route cache rebuilds for awhile
> now, based on a statistical analysis of the various hash chain lengths in the
> cache, the use of the flush timer is somewhat redundant. This patch removes the
> rt_secret_interval sysctl, allowing us to rely solely on the statistical
> analysis mechanism to determine the need for route cache flushes.
>
> Signed-off-by: Neil Horman <nhorman@tuxdriver.com>
>
Acked-by: Eric Dumazet <eric.dumazet@gmail.com>
Sorry for the confusion Neil, thanks !
^ permalink raw reply [flat|nested] 17+ messages in thread
* Re: [PATCH] ipv4: remove ip_rt_secret timer (v4)
2010-05-08 6:36 ` Eric Dumazet
@ 2010-05-08 8:58 ` David Miller
2010-05-08 12:54 ` Neil Horman
1 sibling, 0 replies; 17+ messages in thread
From: David Miller @ 2010-05-08 8:58 UTC (permalink / raw)
To: eric.dumazet; +Cc: nhorman, netdev, kuznet, jmorris, yoshfuji, kaber
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Sat, 08 May 2010 08:36:37 +0200
> Le vendredi 07 mai 2010 à 21:01 -0400, Neil Horman a écrit :
>> A while back there was a discussion regarding the rt_secret_interval timer.
>> Given that we've had the ability to do emergency route cache rebuilds for awhile
>> now, based on a statistical analysis of the various hash chain lengths in the
>> cache, the use of the flush timer is somewhat redundant. This patch removes the
>> rt_secret_interval sysctl, allowing us to rely solely on the statistical
>> analysis mechanism to determine the need for route cache flushes.
>>
>> Signed-off-by: Neil Horman <nhorman@tuxdriver.com>
>>
>
> Acked-by: Eric Dumazet <eric.dumazet@gmail.com>
Applied, thanks guys!
^ permalink raw reply [flat|nested] 17+ messages in thread
* Re: [PATCH] ipv4: remove ip_rt_secret timer (v4)
2010-05-08 6:36 ` Eric Dumazet
2010-05-08 8:58 ` David Miller
@ 2010-05-08 12:54 ` Neil Horman
1 sibling, 0 replies; 17+ messages in thread
From: Neil Horman @ 2010-05-08 12:54 UTC (permalink / raw)
To: Eric Dumazet; +Cc: netdev, davem, kuznet, jmorris, yoshfuji, kaber
On Sat, May 08, 2010 at 08:36:37AM +0200, Eric Dumazet wrote:
> Le vendredi 07 mai 2010 à 21:01 -0400, Neil Horman a écrit :
> > Hey, Had a moment tonight so I spun version 4 of the patch.
> >
> > Change notes:
> > 1) Redid the initialization, in light of Erics clarification. We randomly seed
> > all 32 bits of the rt_genid for a netns, but still just do 256 byte
> > modifications on cache invalidations
> >
> > 2) got rid of the dup checking crap.
> >
> >
> > Summary:
> >
> > A while back there was a discussion regarding the rt_secret_interval timer.
> > Given that we've had the ability to do emergency route cache rebuilds for awhile
> > now, based on a statistical analysis of the various hash chain lengths in the
> > cache, the use of the flush timer is somewhat redundant. This patch removes the
> > rt_secret_interval sysctl, allowing us to rely solely on the statistical
> > analysis mechanism to determine the need for route cache flushes.
> >
> > Signed-off-by: Neil Horman <nhorman@tuxdriver.com>
> >
>
> Acked-by: Eric Dumazet <eric.dumazet@gmail.com>
>
> Sorry for the confusion Neil, thanks !
Its my fault, it was more clear to me on a second reading. Apologies!
Neil
>
>
>
^ permalink raw reply [flat|nested] 17+ messages in thread
end of thread, other threads:[~2010-05-08 12:55 UTC | newest]
Thread overview: 17+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2010-05-06 17:16 [PATCH] ipv4: remove ip_rt_secret timer Neil Horman
2010-05-06 17:32 ` Eric Dumazet
2010-05-06 18:02 ` Neil Horman
2010-05-06 18:33 ` Eric Dumazet
2010-05-06 19:54 ` Neil Horman
2010-05-06 20:10 ` Eric Dumazet
2010-05-06 20:25 ` Neil Horman
2010-05-06 20:29 ` [PATCH] ipv4: remove ip_rt_secret timer (v2) Neil Horman
2010-05-06 21:08 ` Eric Dumazet
2010-05-07 0:02 ` nhorman
2010-05-07 19:55 ` [PATCH] ipv4: remove ip_rt_secret timer (v3) Neil Horman
2010-05-07 21:04 ` Eric Dumazet
2010-05-07 23:15 ` Neil Horman
2010-05-08 1:01 ` [PATCH] ipv4: remove ip_rt_secret timer (v4) Neil Horman
2010-05-08 6:36 ` Eric Dumazet
2010-05-08 8:58 ` David Miller
2010-05-08 12:54 ` Neil Horman
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).