netdev.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Eric Dumazet <dada1@cosmosbay.com>
To: Neil Horman <nhorman@tuxdriver.com>
Cc: Bill Fink <billfink@mindspring.com>,
	David Miller <davem@davemloft.net>,
	netdev@vger.kernel.org, kuznet@ms2.inr.ac.ru, pekkas@netcore.fi,
	jmorris@namei.org, yoshfuji@linux-ipv6.org, kaber@trash.net,
	Evgeniy Polyakov <johnpol@2ka.mipt.ru>
Subject: Re: [PATCH] net: implement emergency route cache rebulds when	gc_elasticity is exceeded
Date: Mon, 06 Oct 2008 12:49:33 +0200	[thread overview]
Message-ID: <48E9ED3D.2020005@cosmosbay.com> (raw)
In-Reply-To: <20081003203640.GA13354@hmsreliant.think-freely.org>

[-- Attachment #1: Type: text/plain, Size: 3369 bytes --]

Neil Horman a écrit :
> Hey all-
> 	So, I've been doing some testing here with this patch, and am
> comfortable that the sd estimation is working reasonably well. For a hash table
> with an average chain length of 1, it computes the stadard deviation to be 2,
> which gives us a max chain length of 9 (4*sd + avg), and it manages to do that
> in about 7 jiffies over about 524000 hash buckets.  I'm reasonably pleased with
> that speed I think, and after thinking about it, I like this implementation
> somewhat better, as it doesn't create a window in which chains can be
> artifically overrun (until the nect gc round) (although I'm happy to hear
> arguments against my implementation).  Anywho here it is, comments and thoughts
> welcome!
> 
> Thanks & Regards
> Neil
> 
> Signed-off-by: Neil Horman <nhorman@tuxdriver.com>
> 
> 
>  route.c |  121 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++--
>  1 file changed, 118 insertions(+), 3 deletions(-)
> 
> 
> diff --git a/net/ipv4/route.c b/net/ipv4/route.c
> index 6ee5354..4f8c5b5 100644
> --- a/net/ipv4/route.c
> +++ b/net/ipv4/route.c
> @@ -145,6 +145,7 @@ static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst);
>  static void		 ipv4_link_failure(struct sk_buff *skb);
>  static void		 ip_rt_update_pmtu(struct dst_entry *dst, u32 mtu);
>  static int rt_garbage_collect(struct dst_ops *ops);
> +static void rt_emergency_hash_rebuild(struct net *net);
>  
>  
>  static struct dst_ops ipv4_dst_ops = {
> @@ -200,7 +201,14 @@ const __u8 ip_tos2prio[16] = {
>  
>  struct rt_hash_bucket {
>  	struct rtable	*chain;
> +	atomic_t	chain_length;
>  };
> +
> +atomic_t rt_hash_total_count;
> +atomic_t rt_hash_nz_count;
> +
> +static int rt_chain_length_max;
> +
>  #if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_SPINLOCK) || \
>  	defined(CONFIG_PROVE_LOCKING)
>  /*
> @@ -601,6 +609,68 @@ static inline int ip_rt_proc_init(void)
>  }
>  #endif /* CONFIG_PROC_FS */
>  
> +static void rt_hash_sd_update(void)
> +{
> +	int temp, i;
> +	unsigned long long sd;
> +	int average = atomic_read(&rt_hash_total_count);
> +	int nzcount = atomic_read(&rt_hash_nz_count);
> +
> +	/*
> + 	 * Don't divide by zero
> + 	 */
> +	if (!nzcount)
> +		return;
> +
> +	average = DIV_ROUND_UP(average, nzcount);
> +
> +	sd = 0;
> +	for (i = 0; i < (1 << rt_hash_log); i++) {
> +		temp = atomic_read(&rt_hash_table[i].chain_length);
> +		/*
> + 		 * Don't count zero entries, as most of the table
> + 		 * will likely be empty.  We don't want to unfairly
> + 		 * bias our average chain length down so far
> + 		 */

Empty chains should be accounted for, or average and standard
deviation are not correct.

> +		if (unlikely(temp))
> +			sd += (temp-average)^2;

Out of curiosity, what do you expect to do here ?

(temp-average) XOR 2
or 
(temp-average) * (temp-average) 

Also, your computations use integer arithmetic.

If avg = 2.5 and sd = 1.9, avg+4*sd you'll find 6 instead of 10 

Anyway, we wont add so many atomic operations and double
hash table size in order to be able to compute sd.

If we really want to be smart, we can have a pretty good
 estimate of average and sd for free in rt_check_expire()

Something like this untested patch. (We should make sure
we dont overflow sum2 for example)


[-- Attachment #2: avg_sd.patch --]
[-- Type: text/plain, Size: 2353 bytes --]

diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 6ee5354..85182d9 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -125,6 +125,7 @@ static int ip_rt_redirect_silence __read_mostly	= ((HZ / 50) << (9 + 1));
 static int ip_rt_error_cost __read_mostly	= HZ;
 static int ip_rt_error_burst __read_mostly	= 5 * HZ;
 static int ip_rt_gc_elasticity __read_mostly	= 8;
+static int rt_chain_length_max __read_mostly    = 32;
 static int ip_rt_mtu_expires __read_mostly	= 10 * 60 * HZ;
 static int ip_rt_min_pmtu __read_mostly		= 512 + 20 + 20;
 static int ip_rt_min_advmss __read_mostly	= 256;
@@ -748,11 +749,24 @@ static void rt_do_flush(int process_context)
 	}
 }
 
+/*
+ * While freeing expired entries, we compute average chain length
+ * and standard deviation, using fixed-point arithmetic.
+ * This to have an estimation of rt_chain_length_max
+ *  rt_chain_length_max = max(elasticity, AVG + 4*SD)
+ * We use 3 bits for frational part, and 29 (or 61) for magnitude.
+ */
+
+#define FRACT_BITS 3
+#define ONE (1UL << FRACT_BITS)
+
 static void rt_check_expire(void)
 {
 	static unsigned int rover;
 	unsigned int i = rover, goal;
 	struct rtable *rth, **rthp;
+	unsigned long sum = 0, sum2 = 0;
+	unsigned long length, samples = 0;
 	u64 mult;
 
 	mult = ((u64)ip_rt_gc_interval) << rt_hash_log;
@@ -770,8 +784,10 @@ static void rt_check_expire(void)
 		if (need_resched())
 			cond_resched();
 
+		samples++;
 		if (*rthp == NULL)
 			continue;
+		length = 0;
 		spin_lock_bh(rt_hash_lock_addr(i));
 		while ((rth = *rthp) != NULL) {
 			if (rt_is_expired(rth)) {
@@ -784,11 +800,13 @@ static void rt_check_expire(void)
 				if (time_before_eq(jiffies, rth->u.dst.expires)) {
 					tmo >>= 1;
 					rthp = &rth->u.dst.rt_next;
+					length += ONE;
 					continue;
 				}
 			} else if (!rt_may_expire(rth, tmo, ip_rt_gc_timeout)) {
 				tmo >>= 1;
 				rthp = &rth->u.dst.rt_next;
+				length += ONE;
 				continue;
 			}
 
@@ -797,6 +815,15 @@ static void rt_check_expire(void)
 			rt_free(rth);
 		}
 		spin_unlock_bh(rt_hash_lock_addr(i));
+		sum += length;
+		sum2 += length*length;
+	}
+	if (samples) {
+		unsigned long avg = sum / samples;
+		unsigned long sd = int_sqrt(sum2 / samples - avg*avg);
+		rt_chain_length_max = max_t(unsigned long,
+					    ip_rt_gc_elasticity,
+					    (avg + 4*sd) >> FRACT_BITS);
 	}
 	rover = i;
 }

  reply	other threads:[~2008-10-06 11:29 UTC|newest]

Thread overview: 64+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2008-09-29 19:12 [PATCH] net: implement emergency route cache rebulds when gc_elasticity is exceeded Neil Horman
2008-09-29 20:22 ` Eric Dumazet
2008-09-29 20:27   ` Neil Horman
2008-09-29 21:00     ` Eric Dumazet
2008-09-29 22:38       ` Neil Horman
2008-09-30  6:02         ` Eric Dumazet
2008-09-30 11:23           ` Neil Horman
2008-09-30 14:10           ` David Miller
2008-09-30 17:16             ` Eric Dumazet
2008-09-30 18:42               ` Neil Horman
2008-10-02  7:16                 ` Evgeniy Polyakov
2008-10-02 13:14                   ` Neil Horman
2008-10-01 18:08               ` Neil Horman
2008-10-02  5:01                 ` Bill Fink
2008-10-02  6:56                   ` Eric Dumazet
2008-10-02  8:15                     ` Eric Dumazet
2008-10-02 14:20                       ` Eric Dumazet
2008-10-03  0:31                       ` Neil Horman
2008-10-03 20:36                         ` Neil Horman
2008-10-06 10:49                           ` Eric Dumazet [this message]
2008-10-06 13:14                             ` Neil Horman
2008-10-06 20:54                             ` Neil Horman
2008-10-06 21:21                               ` Eric Dumazet
2008-10-06 22:52                                 ` Neil Horman
2008-10-07  5:13                                   ` Eric Dumazet
2008-10-07 10:54                                     ` Neil Horman
2008-10-13 18:26                                     ` Neil Horman
2008-10-16  6:55                                       ` David Miller
2008-10-16  9:19                                         ` Eric Dumazet
2008-10-16 21:18                                           ` David Miller
2008-10-16 11:41                                         ` Neil Horman
2008-10-16 12:25                                           ` Eric Dumazet
2008-10-16 16:36                                             ` Neil Horman
2008-10-16 23:35                                               ` Neil Horman
2008-10-17  4:53                                                 ` Eric Dumazet
2008-10-17  5:23                                                   ` David Miller
2008-10-17  5:03                                                 ` Stephen Hemminger
2008-10-17  5:06                                                 ` Stephen Hemminger
2008-10-17 10:39                                                   ` Neil Horman
     [not found]                                                     ` <48F8806A.6090306@cosmosbay.com>
     [not found]                                                       ` <20081017152328.GB23591@hmsreliant.think-freely.org>
     [not found]                                                         ` <48F8AFBE.5080503@cosmosbay.com>
2008-10-17 20:44                                                           ` Neil Horman
2008-10-18  0:54                                                             ` Neil Horman
2008-10-18  4:36                                                               ` Eric Dumazet
2008-10-18 13:30                                                                 ` Neil Horman
2008-10-20  0:07                                                                 ` Neil Horman
2008-10-20  8:12                                                                   ` Eric Dumazet
2008-10-27 19:28                                                                     ` David Miller
2008-10-02  7:13               ` Evgeniy Polyakov
2008-09-30 14:08   ` David Miller
2008-09-30 14:08 ` David Miller
2008-09-30 17:47   ` Eric Dumazet
2008-10-05  3:26   ` Herbert Xu
2008-10-05  4:45     ` Andrew Dickinson
2008-10-05 17:34       ` David Miller
2008-10-05 18:06         ` Andrew Dickinson
2008-10-06  4:21         ` Herbert Xu
2008-10-06 10:50           ` Neil Horman
2008-10-06 11:02             ` Herbert Xu
2008-10-06 12:43               ` Neil Horman
2008-09-30 14:17 ` Denis V. Lunev
2008-09-30 14:35   ` Neil Horman
2008-09-30 14:49     ` Denis V. Lunev
2008-10-05  3:17 ` Herbert Xu
2008-10-05  3:20   ` Herbert Xu
2008-10-06  0:52     ` Neil Horman

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=48E9ED3D.2020005@cosmosbay.com \
    --to=dada1@cosmosbay.com \
    --cc=billfink@mindspring.com \
    --cc=davem@davemloft.net \
    --cc=jmorris@namei.org \
    --cc=johnpol@2ka.mipt.ru \
    --cc=kaber@trash.net \
    --cc=kuznet@ms2.inr.ac.ru \
    --cc=netdev@vger.kernel.org \
    --cc=nhorman@tuxdriver.com \
    --cc=pekkas@netcore.fi \
    --cc=yoshfuji@linux-ipv6.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).