* [PATCH net-next] codel: use Newton method instead of sqrt() and divides @ 2012-05-12 13:32 Eric Dumazet 2012-05-12 19:52 ` David Miller 0 siblings, 1 reply; 8+ messages in thread From: Eric Dumazet @ 2012-05-12 13:32 UTC (permalink / raw) To: David Miller Cc: Dave Taht, netdev, Kathleen Nichols, Van Jacobson, codel, Yuchung Cheng, Matt Mathis, Tom Herbert, Stephen Hemminger, Nandita Dukkipati From: Eric Dumazet <edumazet@google.com> As Van pointed out, interval/sqrt(count) can be implemented using multiplies only. http://en.wikipedia.org/wiki/Methods_of_computing_square_roots#Iterative_methods_for_reciprocal_square_roots This patch implements the Newton method and reciprocal divide. Total cost is 15 cycles instead of 120 on my Corei5 machine (64bit kernel). There is a small 'error' for count values < 5, but we don't really care. I reuse a hole in struct codel_vars : - pack the dropping boolean into one bit - use 31bit to store the reciprocal value of sqrt(count). Suggested-by: Van Jacobson <van@pollere.net> Signed-off-by: Eric Dumazet <edumazet@google.com> Cc: Dave Taht <dave.taht@bufferbloat.net> Cc: Kathleen Nichols <nichols@pollere.com> Cc: Tom Herbert <therbert@google.com> Cc: Matt Mathis <mattmathis@google.com> Cc: Yuchung Cheng <ycheng@google.com> Cc: Nandita Dukkipati <nanditad@google.com> Cc: Stephen Hemminger <shemminger@vyatta.com> --- include/net/codel.h | 68 ++++++++++++++++++++++-------------------- 1 file changed, 37 insertions(+), 31 deletions(-) diff --git a/include/net/codel.h b/include/net/codel.h index bce2cef..bd8747c 100644 --- a/include/net/codel.h +++ b/include/net/codel.h @@ -46,6 +46,7 @@ #include <linux/skbuff.h> #include <net/pkt_sched.h> #include <net/inet_ecn.h> +#include <linux/reciprocal_div.h> /* Controlling Queue Delay (CoDel) algorithm * ========================================= @@ -123,6 +124,7 @@ struct codel_params { * entered dropping state * @lastcount: count at entry to dropping state * @dropping: set to true if in dropping state + * @rec_inv_sqrt: reciprocal value of sqrt(count) >> 1 * @first_above_time: when we went (or will go) continuously above target * for interval * @drop_next: time to drop next packet, or when we dropped last @@ -131,7 +133,8 @@ struct codel_params { struct codel_vars { u32 count; u32 lastcount; - bool dropping; + bool dropping:1; + u32 rec_inv_sqrt:31; codel_time_t first_above_time; codel_time_t drop_next; codel_time_t ldelay; @@ -158,11 +161,7 @@ static void codel_params_init(struct codel_params *params) static void codel_vars_init(struct codel_vars *vars) { - vars->drop_next = 0; - vars->first_above_time = 0; - vars->dropping = false; /* exit dropping state */ - vars->count = 0; - vars->lastcount = 0; + memset(vars, 0, sizeof(*vars)); } static void codel_stats_init(struct codel_stats *stats) @@ -170,38 +169,37 @@ static void codel_stats_init(struct codel_stats *stats) stats->maxpacket = 256; } -/* return interval/sqrt(x) with good precision - * relies on int_sqrt(unsigned long x) kernel implementation +/* + * http://en.wikipedia.org/wiki/Methods_of_computing_square_roots#Iterative_methods_for_reciprocal_square_roots + * new_invsqrt = (invsqrt / 2) * (3 - count * invsqrt^2) + * + * Here, invsqrt is a fixed point number (< 1.0), 31bit mantissa) */ -static u32 codel_inv_sqrt(u32 _interval, u32 _x) +static void codel_Newton_step(struct codel_vars *vars) { - u64 interval = _interval; - unsigned long x = _x; + u32 invsqrt = vars->rec_inv_sqrt; + u32 invsqrt2 = ((u64)invsqrt * invsqrt) >> 31; + u64 val = (3LL << 31) - ((u64)vars->count * invsqrt2); - /* Scale operands for max precision */ - -#if BITS_PER_LONG == 64 - x <<= 32; /* On 64bit arches, we can prescale x by 32bits */ - interval <<= 16; -#endif + val = (val * invsqrt) >> 32; - while (x < (1UL << (BITS_PER_LONG - 2))) { - x <<= 2; - interval <<= 1; - } - do_div(interval, int_sqrt(x)); - return (u32)interval; + vars->rec_inv_sqrt = val; } +/* + * CoDel control_law is t + interval/sqrt(count) + * We maintain in rec_inv_sqrt the reciprocal value of sqrt(count) to avoid + * both sqrt() and divide operation. + */ static codel_time_t codel_control_law(codel_time_t t, codel_time_t interval, - u32 count) + u32 rec_inv_sqrt) { - return t + codel_inv_sqrt(interval, count); + return t + reciprocal_divide(interval, rec_inv_sqrt << 1); } -static bool codel_should_drop(struct sk_buff *skb, +static bool codel_should_drop(const struct sk_buff *skb, unsigned int *backlog, struct codel_vars *vars, struct codel_params *params, @@ -274,14 +272,16 @@ static struct sk_buff *codel_dequeue(struct Qdisc *sch, */ while (vars->dropping && codel_time_after_eq(now, vars->drop_next)) { - if (++vars->count == 0) /* avoid zero divides */ - vars->count = ~0U; + vars->count++; /* dont care of possible wrap + * since there is no more divide + */ + codel_Newton_step(vars); if (params->ecn && INET_ECN_set_ce(skb)) { stats->ecn_mark++; vars->drop_next = codel_control_law(vars->drop_next, params->interval, - vars->count); + vars->rec_inv_sqrt); goto end; } qdisc_drop(skb, sch); @@ -296,7 +296,7 @@ static struct sk_buff *codel_dequeue(struct Qdisc *sch, vars->drop_next = codel_control_law(vars->drop_next, params->interval, - vars->count); + vars->rec_inv_sqrt); } } } @@ -319,12 +319,18 @@ static struct sk_buff *codel_dequeue(struct Qdisc *sch, if (codel_time_before(now - vars->drop_next, 16 * params->interval)) { vars->count = (vars->count - vars->lastcount) | 1; + /* we dont care if rec_inv_sqrt approximation + * is not very precise : + * Next Newton steps will correct it quadratically. + */ + codel_Newton_step(vars); } else { vars->count = 1; + vars->rec_inv_sqrt = 0x7fffffff; } vars->lastcount = vars->count; vars->drop_next = codel_control_law(now, params->interval, - vars->count); + vars->rec_inv_sqrt); } end: return skb; ^ permalink raw reply related [flat|nested] 8+ messages in thread
* Re: [PATCH net-next] codel: use Newton method instead of sqrt() and divides 2012-05-12 13:32 [PATCH net-next] codel: use Newton method instead of sqrt() and divides Eric Dumazet @ 2012-05-12 19:52 ` David Miller 2012-05-12 20:40 ` Eric Dumazet 0 siblings, 1 reply; 8+ messages in thread From: David Miller @ 2012-05-12 19:52 UTC (permalink / raw) To: eric.dumazet Cc: dave.taht, netdev, nichols, van, codel, ycheng, mattmathis, therbert, shemminger, nanditad From: Eric Dumazet <eric.dumazet@gmail.com> Date: Sat, 12 May 2012 15:32:13 +0200 > From: Eric Dumazet <edumazet@google.com> > > As Van pointed out, interval/sqrt(count) can be implemented using > multiplies only. > > http://en.wikipedia.org/wiki/Methods_of_computing_square_roots#Iterative_methods_for_reciprocal_square_roots > > This patch implements the Newton method and reciprocal divide. > > Total cost is 15 cycles instead of 120 on my Corei5 machine (64bit > kernel). > > There is a small 'error' for count values < 5, but we don't really care. > > I reuse a hole in struct codel_vars : > - pack the dropping boolean into one bit > - use 31bit to store the reciprocal value of sqrt(count). > > Suggested-by: Van Jacobson <van@pollere.net> > Signed-off-by: Eric Dumazet <edumazet@google.com> Applied but I never like that bitfield sharing for real integers. GCC makes a complete mess of it as it extracts and inserts the integer value into that bit field. You are guarenteed to get better code if you do this by hand in a full u32. Either that or just bite the bullet and use a completely seperate field, maybe we'll need more boolean states later. ^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [PATCH net-next] codel: use Newton method instead of sqrt() and divides 2012-05-12 19:52 ` David Miller @ 2012-05-12 20:40 ` Eric Dumazet 2012-05-12 20:45 ` David Miller 0 siblings, 1 reply; 8+ messages in thread From: Eric Dumazet @ 2012-05-12 20:40 UTC (permalink / raw) To: David Miller Cc: dave.taht, netdev, nichols, van, codel, ycheng, mattmathis, therbert, shemminger, nanditad On Sat, 2012-05-12 at 15:52 -0400, David Miller wrote: > Applied but I never like that bitfield sharing for real integers. > > GCC makes a complete mess of it as it extracts and inserts the > integer value into that bit field. You are guarenteed to get > better code if you do this by hand in a full u32. > > Either that or just bite the bullet and use a completely seperate > field, maybe we'll need more boolean states later. I couldnt use a full u32 or else fq_codel cell was > 64 bytes (or I would have to remove the 'dropped' field) 24 bit of precision for the reciprocal value is more than enough (Van suggested 16 bits in fact), so we have actually room for 7 bits if needed. By the way, gcc on x86 generates nice "and 0xfffffffe,%eax" instruction for (vars->rec_inv_sqrt << 1). Thanks ^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [PATCH net-next] codel: use Newton method instead of sqrt() and divides 2012-05-12 20:40 ` Eric Dumazet @ 2012-05-12 20:45 ` David Miller 2012-05-12 21:48 ` Eric Dumazet 0 siblings, 1 reply; 8+ messages in thread From: David Miller @ 2012-05-12 20:45 UTC (permalink / raw) To: eric.dumazet Cc: dave.taht, netdev, nichols, van, codel, ycheng, mattmathis, therbert, shemminger, nanditad From: Eric Dumazet <eric.dumazet@gmail.com> Date: Sat, 12 May 2012 22:40:56 +0200 > 24 bit of precision for the reciprocal value is more than enough (Van > suggested 16 bits in fact), so we have actually room for 7 bits if > needed. Using a u16 would also work for me. > By the way, gcc on x86 generates nice "and 0xfffffffe,%eax" instruction > for (vars->rec_inv_sqrt << 1). Yeah but what do stores of ->rec_inv_sqrt look like? ^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [PATCH net-next] codel: use Newton method instead of sqrt() and divides 2012-05-12 20:45 ` David Miller @ 2012-05-12 21:48 ` Eric Dumazet 2012-05-12 21:52 ` David Miller 0 siblings, 1 reply; 8+ messages in thread From: Eric Dumazet @ 2012-05-12 21:48 UTC (permalink / raw) To: David Miller Cc: dave.taht, netdev, nichols, van, codel, ycheng, mattmathis, therbert, shemminger, nanditad On Sat, 2012-05-12 at 16:45 -0400, David Miller wrote: > From: Eric Dumazet <eric.dumazet@gmail.com> > Date: Sat, 12 May 2012 22:40:56 +0200 > > > 24 bit of precision for the reciprocal value is more than enough (Van > > suggested 16 bits in fact), so we have actually room for 7 bits if > > needed. > > Using a u16 would also work for me. I tried it but it gives noticeable errors for count > 16000, and no speed gain. count=16525 scale=0 sqrt(scaled_count)=2056 reciprocal(sq)=1fe0200 Newton=235 interval/sqrt(16525) = 777909 (float compute) 778210 (integer approx) 777926 (int_sqrt_div) 862121 (Newton approx) And if a flow is really agressive, count can grow above 10^6 > > > By the way, gcc on x86 generates nice "and 0xfffffffe,%eax" instruction > > for (vars->rec_inv_sqrt << 1). > > Yeah but what do stores of ->rec_inv_sqrt look like? The load is "shr %edi" as in : and the store an "or %ecx,%esi" 5f2: 8b 72 08 mov 0x8(%rdx),%esi 5f5: 44 8b 02 mov (%rdx),%r8d 5f8: 89 f7 mov %esi,%edi 5fa: 41 83 c0 01 add $0x1,%r8d vars->count + 1 5fe: 83 e6 01 and $0x1,%esi vars->dropping in esi 601: d1 ef shr %edi 603: 44 89 02 mov %r8d,(%rdx) vars->count++; 606: 45 89 c0 mov %r8d,%r8d 609: 89 ff mov %edi,%edi 60b: 48 89 f9 mov %rdi,%rcx 60e: 48 0f af cf imul %rdi,%rcx 612: 48 c1 e9 1f shr $0x1f,%rcx 616: 49 0f af c8 imul %r8,%rcx 61a: 49 b8 00 00 00 80 01 mov $0x180000000,%r8 621: 00 00 00 624: 49 29 c8 sub %rcx,%r8 627: 4c 89 c1 mov %r8,%rcx 62a: 48 0f af cf imul %rdi,%rcx 62e: 48 c1 e9 20 shr $0x20,%rcx 632: 01 c9 add %ecx,%ecx 634: 09 ce or %ecx,%esi combine the two fields 636: 89 72 08 mov %esi,0x8(%rdx) final store Using 24bits generates roughly same code. (constants are different) ^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [PATCH net-next] codel: use Newton method instead of sqrt() and divides 2012-05-12 21:48 ` Eric Dumazet @ 2012-05-12 21:52 ` David Miller 2012-05-13 7:23 ` Eric Dumazet 0 siblings, 1 reply; 8+ messages in thread From: David Miller @ 2012-05-12 21:52 UTC (permalink / raw) To: eric.dumazet Cc: dave.taht, netdev, nichols, van, codel, ycheng, mattmathis, therbert, shemminger, nanditad From: Eric Dumazet <eric.dumazet@gmail.com> Date: Sat, 12 May 2012 23:48:44 +0200 > On Sat, 2012-05-12 at 16:45 -0400, David Miller wrote: >> Using a u16 would also work for me. > > I tried it but it gives noticeable errors for count > 16000, and no > speed gain. ... > And if a flow is really agressive, count can grow above 10^6 > >> > By the way, gcc on x86 generates nice "and 0xfffffffe,%eax" instruction >> > for (vars->rec_inv_sqrt << 1). >> >> Yeah but what do stores of ->rec_inv_sqrt look like? > > The load is "shr %edi" as in : > and the store an "or %ecx,%esi" Ok, fair enough. ^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [PATCH net-next] codel: use Newton method instead of sqrt() and divides 2012-05-12 21:52 ` David Miller @ 2012-05-13 7:23 ` Eric Dumazet 2012-05-14 22:33 ` David Miller 0 siblings, 1 reply; 8+ messages in thread From: Eric Dumazet @ 2012-05-13 7:23 UTC (permalink / raw) To: David Miller Cc: dave.taht, netdev, nichols, van, codel, ycheng, mattmathis, therbert, shemminger, nanditad From: Eric Dumazet <edumazet@google.com> On Sat, 2012-05-12 at 17:52 -0400, David Miller wrote: > Ok, fair enough. Oh well, I sent my mail too late. The error made no sense after a good night. Also, when Van says something, you can be fairly sure its right, and if it's not, then you didn't understand what Van said ;) 16bit precision is OK, once the maths are correctly done in the userland program I wrote yesterday... count=16525, precision=16 bits, sqrt(scaled_count)=4113, reciprocal(sq)=1fde240, Newton=1fd0000 interval/sqrt(16525) = 777909 (float compute) // (u32)(interval/sqrt(count)) 778020 (integer approx) // reciprocal_divide(interval, rec) 777926 (int_sqrt_div) // int_sqrt_div(interval, count) 776672 (Newton approx) // reciprocal_divide(interval, previnv << shift) count=9889134, precision=16 bits, sqrt(scaled_count)=50315, reciprocal(sq)=14d720, Newton=140000 interval/sqrt(9889134) = 31799 (float compute) 31799 (integer approx) 31799 (int_sqrt_div) 30517 (Newton approx) And kernel code using u16 : 6a1: 0f b7 72 0a movzwl 0xa(%rdx),%esi 6a5: 8b 3a mov (%rdx),%edi 6a7: 83 c7 01 add $0x1,%edi 6aa: c1 e6 10 shl $0x10,%esi 6ad: 89 3a mov %edi,(%rdx) vars->count++ 6af: 89 ff mov %edi,%edi 6b1: 89 f6 mov %esi,%esi 6b3: 48 89 f1 mov %rsi,%rcx 6b6: 48 0f af ce imul %rsi,%rcx 6ba: 48 c1 e9 20 shr $0x20,%rcx 6be: 48 0f af cf imul %rdi,%rcx 6c2: 48 bf 00 00 00 00 03 mov $0x300000000,%rdi 6c9: 00 00 00 6cc: 48 29 cf sub %rcx,%rdi 6cf: 48 89 f9 mov %rdi,%rcx 6d2: 48 c1 e9 02 shr $0x2,%rcx 6d6: 48 0f af ce imul %rsi,%rcx 6da: 48 c1 e9 2f shr $0x2f,%rcx 6de: 66 89 4a 0a mov %cx,0xa(%rdx) Fell free to add following cleanup patch, if you like it ;) Thanks [PATCH net-next] codel: use u16 field instead of 31bits for rec_inv_sqrt David pointed out gcc might generate poor code with 31bit fields. Using u16 is more than enough and permits a better code output. Also make the code intent more readable using constants, fixed point arithmetic not being trivial for everybody. Suggested-by: David Miller <davem@davemloft.net> Signed-off-by: Eric Dumazet <edumazet@google.com> --- include/net/codel.h | 25 +++++++++++++++---------- 1 file changed, 15 insertions(+), 10 deletions(-) diff --git a/include/net/codel.h b/include/net/codel.h index bd8747c..7546517 100644 --- a/include/net/codel.h +++ b/include/net/codel.h @@ -133,13 +133,17 @@ struct codel_params { struct codel_vars { u32 count; u32 lastcount; - bool dropping:1; - u32 rec_inv_sqrt:31; + bool dropping; + u16 rec_inv_sqrt; codel_time_t first_above_time; codel_time_t drop_next; codel_time_t ldelay; }; +#define REC_INV_SQRT_BITS (8 * sizeof(u16)) /* or sizeof_in_bits(rec_inv_sqrt) */ +/* needed shift to get a Q0.32 number from rec_inv_sqrt */ +#define REC_INV_SQRT_SHIFT (32 - REC_INV_SQRT_BITS) + /** * struct codel_stats - contains codel shared variables and stats * @maxpacket: largest packet we've seen so far @@ -173,17 +177,18 @@ static void codel_stats_init(struct codel_stats *stats) * http://en.wikipedia.org/wiki/Methods_of_computing_square_roots#Iterative_methods_for_reciprocal_square_roots * new_invsqrt = (invsqrt / 2) * (3 - count * invsqrt^2) * - * Here, invsqrt is a fixed point number (< 1.0), 31bit mantissa) + * Here, invsqrt is a fixed point number (< 1.0), 32bit mantissa, aka Q0.32 */ static void codel_Newton_step(struct codel_vars *vars) { - u32 invsqrt = vars->rec_inv_sqrt; - u32 invsqrt2 = ((u64)invsqrt * invsqrt) >> 31; - u64 val = (3LL << 31) - ((u64)vars->count * invsqrt2); + u32 invsqrt = ((u32)vars->rec_inv_sqrt) << REC_INV_SQRT_SHIFT; + u32 invsqrt2 = ((u64)invsqrt * invsqrt) >> 32; + u64 val = (3LL << 32) - ((u64)vars->count * invsqrt2); - val = (val * invsqrt) >> 32; + val >>= 2; /* avoid overflow in following multiply */ + val = (val * invsqrt) >> (32 - 2 + 1); - vars->rec_inv_sqrt = val; + vars->rec_inv_sqrt = val >> REC_INV_SQRT_SHIFT; } /* @@ -195,7 +200,7 @@ static codel_time_t codel_control_law(codel_time_t t, codel_time_t interval, u32 rec_inv_sqrt) { - return t + reciprocal_divide(interval, rec_inv_sqrt << 1); + return t + reciprocal_divide(interval, rec_inv_sqrt << REC_INV_SQRT_SHIFT); } @@ -326,7 +331,7 @@ static struct sk_buff *codel_dequeue(struct Qdisc *sch, codel_Newton_step(vars); } else { vars->count = 1; - vars->rec_inv_sqrt = 0x7fffffff; + vars->rec_inv_sqrt = ~0U >> REC_INV_SQRT_SHIFT; } vars->lastcount = vars->count; vars->drop_next = codel_control_law(now, params->interval, ^ permalink raw reply related [flat|nested] 8+ messages in thread
* Re: [PATCH net-next] codel: use Newton method instead of sqrt() and divides 2012-05-13 7:23 ` Eric Dumazet @ 2012-05-14 22:33 ` David Miller 0 siblings, 0 replies; 8+ messages in thread From: David Miller @ 2012-05-14 22:33 UTC (permalink / raw) To: eric.dumazet Cc: dave.taht, netdev, nichols, van, codel, ycheng, mattmathis, therbert, shemminger, nanditad From: Eric Dumazet <eric.dumazet@gmail.com> Date: Sun, 13 May 2012 09:23:23 +0200 > Fell free to add following cleanup patch, if you like it ;) ... > [PATCH net-next] codel: use u16 field instead of 31bits for rec_inv_sqrt I do, applied :-) ^ permalink raw reply [flat|nested] 8+ messages in thread
end of thread, other threads:[~2012-05-14 22:35 UTC | newest] Thread overview: 8+ messages (download: mbox.gz follow: Atom feed -- links below jump to the message on this page -- 2012-05-12 13:32 [PATCH net-next] codel: use Newton method instead of sqrt() and divides Eric Dumazet 2012-05-12 19:52 ` David Miller 2012-05-12 20:40 ` Eric Dumazet 2012-05-12 20:45 ` David Miller 2012-05-12 21:48 ` Eric Dumazet 2012-05-12 21:52 ` David Miller 2012-05-13 7:23 ` Eric Dumazet 2012-05-14 22:33 ` David Miller
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox; as well as URLs for NNTP newsgroup(s).