From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1763256AbYEHPRA (ORCPT ); Thu, 8 May 2008 11:17:00 -0400 Received: (majordomo@vger.kernel.org) by vger.kernel.org id S1754370AbYEHPQw (ORCPT ); Thu, 8 May 2008 11:16:52 -0400 Received: from gw.goop.org ([64.81.55.164]:58503 "EHLO mail.goop.org" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1753649AbYEHPQu (ORCPT ); Thu, 8 May 2008 11:16:50 -0400 Message-ID: <48231959.4050406@goop.org> Date: Thu, 08 May 2008 16:16:41 +0100 From: Jeremy Fitzhardinge User-Agent: Thunderbird 2.0.0.14 (X11/20080501) MIME-Version: 1.0 To: Segher Boessenkool CC: Robert Hancock , Christian Kujau , LKML , Ingo Molnar , Thomas Gleixner , Andrew Morton , john stultz Subject: [PATCH] common implementation of iterative div/mod References: <481DF3D8.3010108@shaw.ca> <48217674.8080903@goop.org> In-Reply-To: <48217674.8080903@goop.org> X-Enigmail-Version: 0.95.6 Content-Type: text/plain; charset=ISO-8859-1; format=flowed Content-Transfer-Encoding: 7bit Sender: linux-kernel-owner@vger.kernel.org List-ID: X-Mailing-List: linux-kernel@vger.kernel.org We have a few instances of the open-coded iterative div/mod loop, used when we don't expcet the dividend to be much bigger than the divisor. Unfortunately modern gcc's have the tendency to strength "reduce" this into a full mod operation, which isn't necessarily any faster, and even if it were, doesn't exist if gcc implements it in libgcc. The workaround is to put a dummy asm statement in the loop to prevent gcc from performing the transformation. This patch creates a single implementation of this loop, and uses it to replace the open-coded versions I know about. Signed-off-by: Jeremy Fitzhardinge Cc: Ingo Molnar Cc: Thomas Gleixner Cc: Andrew Morton Cc: john stultz Cc: Segher Boessenkool Cc: Christian Kujau Cc: Robert Hancock --- arch/x86/xen/time.c | 13 +++---------- include/linux/math64.h | 2 ++ include/linux/time.h | 11 ++--------- lib/div64.c | 23 +++++++++++++++++++++++ 4 files changed, 30 insertions(+), 19 deletions(-) =================================================================== --- a/arch/x86/xen/time.c +++ b/arch/x86/xen/time.c @@ -12,6 +12,7 @@ #include #include #include +#include #include #include @@ -150,11 +151,7 @@ static void do_stolen_accounting(void) if (stolen < 0) stolen = 0; - ticks = 0; - while (stolen >= NS_PER_TICK) { - ticks++; - stolen -= NS_PER_TICK; - } + ticks = iter_div_u64_rem(stolen, NS_PER_TICK, &stolen); __get_cpu_var(residual_stolen) = stolen; account_steal_time(NULL, ticks); @@ -166,11 +163,7 @@ static void do_stolen_accounting(void) if (blocked < 0) blocked = 0; - ticks = 0; - while (blocked >= NS_PER_TICK) { - ticks++; - blocked -= NS_PER_TICK; - } + ticks = iter_div_u64_rem(blocked, NS_PER_TICK, &blocked); __get_cpu_var(residual_blocked) = blocked; account_steal_time(idle_task(smp_processor_id()), ticks); } =================================================================== --- a/include/linux/math64.h +++ b/include/linux/math64.h @@ -81,4 +81,6 @@ static inline s64 div_s64(s64 dividend, } #endif +unsigned iter_div_u64_rem(u64 dividend, u32 divisor, u64 *remainder); + #endif /* _LINUX_MATH64_H */ =================================================================== --- a/include/linux/time.h +++ b/include/linux/time.h @@ -2,6 +2,7 @@ #define _LINUX_TIME_H #include +#include #ifdef __KERNEL__ # include @@ -172,15 +173,7 @@ extern struct timeval ns_to_timeval(cons */ static inline void timespec_add_ns(struct timespec *a, u64 ns) { - ns += a->tv_nsec; - while(unlikely(ns >= NSEC_PER_SEC)) { - /* The following asm() prevents the compiler from - * optimising this loop into a modulo operation. */ - asm("" : "+r"(ns)); - - ns -= NSEC_PER_SEC; - a->tv_sec++; - } + a->tv_sec += iter_div_u64_rem(a->tv_nsec + ns, NSEC_PER_SEC, &ns); a->tv_nsec = ns; } #endif /* __KERNEL__ */ =================================================================== --- a/lib/div64.c +++ b/lib/div64.c @@ -98,3 +98,26 @@ EXPORT_SYMBOL(div64_u64); #endif #endif /* BITS_PER_LONG == 32 */ + +/* + * Iterative div/mod for use when dividend is not expected to be much + * bigger than divisor. + */ +unsigned iter_div_u64_rem(u64 dividend, u32 divisor, u64 *remainder) +{ + unsigned ret = 0; + + while(dividend >= divisor) { + /* The following asm() prevents the compiler from + optimising this loop into a modulo operation. */ + asm("" : "+rm"(dividend)); + + dividend -= divisor; + ret++; + } + + *remainder = dividend; + + return ret; +} +EXPORT_SYMBOL(iter_div_u64_rem);