From: Randy Dunlap <rdunlap@infradead.org>
To: Alexey Brodkin <Alexey.Brodkin@synopsys.com>,
linux-kernel@vger.kernel.org
Cc: linux-snps-arc@lists.infradead.org,
Vineet Gupta <Vineet.Gupta1@synopsys.com>,
Ingo Molnar <mingo@elte.hu>,
Stephen Hemminger <shemminger@linux-foundation.org>,
"David S. Miller" <davem@davemloft.net>,
Nicolas Pitre <nico@cam.org>,
Russell King <rmk+kernel@arm.linux.org.uk>
Subject: Re: [PATCH] __div64_32: implement division by multiplication for 32-bit arches
Date: Thu, 29 Oct 2015 10:09:18 -0700 [thread overview]
Message-ID: <563252BE.9030506@infradead.org> (raw)
In-Reply-To: <1446072455-16074-1-git-send-email-abrodkin@synopsys.com>
typos (spellos):
On 10/28/15 15:47, Alexey Brodkin wrote:
> ---
> lib/div64.c | 153 ++++++++++++++++++++++++++++++++++++++++++++++++++----------
> 1 file changed, 128 insertions(+), 25 deletions(-)
>
> diff --git a/lib/div64.c b/lib/div64.c
> index 62a698a..3055328 100644
> --- a/lib/div64.c
> +++ b/lib/div64.c
> @@ -23,37 +23,140 @@
> /* Not needed on 64bit architectures */
> #if BITS_PER_LONG == 32
>
> +
> +/*
> + * If the divisor happens to be constant, we determine the appropriate
> + * inverse at compile time to turn the division into a few inline
> + * multiplications instead which is much faster.
> + */
> uint32_t __attribute__((weak)) __div64_32(uint64_t *n, uint32_t base)
> {
> - uint64_t rem = *n;
> - uint64_t b = base;
> - uint64_t res, d = 1;
> - uint32_t high = rem >> 32;
> -
> - /* Reduce the thing a bit first */
> - res = 0;
> - if (high >= base) {
> - high /= base;
> - res = (uint64_t) high << 32;
> - rem -= (uint64_t) (high*base) << 32;
> - }
> + unsigned int __r, __b = base;
>
> - while ((int64_t)b > 0 && b < rem) {
> - b = b+b;
> - d = d+d;
> - }
> + if (!__builtin_constant_p(__b) || __b == 0) {
> + /* non-constant divisor (or zero): slow path */
> + uint64_t rem = *n;
> + uint64_t b = base;
> + uint64_t res, d = 1;
> + uint32_t high = rem >> 32;
> +
> + /* Reduce the thing a bit first */
> + res = 0;
> + if (high >= base) {
> + high /= base;
> + res = (uint64_t) high << 32;
> + rem -= (uint64_t) (high*base) << 32;
> + }
> +
> + while ((int64_t)b > 0 && b < rem) {
> + b = b+b;
> + d = d+d;
> + }
> +
> + do {
> + if (rem >= b) {
> + rem -= b;
> + res += d;
> + }
> + b >>= 1;
> + d >>= 1;
> + } while (d);
>
> - do {
> - if (rem >= b) {
> - rem -= b;
> - res += d;
> + *n = res;
> + __r = rem;
> + } else if ((__b & (__b - 1)) == 0) {
> + /*
> + * Trivial: __b is constant and a power of 2
> + * gcc does the right thing with this code.
> + * Even though code is the same as above but
> + * we make it visually as a separate path.
> + * Still only one of these branches will survive
> + * pre-processor stage, so let's leave it here.
> + */
> + __r = *n;
> + __r &= (__b - 1);
> + *n /= __b;
> + } else {
> + /* Start of preprocessor calculations */
> +
> + /*
> + * Multiply by inverse of __b: *n/b = *n*(p/b)/p
> + * We rely on the fact that most of this code gets
> + * optimized away at compile time due to constant
> + * propagation and only a couple inline assembly
> + * instructions should remain. Better avoid any
> + * code construct that might prevent that.
> + */
> + unsigned long long __res, __x, __t, __m, __n = *n;
> + unsigned int __p;
> + /* preserve low part of *n for reminder computation */
remainder
> + __r = __n;
> + /* determine number of bits to represent __b */
> + __p = 1 << __div64_fls(__b);
> + /* compute __m = ((__p << 64) + __b - 1) / __b */
> + __m = (~0ULL / __b) * __p;
> + __m += (((~0ULL % __b + 1) * __p) + __b - 1) / __b;
> + /* compute __res = __m*(~0ULL/__b*__b-1)/(__p << 64) */
> + __x = ~0ULL / __b * __b - 1;
> + __res = (__m & 0xffffffff) * (__x & 0xffffffff);
> + __res >>= 32;
> + __res += (__m & 0xffffffff) * (__x >> 32);
> + __t = __res;
> + __res += (__x & 0xffffffff) * (__m >> 32);
> + __t = (__res < __t) ? (1ULL << 32) : 0;
> + __res = (__res >> 32) + __t;
> + __res += (__m >> 32) * (__x >> 32);
> + __res /= __p;
> + /* End of preprocessor calculations */
> +
> + /* Start of run-time calculations */
> + __res = (unsigned int)__m * (unsigned int)__n;
> + __res >>= 32;
> + __res += (unsigned int)__m * (__n >> 32);
> + __t = __res;
> + __res += (unsigned int)__n * (__m >> 32);
> + __t = (__res < __t) ? (1ULL << 32) : 0;
> + __res = (__res >> 32) + __t;
> + __res += (__m >> 32) * (__n >> 32);
> + __res /= __p;
> +
> + /*
> + * The reminder can be computed with 32-bit regs
remainder
> + * only, and gcc is good at that.
> + */
> + {
> + unsigned int __res0 = __res;
> + unsigned int __b0 = __b;
> +
> + __r -= __res0 * __b0;
> }
> - b >>= 1;
> - d >>= 1;
> - } while (d);
> + /* End of run-time calculations */
>
> - *n = res;
> - return rem;
> + *n = __res;
> + }
> + return __r;
> }
>
> EXPORT_SYMBOL(__div64_32);
--
~Randy
prev parent reply other threads:[~2015-10-29 17:10 UTC|newest]
Thread overview: 24+ messages / expand[flat|nested] mbox.gz Atom feed top
2015-10-28 22:47 [PATCH] __div64_32: implement division by multiplication for 32-bit arches Alexey Brodkin
2015-10-28 23:32 ` Nicolas Pitre
2015-10-29 7:34 ` Alexey Brodkin
2015-10-30 1:26 ` Nicolas Pitre
2015-10-30 5:41 ` Vineet Gupta
2015-10-30 12:41 ` Måns Rullgård
2015-10-30 12:40 ` Måns Rullgård
2015-10-30 15:17 ` Nicolas Pitre
2015-10-30 15:54 ` Alexey Brodkin
2015-10-30 16:55 ` Nicolas Pitre
2015-10-30 17:45 ` Måns Rullgård
2015-11-04 23:46 ` Nicolas Pitre
2015-11-04 23:48 ` Nicolas Pitre
2015-11-05 3:13 ` Vineet Gupta
2015-11-05 5:06 ` Nicolas Pitre
2015-11-04 23:49 ` Måns Rullgård
2015-10-30 14:28 ` Alexey Brodkin
2015-10-29 0:36 ` kbuild test robot
2015-10-29 12:52 ` Måns Rullgård
2015-10-29 13:05 ` Alexey Brodkin
2015-10-29 13:37 ` Måns Rullgård
2015-10-29 13:31 ` Russell King - ARM Linux
2015-10-29 14:32 ` Alexey Brodkin
2015-10-29 17:09 ` Randy Dunlap [this message]
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=563252BE.9030506@infradead.org \
--to=rdunlap@infradead.org \
--cc=Alexey.Brodkin@synopsys.com \
--cc=Vineet.Gupta1@synopsys.com \
--cc=davem@davemloft.net \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-snps-arc@lists.infradead.org \
--cc=mingo@elte.hu \
--cc=nico@cam.org \
--cc=rmk+kernel@arm.linux.org.uk \
--cc=shemminger@linux-foundation.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox