Re: [PATCH] math64: Provide an uprounding variant of mul_u64_u64_div_u64()

public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed

From: David Laight <david.laight.linux@gmail.com>
To: "Uwe Kleine-König" <u.kleine-koenig@baylibre.com>
Cc: Nicolas Pitre <nico@fluxnic.net>,
	Andrew Morton <akpm@linux-foundation.org>,
	linux-kernel@vger.kernel.org
Subject: Re: [PATCH] math64: Provide an uprounding variant of mul_u64_u64_div_u64()
Date: Wed, 2 Apr 2025 22:46:52 +0100	[thread overview]
Message-ID: <20250402224652.1bb38f6b@pumpkin> (raw)
In-Reply-To: <gqqxuoz5jfrlsmrxdhwevfo7kflxjqhbkfy2ksnsdcadbk52hd@yaitrauy52xg>

On Wed, 2 Apr 2025 17:01:49 +0200
Uwe Kleine-König <u.kleine-koenig@baylibre.com> wrote:

How about (tab damaged):

Compile tested only, on x86-x64 (once with the local definitions removed).

Looking at the object code, if u128 is supported then checking n_hi
is always going to be better than a pre-check.
Remember multiply is cheap.

	David

diff --git a/arch/x86/include/asm/div64.h b/arch/x86/include/asm/div64.h
index 9931e4c7d73f..6115f3fcb975 100644
--- a/arch/x86/include/asm/div64.h
+++ b/arch/x86/include/asm/div64.h
@@ -84,21 +84,28 @@ static inline u64 mul_u32_u32(u32 a, u32 b)
  * Will generate an #DE when the result doesn't fit u64, could fix with an
  * __ex_table[] entry when it becomes an issue.
  */
-static inline u64 mul_u64_u64_div_u64(u64 a, u64 mul, u64 div)
+static inline u64 mul_u64_add_u64_div_u64(u64 a, u64 mul, u64 add, u64 div)
 {
        u64 q;

-       asm ("mulq %2; divq %3" : "=a" (q)
-                               : "a" (a), "rm" (mul), "rm" (div)
-                               : "rdx");
+       if (statically_true(!add)) {
+               asm ("mulq %2; divq %3" : "=a" (q)
+                                       : "a" (a), "rm" (mul), "rm" (div)
+                                       : "rdx");
+       } else {
+               asm ("mulq %2; addq %4,%rax; addc $0,%rdx; divq %3"
+                       : "=a" (q)
+                       : "a" (a), "rm" (mul), "rm" (div), "rm" (add)
+                       : "rdx");
+       }

        return q;
 }
-#define mul_u64_u64_div_u64 mul_u64_u64_div_u64
+#define mul_u64_add_u64_div_u64 mul_u64_add_u64_div_u64

 static inline u64 mul_u64_u32_div(u64 a, u32 mul, u32 div)
 {
-       return mul_u64_u64_div_u64(a, mul, div);
+       return mul_u64_add_u64_div_u64(a, mul, 0, div);
 }
 #define mul_u64_u32_div        mul_u64_u32_div

diff --git a/include/linux/math64.h b/include/linux/math64.h
index 6aaccc1626ab..1544dc37e317 100644
--- a/include/linux/math64.h
+++ b/include/linux/math64.h
@@ -282,7 +282,10 @@ static inline u64 mul_u64_u32_div(u64 a, u32 mul, u32 divisor)
 }
 #endif /* mul_u64_u32_div */

-u64 mul_u64_u64_div_u64(u64 a, u64 mul, u64 div);
+u64 mul_u64_add_u64_div_u64(u64 a, u64 mul, u64 add, u64 div);
+#define mul_u64_u64_div_u64(a, mul, div) mul_u64_add_u64_div_u64(a, mul, 0, div)
+#define mul_u64_u64_div_u64_roundup(a, mul, div) \
+       ({ u64 _tmp = (div); mul_u64_add_u64_div_u64(a, mul, _tmp - 1, _tmp); })

 /**
  * DIV64_U64_ROUND_UP - unsigned 64bit divide with 64bit divisor rounded up
diff --git a/lib/math/div64.c b/lib/math/div64.c
index 5faa29208bdb..efcc8d729c74 100644
--- a/lib/math/div64.c
+++ b/lib/math/div64.c
@@ -183,16 +183,13 @@ u32 iter_div_u64_rem(u64 dividend, u32 divisor, u64 *remainder)
 }
 EXPORT_SYMBOL(iter_div_u64_rem);

-#ifndef mul_u64_u64_div_u64
-u64 mul_u64_u64_div_u64(u64 a, u64 b, u64 c)
+#ifndef mul_u64_add_u64_div_u64
+u64 mul_u64_add_u64_div_u64(u64 a, u64 b, u64 add, u64 c)
 {
-       if (ilog2(a) + ilog2(b) <= 62)
-               return div64_u64(a * b, c);
-
 #if defined(__SIZEOF_INT128__)

        /* native 64x64=128 bits multiplication */
-       u128 prod = (u128)a * b;
+       u128 prod = (u128)a * b + add;
        u64 n_lo = prod, n_hi = prod >> 64;

 #else
@@ -201,6 +198,11 @@ u64 mul_u64_u64_div_u64(u64 a, u64 b, u64 c)
        u32 a_lo = a, a_hi = a >> 32, b_lo = b, b_hi = b >> 32;
        u64 x, y, z;

+#if BITS_PER_LONG == 32
+       if (!(a_hi | b_hi))
+               return div64_u64(a_lo * b_lo + add, c);
+#endif
+
        x = (u64)a_lo * b_lo;
        y = (u64)a_lo * b_hi + (u32)(x >> 32);
        z = (u64)a_hi * b_hi + (u32)(y >> 32);
@@ -208,10 +210,13 @@ u64 mul_u64_u64_div_u64(u64 a, u64 b, u64 c)
        z += (u32)(y >> 32);
        x = (y << 32) + (u32)x;

-       u64 n_lo = x, n_hi = z;
+       u64 n_lo = x + add, n_hi = z + (n_lo < x);

 #endif

+       if (!n_hi)
+               return div64_u64(n_lo, c);
+
        /* make sure c is not zero, trigger exception otherwise */
 #pragma GCC diagnostic push
 #pragma GCC diagnostic ignored "-Wdiv-by-zero"
@@ -265,5 +270,5 @@ u64 mul_u64_u64_div_u64(u64 a, u64 b, u64 c)

        return res;
 }
-EXPORT_SYMBOL(mul_u64_u64_div_u64);
+EXPORT_SYMBOL(mul_u64_add_u64_div_u64);
 #endif

     prev parent reply	other threads:[~2025-04-02 21:46 UTC|newest]

Thread overview: 18+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-03-19 17:14 [PATCH] math64: Provide an uprounding variant of mul_u64_u64_div_u64() Uwe Kleine-König
2025-03-19 19:38 ` Nicolas Pitre
2025-03-20  7:36   ` Uwe Kleine-König
2025-03-21 13:18 ` David Laight
2025-03-31 16:14   ` Uwe Kleine-König
2025-03-31 18:53     ` David Laight
2025-04-01  7:25       ` Uwe Kleine-König
2025-04-01 19:26         ` David Laight
2025-04-01 20:13           ` Nicolas Pitre
2025-04-01 20:30             ` Nicolas Pitre
2025-04-01 21:37               ` David Laight
2025-04-01 22:10                 ` Nicolas Pitre
2025-04-02  8:16                 ` Uwe Kleine-König
2025-04-02 12:52                   ` David Laight
2025-04-02 15:01                     ` Uwe Kleine-König
2025-04-02 20:59                       ` David Laight
2025-04-03  6:08                         ` Uwe Kleine-König
2025-04-02 21:46                       ` David Laight [this message]

find likely ancestor, descendant, or conflicting patches for this message:
( dfblob:9931e4c7d73 dfblob:6115f3fcb97 dfblob:6aaccc1626a
dfblob:1544dc37e31 dfblob:5faa29208bd dfblob:efcc8d729c7 )
 OR (
bs:"Re: [PATCH] math64: Provide an uprounding variant of mul_u64_u64_div_u64()" )
	(help)

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20250402224652.1bb38f6b@pumpkin \
    --to=david.laight.linux@gmail.com \
    --cc=akpm@linux-foundation.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=nico@fluxnic.net \
    --cc=u.kleine-koenig@baylibre.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox