Re: [PATCH] math64: Provide an uprounding variant of mul_u64_u64_div_u64()

All of lore.kernel.org
 help / color / mirror / Atom feed

From: David Laight <david.laight.linux@gmail.com>
To: "Uwe Kleine-König" <u.kleine-koenig@baylibre.com>
Cc: Nicolas Pitre <nico@fluxnic.net>,
	Andrew Morton <akpm@linux-foundation.org>,
	linux-kernel@vger.kernel.org
Subject: Re: [PATCH] math64: Provide an uprounding variant of mul_u64_u64_div_u64()
Date: Wed, 2 Apr 2025 22:46:52 +0100	[thread overview]
Message-ID: <20250402224652.1bb38f6b@pumpkin> (raw)
In-Reply-To: <gqqxuoz5jfrlsmrxdhwevfo7kflxjqhbkfy2ksnsdcadbk52hd@yaitrauy52xg>

On Wed, 2 Apr 2025 17:01:49 +0200
Uwe Kleine-König <u.kleine-koenig@baylibre.com> wrote:

How about (tab damaged):

Compile tested only, on x86-x64 (once with the local definitions removed).

Looking at the object code, if u128 is supported then checking n_hi
is always going to be better than a pre-check.
Remember multiply is cheap.

	David

diff --git a/arch/x86/include/asm/div64.h b/arch/x86/include/asm/div64.h
index 9931e4c7d73f..6115f3fcb975 100644
--- a/arch/x86/include/asm/div64.h
+++ b/arch/x86/include/asm/div64.h
@@ -84,21 +84,28 @@ static inline u64 mul_u32_u32(u32 a, u32 b)
  * Will generate an #DE when the result doesn't fit u64, could fix with an
  * __ex_table[] entry when it becomes an issue.
  */
-static inline u64 mul_u64_u64_div_u64(u64 a, u64 mul, u64 div)
+static inline u64 mul_u64_add_u64_div_u64(u64 a, u64 mul, u64 add, u64 div)
 {
        u64 q;

-       asm ("mulq %2; divq %3" : "=a" (q)
-                               : "a" (a), "rm" (mul), "rm" (div)
-                               : "rdx");
+       if (statically_true(!add)) {
+               asm ("mulq %2; divq %3" : "=a" (q)
+                                       : "a" (a), "rm" (mul), "rm" (div)
+                                       : "rdx");
+       } else {
+               asm ("mulq %2; addq %4,%rax; addc $0,%rdx; divq %3"
+                       : "=a" (q)
+                       : "a" (a), "rm" (mul), "rm" (div), "rm" (add)
+                       : "rdx");
+       }

        return q;
 }
-#define mul_u64_u64_div_u64 mul_u64_u64_div_u64
+#define mul_u64_add_u64_div_u64 mul_u64_add_u64_div_u64

 static inline u64 mul_u64_u32_div(u64 a, u32 mul, u32 div)
 {
-       return mul_u64_u64_div_u64(a, mul, div);
+       return mul_u64_add_u64_div_u64(a, mul, 0, div);
 }
 #define mul_u64_u32_div        mul_u64_u32_div

diff --git a/include/linux/math64.h b/include/linux/math64.h
index 6aaccc1626ab..1544dc37e317 100644
--- a/include/linux/math64.h
+++ b/include/linux/math64.h
@@ -282,7 +282,10 @@ static inline u64 mul_u64_u32_div(u64 a, u32 mul, u32 divisor)
 }
 #endif /* mul_u64_u32_div */

-u64 mul_u64_u64_div_u64(u64 a, u64 mul, u64 div);
+u64 mul_u64_add_u64_div_u64(u64 a, u64 mul, u64 add, u64 div);
+#define mul_u64_u64_div_u64(a, mul, div) mul_u64_add_u64_div_u64(a, mul, 0, div)
+#define mul_u64_u64_div_u64_roundup(a, mul, div) \
+       ({ u64 _tmp = (div); mul_u64_add_u64_div_u64(a, mul, _tmp - 1, _tmp); })

 /**
  * DIV64_U64_ROUND_UP - unsigned 64bit divide with 64bit divisor rounded up
diff --git a/lib/math/div64.c b/lib/math/div64.c
index 5faa29208bdb..efcc8d729c74 100644
--- a/lib/math/div64.c
+++ b/lib/math/div64.c
@@ -183,16 +183,13 @@ u32 iter_div_u64_rem(u64 dividend, u32 divisor, u64 *remainder)
 }
 EXPORT_SYMBOL(iter_div_u64_rem);

-#ifndef mul_u64_u64_div_u64
-u64 mul_u64_u64_div_u64(u64 a, u64 b, u64 c)
+#ifndef mul_u64_add_u64_div_u64
+u64 mul_u64_add_u64_div_u64(u64 a, u64 b, u64 add, u64 c)
 {
-       if (ilog2(a) + ilog2(b) <= 62)
-               return div64_u64(a * b, c);
-
 #if defined(__SIZEOF_INT128__)

        /* native 64x64=128 bits multiplication */
-       u128 prod = (u128)a * b;
+       u128 prod = (u128)a * b + add;
        u64 n_lo = prod, n_hi = prod >> 64;

 #else
@@ -201,6 +198,11 @@ u64 mul_u64_u64_div_u64(u64 a, u64 b, u64 c)
        u32 a_lo = a, a_hi = a >> 32, b_lo = b, b_hi = b >> 32;
        u64 x, y, z;

+#if BITS_PER_LONG == 32
+       if (!(a_hi | b_hi))
+               return div64_u64(a_lo * b_lo + add, c);
+#endif
+
        x = (u64)a_lo * b_lo;
        y = (u64)a_lo * b_hi + (u32)(x >> 32);
        z = (u64)a_hi * b_hi + (u32)(y >> 32);
@@ -208,10 +210,13 @@ u64 mul_u64_u64_div_u64(u64 a, u64 b, u64 c)
        z += (u32)(y >> 32);
        x = (y << 32) + (u32)x;

-       u64 n_lo = x, n_hi = z;
+       u64 n_lo = x + add, n_hi = z + (n_lo < x);

 #endif

+       if (!n_hi)
+               return div64_u64(n_lo, c);
+
        /* make sure c is not zero, trigger exception otherwise */
 #pragma GCC diagnostic push
 #pragma GCC diagnostic ignored "-Wdiv-by-zero"
@@ -265,5 +270,5 @@ u64 mul_u64_u64_div_u64(u64 a, u64 b, u64 c)

        return res;
 }
-EXPORT_SYMBOL(mul_u64_u64_div_u64);
+EXPORT_SYMBOL(mul_u64_add_u64_div_u64);
 #endif

     prev parent reply	other threads:[~2025-04-02 21:46 UTC|newest]

Thread overview: 18+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-03-19 17:14 [PATCH] math64: Provide an uprounding variant of mul_u64_u64_div_u64() Uwe Kleine-König
2025-03-19 19:38 ` Nicolas Pitre
2025-03-20  7:36   ` Uwe Kleine-König
2025-03-21 13:18 ` David Laight
2025-03-31 16:14   ` Uwe Kleine-König
2025-03-31 18:53     ` David Laight
2025-04-01  7:25       ` Uwe Kleine-König
2025-04-01 19:26         ` David Laight
2025-04-01 20:13           ` Nicolas Pitre
2025-04-01 20:30             ` Nicolas Pitre
2025-04-01 21:37               ` David Laight
2025-04-01 22:10                 ` Nicolas Pitre
2025-04-02  8:16                 ` Uwe Kleine-König
2025-04-02 12:52                   ` David Laight
2025-04-02 15:01                     ` Uwe Kleine-König
2025-04-02 20:59                       ` David Laight
2025-04-03  6:08                         ` Uwe Kleine-König
2025-04-02 21:46                       ` David Laight [this message]

find likely ancestor, descendant, or conflicting patches for this message:
( dfblob:9931e4c7d73 dfblob:6115f3fcb97 dfblob:6aaccc1626a
dfblob:1544dc37e31 dfblob:5faa29208bd dfblob:efcc8d729c7 )
 OR (
bs:"Re: [PATCH] math64: Provide an uprounding variant of mul_u64_u64_div_u64()" )
	(help)

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20250402224652.1bb38f6b@pumpkin \
    --to=david.laight.linux@gmail.com \
    --cc=akpm@linux-foundation.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=nico@fluxnic.net \
    --cc=u.kleine-koenig@baylibre.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.