From: David Laight <david.laight.linux@gmail.com>
To: "Uwe Kleine-König" <u.kleine-koenig@baylibre.com>
Cc: Nicolas Pitre <nico@fluxnic.net>,
Andrew Morton <akpm@linux-foundation.org>,
linux-kernel@vger.kernel.org
Subject: Re: [PATCH] math64: Provide an uprounding variant of mul_u64_u64_div_u64()
Date: Wed, 2 Apr 2025 22:46:52 +0100 [thread overview]
Message-ID: <20250402224652.1bb38f6b@pumpkin> (raw)
In-Reply-To: <gqqxuoz5jfrlsmrxdhwevfo7kflxjqhbkfy2ksnsdcadbk52hd@yaitrauy52xg>
On Wed, 2 Apr 2025 17:01:49 +0200
Uwe Kleine-König <u.kleine-koenig@baylibre.com> wrote:
How about (tab damaged):
Compile tested only, on x86-x64 (once with the local definitions removed).
Looking at the object code, if u128 is supported then checking n_hi
is always going to be better than a pre-check.
Remember multiply is cheap.
David
diff --git a/arch/x86/include/asm/div64.h b/arch/x86/include/asm/div64.h
index 9931e4c7d73f..6115f3fcb975 100644
--- a/arch/x86/include/asm/div64.h
+++ b/arch/x86/include/asm/div64.h
@@ -84,21 +84,28 @@ static inline u64 mul_u32_u32(u32 a, u32 b)
* Will generate an #DE when the result doesn't fit u64, could fix with an
* __ex_table[] entry when it becomes an issue.
*/
-static inline u64 mul_u64_u64_div_u64(u64 a, u64 mul, u64 div)
+static inline u64 mul_u64_add_u64_div_u64(u64 a, u64 mul, u64 add, u64 div)
{
u64 q;
- asm ("mulq %2; divq %3" : "=a" (q)
- : "a" (a), "rm" (mul), "rm" (div)
- : "rdx");
+ if (statically_true(!add)) {
+ asm ("mulq %2; divq %3" : "=a" (q)
+ : "a" (a), "rm" (mul), "rm" (div)
+ : "rdx");
+ } else {
+ asm ("mulq %2; addq %4,%rax; addc $0,%rdx; divq %3"
+ : "=a" (q)
+ : "a" (a), "rm" (mul), "rm" (div), "rm" (add)
+ : "rdx");
+ }
return q;
}
-#define mul_u64_u64_div_u64 mul_u64_u64_div_u64
+#define mul_u64_add_u64_div_u64 mul_u64_add_u64_div_u64
static inline u64 mul_u64_u32_div(u64 a, u32 mul, u32 div)
{
- return mul_u64_u64_div_u64(a, mul, div);
+ return mul_u64_add_u64_div_u64(a, mul, 0, div);
}
#define mul_u64_u32_div mul_u64_u32_div
diff --git a/include/linux/math64.h b/include/linux/math64.h
index 6aaccc1626ab..1544dc37e317 100644
--- a/include/linux/math64.h
+++ b/include/linux/math64.h
@@ -282,7 +282,10 @@ static inline u64 mul_u64_u32_div(u64 a, u32 mul, u32 divisor)
}
#endif /* mul_u64_u32_div */
-u64 mul_u64_u64_div_u64(u64 a, u64 mul, u64 div);
+u64 mul_u64_add_u64_div_u64(u64 a, u64 mul, u64 add, u64 div);
+#define mul_u64_u64_div_u64(a, mul, div) mul_u64_add_u64_div_u64(a, mul, 0, div)
+#define mul_u64_u64_div_u64_roundup(a, mul, div) \
+ ({ u64 _tmp = (div); mul_u64_add_u64_div_u64(a, mul, _tmp - 1, _tmp); })
/**
* DIV64_U64_ROUND_UP - unsigned 64bit divide with 64bit divisor rounded up
diff --git a/lib/math/div64.c b/lib/math/div64.c
index 5faa29208bdb..efcc8d729c74 100644
--- a/lib/math/div64.c
+++ b/lib/math/div64.c
@@ -183,16 +183,13 @@ u32 iter_div_u64_rem(u64 dividend, u32 divisor, u64 *remainder)
}
EXPORT_SYMBOL(iter_div_u64_rem);
-#ifndef mul_u64_u64_div_u64
-u64 mul_u64_u64_div_u64(u64 a, u64 b, u64 c)
+#ifndef mul_u64_add_u64_div_u64
+u64 mul_u64_add_u64_div_u64(u64 a, u64 b, u64 add, u64 c)
{
- if (ilog2(a) + ilog2(b) <= 62)
- return div64_u64(a * b, c);
-
#if defined(__SIZEOF_INT128__)
/* native 64x64=128 bits multiplication */
- u128 prod = (u128)a * b;
+ u128 prod = (u128)a * b + add;
u64 n_lo = prod, n_hi = prod >> 64;
#else
@@ -201,6 +198,11 @@ u64 mul_u64_u64_div_u64(u64 a, u64 b, u64 c)
u32 a_lo = a, a_hi = a >> 32, b_lo = b, b_hi = b >> 32;
u64 x, y, z;
+#if BITS_PER_LONG == 32
+ if (!(a_hi | b_hi))
+ return div64_u64(a_lo * b_lo + add, c);
+#endif
+
x = (u64)a_lo * b_lo;
y = (u64)a_lo * b_hi + (u32)(x >> 32);
z = (u64)a_hi * b_hi + (u32)(y >> 32);
@@ -208,10 +210,13 @@ u64 mul_u64_u64_div_u64(u64 a, u64 b, u64 c)
z += (u32)(y >> 32);
x = (y << 32) + (u32)x;
- u64 n_lo = x, n_hi = z;
+ u64 n_lo = x + add, n_hi = z + (n_lo < x);
#endif
+ if (!n_hi)
+ return div64_u64(n_lo, c);
+
/* make sure c is not zero, trigger exception otherwise */
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wdiv-by-zero"
@@ -265,5 +270,5 @@ u64 mul_u64_u64_div_u64(u64 a, u64 b, u64 c)
return res;
}
-EXPORT_SYMBOL(mul_u64_u64_div_u64);
+EXPORT_SYMBOL(mul_u64_add_u64_div_u64);
#endif
prev parent reply other threads:[~2025-04-02 21:46 UTC|newest]
Thread overview: 18+ messages / expand[flat|nested] mbox.gz Atom feed top
2025-03-19 17:14 [PATCH] math64: Provide an uprounding variant of mul_u64_u64_div_u64() Uwe Kleine-König
2025-03-19 19:38 ` Nicolas Pitre
2025-03-20 7:36 ` Uwe Kleine-König
2025-03-21 13:18 ` David Laight
2025-03-31 16:14 ` Uwe Kleine-König
2025-03-31 18:53 ` David Laight
2025-04-01 7:25 ` Uwe Kleine-König
2025-04-01 19:26 ` David Laight
2025-04-01 20:13 ` Nicolas Pitre
2025-04-01 20:30 ` Nicolas Pitre
2025-04-01 21:37 ` David Laight
2025-04-01 22:10 ` Nicolas Pitre
2025-04-02 8:16 ` Uwe Kleine-König
2025-04-02 12:52 ` David Laight
2025-04-02 15:01 ` Uwe Kleine-König
2025-04-02 20:59 ` David Laight
2025-04-03 6:08 ` Uwe Kleine-König
2025-04-02 21:46 ` David Laight [this message]
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20250402224652.1bb38f6b@pumpkin \
--to=david.laight.linux@gmail.com \
--cc=akpm@linux-foundation.org \
--cc=linux-kernel@vger.kernel.org \
--cc=nico@fluxnic.net \
--cc=u.kleine-koenig@baylibre.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox