[PATCH 1/3] lib: Add mul_u64_add_u64_div_u64() and mul_u64_u64_div_u64_roundup()

All of lore.kernel.org
 help / color / mirror / Atom feed

From: David Laight <david.laight.linux@gmail.com>
To: Andrew Morton <akpm@linux-foundation.org>, linux-kernel@vger.kernel.org
Cc: "David Laight" <david.laight.linux@gmail.com>,
	"Uwe Kleine-König" <u.kleine-koenig@baylibre.com>,
	"Nicolas Pitre" <npitre@baylibre.com>,
	"Oleg Nesterov" <oleg@redhat.com>,
	"Peter Zijlstra" <peterz@infradead.org>,
	"Biju Das" <biju.das.jz@bp.renesas.com>
Subject: [PATCH 1/3] lib: Add mul_u64_add_u64_div_u64() and mul_u64_u64_div_u64_roundup()
Date: Sat,  5 Apr 2025 21:45:28 +0100	[thread overview]
Message-ID: <20250405204530.186242-2-david.laight.linux@gmail.com> (raw)
In-Reply-To: <20250405204530.186242-1-david.laight.linux@gmail.com>

The existing mul_u64_u64_div_u64() rounds down, a 'rounding up'
variant needs 'divisor - 1' adding in between the multiply and
divide so cannot easily be done by a caller.

Add mul_u64_add_u64_div_u64(a, b, c, d) that calculates (a * b + c)/d
and implement the 'round down' and 'round up' using it.

Update the x86-64 asm to optimise for 'c' being a constant zero.

For architectures that support u128 check for a 64bit product after
the multiply (will be cheap).
Leave in the early check for other architectures (mostly 32bit) when
'c' is zero to avoid the multi-part multiply.

Note that the cost of the 128bit divide will dwarf the rest of the code.
This function is very slow on everything except x86-64 (very very slow
on 32bit).

Add kerndoc definitions for all three functions.

Signed-off-by: David Laight <david.laight.linux@gmail.com>
---
 arch/x86/include/asm/div64.h | 19 +++++++++++-----
 include/linux/math64.h       | 44 +++++++++++++++++++++++++++++++++++-
 lib/math/div64.c             | 41 ++++++++++++++++++---------------
 3 files changed, 79 insertions(+), 25 deletions(-)

diff --git a/arch/x86/include/asm/div64.h b/arch/x86/include/asm/div64.h
index 9931e4c7d73f..9322a35f6a39 100644
--- a/arch/x86/include/asm/div64.h
+++ b/arch/x86/include/asm/div64.h
@@ -84,21 +84,28 @@ static inline u64 mul_u32_u32(u32 a, u32 b)
  * Will generate an #DE when the result doesn't fit u64, could fix with an
  * __ex_table[] entry when it becomes an issue.
  */
-static inline u64 mul_u64_u64_div_u64(u64 a, u64 mul, u64 div)
+static inline u64 mul_u64_add_u64_div_u64(u64 a, u64 mul, u64 add, u64 div)
 {
 	u64 q;
 
-	asm ("mulq %2; divq %3" : "=a" (q)
-				: "a" (a), "rm" (mul), "rm" (div)
-				: "rdx");
+	if (statically_true(!add)) {
+		asm ("mulq %2; divq %3" : "=a" (q)
+					: "a" (a), "rm" (mul), "rm" (div)
+					: "rdx");
+	} else {
+		asm ("mulq %2; addq %4,%%rax; adcq $0,%%rdx; divq %3"
+			: "=a" (q)
+			: "a" (a), "rm" (mul), "rm" (div), "rm" (add)
+			: "rdx");
+	}
 
 	return q;
 }
-#define mul_u64_u64_div_u64 mul_u64_u64_div_u64
+#define mul_u64_add_u64_div_u64 mul_u64_add_u64_div_u64
 
 static inline u64 mul_u64_u32_div(u64 a, u32 mul, u32 div)
 {
-	return mul_u64_u64_div_u64(a, mul, div);
+	return mul_u64_add_u64_div_u64(a, mul, 0, div);
 }
 #define mul_u64_u32_div	mul_u64_u32_div
 
diff --git a/include/linux/math64.h b/include/linux/math64.h
index 6aaccc1626ab..e958170e64ab 100644
--- a/include/linux/math64.h
+++ b/include/linux/math64.h
@@ -282,7 +282,49 @@ static inline u64 mul_u64_u32_div(u64 a, u32 mul, u32 divisor)
 }
 #endif /* mul_u64_u32_div */
 
-u64 mul_u64_u64_div_u64(u64 a, u64 mul, u64 div);
+/**
+ * mul_u64_add_u64_div_u64 - unsigned 64bit multiply, add, and divide
+ * @a: first unsigned 64bit multiplicand
+ * @b: second unsigned 64bit multiplicand
+ * @c: unsigned 64bit addend
+ * @d: unsigned 64bit divisor
+ *
+ * Multiply two 64bit values together to generate a 128bit product
+ * add a third value and then divide by a fourth.
+ * May BUG()/trap if @d is zero or the quotient exceeds 64 bits.
+ *
+ * Return: (@a * @b + @c) / @d
+ */
+u64 mul_u64_add_u64_div_u64(u64 a, u64 b, u64 c, u64 d);
+
+/**
+ * mul_u64_u64_div_u64 - unsigned 64bit multiply and divide
+ * @a: first unsigned 64bit multiplicand
+ * @b: second unsigned 64bit multiplicand
+ * @d: unsigned 64bit divisor
+ *
+ * Multiply two 64bit values together to generate a 128bit product
+ * and then divide by a third value.
+ * May BUG()/trap if @d is zero or the quotient exceeds 64 bits.
+ *
+ * Return: @a * @b / @d
+ */
+#define mul_u64_u64_div_u64(a, b, d) mul_u64_add_u64_div_u64(a, b, 0, d)
+
+/**
+ * mul_u64_u64_div_u64_roundup - unsigned 64bit multiply and divide rounded up
+ * @a: first unsigned 64bit multiplicand
+ * @b: second unsigned 64bit multiplicand
+ * @d: unsigned 64bit divisor
+ *
+ * Multiply two 64bit values together to generate a 128bit product
+ * and then divide and round up.
+ * May BUG()/trap if @d is zero or the quotient exceeds 64 bits.
+ *
+ * Return: (@a * @b + @d - 1) / @d
+ */
+#define mul_u64_u64_div_u64_roundup(a, b, d) \
+	({ u64 _tmp = (d); mul_u64_add_u64_div_u64(a, b, _tmp - 1, _tmp); })
 
 /**
  * DIV64_U64_ROUND_UP - unsigned 64bit divide with 64bit divisor rounded up
diff --git a/lib/math/div64.c b/lib/math/div64.c
index 5faa29208bdb..50e025174495 100644
--- a/lib/math/div64.c
+++ b/lib/math/div64.c
@@ -183,26 +183,28 @@ u32 iter_div_u64_rem(u64 dividend, u32 divisor, u64 *remainder)
 }
 EXPORT_SYMBOL(iter_div_u64_rem);
 
-#ifndef mul_u64_u64_div_u64
-u64 mul_u64_u64_div_u64(u64 a, u64 b, u64 c)
+#if !defined(mul_u64_add_u64_div_u64)
+u64 mul_u64_add_u64_div_u64(u64 a, u64 b, u64 c, u64 d)
 {
-	if (ilog2(a) + ilog2(b) <= 62)
-		return div64_u64(a * b, c);
-
 #if defined(__SIZEOF_INT128__)
 
 	/* native 64x64=128 bits multiplication */
-	u128 prod = (u128)a * b;
+	u128 prod = (u128)a * b + c;
 	u64 n_lo = prod, n_hi = prod >> 64;
 
 #else
 
+	if (!c && ilog2(a) + ilog2(b) <= 62)
+		return div64_u64(a * b, d);
+
 	/* perform a 64x64=128 bits multiplication manually */
 	u32 a_lo = a, a_hi = a >> 32, b_lo = b, b_hi = b >> 32;
 	u64 x, y, z;
 
-	x = (u64)a_lo * b_lo;
+	/* Since (x-1)(x-1) + 2(x-1) == x.x - 1 two u32 can be added to a u64 */
+	x = (u64)a_lo * b_lo + (u32)c;
 	y = (u64)a_lo * b_hi + (u32)(x >> 32);
+	y += (u32)(c >> 32);
 	z = (u64)a_hi * b_hi + (u32)(y >> 32);
 	y = (u64)a_hi * b_lo + (u32)y;
 	z += (u32)(y >> 32);
@@ -212,36 +214,39 @@ u64 mul_u64_u64_div_u64(u64 a, u64 b, u64 c)
 
 #endif
 
-	/* make sure c is not zero, trigger exception otherwise */
+	if (!n_hi)
+		return div64_u64(n_lo, d);
+
+	/* make sure d is not zero, trigger exception otherwise */
 #pragma GCC diagnostic push
 #pragma GCC diagnostic ignored "-Wdiv-by-zero"
-	if (unlikely(c == 0))
+	if (unlikely(d == 0))
 		return 1/0;
 #pragma GCC diagnostic pop
 
-	int shift = __builtin_ctzll(c);
+	int shift = __builtin_ctzll(d);
 
 	/* try reducing the fraction in case the dividend becomes <= 64 bits */
 	if ((n_hi >> shift) == 0) {
 		u64 n = shift ? (n_lo >> shift) | (n_hi << (64 - shift)) : n_lo;
 
-		return div64_u64(n, c >> shift);
+		return div64_u64(n, d >> shift);
 		/*
 		 * The remainder value if needed would be:
-		 *   res = div64_u64_rem(n, c >> shift, &rem);
+		 *   res = div64_u64_rem(n, d >> shift, &rem);
 		 *   rem = (rem << shift) + (n_lo - (n << shift));
 		 */
 	}
 
-	if (n_hi >= c) {
+	if (n_hi >= d) {
 		/* overflow: result is unrepresentable in a u64 */
 		return -1;
 	}
 
 	/* Do the full 128 by 64 bits division */
 
-	shift = __builtin_clzll(c);
-	c <<= shift;
+	shift = __builtin_clzll(d);
+	d <<= shift;
 
 	int p = 64 + shift;
 	u64 res = 0;
@@ -256,8 +261,8 @@ u64 mul_u64_u64_div_u64(u64 a, u64 b, u64 c)
 		n_hi <<= shift;
 		n_hi |= n_lo >> (64 - shift);
 		n_lo <<= shift;
-		if (carry || (n_hi >= c)) {
-			n_hi -= c;
+		if (carry || (n_hi >= d)) {
+			n_hi -= d;
 			res |= 1ULL << p;
 		}
 	} while (n_hi);
@@ -265,5 +270,5 @@ u64 mul_u64_u64_div_u64(u64 a, u64 b, u64 c)
 
 	return res;
 }
-EXPORT_SYMBOL(mul_u64_u64_div_u64);
+EXPORT_SYMBOL(mul_u64_add_u64_div_u64);
 #endif
-- 
2.39.5

next prev parent reply	other threads:[~2025-04-05 20:45 UTC|newest]

Thread overview: 14+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-04-05 20:45 [PATCH next 0/3] lib: Implement mul_u64_u64_div_u64_roundup() David Laight
2025-04-05 20:45 ` David Laight [this message]
2025-04-06  1:46   ` [PATCH 1/3] lib: Add mul_u64_add_u64_div_u64() and mul_u64_u64_div_u64_roundup() Nicolas Pitre
2025-04-06  3:06     ` Nicolas Pitre
2025-04-06  9:35     ` David Laight
2025-04-06 12:30       ` David Laight
2025-04-05 20:45 ` [PATCH 2/3] lib: Add tests for mul_u64_u64_div_u64_roundup() David Laight
2025-04-06  1:47   ` Nicolas Pitre
2025-04-05 20:45 ` [PATCH 3/3] lib: Update the muldiv64 tests to verify the C on x86-64 David Laight
2025-04-06  2:26   ` Nicolas Pitre
2025-05-16  9:47 ` [PATCH next 0/3] lib: Implement mul_u64_u64_div_u64_roundup() Uwe Kleine-König
2025-05-16 12:17   ` David Laight
2025-05-16 15:49     ` Nicolas Pitre
2025-05-18 13:43       ` David Laight

find likely ancestor, descendant, or conflicting patches for this message:
( dfblob:9931e4c7d73 dfblob:9322a35f6a3 dfblob:6aaccc1626a
dfblob:e958170e64a dfblob:5faa29208bd dfblob:50e02517449 )
 OR (
bs:"[PATCH 1/3] lib: Add mul_u64_add_u64_div_u64() and mul_u64_u64_div_u64_roundup()" )
	(help)

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20250405204530.186242-2-david.laight.linux@gmail.com \
    --to=david.laight.linux@gmail.com \
    --cc=akpm@linux-foundation.org \
    --cc=biju.das.jz@bp.renesas.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=npitre@baylibre.com \
    --cc=oleg@redhat.com \
    --cc=peterz@infradead.org \
    --cc=u.kleine-koenig@baylibre.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.