* [PATCH 5/5] ARM: asm/div64.h: adjust to generic codde
2015-11-02 22:33 ` [PATCH 5/5] ARM: asm/div64.h: adjust to generic codde Nicolas Pitre
@ 2015-11-02 22:33 ` Nicolas Pitre
2015-11-03 1:25 ` kbuild test robot
` (2 subsequent siblings)
3 siblings, 0 replies; 22+ messages in thread
From: Nicolas Pitre @ 2015-11-02 22:33 UTC (permalink / raw)
To: Alexey Brodkin, Måns Rullgård
Cc: Arnd Bergmann, rmk+kernel, linux-arch, linux-kernel
Now that the constant divisor optimization is made generic, adapt the
ARM case to it.
Signed-off-by: Nicolas Pitre <nico@linaro.org>
---
arch/arm/include/asm/div64.h | 283 ++++++++++++++-----------------------------
1 file changed, 93 insertions(+), 190 deletions(-)
diff --git a/arch/arm/include/asm/div64.h b/arch/arm/include/asm/div64.h
index 662c7bd061..626bbb3671 100644
--- a/arch/arm/include/asm/div64.h
+++ b/arch/arm/include/asm/div64.h
@@ -5,9 +5,9 @@
#include <asm/compiler.h>
/*
- * The semantics of do_div() are:
+ * The semantics of __div64_32() are:
*
- * uint32_t do_div(uint64_t *n, uint32_t base)
+ * uint32_t __div64_32(uint64_t *n, uint32_t base)
* {
* uint32_t remainder = *n % base;
* *n = *n / base;
@@ -16,8 +16,9 @@
*
* In other words, a 64-bit dividend with a 32-bit divisor producing
* a 64-bit result and a 32-bit remainder. To accomplish this optimally
- * we call a special __do_div64 helper with completely non standard
- * calling convention for arguments and results (beware).
+ * we override the generic version in lib/div64.c to call our __do_div64
+ * assembly implementation with completely non standard calling convention
+ * for arguments and results (beware).
*/
#ifdef __ARMEB__
@@ -28,199 +29,101 @@
#define __xh "r1"
#endif
-#define __do_div_asm(n, base) \
-({ \
- register unsigned int __base asm("r4") = base; \
- register unsigned long long __n asm("r0") = n; \
- register unsigned long long __res asm("r2"); \
- register unsigned int __rem asm(__xh); \
- asm( __asmeq("%0", __xh) \
- __asmeq("%1", "r2") \
- __asmeq("%2", "r0") \
- __asmeq("%3", "r4") \
- "bl __do_div64" \
- : "=r" (__rem), "=r" (__res) \
- : "r" (__n), "r" (__base) \
- : "ip", "lr", "cc"); \
- n = __res; \
- __rem; \
-})
-
-#if __GNUC__ < 4 || !defined(CONFIG_AEABI)
+static inline uint32_t __div64_32(uint64_t *n, uint32_t base)
+{
+ register unsigned int __base asm("r4") = base;
+ register unsigned long long __n asm("r0") = *n;
+ register unsigned long long __res asm("r2");
+ register unsigned int __rem asm(__xh);
+ asm( __asmeq("%0", __xh)
+ __asmeq("%1", "r2")
+ __asmeq("%2", "r0")
+ __asmeq("%3", "r4")
+ "bl __do_div64"
+ : "=r" (__rem), "=r" (__res)
+ : "r" (__n), "r" (__base)
+ : "ip", "lr", "cc");
+ *n = __res;
+ return __rem;
+}
+#define __div64_32 __div64_32
+
+#if !defined(CONFIG_AEABI)
/*
- * gcc versions earlier than 4.0 are simply too problematic for the
- * optimized implementation below. First there is gcc PR 15089 that
- * tend to trig on more complex constructs, spurious .global __udivsi3
- * are inserted even if none of those symbols are referenced in the
- * generated code, and those gcc versions are not able to do constant
- * propagation on long long values anyway.
+ * In OABI configurations, some uses of the do_div function
+ * cause gcc to run out of registers. To work around that,
+ * we can force the use of the out-of-line version for
+ * configurations that build a OABI kernel.
*/
-#define do_div(n, base) __do_div_asm(n, base)
-
-#elif __GNUC__ >= 4
+#define do_div(n, base) __div64_32(&(n), base)
-#include <asm/bug.h>
+#else
/*
- * If the divisor happens to be constant, we determine the appropriate
- * inverse at compile time to turn the division into a few inline
- * multiplications instead which is much faster. And yet only if compiling
- * for ARMv4 or higher (we need umull/umlal) and if the gcc version is
- * sufficiently recent to perform proper long long constant propagation.
- * (It is unfortunate that gcc doesn't perform all this internally.)
+ * gcc versions earlier than 4.0 are simply too problematic for the
+ * __div64_const32() code in asm-generic/div64.h. First there is
+ * gcc PR 15089 that tend to trig on more complex constructs, spurious
+ * .global __udivsi3 are inserted even if none of those symbols are
+ * referenced in the generated code, and those gcc versions are not able
+ * to do constant propagation on long long values anyway.
*/
-#define do_div(n, base) \
-({ \
- unsigned int __r, __b = (base); \
- if (!__builtin_constant_p(__b) || __b == 0 || \
- (__LINUX_ARM_ARCH__ < 4 && (__b & (__b - 1)) != 0)) { \
- /* non-constant divisor (or zero): slow path */ \
- __r = __do_div_asm(n, __b); \
- } else if ((__b & (__b - 1)) == 0) { \
- /* Trivial: __b is constant and a power of 2 */ \
- /* gcc does the right thing with this code. */ \
- __r = n; \
- __r &= (__b - 1); \
- n /= __b; \
- } else { \
- /* Multiply by inverse of __b: n/b = n*(p/b)/p */ \
- /* We rely on the fact that most of this code gets */ \
- /* optimized away at compile time due to constant */ \
- /* propagation and only a couple inline assembly */ \
- /* instructions should remain. Better avoid any */ \
- /* code construct that might prevent that. */ \
- unsigned long long __res, __x, __t, __m, __n = n; \
- unsigned int __c, __p, __z = 0; \
- /* preserve low part of n for reminder computation */ \
- __r = __n; \
- /* determine number of bits to represent __b */ \
- __p = 1 << __div64_fls(__b); \
- /* compute __m = ((__p << 64) + __b - 1) / __b */ \
- __m = (~0ULL / __b) * __p; \
- __m += (((~0ULL % __b + 1) * __p) + __b - 1) / __b; \
- /* compute __res = __m*(~0ULL/__b*__b-1)/(__p << 64) */ \
- __x = ~0ULL / __b * __b - 1; \
- __res = (__m & 0xffffffff) * (__x & 0xffffffff); \
- __res >>= 32; \
- __res += (__m & 0xffffffff) * (__x >> 32); \
- __t = __res; \
- __res += (__x & 0xffffffff) * (__m >> 32); \
- __t = (__res < __t) ? (1ULL << 32) : 0; \
- __res = (__res >> 32) + __t; \
- __res += (__m >> 32) * (__x >> 32); \
- __res /= __p; \
- /* Now sanitize and optimize what we've got. */ \
- if (~0ULL % (__b / (__b & -__b)) == 0) { \
- /* those cases can be simplified with: */ \
- __n /= (__b & -__b); \
- __m = ~0ULL / (__b / (__b & -__b)); \
- __p = 1; \
- __c = 1; \
- } else if (__res != __x / __b) { \
- /* We can't get away without a correction */ \
- /* to compensate for bit truncation errors. */ \
- /* To avoid it we'd need an additional bit */ \
- /* to represent __m which would overflow it. */ \
- /* Instead we do m=p/b and n/b=(n*m+m)/p. */ \
- __c = 1; \
- /* Compute __m = (__p << 64) / __b */ \
- __m = (~0ULL / __b) * __p; \
- __m += ((~0ULL % __b + 1) * __p) / __b; \
- } else { \
- /* Reduce __m/__p, and try to clear bit 31 */ \
- /* of __m when possible otherwise that'll */ \
- /* need extra overflow handling later. */ \
- unsigned int __bits = -(__m & -__m); \
- __bits |= __m >> 32; \
- __bits = (~__bits) << 1; \
- /* If __bits == 0 then setting bit 31 is */ \
- /* unavoidable. Simply apply the maximum */ \
- /* possible reduction in that case. */ \
- /* Otherwise the MSB of __bits indicates the */ \
- /* best reduction we should apply. */ \
- if (!__bits) { \
- __p /= (__m & -__m); \
- __m /= (__m & -__m); \
- } else { \
- __p >>= __div64_fls(__bits); \
- __m >>= __div64_fls(__bits); \
- } \
- /* No correction needed. */ \
- __c = 0; \
- } \
- /* Now we have a combination of 2 conditions: */ \
- /* 1) whether or not we need a correction (__c), and */ \
- /* 2) whether or not there might be an overflow in */ \
- /* the cross product (__m & ((1<<63) | (1<<31))) */ \
- /* Select the best insn combination to perform the */ \
- /* actual __m * __n / (__p << 64) operation. */ \
- if (!__c) { \
- asm ( "umull %Q0, %R0, %Q1, %Q2\n\t" \
- "mov %Q0, #0" \
- : "=&r" (__res) \
- : "r" (__m), "r" (__n) \
- : "cc" ); \
- } else if (!(__m & ((1ULL << 63) | (1ULL << 31)))) { \
- __res = __m; \
- asm ( "umlal %Q0, %R0, %Q1, %Q2\n\t" \
- "mov %Q0, #0" \
- : "+&r" (__res) \
- : "r" (__m), "r" (__n) \
- : "cc" ); \
- } else { \
- asm ( "umull %Q0, %R0, %Q1, %Q2\n\t" \
- "cmn %Q0, %Q1\n\t" \
- "adcs %R0, %R0, %R1\n\t" \
- "adc %Q0, %3, #0" \
- : "=&r" (__res) \
- : "r" (__m), "r" (__n), "r" (__z) \
- : "cc" ); \
- } \
- if (!(__m & ((1ULL << 63) | (1ULL << 31)))) { \
- asm ( "umlal %R0, %Q0, %R1, %Q2\n\t" \
- "umlal %R0, %Q0, %Q1, %R2\n\t" \
- "mov %R0, #0\n\t" \
- "umlal %Q0, %R0, %R1, %R2" \
- : "+&r" (__res) \
- : "r" (__m), "r" (__n) \
- : "cc" ); \
- } else { \
- asm ( "umlal %R0, %Q0, %R2, %Q3\n\t" \
- "umlal %R0, %1, %Q2, %R3\n\t" \
- "mov %R0, #0\n\t" \
- "adds %Q0, %1, %Q0\n\t" \
- "adc %R0, %R0, #0\n\t" \
- "umlal %Q0, %R0, %R2, %R3" \
- : "+&r" (__res), "+&r" (__z) \
- : "r" (__m), "r" (__n) \
- : "cc" ); \
- } \
- __res /= __p; \
- /* The reminder can be computed with 32-bit regs */ \
- /* only, and gcc is good at that. */ \
- { \
- unsigned int __res0 = __res; \
- unsigned int __b0 = __b; \
- __r -= __res0 * __b0; \
- } \
- /* BUG_ON(__r >= __b || __res * __b + __r != n); */ \
- n = __res; \
- } \
- __r; \
-})
-
-/* our own fls implementation to make sure constant propagation is fine */
-#define __div64_fls(bits) \
-({ \
- unsigned int __left = (bits), __nr = 0; \
- if (__left & 0xffff0000) __nr += 16, __left >>= 16; \
- if (__left & 0x0000ff00) __nr += 8, __left >>= 8; \
- if (__left & 0x000000f0) __nr += 4, __left >>= 4; \
- if (__left & 0x0000000c) __nr += 2, __left >>= 2; \
- if (__left & 0x00000002) __nr += 1; \
- __nr; \
-})
+
+#define __div64_const32_is_OK (__GNUC__ >= 4)
+
+static inline uint64_t __arch_xprod_64(uint64_t m, uint64_t n, bool bias)
+{
+ unsigned long long res;
+ unsigned int tmp = 0;
+
+ if (!bias) {
+ asm ( "umull %Q0, %R0, %Q1, %Q2\n\t"
+ "mov %Q0, #0"
+ : "=&r" (res)
+ : "r" (m), "r" (n)
+ : "cc");
+ } else if (!(m & ((1ULL << 63) | (1ULL << 31)))) {
+ res = m;
+ asm ( "umlal %Q0, %R0, %Q1, %Q2\n\t"
+ "mov %Q0, #0"
+ : "+&r" (res)
+ : "r" (m), "r" (n)
+ : "cc");
+ } else {
+ asm ( "umull %Q0, %R0, %Q2, %Q3\n\t"
+ "cmn %Q0, %Q2\n\t"
+ "adcs %R0, %R0, %R2\n\t"
+ "adc %Q0, %1, #0"
+ : "=&r" (res), "+&r" (tmp)
+ : "r" (m), "r" (n)
+ : "cc");
+ }
+
+ if (!(m & ((1ULL << 63) | (1ULL << 31)))) {
+ asm ( "umlal %R0, %Q0, %R1, %Q2\n\t"
+ "umlal %R0, %Q0, %Q1, %R2\n\t"
+ "mov %R0, #0\n\t"
+ "umlal %Q0, %R0, %R1, %R2"
+ : "+&r" (res)
+ : "r" (m), "r" (n)
+ : "cc");
+ } else {
+ asm ( "umlal %R0, %Q0, %R2, %Q3\n\t"
+ "umlal %R0, %1, %Q2, %R3\n\t"
+ "mov %R0, #0\n\t"
+ "adds %Q0, %1, %Q0\n\t"
+ "adc %R0, %R0, #0\n\t"
+ "umlal %Q0, %R0, %R2, %R3"
+ : "+&r" (res), "+&r" (tmp)
+ : "r" (m), "r" (n)
+ : "cc");
+ }
+
+ return res;
+}
+#define __arch_xprod_64 __arch_xprod_64
+
+#include <asm-generic/div64.h>
#endif
--
2.4.3
^ permalink raw reply related [flat|nested] 22+ messages in thread* Re: [PATCH 5/5] ARM: asm/div64.h: adjust to generic codde
2015-11-02 22:33 ` [PATCH 5/5] ARM: asm/div64.h: adjust to generic codde Nicolas Pitre
2015-11-02 22:33 ` Nicolas Pitre
@ 2015-11-03 1:25 ` kbuild test robot
2015-11-03 4:03 ` Nicolas Pitre
2015-11-03 21:39 ` kbuild test robot
2015-11-19 16:36 ` Måns Rullgård
3 siblings, 1 reply; 22+ messages in thread
From: kbuild test robot @ 2015-11-03 1:25 UTC (permalink / raw)
To: Nicolas Pitre
Cc: kbuild-all, Alexey Brodkin, Måns Rullgård,
Arnd Bergmann, rmk+kernel, linux-arch, linux-kernel
[-- Attachment #1: Type: text/plain, Size: 20753 bytes --]
Hi Nicolas,
[auto build test WARNING on asm-generic/master -- if it's inappropriate base, please suggest rules for selecting the more suitable base]
url: https://github.com/0day-ci/linux/commits/Nicolas-Pitre/div64-h-optimize-do_div-for-power-of-two-constant-divisors/20151103-065348
config: arm-multi_v7_defconfig (attached as .config)
reproduce:
wget https://git.kernel.org/cgit/linux/kernel/git/wfg/lkp-tests.git/plain/sbin/make.cross -O ~/bin/make.cross
chmod +x ~/bin/make.cross
# save the attached .config to linux build tree
make.cross ARCH=arm
All warnings (new ones prefixed by >>):
In file included from arch/arm/include/asm/div64.h:126:0,
from include/linux/kernel.h:136,
from include/asm-generic/bug.h:13,
from arch/arm/include/asm/bug.h:62,
from include/linux/bug.h:4,
from include/linux/io.h:23,
from include/linux/clk-provider.h:14,
from drivers/clk/imx/clk-pllv1.c:1:
drivers/clk/imx/clk-pllv1.c: In function 'clk_pllv1_recalc_rate':
include/asm-generic/div64.h:217:28: warning: comparison of distinct pointer types lacks a cast
(void)(((typeof((n)) *)0) == ((uint64_t *)0)); \
^
>> drivers/clk/imx/clk-pllv1.c:99:2: note: in expansion of macro 'do_div'
do_div(ll, mfd + 1);
^
--
In file included from arch/arm/include/asm/div64.h:126:0,
from include/linux/kernel.h:136,
from drivers/clk/imx/clk-pllv2.c:1:
drivers/clk/imx/clk-pllv2.c: In function '__clk_pllv2_recalc_rate':
include/asm-generic/div64.h:217:28: warning: comparison of distinct pointer types lacks a cast
(void)(((typeof((n)) *)0) == ((uint64_t *)0)); \
^
>> drivers/clk/imx/clk-pllv2.c:103:2: note: in expansion of macro 'do_div'
do_div(temp, mfd + 1);
^
drivers/clk/imx/clk-pllv2.c: In function '__clk_pllv2_set_rate':
include/asm-generic/div64.h:217:28: warning: comparison of distinct pointer types lacks a cast
(void)(((typeof((n)) *)0) == ((uint64_t *)0)); \
^
drivers/clk/imx/clk-pllv2.c:145:2: note: in expansion of macro 'do_div'
do_div(temp64, quad_parent_rate / 1000000);
^
--
In file included from arch/arm/include/asm/div64.h:126:0,
from include/linux/kernel.h:136,
from drivers/clk/tegra/clk-divider.c:17:
drivers/clk/tegra/clk-divider.c: In function 'get_div':
include/asm-generic/div64.h:217:28: warning: comparison of distinct pointer types lacks a cast
(void)(((typeof((n)) *)0) == ((uint64_t *)0)); \
^
>> drivers/clk/tegra/clk-divider.c:50:2: note: in expansion of macro 'do_div'
do_div(divider_ux1, rate);
^
--
In file included from arch/arm/include/asm/div64.h:126:0,
from include/linux/kernel.h:136,
from drivers/clk/ti/clkt_dpll.c:17:
drivers/clk/ti/clkt_dpll.c: In function 'omap2_get_dpll_rate':
include/asm-generic/div64.h:217:28: warning: comparison of distinct pointer types lacks a cast
(void)(((typeof((n)) *)0) == ((uint64_t *)0)); \
^
>> drivers/clk/ti/clkt_dpll.c:266:2: note: in expansion of macro 'do_div'
do_div(dpll_clk, dpll_div + 1);
^
--
In file included from arch/arm/include/asm/div64.h:126:0,
from include/linux/kernel.h:136,
from include/linux/clk.h:16,
from drivers/clk/ti/fapll.c:12:
drivers/clk/ti/fapll.c: In function 'ti_fapll_recalc_rate':
include/asm-generic/div64.h:217:28: warning: comparison of distinct pointer types lacks a cast
(void)(((typeof((n)) *)0) == ((uint64_t *)0)); \
^
>> drivers/clk/ti/fapll.c:182:3: note: in expansion of macro 'do_div'
do_div(rate, fapll_p);
^
drivers/clk/ti/fapll.c: In function 'ti_fapll_synth_recalc_rate':
include/asm-generic/div64.h:217:28: warning: comparison of distinct pointer types lacks a cast
(void)(((typeof((n)) *)0) == ((uint64_t *)0)); \
^
drivers/clk/ti/fapll.c:346:3: note: in expansion of macro 'do_div'
do_div(rate, synth_div_freq);
^
--
In file included from arch/arm/include/asm/div64.h:126:0,
from include/linux/kernel.h:136,
from include/linux/list.h:8,
from include/linux/preempt.h:10,
from include/linux/spinlock.h:50,
from include/linux/mmzone.h:7,
from include/linux/gfp.h:5,
from include/linux/slab.h:14,
from drivers/gpu/drm/nouveau/include/nvif/os.h:5,
from drivers/gpu/drm/nouveau/include/nvkm/core/os.h:3,
from drivers/gpu/drm/nouveau/include/nvkm/core/event.h:3,
from drivers/gpu/drm/nouveau/include/nvkm/core/device.h:3,
from drivers/gpu/drm/nouveau/include/nvkm/core/subdev.h:3,
from drivers/gpu/drm/nouveau/include/nvkm/subdev/clk.h:3,
from drivers/gpu/drm/nouveau/nvkm/subdev/clk/priv.h:4,
from drivers/gpu/drm/nouveau/nvkm/subdev/clk/gk20a.c:26:
drivers/gpu/drm/nouveau/nvkm/subdev/clk/gk20a.c: In function 'gk20a_pllg_calc_rate':
include/asm-generic/div64.h:217:28: warning: comparison of distinct pointer types lacks a cast
(void)(((typeof((n)) *)0) == ((uint64_t *)0)); \
^
drivers/gpu/drm/nouveau/nvkm/subdev/clk/gk20a.c:144:2: note: in expansion of macro 'do_div'
do_div(rate, divider);
^
drivers/gpu/drm/nouveau/nvkm/subdev/clk/gk20a.c:144:2: warning: right shift count >= width of type
In file included from include/linux/kernel.h:136:0,
from include/linux/list.h:8,
from include/linux/preempt.h:10,
from include/linux/spinlock.h:50,
from include/linux/mmzone.h:7,
from include/linux/gfp.h:5,
from include/linux/slab.h:14,
from drivers/gpu/drm/nouveau/include/nvif/os.h:5,
from drivers/gpu/drm/nouveau/include/nvkm/core/os.h:3,
from drivers/gpu/drm/nouveau/include/nvkm/core/event.h:3,
from drivers/gpu/drm/nouveau/include/nvkm/core/device.h:3,
from drivers/gpu/drm/nouveau/include/nvkm/core/subdev.h:3,
from drivers/gpu/drm/nouveau/include/nvkm/subdev/clk.h:3,
from drivers/gpu/drm/nouveau/nvkm/subdev/clk/priv.h:4,
from drivers/gpu/drm/nouveau/nvkm/subdev/clk/gk20a.c:26:
>> arch/arm/include/asm/div64.h:49:20: warning: passing argument 1 of '__div64_32' from incompatible pointer type
#define __div64_32 __div64_32
^
>> include/asm-generic/div64.h:235:11: note: in expansion of macro '__div64_32'
__rem = __div64_32(&(n), __base); \
^
drivers/gpu/drm/nouveau/nvkm/subdev/clk/gk20a.c:144:2: note: in expansion of macro 'do_div'
do_div(rate, divider);
^
arch/arm/include/asm/div64.h:32:24: note: expected 'uint64_t *' but argument is of type 'u32 *'
static inline uint32_t __div64_32(uint64_t *n, uint32_t base)
^
--
In file included from arch/arm/include/asm/div64.h:126:0,
from include/linux/kernel.h:136,
from include/linux/list.h:8,
from include/linux/preempt.h:10,
from include/linux/spinlock.h:50,
from include/linux/mmzone.h:7,
from include/linux/gfp.h:5,
from include/linux/slab.h:14,
from drivers/gpu/drm//nouveau/include/nvif/os.h:5,
from drivers/gpu/drm//nouveau/include/nvkm/core/os.h:3,
from drivers/gpu/drm//nouveau/include/nvkm/core/event.h:3,
from drivers/gpu/drm//nouveau/include/nvkm/core/device.h:3,
from drivers/gpu/drm//nouveau/include/nvkm/core/subdev.h:3,
from drivers/gpu/drm//nouveau/include/nvkm/subdev/clk.h:3,
from drivers/gpu/drm//nouveau/nvkm/subdev/clk/priv.h:4,
from drivers/gpu/drm//nouveau/nvkm/subdev/clk/gk20a.c:26:
drivers/gpu/drm//nouveau/nvkm/subdev/clk/gk20a.c: In function 'gk20a_pllg_calc_rate':
include/asm-generic/div64.h:217:28: warning: comparison of distinct pointer types lacks a cast
(void)(((typeof((n)) *)0) == ((uint64_t *)0)); \
^
drivers/gpu/drm//nouveau/nvkm/subdev/clk/gk20a.c:144:2: note: in expansion of macro 'do_div'
do_div(rate, divider);
^
drivers/gpu/drm//nouveau/nvkm/subdev/clk/gk20a.c:144:2: warning: right shift count >= width of type
In file included from include/linux/kernel.h:136:0,
from include/linux/list.h:8,
from include/linux/preempt.h:10,
from include/linux/spinlock.h:50,
from include/linux/mmzone.h:7,
from include/linux/gfp.h:5,
from include/linux/slab.h:14,
from drivers/gpu/drm//nouveau/include/nvif/os.h:5,
from drivers/gpu/drm//nouveau/include/nvkm/core/os.h:3,
from drivers/gpu/drm//nouveau/include/nvkm/core/event.h:3,
from drivers/gpu/drm//nouveau/include/nvkm/core/device.h:3,
from drivers/gpu/drm//nouveau/include/nvkm/core/subdev.h:3,
from drivers/gpu/drm//nouveau/include/nvkm/subdev/clk.h:3,
from drivers/gpu/drm//nouveau/nvkm/subdev/clk/priv.h:4,
from drivers/gpu/drm//nouveau/nvkm/subdev/clk/gk20a.c:26:
>> arch/arm/include/asm/div64.h:49:20: warning: passing argument 1 of '__div64_32' from incompatible pointer type
#define __div64_32 __div64_32
^
>> include/asm-generic/div64.h:235:11: note: in expansion of macro '__div64_32'
__rem = __div64_32(&(n), __base); \
^
drivers/gpu/drm//nouveau/nvkm/subdev/clk/gk20a.c:144:2: note: in expansion of macro 'do_div'
do_div(rate, divider);
^
arch/arm/include/asm/div64.h:32:24: note: expected 'uint64_t *' but argument is of type 'u32 *'
static inline uint32_t __div64_32(uint64_t *n, uint32_t base)
^
vim +/do_div +99 drivers/clk/imx/clk-pllv1.c
2af9e6db arch/arm/mach-imx/clk-pllv1.c Sascha Hauer 2012-03-09 @1 #include <linux/clk-provider.h>
2af9e6db arch/arm/mach-imx/clk-pllv1.c Sascha Hauer 2012-03-09 2 #include <linux/io.h>
2af9e6db arch/arm/mach-imx/clk-pllv1.c Sascha Hauer 2012-03-09 3 #include <linux/slab.h>
2af9e6db arch/arm/mach-imx/clk-pllv1.c Sascha Hauer 2012-03-09 4 #include <linux/kernel.h>
2af9e6db arch/arm/mach-imx/clk-pllv1.c Sascha Hauer 2012-03-09 5 #include <linux/err.h>
3a84d17b arch/arm/mach-imx/clk-pllv1.c Sascha Hauer 2012-09-11 6
2af9e6db arch/arm/mach-imx/clk-pllv1.c Sascha Hauer 2012-03-09 7 #include "clk.h"
2af9e6db arch/arm/mach-imx/clk-pllv1.c Sascha Hauer 2012-03-09 8
2af9e6db arch/arm/mach-imx/clk-pllv1.c Sascha Hauer 2012-03-09 9 /**
2af9e6db arch/arm/mach-imx/clk-pllv1.c Sascha Hauer 2012-03-09 10 * pll v1
2af9e6db arch/arm/mach-imx/clk-pllv1.c Sascha Hauer 2012-03-09 11 *
2af9e6db arch/arm/mach-imx/clk-pllv1.c Sascha Hauer 2012-03-09 12 * @clk_hw clock source
2af9e6db arch/arm/mach-imx/clk-pllv1.c Sascha Hauer 2012-03-09 13 * @parent the parent clock name
2af9e6db arch/arm/mach-imx/clk-pllv1.c Sascha Hauer 2012-03-09 14 * @base base address of pll registers
2af9e6db arch/arm/mach-imx/clk-pllv1.c Sascha Hauer 2012-03-09 15 *
2af9e6db arch/arm/mach-imx/clk-pllv1.c Sascha Hauer 2012-03-09 16 * PLL clock version 1, found on i.MX1/21/25/27/31/35
2af9e6db arch/arm/mach-imx/clk-pllv1.c Sascha Hauer 2012-03-09 17 */
a5947903 arch/arm/mach-imx/clk-pllv1.c Alexander Shiyan 2013-11-10 18
a5947903 arch/arm/mach-imx/clk-pllv1.c Alexander Shiyan 2013-11-10 19 #define MFN_BITS (10)
a5947903 arch/arm/mach-imx/clk-pllv1.c Alexander Shiyan 2013-11-10 20 #define MFN_SIGN (BIT(MFN_BITS - 1))
a5947903 arch/arm/mach-imx/clk-pllv1.c Alexander Shiyan 2013-11-10 21 #define MFN_MASK (MFN_SIGN - 1)
a5947903 arch/arm/mach-imx/clk-pllv1.c Alexander Shiyan 2013-11-10 22
2af9e6db arch/arm/mach-imx/clk-pllv1.c Sascha Hauer 2012-03-09 23 struct clk_pllv1 {
2af9e6db arch/arm/mach-imx/clk-pllv1.c Sascha Hauer 2012-03-09 24 struct clk_hw hw;
2af9e6db arch/arm/mach-imx/clk-pllv1.c Sascha Hauer 2012-03-09 25 void __iomem *base;
3bec5f81 arch/arm/mach-imx/clk-pllv1.c Shawn Guo 2015-04-26 26 enum imx_pllv1_type type;
2af9e6db arch/arm/mach-imx/clk-pllv1.c Sascha Hauer 2012-03-09 27 };
2af9e6db arch/arm/mach-imx/clk-pllv1.c Sascha Hauer 2012-03-09 28
2af9e6db arch/arm/mach-imx/clk-pllv1.c Sascha Hauer 2012-03-09 29 #define to_clk_pllv1(clk) (container_of(clk, struct clk_pllv1, clk))
2af9e6db arch/arm/mach-imx/clk-pllv1.c Sascha Hauer 2012-03-09 30
3bec5f81 arch/arm/mach-imx/clk-pllv1.c Shawn Guo 2015-04-26 31 static inline bool is_imx1_pllv1(struct clk_pllv1 *pll)
a5947903 arch/arm/mach-imx/clk-pllv1.c Alexander Shiyan 2013-11-10 32 {
3bec5f81 arch/arm/mach-imx/clk-pllv1.c Shawn Guo 2015-04-26 33 return pll->type == IMX_PLLV1_IMX1;
3bec5f81 arch/arm/mach-imx/clk-pllv1.c Shawn Guo 2015-04-26 34 }
3bec5f81 arch/arm/mach-imx/clk-pllv1.c Shawn Guo 2015-04-26 35
3bec5f81 arch/arm/mach-imx/clk-pllv1.c Shawn Guo 2015-04-26 36 static inline bool is_imx21_pllv1(struct clk_pllv1 *pll)
3bec5f81 arch/arm/mach-imx/clk-pllv1.c Shawn Guo 2015-04-26 37 {
3bec5f81 arch/arm/mach-imx/clk-pllv1.c Shawn Guo 2015-04-26 38 return pll->type == IMX_PLLV1_IMX21;
3bec5f81 arch/arm/mach-imx/clk-pllv1.c Shawn Guo 2015-04-26 39 }
3bec5f81 arch/arm/mach-imx/clk-pllv1.c Shawn Guo 2015-04-26 40
3bec5f81 arch/arm/mach-imx/clk-pllv1.c Shawn Guo 2015-04-26 41 static inline bool is_imx27_pllv1(struct clk_pllv1 *pll)
3bec5f81 arch/arm/mach-imx/clk-pllv1.c Shawn Guo 2015-04-26 42 {
3bec5f81 arch/arm/mach-imx/clk-pllv1.c Shawn Guo 2015-04-26 43 return pll->type == IMX_PLLV1_IMX27;
3bec5f81 arch/arm/mach-imx/clk-pllv1.c Shawn Guo 2015-04-26 44 }
3bec5f81 arch/arm/mach-imx/clk-pllv1.c Shawn Guo 2015-04-26 45
3bec5f81 arch/arm/mach-imx/clk-pllv1.c Shawn Guo 2015-04-26 46 static inline bool mfn_is_negative(struct clk_pllv1 *pll, unsigned int mfn)
3bec5f81 arch/arm/mach-imx/clk-pllv1.c Shawn Guo 2015-04-26 47 {
3bec5f81 arch/arm/mach-imx/clk-pllv1.c Shawn Guo 2015-04-26 48 return !is_imx1_pllv1(pll) && !is_imx21_pllv1(pll) && (mfn & MFN_SIGN);
a5947903 arch/arm/mach-imx/clk-pllv1.c Alexander Shiyan 2013-11-10 49 }
a5947903 arch/arm/mach-imx/clk-pllv1.c Alexander Shiyan 2013-11-10 50
2af9e6db arch/arm/mach-imx/clk-pllv1.c Sascha Hauer 2012-03-09 51 static unsigned long clk_pllv1_recalc_rate(struct clk_hw *hw,
2af9e6db arch/arm/mach-imx/clk-pllv1.c Sascha Hauer 2012-03-09 52 unsigned long parent_rate)
2af9e6db arch/arm/mach-imx/clk-pllv1.c Sascha Hauer 2012-03-09 53 {
2af9e6db arch/arm/mach-imx/clk-pllv1.c Sascha Hauer 2012-03-09 54 struct clk_pllv1 *pll = to_clk_pllv1(hw);
a6dd3c81 arch/arm/mach-imx/clk-pllv1.c Sascha Hauer 2012-09-11 55 long long ll;
a6dd3c81 arch/arm/mach-imx/clk-pllv1.c Sascha Hauer 2012-09-11 56 int mfn_abs;
a6dd3c81 arch/arm/mach-imx/clk-pllv1.c Sascha Hauer 2012-09-11 57 unsigned int mfi, mfn, mfd, pd;
a6dd3c81 arch/arm/mach-imx/clk-pllv1.c Sascha Hauer 2012-09-11 58 u32 reg;
a6dd3c81 arch/arm/mach-imx/clk-pllv1.c Sascha Hauer 2012-09-11 59 unsigned long rate;
2af9e6db arch/arm/mach-imx/clk-pllv1.c Sascha Hauer 2012-03-09 60
a6dd3c81 arch/arm/mach-imx/clk-pllv1.c Sascha Hauer 2012-09-11 61 reg = readl(pll->base);
a6dd3c81 arch/arm/mach-imx/clk-pllv1.c Sascha Hauer 2012-09-11 62
a6dd3c81 arch/arm/mach-imx/clk-pllv1.c Sascha Hauer 2012-09-11 63 /*
a6dd3c81 arch/arm/mach-imx/clk-pllv1.c Sascha Hauer 2012-09-11 64 * Get the resulting clock rate from a PLL register value and the input
a6dd3c81 arch/arm/mach-imx/clk-pllv1.c Sascha Hauer 2012-09-11 65 * frequency. PLLs with this register layout can be found on i.MX1,
a6dd3c81 arch/arm/mach-imx/clk-pllv1.c Sascha Hauer 2012-09-11 66 * i.MX21, i.MX27 and i,MX31
a6dd3c81 arch/arm/mach-imx/clk-pllv1.c Sascha Hauer 2012-09-11 67 *
a6dd3c81 arch/arm/mach-imx/clk-pllv1.c Sascha Hauer 2012-09-11 68 * mfi + mfn / (mfd + 1)
a6dd3c81 arch/arm/mach-imx/clk-pllv1.c Sascha Hauer 2012-09-11 69 * f = 2 * f_ref * --------------------
a6dd3c81 arch/arm/mach-imx/clk-pllv1.c Sascha Hauer 2012-09-11 70 * pd + 1
a6dd3c81 arch/arm/mach-imx/clk-pllv1.c Sascha Hauer 2012-09-11 71 */
a6dd3c81 arch/arm/mach-imx/clk-pllv1.c Sascha Hauer 2012-09-11 72
a6dd3c81 arch/arm/mach-imx/clk-pllv1.c Sascha Hauer 2012-09-11 73 mfi = (reg >> 10) & 0xf;
a6dd3c81 arch/arm/mach-imx/clk-pllv1.c Sascha Hauer 2012-09-11 74 mfn = reg & 0x3ff;
a6dd3c81 arch/arm/mach-imx/clk-pllv1.c Sascha Hauer 2012-09-11 75 mfd = (reg >> 16) & 0x3ff;
a6dd3c81 arch/arm/mach-imx/clk-pllv1.c Sascha Hauer 2012-09-11 76 pd = (reg >> 26) & 0xf;
a6dd3c81 arch/arm/mach-imx/clk-pllv1.c Sascha Hauer 2012-09-11 77
a6dd3c81 arch/arm/mach-imx/clk-pllv1.c Sascha Hauer 2012-09-11 78 mfi = mfi <= 5 ? 5 : mfi;
a6dd3c81 arch/arm/mach-imx/clk-pllv1.c Sascha Hauer 2012-09-11 79
a6dd3c81 arch/arm/mach-imx/clk-pllv1.c Sascha Hauer 2012-09-11 80 mfn_abs = mfn;
a6dd3c81 arch/arm/mach-imx/clk-pllv1.c Sascha Hauer 2012-09-11 81
a6dd3c81 arch/arm/mach-imx/clk-pllv1.c Sascha Hauer 2012-09-11 82 /*
a6dd3c81 arch/arm/mach-imx/clk-pllv1.c Sascha Hauer 2012-09-11 83 * On all i.MXs except i.MX1 and i.MX21 mfn is a 10bit
a5947903 arch/arm/mach-imx/clk-pllv1.c Alexander Shiyan 2013-11-10 84 * 2's complements number.
a5947903 arch/arm/mach-imx/clk-pllv1.c Alexander Shiyan 2013-11-10 85 * On i.MX27 the bit 9 is the sign bit.
a6dd3c81 arch/arm/mach-imx/clk-pllv1.c Sascha Hauer 2012-09-11 86 */
3bec5f81 arch/arm/mach-imx/clk-pllv1.c Shawn Guo 2015-04-26 87 if (mfn_is_negative(pll, mfn)) {
3bec5f81 arch/arm/mach-imx/clk-pllv1.c Shawn Guo 2015-04-26 88 if (is_imx27_pllv1(pll))
a5947903 arch/arm/mach-imx/clk-pllv1.c Alexander Shiyan 2013-11-10 89 mfn_abs = mfn & MFN_MASK;
a5947903 arch/arm/mach-imx/clk-pllv1.c Alexander Shiyan 2013-11-10 90 else
a5947903 arch/arm/mach-imx/clk-pllv1.c Alexander Shiyan 2013-11-10 91 mfn_abs = BIT(MFN_BITS) - mfn;
a5947903 arch/arm/mach-imx/clk-pllv1.c Alexander Shiyan 2013-11-10 92 }
a6dd3c81 arch/arm/mach-imx/clk-pllv1.c Sascha Hauer 2012-09-11 93
a6dd3c81 arch/arm/mach-imx/clk-pllv1.c Sascha Hauer 2012-09-11 94 rate = parent_rate * 2;
a6dd3c81 arch/arm/mach-imx/clk-pllv1.c Sascha Hauer 2012-09-11 95 rate /= pd + 1;
a6dd3c81 arch/arm/mach-imx/clk-pllv1.c Sascha Hauer 2012-09-11 96
a6dd3c81 arch/arm/mach-imx/clk-pllv1.c Sascha Hauer 2012-09-11 97 ll = (unsigned long long)rate * mfn_abs;
a6dd3c81 arch/arm/mach-imx/clk-pllv1.c Sascha Hauer 2012-09-11 98
a6dd3c81 arch/arm/mach-imx/clk-pllv1.c Sascha Hauer 2012-09-11 @99 do_div(ll, mfd + 1);
a6dd3c81 arch/arm/mach-imx/clk-pllv1.c Sascha Hauer 2012-09-11 100
3bec5f81 arch/arm/mach-imx/clk-pllv1.c Shawn Guo 2015-04-26 101 if (mfn_is_negative(pll, mfn))
a6dd3c81 arch/arm/mach-imx/clk-pllv1.c Sascha Hauer 2012-09-11 102 ll = -ll;
:::::: The code at line 99 was first introduced by commit
:::::: a6dd3c812e774b876d440c1a9ec1bd0fd5659390 ARM: i.MX clk pllv1: move mxc_decode_pll code to its user
:::::: TO: Sascha Hauer <s.hauer@pengutronix.de>
:::::: CC: Sascha Hauer <s.hauer@pengutronix.de>
---
0-DAY kernel test infrastructure Open Source Technology Center
https://lists.01.org/pipermail/kbuild-all Intel Corporation
[-- Attachment #2: .config.gz --]
[-- Type: application/octet-stream, Size: 34547 bytes --]
^ permalink raw reply [flat|nested] 22+ messages in thread* Re: [PATCH 5/5] ARM: asm/div64.h: adjust to generic codde
2015-11-03 1:25 ` kbuild test robot
@ 2015-11-03 4:03 ` Nicolas Pitre
0 siblings, 0 replies; 22+ messages in thread
From: Nicolas Pitre @ 2015-11-03 4:03 UTC (permalink / raw)
To: Michael Turquette, linux-clk, David Airlie, dri-devel
Cc: kbuild test robot, kbuild-all, Alexey Brodkin,
Måns Rullgård, Arnd Bergmann, rmk+kernel, linux-arch,
linux-kernel
[added Mike/linux-clk and David/dri-devel]
A patch I produced is now highlighting existing bugs in the drivers
listed below.
On Tue, 3 Nov 2015, kbuild test robot wrote:
> Hi Nicolas,
>
> [auto build test WARNING on asm-generic/master -- if it's inappropriate base, please suggest rules for selecting the more suitable base]
>
> url: https://github.com/0day-ci/linux/commits/Nicolas-Pitre/div64-h-optimize-do_div-for-power-of-two-constant-divisors/20151103-065348
> config: arm-multi_v7_defconfig (attached as .config)
> reproduce:
> wget https://git.kernel.org/cgit/linux/kernel/git/wfg/lkp-tests.git/plain/sbin/make.cross -O ~/bin/make.cross
> chmod +x ~/bin/make.cross
> # save the attached .config to linux build tree
> make.cross ARCH=arm
>
> All warnings (new ones prefixed by >>):
>
> In file included from arch/arm/include/asm/div64.h:126:0,
> from include/linux/kernel.h:136,
> from include/asm-generic/bug.h:13,
> from arch/arm/include/asm/bug.h:62,
> from include/linux/bug.h:4,
> from include/linux/io.h:23,
> from include/linux/clk-provider.h:14,
> from drivers/clk/imx/clk-pllv1.c:1:
> drivers/clk/imx/clk-pllv1.c: In function 'clk_pllv1_recalc_rate':
> include/asm-generic/div64.h:217:28: warning: comparison of distinct pointer types lacks a cast
> (void)(((typeof((n)) *)0) == ((uint64_t *)0)); \
> ^
> >> drivers/clk/imx/clk-pllv1.c:99:2: note: in expansion of macro 'do_div'
> do_div(ll, mfd + 1);
> ^
Here the problem is in clk-pllv1.c where the ll variable is declared as
a long long. It should be an unsigned long long, or better yet an
uint64_t or u64.
> --
> In file included from arch/arm/include/asm/div64.h:126:0,
> from include/linux/kernel.h:136,
> from drivers/clk/imx/clk-pllv2.c:1:
> drivers/clk/imx/clk-pllv2.c: In function '__clk_pllv2_recalc_rate':
> include/asm-generic/div64.h:217:28: warning: comparison of distinct pointer types lacks a cast
> (void)(((typeof((n)) *)0) == ((uint64_t *)0)); \
> ^
> >> drivers/clk/imx/clk-pllv2.c:103:2: note: in expansion of macro 'do_div'
> do_div(temp, mfd + 1);
> ^
Same thing: temp is declared as a s64. It should be u64.
> drivers/clk/imx/clk-pllv2.c: In function '__clk_pllv2_set_rate':
> include/asm-generic/div64.h:217:28: warning: comparison of distinct pointer types lacks a cast
> (void)(((typeof((n)) *)0) == ((uint64_t *)0)); \
> ^
> drivers/clk/imx/clk-pllv2.c:145:2: note: in expansion of macro 'do_div'
> do_div(temp64, quad_parent_rate / 1000000);
> ^
Ditto here.
> --
> In file included from arch/arm/include/asm/div64.h:126:0,
> from include/linux/kernel.h:136,
> from drivers/clk/tegra/clk-divider.c:17:
> drivers/clk/tegra/clk-divider.c: In function 'get_div':
> include/asm-generic/div64.h:217:28: warning: comparison of distinct pointer types lacks a cast
> (void)(((typeof((n)) *)0) == ((uint64_t *)0)); \
> ^
> >> drivers/clk/tegra/clk-divider.c:50:2: note: in expansion of macro 'do_div'
> do_div(divider_ux1, rate);
> ^
Ditto here.
> --
> In file included from arch/arm/include/asm/div64.h:126:0,
> from include/linux/kernel.h:136,
> from drivers/clk/ti/clkt_dpll.c:17:
> drivers/clk/ti/clkt_dpll.c: In function 'omap2_get_dpll_rate':
> include/asm-generic/div64.h:217:28: warning: comparison of distinct pointer types lacks a cast
> (void)(((typeof((n)) *)0) == ((uint64_t *)0)); \
> ^
> >> drivers/clk/ti/clkt_dpll.c:266:2: note: in expansion of macro 'do_div'
> do_div(dpll_clk, dpll_div + 1);
> ^
Ditto here.
> --
> In file included from arch/arm/include/asm/div64.h:126:0,
> from include/linux/kernel.h:136,
> from include/linux/clk.h:16,
> from drivers/clk/ti/fapll.c:12:
> drivers/clk/ti/fapll.c: In function 'ti_fapll_recalc_rate':
> include/asm-generic/div64.h:217:28: warning: comparison of distinct pointer types lacks a cast
> (void)(((typeof((n)) *)0) == ((uint64_t *)0)); \
> ^
> >> drivers/clk/ti/fapll.c:182:3: note: in expansion of macro 'do_div'
> do_div(rate, fapll_p);
> ^
Ditto here.
> drivers/clk/ti/fapll.c: In function 'ti_fapll_synth_recalc_rate':
> include/asm-generic/div64.h:217:28: warning: comparison of distinct pointer types lacks a cast
> (void)(((typeof((n)) *)0) == ((uint64_t *)0)); \
> ^
> drivers/clk/ti/fapll.c:346:3: note: in expansion of macro 'do_div'
> do_div(rate, synth_div_freq);
> ^
Ditto here.
> --
> In file included from arch/arm/include/asm/div64.h:126:0,
> from include/linux/kernel.h:136,
> from include/linux/list.h:8,
> from include/linux/preempt.h:10,
> from include/linux/spinlock.h:50,
> from include/linux/mmzone.h:7,
> from include/linux/gfp.h:5,
> from include/linux/slab.h:14,
> from drivers/gpu/drm/nouveau/include/nvif/os.h:5,
> from drivers/gpu/drm/nouveau/include/nvkm/core/os.h:3,
> from drivers/gpu/drm/nouveau/include/nvkm/core/event.h:3,
> from drivers/gpu/drm/nouveau/include/nvkm/core/device.h:3,
> from drivers/gpu/drm/nouveau/include/nvkm/core/subdev.h:3,
> from drivers/gpu/drm/nouveau/include/nvkm/subdev/clk.h:3,
> from drivers/gpu/drm/nouveau/nvkm/subdev/clk/priv.h:4,
> from drivers/gpu/drm/nouveau/nvkm/subdev/clk/gk20a.c:26:
> drivers/gpu/drm/nouveau/nvkm/subdev/clk/gk20a.c: In function 'gk20a_pllg_calc_rate':
> include/asm-generic/div64.h:217:28: warning: comparison of distinct pointer types lacks a cast
> (void)(((typeof((n)) *)0) == ((uint64_t *)0)); \
> ^
> drivers/gpu/drm/nouveau/nvkm/subdev/clk/gk20a.c:144:2: note: in expansion of macro 'do_div'
> do_div(rate, divider);
> ^
> drivers/gpu/drm/nouveau/nvkm/subdev/clk/gk20a.c:144:2: warning: right shift count >= width of type
> In file included from include/linux/kernel.h:136:0,
> from include/linux/list.h:8,
> from include/linux/preempt.h:10,
> from include/linux/spinlock.h:50,
> from include/linux/mmzone.h:7,
> from include/linux/gfp.h:5,
> from include/linux/slab.h:14,
> from drivers/gpu/drm/nouveau/include/nvif/os.h:5,
> from drivers/gpu/drm/nouveau/include/nvkm/core/os.h:3,
> from drivers/gpu/drm/nouveau/include/nvkm/core/event.h:3,
> from drivers/gpu/drm/nouveau/include/nvkm/core/device.h:3,
> from drivers/gpu/drm/nouveau/include/nvkm/core/subdev.h:3,
> from drivers/gpu/drm/nouveau/include/nvkm/subdev/clk.h:3,
> from drivers/gpu/drm/nouveau/nvkm/subdev/clk/priv.h:4,
> from drivers/gpu/drm/nouveau/nvkm/subdev/clk/gk20a.c:26:
> >> arch/arm/include/asm/div64.h:49:20: warning: passing argument 1 of '__div64_32' from incompatible pointer type
> #define __div64_32 __div64_32
> ^
> >> include/asm-generic/div64.h:235:11: note: in expansion of macro '__div64_32'
> __rem = __div64_32(&(n), __base); \
> ^
> drivers/gpu/drm/nouveau/nvkm/subdev/clk/gk20a.c:144:2: note: in expansion of macro 'do_div'
> do_div(rate, divider);
> ^
> arch/arm/include/asm/div64.h:32:24: note: expected 'uint64_t *' but argument is of type 'u32 *'
> static inline uint32_t __div64_32(uint64_t *n, uint32_t base)
> ^
This one is a plain bug.
> --
> In file included from arch/arm/include/asm/div64.h:126:0,
> from include/linux/kernel.h:136,
> from include/linux/list.h:8,
> from include/linux/preempt.h:10,
> from include/linux/spinlock.h:50,
> from include/linux/mmzone.h:7,
> from include/linux/gfp.h:5,
> from include/linux/slab.h:14,
> from drivers/gpu/drm//nouveau/include/nvif/os.h:5,
> from drivers/gpu/drm//nouveau/include/nvkm/core/os.h:3,
> from drivers/gpu/drm//nouveau/include/nvkm/core/event.h:3,
> from drivers/gpu/drm//nouveau/include/nvkm/core/device.h:3,
> from drivers/gpu/drm//nouveau/include/nvkm/core/subdev.h:3,
> from drivers/gpu/drm//nouveau/include/nvkm/subdev/clk.h:3,
> from drivers/gpu/drm//nouveau/nvkm/subdev/clk/priv.h:4,
> from drivers/gpu/drm//nouveau/nvkm/subdev/clk/gk20a.c:26:
> drivers/gpu/drm//nouveau/nvkm/subdev/clk/gk20a.c: In function 'gk20a_pllg_calc_rate':
> include/asm-generic/div64.h:217:28: warning: comparison of distinct pointer types lacks a cast
> (void)(((typeof((n)) *)0) == ((uint64_t *)0)); \
> ^
This one is a driver bug too.
> drivers/gpu/drm//nouveau/nvkm/subdev/clk/gk20a.c:144:2: note: in expansion of macro 'do_div'
> do_div(rate, divider);
> ^
> drivers/gpu/drm//nouveau/nvkm/subdev/clk/gk20a.c:144:2: warning: right shift count >= width of type
> In file included from include/linux/kernel.h:136:0,
> from include/linux/list.h:8,
> from include/linux/preempt.h:10,
> from include/linux/spinlock.h:50,
> from include/linux/mmzone.h:7,
> from include/linux/gfp.h:5,
> from include/linux/slab.h:14,
> from drivers/gpu/drm//nouveau/include/nvif/os.h:5,
> from drivers/gpu/drm//nouveau/include/nvkm/core/os.h:3,
> from drivers/gpu/drm//nouveau/include/nvkm/core/event.h:3,
> from drivers/gpu/drm//nouveau/include/nvkm/core/device.h:3,
> from drivers/gpu/drm//nouveau/include/nvkm/core/subdev.h:3,
> from drivers/gpu/drm//nouveau/include/nvkm/subdev/clk.h:3,
> from drivers/gpu/drm//nouveau/nvkm/subdev/clk/priv.h:4,
> from drivers/gpu/drm//nouveau/nvkm/subdev/clk/gk20a.c:26:
> >> arch/arm/include/asm/div64.h:49:20: warning: passing argument 1 of '__div64_32' from incompatible pointer type
> #define __div64_32 __div64_32
> ^
> >> include/asm-generic/div64.h:235:11: note: in expansion of macro '__div64_32'
> __rem = __div64_32(&(n), __base); \
> ^
> drivers/gpu/drm//nouveau/nvkm/subdev/clk/gk20a.c:144:2: note: in expansion of macro 'do_div'
> do_div(rate, divider);
> ^
> arch/arm/include/asm/div64.h:32:24: note: expected 'uint64_t *' but argument is of type 'u32 *'
> static inline uint32_t __div64_32(uint64_t *n, uint32_t base)
> ^
>
> vim +/do_div +99 drivers/clk/imx/clk-pllv1.c
>
> 2af9e6db arch/arm/mach-imx/clk-pllv1.c Sascha Hauer 2012-03-09 @1 #include <linux/clk-provider.h>
> 2af9e6db arch/arm/mach-imx/clk-pllv1.c Sascha Hauer 2012-03-09 2 #include <linux/io.h>
> 2af9e6db arch/arm/mach-imx/clk-pllv1.c Sascha Hauer 2012-03-09 3 #include <linux/slab.h>
> 2af9e6db arch/arm/mach-imx/clk-pllv1.c Sascha Hauer 2012-03-09 4 #include <linux/kernel.h>
> 2af9e6db arch/arm/mach-imx/clk-pllv1.c Sascha Hauer 2012-03-09 5 #include <linux/err.h>
> 3a84d17b arch/arm/mach-imx/clk-pllv1.c Sascha Hauer 2012-09-11 6
> 2af9e6db arch/arm/mach-imx/clk-pllv1.c Sascha Hauer 2012-03-09 7 #include "clk.h"
> 2af9e6db arch/arm/mach-imx/clk-pllv1.c Sascha Hauer 2012-03-09 8
> 2af9e6db arch/arm/mach-imx/clk-pllv1.c Sascha Hauer 2012-03-09 9 /**
> 2af9e6db arch/arm/mach-imx/clk-pllv1.c Sascha Hauer 2012-03-09 10 * pll v1
> 2af9e6db arch/arm/mach-imx/clk-pllv1.c Sascha Hauer 2012-03-09 11 *
> 2af9e6db arch/arm/mach-imx/clk-pllv1.c Sascha Hauer 2012-03-09 12 * @clk_hw clock source
> 2af9e6db arch/arm/mach-imx/clk-pllv1.c Sascha Hauer 2012-03-09 13 * @parent the parent clock name
> 2af9e6db arch/arm/mach-imx/clk-pllv1.c Sascha Hauer 2012-03-09 14 * @base base address of pll registers
> 2af9e6db arch/arm/mach-imx/clk-pllv1.c Sascha Hauer 2012-03-09 15 *
> 2af9e6db arch/arm/mach-imx/clk-pllv1.c Sascha Hauer 2012-03-09 16 * PLL clock version 1, found on i.MX1/21/25/27/31/35
> 2af9e6db arch/arm/mach-imx/clk-pllv1.c Sascha Hauer 2012-03-09 17 */
> a5947903 arch/arm/mach-imx/clk-pllv1.c Alexander Shiyan 2013-11-10 18
> a5947903 arch/arm/mach-imx/clk-pllv1.c Alexander Shiyan 2013-11-10 19 #define MFN_BITS (10)
> a5947903 arch/arm/mach-imx/clk-pllv1.c Alexander Shiyan 2013-11-10 20 #define MFN_SIGN (BIT(MFN_BITS - 1))
> a5947903 arch/arm/mach-imx/clk-pllv1.c Alexander Shiyan 2013-11-10 21 #define MFN_MASK (MFN_SIGN - 1)
> a5947903 arch/arm/mach-imx/clk-pllv1.c Alexander Shiyan 2013-11-10 22
> 2af9e6db arch/arm/mach-imx/clk-pllv1.c Sascha Hauer 2012-03-09 23 struct clk_pllv1 {
> 2af9e6db arch/arm/mach-imx/clk-pllv1.c Sascha Hauer 2012-03-09 24 struct clk_hw hw;
> 2af9e6db arch/arm/mach-imx/clk-pllv1.c Sascha Hauer 2012-03-09 25 void __iomem *base;
> 3bec5f81 arch/arm/mach-imx/clk-pllv1.c Shawn Guo 2015-04-26 26 enum imx_pllv1_type type;
> 2af9e6db arch/arm/mach-imx/clk-pllv1.c Sascha Hauer 2012-03-09 27 };
> 2af9e6db arch/arm/mach-imx/clk-pllv1.c Sascha Hauer 2012-03-09 28
> 2af9e6db arch/arm/mach-imx/clk-pllv1.c Sascha Hauer 2012-03-09 29 #define to_clk_pllv1(clk) (container_of(clk, struct clk_pllv1, clk))
> 2af9e6db arch/arm/mach-imx/clk-pllv1.c Sascha Hauer 2012-03-09 30
> 3bec5f81 arch/arm/mach-imx/clk-pllv1.c Shawn Guo 2015-04-26 31 static inline bool is_imx1_pllv1(struct clk_pllv1 *pll)
> a5947903 arch/arm/mach-imx/clk-pllv1.c Alexander Shiyan 2013-11-10 32 {
> 3bec5f81 arch/arm/mach-imx/clk-pllv1.c Shawn Guo 2015-04-26 33 return pll->type == IMX_PLLV1_IMX1;
> 3bec5f81 arch/arm/mach-imx/clk-pllv1.c Shawn Guo 2015-04-26 34 }
> 3bec5f81 arch/arm/mach-imx/clk-pllv1.c Shawn Guo 2015-04-26 35
> 3bec5f81 arch/arm/mach-imx/clk-pllv1.c Shawn Guo 2015-04-26 36 static inline bool is_imx21_pllv1(struct clk_pllv1 *pll)
> 3bec5f81 arch/arm/mach-imx/clk-pllv1.c Shawn Guo 2015-04-26 37 {
> 3bec5f81 arch/arm/mach-imx/clk-pllv1.c Shawn Guo 2015-04-26 38 return pll->type == IMX_PLLV1_IMX21;
> 3bec5f81 arch/arm/mach-imx/clk-pllv1.c Shawn Guo 2015-04-26 39 }
> 3bec5f81 arch/arm/mach-imx/clk-pllv1.c Shawn Guo 2015-04-26 40
> 3bec5f81 arch/arm/mach-imx/clk-pllv1.c Shawn Guo 2015-04-26 41 static inline bool is_imx27_pllv1(struct clk_pllv1 *pll)
> 3bec5f81 arch/arm/mach-imx/clk-pllv1.c Shawn Guo 2015-04-26 42 {
> 3bec5f81 arch/arm/mach-imx/clk-pllv1.c Shawn Guo 2015-04-26 43 return pll->type == IMX_PLLV1_IMX27;
> 3bec5f81 arch/arm/mach-imx/clk-pllv1.c Shawn Guo 2015-04-26 44 }
> 3bec5f81 arch/arm/mach-imx/clk-pllv1.c Shawn Guo 2015-04-26 45
> 3bec5f81 arch/arm/mach-imx/clk-pllv1.c Shawn Guo 2015-04-26 46 static inline bool mfn_is_negative(struct clk_pllv1 *pll, unsigned int mfn)
> 3bec5f81 arch/arm/mach-imx/clk-pllv1.c Shawn Guo 2015-04-26 47 {
> 3bec5f81 arch/arm/mach-imx/clk-pllv1.c Shawn Guo 2015-04-26 48 return !is_imx1_pllv1(pll) && !is_imx21_pllv1(pll) && (mfn & MFN_SIGN);
> a5947903 arch/arm/mach-imx/clk-pllv1.c Alexander Shiyan 2013-11-10 49 }
> a5947903 arch/arm/mach-imx/clk-pllv1.c Alexander Shiyan 2013-11-10 50
> 2af9e6db arch/arm/mach-imx/clk-pllv1.c Sascha Hauer 2012-03-09 51 static unsigned long clk_pllv1_recalc_rate(struct clk_hw *hw,
> 2af9e6db arch/arm/mach-imx/clk-pllv1.c Sascha Hauer 2012-03-09 52 unsigned long parent_rate)
> 2af9e6db arch/arm/mach-imx/clk-pllv1.c Sascha Hauer 2012-03-09 53 {
> 2af9e6db arch/arm/mach-imx/clk-pllv1.c Sascha Hauer 2012-03-09 54 struct clk_pllv1 *pll = to_clk_pllv1(hw);
> a6dd3c81 arch/arm/mach-imx/clk-pllv1.c Sascha Hauer 2012-09-11 55 long long ll;
> a6dd3c81 arch/arm/mach-imx/clk-pllv1.c Sascha Hauer 2012-09-11 56 int mfn_abs;
> a6dd3c81 arch/arm/mach-imx/clk-pllv1.c Sascha Hauer 2012-09-11 57 unsigned int mfi, mfn, mfd, pd;
> a6dd3c81 arch/arm/mach-imx/clk-pllv1.c Sascha Hauer 2012-09-11 58 u32 reg;
> a6dd3c81 arch/arm/mach-imx/clk-pllv1.c Sascha Hauer 2012-09-11 59 unsigned long rate;
> 2af9e6db arch/arm/mach-imx/clk-pllv1.c Sascha Hauer 2012-03-09 60
> a6dd3c81 arch/arm/mach-imx/clk-pllv1.c Sascha Hauer 2012-09-11 61 reg = readl(pll->base);
> a6dd3c81 arch/arm/mach-imx/clk-pllv1.c Sascha Hauer 2012-09-11 62
> a6dd3c81 arch/arm/mach-imx/clk-pllv1.c Sascha Hauer 2012-09-11 63 /*
> a6dd3c81 arch/arm/mach-imx/clk-pllv1.c Sascha Hauer 2012-09-11 64 * Get the resulting clock rate from a PLL register value and the input
> a6dd3c81 arch/arm/mach-imx/clk-pllv1.c Sascha Hauer 2012-09-11 65 * frequency. PLLs with this register layout can be found on i.MX1,
> a6dd3c81 arch/arm/mach-imx/clk-pllv1.c Sascha Hauer 2012-09-11 66 * i.MX21, i.MX27 and i,MX31
> a6dd3c81 arch/arm/mach-imx/clk-pllv1.c Sascha Hauer 2012-09-11 67 *
> a6dd3c81 arch/arm/mach-imx/clk-pllv1.c Sascha Hauer 2012-09-11 68 * mfi + mfn / (mfd + 1)
> a6dd3c81 arch/arm/mach-imx/clk-pllv1.c Sascha Hauer 2012-09-11 69 * f = 2 * f_ref * --------------------
> a6dd3c81 arch/arm/mach-imx/clk-pllv1.c Sascha Hauer 2012-09-11 70 * pd + 1
> a6dd3c81 arch/arm/mach-imx/clk-pllv1.c Sascha Hauer 2012-09-11 71 */
> a6dd3c81 arch/arm/mach-imx/clk-pllv1.c Sascha Hauer 2012-09-11 72
> a6dd3c81 arch/arm/mach-imx/clk-pllv1.c Sascha Hauer 2012-09-11 73 mfi = (reg >> 10) & 0xf;
> a6dd3c81 arch/arm/mach-imx/clk-pllv1.c Sascha Hauer 2012-09-11 74 mfn = reg & 0x3ff;
> a6dd3c81 arch/arm/mach-imx/clk-pllv1.c Sascha Hauer 2012-09-11 75 mfd = (reg >> 16) & 0x3ff;
> a6dd3c81 arch/arm/mach-imx/clk-pllv1.c Sascha Hauer 2012-09-11 76 pd = (reg >> 26) & 0xf;
> a6dd3c81 arch/arm/mach-imx/clk-pllv1.c Sascha Hauer 2012-09-11 77
> a6dd3c81 arch/arm/mach-imx/clk-pllv1.c Sascha Hauer 2012-09-11 78 mfi = mfi <= 5 ? 5 : mfi;
> a6dd3c81 arch/arm/mach-imx/clk-pllv1.c Sascha Hauer 2012-09-11 79
> a6dd3c81 arch/arm/mach-imx/clk-pllv1.c Sascha Hauer 2012-09-11 80 mfn_abs = mfn;
> a6dd3c81 arch/arm/mach-imx/clk-pllv1.c Sascha Hauer 2012-09-11 81
> a6dd3c81 arch/arm/mach-imx/clk-pllv1.c Sascha Hauer 2012-09-11 82 /*
> a6dd3c81 arch/arm/mach-imx/clk-pllv1.c Sascha Hauer 2012-09-11 83 * On all i.MXs except i.MX1 and i.MX21 mfn is a 10bit
> a5947903 arch/arm/mach-imx/clk-pllv1.c Alexander Shiyan 2013-11-10 84 * 2's complements number.
> a5947903 arch/arm/mach-imx/clk-pllv1.c Alexander Shiyan 2013-11-10 85 * On i.MX27 the bit 9 is the sign bit.
> a6dd3c81 arch/arm/mach-imx/clk-pllv1.c Sascha Hauer 2012-09-11 86 */
> 3bec5f81 arch/arm/mach-imx/clk-pllv1.c Shawn Guo 2015-04-26 87 if (mfn_is_negative(pll, mfn)) {
> 3bec5f81 arch/arm/mach-imx/clk-pllv1.c Shawn Guo 2015-04-26 88 if (is_imx27_pllv1(pll))
> a5947903 arch/arm/mach-imx/clk-pllv1.c Alexander Shiyan 2013-11-10 89 mfn_abs = mfn & MFN_MASK;
> a5947903 arch/arm/mach-imx/clk-pllv1.c Alexander Shiyan 2013-11-10 90 else
> a5947903 arch/arm/mach-imx/clk-pllv1.c Alexander Shiyan 2013-11-10 91 mfn_abs = BIT(MFN_BITS) - mfn;
> a5947903 arch/arm/mach-imx/clk-pllv1.c Alexander Shiyan 2013-11-10 92 }
> a6dd3c81 arch/arm/mach-imx/clk-pllv1.c Sascha Hauer 2012-09-11 93
> a6dd3c81 arch/arm/mach-imx/clk-pllv1.c Sascha Hauer 2012-09-11 94 rate = parent_rate * 2;
> a6dd3c81 arch/arm/mach-imx/clk-pllv1.c Sascha Hauer 2012-09-11 95 rate /= pd + 1;
> a6dd3c81 arch/arm/mach-imx/clk-pllv1.c Sascha Hauer 2012-09-11 96
> a6dd3c81 arch/arm/mach-imx/clk-pllv1.c Sascha Hauer 2012-09-11 97 ll = (unsigned long long)rate * mfn_abs;
> a6dd3c81 arch/arm/mach-imx/clk-pllv1.c Sascha Hauer 2012-09-11 98
> a6dd3c81 arch/arm/mach-imx/clk-pllv1.c Sascha Hauer 2012-09-11 @99 do_div(ll, mfd + 1);
> a6dd3c81 arch/arm/mach-imx/clk-pllv1.c Sascha Hauer 2012-09-11 100
> 3bec5f81 arch/arm/mach-imx/clk-pllv1.c Shawn Guo 2015-04-26 101 if (mfn_is_negative(pll, mfn))
> a6dd3c81 arch/arm/mach-imx/clk-pllv1.c Sascha Hauer 2012-09-11 102 ll = -ll;
>
> :::::: The code at line 99 was first introduced by commit
> :::::: a6dd3c812e774b876d440c1a9ec1bd0fd5659390 ARM: i.MX clk pllv1: move mxc_decode_pll code to its user
>
> :::::: TO: Sascha Hauer <s.hauer@pengutronix.de>
> :::::: CC: Sascha Hauer <s.hauer@pengutronix.de>
>
> ---
> 0-DAY kernel test infrastructure Open Source Technology Center
> https://lists.01.org/pipermail/kbuild-all Intel Corporation
>
^ permalink raw reply [flat|nested] 22+ messages in thread
* Re: [PATCH 5/5] ARM: asm/div64.h: adjust to generic codde
2015-11-02 22:33 ` [PATCH 5/5] ARM: asm/div64.h: adjust to generic codde Nicolas Pitre
2015-11-02 22:33 ` Nicolas Pitre
2015-11-03 1:25 ` kbuild test robot
@ 2015-11-03 21:39 ` kbuild test robot
2015-11-19 16:36 ` Måns Rullgård
3 siblings, 0 replies; 22+ messages in thread
From: kbuild test robot @ 2015-11-03 21:39 UTC (permalink / raw)
To: Nicolas Pitre
Cc: kbuild-all, Alexey Brodkin, Måns Rullgård,
Arnd Bergmann, rmk+kernel, linux-arch, linux-kernel
[-- Attachment #1: Type: text/plain, Size: 6111 bytes --]
Hi Nicolas,
[auto build test WARNING on asm-generic/master]
[also WARNING on: v4.3 next-20151103]
url: https://github.com/0day-ci/linux/commits/Nicolas-Pitre/div64-h-optimize-do_div-for-power-of-two-constant-divisors/20151103-065348
base: https://github.com/0day-ci/linux Nicolas-Pitre/div64-h-optimize-do_div-for-power-of-two-constant-divisors/20151103-065348
config: arm-allyesconfig (attached as .config)
reproduce:
wget https://git.kernel.org/cgit/linux/kernel/git/wfg/lkp-tests.git/plain/sbin/make.cross -O ~/bin/make.cross
chmod +x ~/bin/make.cross
# save the attached .config to linux build tree
make.cross ARCH=arm
All warnings (new ones prefixed by >>):
In file included from arch/arm/include/asm/div64.h:126:0,
from include/linux/kernel.h:136,
from drivers/cpufreq/s5pv210-cpufreq.c:13:
drivers/cpufreq/s5pv210-cpufreq.c: In function 's5pv210_set_refresh':
include/asm-generic/div64.h:217:28: warning: comparison of distinct pointer types lacks a cast
(void)(((typeof((n)) *)0) == ((uint64_t *)0)); \
^
>> drivers/cpufreq/s5pv210-cpufreq.c:215:2: note: in expansion of macro 'do_div'
do_div(tmp, freq);
^
>> drivers/cpufreq/s5pv210-cpufreq.c:215:2: warning: right shift count >= width of type
In file included from include/linux/kernel.h:136:0,
from drivers/cpufreq/s5pv210-cpufreq.c:13:
arch/arm/include/asm/div64.h:49:20: warning: passing argument 1 of '__div64_32' from incompatible pointer type
#define __div64_32 __div64_32
^
include/asm-generic/div64.h:235:11: note: in expansion of macro '__div64_32'
__rem = __div64_32(&(n), __base); \
^
>> drivers/cpufreq/s5pv210-cpufreq.c:215:2: note: in expansion of macro 'do_div'
do_div(tmp, freq);
^
arch/arm/include/asm/div64.h:32:24: note: expected 'uint64_t *' but argument is of type 'long unsigned int *'
static inline uint32_t __div64_32(uint64_t *n, uint32_t base)
^
In file included from arch/arm/include/asm/div64.h:126:0,
from include/linux/kernel.h:136,
from drivers/cpufreq/s5pv210-cpufreq.c:13:
include/asm-generic/div64.h:217:28: warning: comparison of distinct pointer types lacks a cast
(void)(((typeof((n)) *)0) == ((uint64_t *)0)); \
^
drivers/cpufreq/s5pv210-cpufreq.c:219:2: note: in expansion of macro 'do_div'
do_div(tmp1, tmp);
^
drivers/cpufreq/s5pv210-cpufreq.c:219:2: warning: right shift count >= width of type
In file included from include/linux/kernel.h:136:0,
from drivers/cpufreq/s5pv210-cpufreq.c:13:
arch/arm/include/asm/div64.h:49:20: warning: passing argument 1 of '__div64_32' from incompatible pointer type
#define __div64_32 __div64_32
^
include/asm-generic/div64.h:235:11: note: in expansion of macro '__div64_32'
__rem = __div64_32(&(n), __base); \
^
drivers/cpufreq/s5pv210-cpufreq.c:219:2: note: in expansion of macro 'do_div'
do_div(tmp1, tmp);
^
arch/arm/include/asm/div64.h:32:24: note: expected 'uint64_t *' but argument is of type 'long unsigned int *'
static inline uint32_t __div64_32(uint64_t *n, uint32_t base)
^
vim +/do_div +215 drivers/cpufreq/s5pv210-cpufreq.c
83efc743 arch/arm/mach-s5pv210/cpufreq.c Jaecheol Lee 2010-10-12 199 {
83efc743 arch/arm/mach-s5pv210/cpufreq.c Jaecheol Lee 2010-10-12 200 unsigned long tmp, tmp1;
83efc743 arch/arm/mach-s5pv210/cpufreq.c Jaecheol Lee 2010-10-12 201 void __iomem *reg = NULL;
83efc743 arch/arm/mach-s5pv210/cpufreq.c Jaecheol Lee 2010-10-12 202
d62fa311 arch/arm/mach-s5pv210/cpufreq.c Jonghwan Choi 2011-05-12 203 if (ch == DMC0) {
6d4ed0f4 drivers/cpufreq/s5pv210-cpufreq.c Tomasz Figa 2014-07-03 204 reg = (dmc_base[0] + 0x30);
d62fa311 arch/arm/mach-s5pv210/cpufreq.c Jonghwan Choi 2011-05-12 205 } else if (ch == DMC1) {
6d4ed0f4 drivers/cpufreq/s5pv210-cpufreq.c Tomasz Figa 2014-07-03 206 reg = (dmc_base[1] + 0x30);
d62fa311 arch/arm/mach-s5pv210/cpufreq.c Jonghwan Choi 2011-05-12 207 } else {
83efc743 arch/arm/mach-s5pv210/cpufreq.c Jaecheol Lee 2010-10-12 208 printk(KERN_ERR "Cannot find DMC port\n");
d62fa311 arch/arm/mach-s5pv210/cpufreq.c Jonghwan Choi 2011-05-12 209 return;
d62fa311 arch/arm/mach-s5pv210/cpufreq.c Jonghwan Choi 2011-05-12 210 }
83efc743 arch/arm/mach-s5pv210/cpufreq.c Jaecheol Lee 2010-10-12 211
83efc743 arch/arm/mach-s5pv210/cpufreq.c Jaecheol Lee 2010-10-12 212 /* Find current DRAM frequency */
83efc743 arch/arm/mach-s5pv210/cpufreq.c Jaecheol Lee 2010-10-12 213 tmp = s5pv210_dram_conf[ch].freq;
83efc743 arch/arm/mach-s5pv210/cpufreq.c Jaecheol Lee 2010-10-12 214
83efc743 arch/arm/mach-s5pv210/cpufreq.c Jaecheol Lee 2010-10-12 @215 do_div(tmp, freq);
83efc743 arch/arm/mach-s5pv210/cpufreq.c Jaecheol Lee 2010-10-12 216
83efc743 arch/arm/mach-s5pv210/cpufreq.c Jaecheol Lee 2010-10-12 217 tmp1 = s5pv210_dram_conf[ch].refresh;
83efc743 arch/arm/mach-s5pv210/cpufreq.c Jaecheol Lee 2010-10-12 218
83efc743 arch/arm/mach-s5pv210/cpufreq.c Jaecheol Lee 2010-10-12 219 do_div(tmp1, tmp);
83efc743 arch/arm/mach-s5pv210/cpufreq.c Jaecheol Lee 2010-10-12 220
83efc743 arch/arm/mach-s5pv210/cpufreq.c Jaecheol Lee 2010-10-12 221 __raw_writel(tmp1, reg);
83efc743 arch/arm/mach-s5pv210/cpufreq.c Jaecheol Lee 2010-10-12 222 }
83efc743 arch/arm/mach-s5pv210/cpufreq.c Jaecheol Lee 2010-10-12 223
:::::: The code at line 215 was first introduced by commit
:::::: 83efc7432f881d4f4bcedd6800710c2c4c24c58d ARM: S5PV210: Add support CPUFREQ
:::::: TO: Jaecheol Lee <jc.lee@samsung.com>
:::::: CC: Kukjin Kim <kgene.kim@samsung.com>
---
0-DAY kernel test infrastructure Open Source Technology Center
https://lists.01.org/pipermail/kbuild-all Intel Corporation
[-- Attachment #2: .config.gz --]
[-- Type: application/octet-stream, Size: 53076 bytes --]
^ permalink raw reply [flat|nested] 22+ messages in thread* Re: [PATCH 5/5] ARM: asm/div64.h: adjust to generic codde
2015-11-02 22:33 ` [PATCH 5/5] ARM: asm/div64.h: adjust to generic codde Nicolas Pitre
` (2 preceding siblings ...)
2015-11-03 21:39 ` kbuild test robot
@ 2015-11-19 16:36 ` Måns Rullgård
2015-11-19 16:42 ` Nicolas Pitre
3 siblings, 1 reply; 22+ messages in thread
From: Måns Rullgård @ 2015-11-19 16:36 UTC (permalink / raw)
To: Nicolas Pitre
Cc: Alexey Brodkin, Arnd Bergmann, rmk+kernel, linux-arch,
linux-kernel
Nicolas Pitre <nicolas.pitre@linaro.org> writes:
> +static inline uint64_t __arch_xprod_64(uint64_t m, uint64_t n, bool bias)
> +{
> + unsigned long long res;
> + unsigned int tmp = 0;
> +
> + if (!bias) {
> + asm ( "umull %Q0, %R0, %Q1, %Q2\n\t"
> + "mov %Q0, #0"
> + : "=&r" (res)
> + : "r" (m), "r" (n)
> + : "cc");
> + } else if (!(m & ((1ULL << 63) | (1ULL << 31)))) {
> + res = m;
> + asm ( "umlal %Q0, %R0, %Q1, %Q2\n\t"
> + "mov %Q0, #0"
> + : "+&r" (res)
> + : "r" (m), "r" (n)
> + : "cc");
> + } else {
> + asm ( "umull %Q0, %R0, %Q2, %Q3\n\t"
> + "cmn %Q0, %Q2\n\t"
> + "adcs %R0, %R0, %R2\n\t"
> + "adc %Q0, %1, #0"
> + : "=&r" (res), "+&r" (tmp)
> + : "r" (m), "r" (n)
Why is tmp using a +r constraint here? The register is not written, so
using an input-only operand could/should result in better code. That is
also what the old code did.
> + : "cc");
> + }
> +
> + if (!(m & ((1ULL << 63) | (1ULL << 31)))) {
> + asm ( "umlal %R0, %Q0, %R1, %Q2\n\t"
> + "umlal %R0, %Q0, %Q1, %R2\n\t"
> + "mov %R0, #0\n\t"
> + "umlal %Q0, %R0, %R1, %R2"
> + : "+&r" (res)
> + : "r" (m), "r" (n)
> + : "cc");
> + } else {
> + asm ( "umlal %R0, %Q0, %R2, %Q3\n\t"
> + "umlal %R0, %1, %Q2, %R3\n\t"
> + "mov %R0, #0\n\t"
> + "adds %Q0, %1, %Q0\n\t"
> + "adc %R0, %R0, #0\n\t"
> + "umlal %Q0, %R0, %R2, %R3"
> + : "+&r" (res), "+&r" (tmp)
> + : "r" (m), "r" (n)
> + : "cc");
> + }
> +
> + return res;
> +}
--
Måns Rullgård
mans@mansr.com
^ permalink raw reply [flat|nested] 22+ messages in thread* Re: [PATCH 5/5] ARM: asm/div64.h: adjust to generic codde
2015-11-19 16:36 ` Måns Rullgård
@ 2015-11-19 16:42 ` Nicolas Pitre
2015-11-19 16:44 ` Måns Rullgård
0 siblings, 1 reply; 22+ messages in thread
From: Nicolas Pitre @ 2015-11-19 16:42 UTC (permalink / raw)
To: Måns Rullgård
Cc: Alexey Brodkin, Arnd Bergmann, rmk+kernel, linux-arch,
linux-kernel
[-- Attachment #1: Type: text/plain, Size: 2244 bytes --]
On Thu, 19 Nov 2015, Måns Rullgård wrote:
> Nicolas Pitre <nicolas.pitre@linaro.org> writes:
>
> > +static inline uint64_t __arch_xprod_64(uint64_t m, uint64_t n, bool bias)
> > +{
> > + unsigned long long res;
> > + unsigned int tmp = 0;
> > +
> > + if (!bias) {
> > + asm ( "umull %Q0, %R0, %Q1, %Q2\n\t"
> > + "mov %Q0, #0"
> > + : "=&r" (res)
> > + : "r" (m), "r" (n)
> > + : "cc");
> > + } else if (!(m & ((1ULL << 63) | (1ULL << 31)))) {
> > + res = m;
> > + asm ( "umlal %Q0, %R0, %Q1, %Q2\n\t"
> > + "mov %Q0, #0"
> > + : "+&r" (res)
> > + : "r" (m), "r" (n)
> > + : "cc");
> > + } else {
> > + asm ( "umull %Q0, %R0, %Q2, %Q3\n\t"
> > + "cmn %Q0, %Q2\n\t"
> > + "adcs %R0, %R0, %R2\n\t"
> > + "adc %Q0, %1, #0"
> > + : "=&r" (res), "+&r" (tmp)
> > + : "r" (m), "r" (n)
>
> Why is tmp using a +r constraint here? The register is not written, so
> using an input-only operand could/should result in better code. That is
> also what the old code did.
No, it is worse. gcc allocates two registers because, somehow, it
doesn't think that the first one still holds zero after the first usage.
This way usage of only one temporary register is forced throughout,
producing better code.
I meant to have this split out in a separate patch but messed it up
somehow.
>
> > + : "cc");
> > + }
> > +
> > + if (!(m & ((1ULL << 63) | (1ULL << 31)))) {
> > + asm ( "umlal %R0, %Q0, %R1, %Q2\n\t"
> > + "umlal %R0, %Q0, %Q1, %R2\n\t"
> > + "mov %R0, #0\n\t"
> > + "umlal %Q0, %R0, %R1, %R2"
> > + : "+&r" (res)
> > + : "r" (m), "r" (n)
> > + : "cc");
> > + } else {
> > + asm ( "umlal %R0, %Q0, %R2, %Q3\n\t"
> > + "umlal %R0, %1, %Q2, %R3\n\t"
> > + "mov %R0, #0\n\t"
> > + "adds %Q0, %1, %Q0\n\t"
> > + "adc %R0, %R0, #0\n\t"
> > + "umlal %Q0, %R0, %R2, %R3"
> > + : "+&r" (res), "+&r" (tmp)
> > + : "r" (m), "r" (n)
> > + : "cc");
> > + }
> > +
> > + return res;
> > +}
>
> --
> Måns Rullgård
> mans@mansr.com
> --
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at http://www.tux.org/lkml/
>
>
^ permalink raw reply [flat|nested] 22+ messages in thread* Re: [PATCH 5/5] ARM: asm/div64.h: adjust to generic codde
2015-11-19 16:42 ` Nicolas Pitre
@ 2015-11-19 16:44 ` Måns Rullgård
0 siblings, 0 replies; 22+ messages in thread
From: Måns Rullgård @ 2015-11-19 16:44 UTC (permalink / raw)
To: Nicolas Pitre
Cc: Alexey Brodkin, Arnd Bergmann, rmk+kernel, linux-arch,
linux-kernel
Nicolas Pitre <nicolas.pitre@linaro.org> writes:
> On Thu, 19 Nov 2015, Måns Rullgård wrote:
>
>> Nicolas Pitre <nicolas.pitre@linaro.org> writes:
>>
>> > +static inline uint64_t __arch_xprod_64(uint64_t m, uint64_t n, bool bias)
>> > +{
>> > + unsigned long long res;
>> > + unsigned int tmp = 0;
>> > +
>> > + if (!bias) {
>> > + asm ( "umull %Q0, %R0, %Q1, %Q2\n\t"
>> > + "mov %Q0, #0"
>> > + : "=&r" (res)
>> > + : "r" (m), "r" (n)
>> > + : "cc");
>> > + } else if (!(m & ((1ULL << 63) | (1ULL << 31)))) {
>> > + res = m;
>> > + asm ( "umlal %Q0, %R0, %Q1, %Q2\n\t"
>> > + "mov %Q0, #0"
>> > + : "+&r" (res)
>> > + : "r" (m), "r" (n)
>> > + : "cc");
>> > + } else {
>> > + asm ( "umull %Q0, %R0, %Q2, %Q3\n\t"
>> > + "cmn %Q0, %Q2\n\t"
>> > + "adcs %R0, %R0, %R2\n\t"
>> > + "adc %Q0, %1, #0"
>> > + : "=&r" (res), "+&r" (tmp)
>> > + : "r" (m), "r" (n)
>>
>> Why is tmp using a +r constraint here? The register is not written, so
>> using an input-only operand could/should result in better code. That is
>> also what the old code did.
>
> No, it is worse. gcc allocates two registers because, somehow, it
> doesn't think that the first one still holds zero after the first usage.
> This way usage of only one temporary register is forced throughout,
> producing better code.
Makes sense. Thanks for explaining.
--
Måns Rullgård
mans@mansr.com
^ permalink raw reply [flat|nested] 22+ messages in thread