All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] Remove some divide instructions
@ 2004-10-27 16:14 Zachary Amsden
  2004-10-27 16:28 ` Linus Torvalds
  0 siblings, 1 reply; 13+ messages in thread
From: Zachary Amsden @ 2004-10-27 16:14 UTC (permalink / raw)
  To: linux-kernel, Linus Torvalds, george

[-- Attachment #1: Type: text/plain, Size: 560 bytes --]

I noticed several 64-bit divides for HZ/USER_HZ, and also the fact that 
HZ == USER_HZ on many architectures (or if you play with scaling it ;).  
Since do_div is macroized to optimized assembler on many platforms, we 
emit long divides for divide by one.  This could be extended further to 
recognize other power of two divides, but I don't think the complexity 
of the macros would be justified.  I also didn't feel it was worthwhile 
to optimize this for non-constant divides; if you feel otherwise, please 
extend.

Cheers,

Zachary Amsden
zach@vmware.com

[-- Attachment #2: div64.patch --]
[-- Type: text/plain, Size: 6213 bytes --]

diff -ru linux-2.6.10-rc1-nsz/include/asm-arm/div64.h linux-2.6.10-rc1/include/asm-arm/div64.h
--- linux-2.6.10-rc1-nsz/include/asm-arm/div64.h	2004-10-25 10:53:12.000000000 -0700
+++ linux-2.6.10-rc1/include/asm-arm/div64.h	2004-10-27 08:29:36.000000000 -0700
@@ -28,6 +28,7 @@
 #endif
 
 #define do_div(n,base)						\
+((__builtin_constant_p(base) && ((base) == 1)) ? 0 :		\
 ({								\
 	register unsigned int __base      asm("r4") = base;	\
 	register unsigned long long __n   asm("r0") = n;	\
@@ -43,6 +44,6 @@
 		: "ip", "lr", "cc");				\
 	n = __res;						\
 	__rem;							\
-})
+}))
 
 #endif
diff -ru linux-2.6.10-rc1-nsz/include/asm-generic/div64.h linux-2.6.10-rc1/include/asm-generic/div64.h
--- linux-2.6.10-rc1-nsz/include/asm-generic/div64.h	2003-12-17 18:59:30.000000000 -0800
+++ linux-2.6.10-rc1/include/asm-generic/div64.h	2004-10-27 08:26:01.000000000 -0700
@@ -22,13 +22,14 @@
 
 #if BITS_PER_LONG == 64
 
-# define do_div(n,base) ({					\
-	uint32_t __base = (base);				\
+# define do_div(n,base) (					\
+	(__builtin_constant_p(base) && ((base) == 1)) ? 0 : ({	\
 	uint32_t __rem;						\
+	uint32_t __base = (base);				\
 	__rem = ((uint64_t)(n)) % __base;			\
 	(n) = ((uint64_t)(n)) / __base;				\
 	__rem;							\
- })
+ }))
 
 #elif BITS_PER_LONG == 32
 
@@ -37,17 +38,18 @@
 /* The unnecessary pointer compare is there
  * to check for type safety (n must be 64bit)
  */
-# define do_div(n,base) ({				\
-	uint32_t __base = (base);			\
-	uint32_t __rem;					\
-	(void)(((typeof((n)) *)0) == ((uint64_t *)0));	\
-	if (likely(((n) >> 32) == 0)) {			\
-		__rem = (uint32_t)(n) % __base;		\
-		(n) = (uint32_t)(n) / __base;		\
-	} else 						\
-		__rem = __div64_32(&(n), __base);	\
-	__rem;						\
- })
+# define do_div(n,base) (					\
+	(__builtin_constant_p(base) && ((base) == 1)) ? 0 : ({	\
+	uint32_t __base = (base);				\
+	uint32_t __rem;						\
+	(void)(((typeof((n)) *)0) == ((uint64_t *)0));		\
+	if (likely(((n) >> 32) == 0)) {				\
+		__rem = (uint32_t)(n) % __base;			\
+		(n) = (uint32_t)(n) / __base;			\
+	} else 							\
+		__rem = __div64_32(&(n), __base);		\
+	__rem;							\
+ }))
 
 #else /* BITS_PER_LONG == ?? */
 
diff -ru linux-2.6.10-rc1-nsz/include/asm-i386/div64.h linux-2.6.10-rc1/include/asm-i386/div64.h
--- linux-2.6.10-rc1-nsz/include/asm-i386/div64.h	2003-12-17 18:57:59.000000000 -0800
+++ linux-2.6.10-rc1/include/asm-i386/div64.h	2004-10-27 08:31:45.000000000 -0700
@@ -13,7 +13,8 @@
  * This ends up being the most efficient "calling
  * convention" on x86.
  */
-#define do_div(n,base) ({ \
+#define do_div(n,base) ( \
+	(__builtin_constant_p(base) && ((base) == 1)) ? 0 : ({ \
 	unsigned long __upper, __low, __high, __mod, __base; \
 	__base = (base); \
 	asm("":"=a" (__low), "=d" (__high):"A" (n)); \
@@ -25,7 +26,7 @@
 	asm("divl %2":"=a" (__low), "=d" (__mod):"rm" (__base), "0" (__low), "1" (__upper)); \
 	asm("":"=A" (n):"a" (__low),"d" (__high)); \
 	__mod; \
-})
+}))
 
 /*
  * (long)X = ((long long)divs) / (long)div
diff -ru linux-2.6.10-rc1-nsz/include/asm-m32r/div64.h linux-2.6.10-rc1/include/asm-m32r/div64.h
--- linux-2.6.10-rc1-nsz/include/asm-m32r/div64.h	2004-10-25 11:15:58.000000000 -0700
+++ linux-2.6.10-rc1/include/asm-m32r/div64.h	2004-10-27 08:21:53.000000000 -0700
@@ -12,6 +12,7 @@
  *  return value = n % base;
  */
 #define do_div(n, base)						\
+((__builtin_constant_p(base) && ((base) == 1)) ? 0 : 		\
 ({								\
 	unsigned long _res, _high, _mid, _low;			\
 								\
@@ -33,6 +34,6 @@
 		n = (_low / (unsigned long)(base));		\
 	}							\
 	_res;							\
-})
+}))
 
 #endif  /* _ASM_M32R_DIV64 */
diff -ru linux-2.6.10-rc1-nsz/include/asm-m68k/div64.h linux-2.6.10-rc1/include/asm-m68k/div64.h
--- linux-2.6.10-rc1-nsz/include/asm-m68k/div64.h	2003-12-17 18:59:37.000000000 -0800
+++ linux-2.6.10-rc1/include/asm-m68k/div64.h	2004-10-27 08:30:55.000000000 -0700
@@ -3,7 +3,8 @@
 
 /* n = n / base; return rem; */
 
-#define do_div(n, base) ({					\
+#define do_div(n, base) (					\
+(__builtin_constant_p(base) && ((base) == 1)) ? 0 : ({		\
 	union {							\
 		unsigned long n32[2];				\
 		unsigned long long n64;				\
@@ -21,6 +22,6 @@
 		: "d" (base), "1" (__upper), "0" (__n.n32[1]));	\
 	(n) = __n.n64;						\
 	__rem;							\
-})
+}))
 
 #endif /* _M68K_DIV64_H */
diff -ru linux-2.6.10-rc1-nsz/include/asm-mips/div64.h linux-2.6.10-rc1/include/asm-mips/div64.h
--- linux-2.6.10-rc1-nsz/include/asm-mips/div64.h	2003-12-17 18:59:06.000000000 -0800
+++ linux-2.6.10-rc1/include/asm-mips/div64.h	2004-10-27 08:25:29.000000000 -0700
@@ -51,7 +51,8 @@
 	(res) = __quot; \
 	__mod; })
 
-#define do_div(n, base) ({ \
+#define do_div(n, base) ( \
+	(__builtin_constant_p(base) && ((base) == 1)) ? 0 : ({ \
 	unsigned long long __quot; \
 	unsigned long __mod; \
 	unsigned long long __div; \
@@ -74,7 +75,7 @@
 	__quot = __high; \
 	__quot = __quot << 32 | __low; \
 	(n) = __quot; \
-	__mod; })
+	__mod; }))
 #endif /* (_MIPS_SZLONG == 32) */
 
 #if (_MIPS_SZLONG == 64)
@@ -104,7 +105,8 @@
  * Hey, we're already 64-bit, no
  * need to play games..
  */
-#define do_div(n, base) ({ \
+#define do_div(n, base) ( \
+	(__builtin_constant_p(base) && ((base) == 1)) ? 0 : ({ \
 	unsigned long __quot; \
 	unsigned int __mod; \
 	unsigned long __div; \
@@ -117,7 +119,7 @@
 	__quot = __div / __base; \
 	\
 	(n) = __quot; \
-	__mod; })
+	__mod; }))
 
 #endif /* (_MIPS_SZLONG == 64) */
 
diff -ru linux-2.6.10-rc1-nsz/include/asm-s390/div64.h linux-2.6.10-rc1/include/asm-s390/div64.h
--- linux-2.6.10-rc1-nsz/include/asm-s390/div64.h	2004-10-25 10:50:43.000000000 -0700
+++ linux-2.6.10-rc1/include/asm-s390/div64.h	2004-10-27 08:27:34.000000000 -0700
@@ -4,7 +4,8 @@
 #ifndef __s390x__
 
 /* for do_div "base" needs to be smaller than 2^31-1 */
-#define do_div(n, base) ({                                      \
+#define do_div(n, base) (					\
+	(__builtin_constant_p(base) && ((base) == 1)) ? 0 : ({	\
 	unsigned long long __n = (n);				\
 	unsigned long __r;					\
 								\
@@ -40,7 +41,7 @@
 	     : "d" (base), "m" (__n) : "0", "1", "2", "cc" );	\
 	(n) = (__n);						\
         __r;                                                    \
-})
+}))
 
 #else /* __s390x__ */
 #include <asm-generic/div64.h>

[-- Attachment #3: README.div64 --]
[-- Type: text/plain, Size: 484 bytes --]


div64.patch :

Get rid of 64-bit constant divide by one.  This appears to be a common case
for HZ == USER_HZ.  I tested the patch on older 2.6 kernels and was able to
produce some harmless warnings (statement has no effect), but it builds clean
for i386 with a 2.6.10 kernel.  I tested the generic asm inline by extracting
to gcc, but I have not tested any other platforms.

Doubtful, but if this breaks your build for some other platform, send me mail.

Zach Amsden
zach@vmware.com

^ permalink raw reply	[flat|nested] 13+ messages in thread

end of thread, other threads:[~2004-10-29 19:43 UTC | newest]

Thread overview: 13+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2004-10-27 16:14 [PATCH] Remove some divide instructions Zachary Amsden
2004-10-27 16:28 ` Linus Torvalds
2004-10-27 18:05   ` Zachary Amsden
2004-10-27 20:16   ` Zachary Amsden
2004-10-27 21:24     ` Linus Torvalds
2004-10-27 22:08   ` Thayne Harbaugh
2004-10-27 22:14   ` Zachary Amsden
2004-10-28  0:11     ` Linus Torvalds
2004-10-28  0:47       ` Linus Torvalds
2004-10-29  0:47         ` Zachary Amsden
2004-10-29  4:52           ` Linus Torvalds
2004-10-29 19:10             ` Geert Uytterhoeven
2004-10-28  0:59       ` Maciej W. Rozycki

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.