public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed
* [PATCH] x86-64: memset optimization
@ 2007-08-17 23:34 Stephen Hemminger
  2007-08-18  7:17 ` Eric Dumazet
  2007-08-18  9:46 ` Andi Kleen
  0 siblings, 2 replies; 13+ messages in thread
From: Stephen Hemminger @ 2007-08-17 23:34 UTC (permalink / raw)
  To: Andi Kleen; +Cc: discuss, linux-kernel

Optimize uses of memset with small constant offsets.
This will generate smaller code, and avoid the slow rep/string instructions.
Code copied from i386 with a little cleanup.

Signed-off-by: Stephen Hemminger <shemminger@linux-foundation.org>

--- a/include/asm-x86_64/string.h	2007-08-17 15:14:32.000000000 -0700
+++ b/include/asm-x86_64/string.h	2007-08-17 15:36:30.000000000 -0700
@@ -42,9 +42,51 @@ extern void *__memcpy(void *to, const vo
 		 __ret = __builtin_memcpy((dst),(src),__len);	\
 	   __ret; }) 
 #endif
-
 #define __HAVE_ARCH_MEMSET
-void *memset(void *s, int c, size_t n);
+void *__memset(void *s, int c, size_t n);
+
+/* Optimize for cases of trivial memset's
+ * Compiler should optimize away all but the case used.
+ */
+static __always_inline void *
+__constant_c_and_count_memset(void *s, int c, size_t count)
+{
+	unsigned long pattern = 0x01010101UL * (unsigned char) c;
+
+	switch (count) {
+	case 0:
+		return s;
+	case 1:
+		*(unsigned char *)s = pattern;
+		return s;
+	case 2:
+		*(unsigned short *)s = pattern;
+		return s;
+	case 3:
+		*(unsigned short *)s = pattern;
+		*(2+(unsigned char *)s) = pattern;
+		return s;
+	case 4:
+		*(unsigned long *)s = pattern;
+		return s;
+	case 6:
+		*(unsigned long *)s = pattern;
+		*(2+(unsigned short *)s) = pattern;
+		return s;
+	case 8:
+		*(unsigned long *)s = pattern;
+		*(1+(unsigned long *)s) = pattern;
+		return s;
+	default:
+		return __memset(s, c, count);
+	}
+}
+#define memset(s, c, count)					\
+	(__builtin_constant_p(c)				\
+	 ? __constant_c_and_count_memset((s),(c),(count))	\
+	 : __memset((s),(c),(count)))
+
+
 
 #define __HAVE_ARCH_MEMMOVE
 void * memmove(void * dest,const void *src,size_t count);
--- a/arch/x86_64/kernel/x8664_ksyms.c	2007-08-17 15:14:32.000000000 -0700
+++ b/arch/x86_64/kernel/x8664_ksyms.c	2007-08-17 15:44:58.000000000 -0700
@@ -48,10 +48,12 @@ EXPORT_SYMBOL(__read_lock_failed);
 #undef memmove
 
 extern void * memset(void *,int,__kernel_size_t);
+extern void * __memset(void *,int,__kernel_size_t);
 extern void * memcpy(void *,const void *,__kernel_size_t);
 extern void * __memcpy(void *,const void *,__kernel_size_t);
 
 EXPORT_SYMBOL(memset);
+EXPORT_SYMBOL(__memset);
 EXPORT_SYMBOL(memcpy);
 EXPORT_SYMBOL(__memcpy);
 

^ permalink raw reply	[flat|nested] 13+ messages in thread

end of thread, other threads:[~2007-08-21 10:16 UTC | newest]

Thread overview: 13+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2007-08-17 23:34 [PATCH] x86-64: memset optimization Stephen Hemminger
2007-08-18  7:17 ` Eric Dumazet
2007-08-18  9:46 ` Andi Kleen
2007-08-18 14:56   ` Stephen Hemminger
2007-08-18 18:55     ` Andi Kleen
2007-08-19  5:04       ` Stephen Hemminger
2007-08-19 18:24         ` [discuss] " Andi Kleen
2007-08-20 15:52           ` Stephen Hemminger
2007-08-20 15:51             ` Arjan van de Ven
2007-08-20 17:03               ` Roland Dreier
2007-08-20 18:16                 ` Andi Kleen
2007-08-20 18:56                 ` Jan Hubicka
2007-08-21 10:16                   ` Denys Vlasenko

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox