public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed
* [PATCH 3/3, v2] x86/xor: make virtualization friendly
@ 2012-11-02 14:21 Jan Beulich
  2012-11-02 17:30 ` H. Peter Anvin
  2013-01-25 10:43 ` [tip:x86/asm] x86/xor: Make " tip-bot for Jan Beulich
  0 siblings, 2 replies; 11+ messages in thread
From: Jan Beulich @ 2012-11-02 14:21 UTC (permalink / raw)
  To: mingo, tglx, hpa; +Cc: Konrad Rzeszutek Wilk, linux-kernel

In virtualized environments, the CR0.TS management needed here can be a
lot slower than anticipated by the original authors of this code, which
particularly means that in such cases forcing the use of SSE- (or MMX-)
based implementations is not desirable - actual measurements should
always be done in that case.

For consistency, pull into the shared (32- and 64-bit) header not only
the inclusion of the generic code, but also that of the AVX variants.

Signed-off-by: Jan Beulich <jbeulich@suse.com>
Cc: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>

---
 arch/x86/include/asm/xor.h    |    8 +++++++-
 arch/x86/include/asm/xor_32.h |   22 ++++++++++------------
 arch/x86/include/asm/xor_64.h |   10 ++++++----
 3 files changed, 23 insertions(+), 17 deletions(-)

--- 3.7-rc3-x86-xor.orig/arch/x86/include/asm/xor.h
+++ 3.7-rc3-x86-xor/arch/x86/include/asm/xor.h
@@ -487,6 +487,12 @@ static struct xor_block_template xor_blo
 
 #undef XOR_CONSTANT_CONSTRAINT
 
+/* Also try the AVX routines */
+#include <asm/xor_avx.h>
+
+/* Also try the generic routines. */
+#include <asm-generic/xor.h>
+
 #ifdef CONFIG_X86_32
 # include <asm/xor_32.h>
 #else
@@ -494,6 +500,6 @@ static struct xor_block_template xor_blo
 #endif
 
 #define XOR_SELECT_TEMPLATE(FASTEST) \
-	AVX_SELECT(FASTEST)
+	(cpu_has_hypervisor ? (FASTEST) : AVX_SELECT(FASTEST))
 
 #endif /* _ASM_X86_XOR_H */
--- 3.7-rc3-x86-xor.orig/arch/x86/include/asm/xor_32.h
+++ 3.7-rc3-x86-xor/arch/x86/include/asm/xor_32.h
@@ -537,12 +537,6 @@ static struct xor_block_template xor_blo
 	.do_5 = xor_sse_5,
 };
 
-/* Also try the AVX routines */
-#include <asm/xor_avx.h>
-
-/* Also try the generic routines.  */
-#include <asm-generic/xor.h>
-
 /* We force the use of the SSE xor block because it can write around L2.
    We may also be able to load into the L1 only depending on how the cpu
    deals with a load to a line that is being prefetched.  */
@@ -553,15 +547,19 @@ do {							\
 	if (cpu_has_xmm) {				\
 		xor_speed(&xor_block_pIII_sse);		\
 		xor_speed(&xor_block_sse_pf64);		\
-	} else if (cpu_has_mmx) {			\
+		if (!cpu_has_hypervisor)		\
+			break;				\
+	}						\
+	if (cpu_has_mmx) {				\
 		xor_speed(&xor_block_pII_mmx);		\
 		xor_speed(&xor_block_p5_mmx);		\
-	} else {					\
-		xor_speed(&xor_block_8regs);		\
-		xor_speed(&xor_block_8regs_p);		\
-		xor_speed(&xor_block_32regs);		\
-		xor_speed(&xor_block_32regs_p);		\
+		if (!cpu_has_hypervisor)		\
+			break;				\
 	}						\
+	xor_speed(&xor_block_8regs);			\
+	xor_speed(&xor_block_8regs_p);			\
+	xor_speed(&xor_block_32regs);			\
+	xor_speed(&xor_block_32regs_p);			\
 } while (0)
 
 #endif /* _ASM_X86_XOR_32_H */
--- 3.7-rc3-x86-xor.orig/arch/x86/include/asm/xor_64.h
+++ 3.7-rc3-x86-xor/arch/x86/include/asm/xor_64.h
@@ -9,10 +9,6 @@ static struct xor_block_template xor_blo
 	.do_5 = xor_sse_5,
 };
 
-
-/* Also try the AVX routines */
-#include <asm/xor_avx.h>
-
 /* We force the use of the SSE xor block because it can write around L2.
    We may also be able to load into the L1 only depending on how the cpu
    deals with a load to a line that is being prefetched.  */
@@ -22,6 +18,12 @@ do {						\
 	AVX_XOR_SPEED;				\
 	xor_speed(&xor_block_sse_pf64);		\
 	xor_speed(&xor_block_sse);		\
+	if (cpu_has_hypervisor) {		\
+		xor_speed(&xor_block_8regs);	\
+		xor_speed(&xor_block_8regs_p);	\
+		xor_speed(&xor_block_32regs);	\
+		xor_speed(&xor_block_32regs_p);	\
+	}					\
 } while (0)
 
 #endif /* _ASM_X86_XOR_64_H */




^ permalink raw reply	[flat|nested] 11+ messages in thread

end of thread, other threads:[~2013-01-28 15:27 UTC | newest]

Thread overview: 11+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2012-11-02 14:21 [PATCH 3/3, v2] x86/xor: make virtualization friendly Jan Beulich
2012-11-02 17:30 ` H. Peter Anvin
2012-11-05  9:10   ` Jan Beulich
2013-01-25 10:43 ` [tip:x86/asm] x86/xor: Make " tip-bot for Jan Beulich
2013-01-25 22:11   ` H. Peter Anvin
2013-01-25 22:15     ` H. Peter Anvin
2013-01-26  1:05       ` H. Peter Anvin
2013-01-26 16:49         ` KY Srinivasan
2013-01-26 12:10       ` Ingo Molnar
2013-01-28  9:04     ` Jan Beulich
2013-01-28 15:26       ` H. Peter Anvin

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox