All of lore.kernel.org
 help / color / mirror / Atom feed
From: Alexey Dobriyan <adobriyan@gmail.com>
To: linux-kernel@vger.kernel.org
Cc: x86@kernel.org, tglx@linutronix.de, mingo@redhat.com,
	hpa@zytor.com, Alexey Dobriyan <adobriyan@gmail.com>
Subject: [PATCH 5/5] -march=native: MOVBE support
Date: Fri,  8 Dec 2017 01:41:54 +0300	[thread overview]
Message-ID: <20171207224154.4687-5-adobriyan@gmail.com> (raw)
In-Reply-To: <20171207224154.4687-1-adobriyan@gmail.com>

Use MOVBE if it is available.

This doesn't save code size as MOVBE seems to be as long as MOV+BSWAP,
It is not clear if it saves uop, maybe it will in the future.

Do it because it is easy, I guess.
---
 arch/x86/crypto/des3_ede-asm_64.S | 28 ++++++++++++++++++++++++++++
 arch/x86/net/bpf_jit.S            | 12 ++++++++++++
 scripts/kconfig/cpuid.c           |  4 ++++
 scripts/march-native.sh           |  3 ++-
 4 files changed, 46 insertions(+), 1 deletion(-)

diff --git a/arch/x86/crypto/des3_ede-asm_64.S b/arch/x86/crypto/des3_ede-asm_64.S
index 8e49ce117494..007319ea1f62 100644
--- a/arch/x86/crypto/des3_ede-asm_64.S
+++ b/arch/x86/crypto/des3_ede-asm_64.S
@@ -159,6 +159,15 @@
 
 #define dummy2(a, b) /*_*/
 
+#ifdef CONFIG_MARCH_NATIVE_MOVBE
+#define read_block(io, left, right) \
+	movbe	 (io), left##d; \
+	movbe	4(io), right##d;
+
+#define write_block(io, left, right) \
+	movbe	left##d,   (io); \
+	movbe	right##d, 4(io);
+#else
 #define read_block(io, left, right) \
 	movl    (io), left##d; \
 	movl   4(io), right##d; \
@@ -170,6 +179,7 @@
 	bswapl right##d; \
 	movl   left##d,   (io); \
 	movl   right##d, 4(io);
+#endif
 
 ENTRY(des3_ede_x86_64_crypt_blk)
 	/* input:
@@ -443,6 +453,14 @@ ENTRY(des3_ede_x86_64_crypt_blk_3way)
 	pushq %rsi /* dst */
 
 	/* load input */
+#ifdef CONFIG_MARCH_NATIVE_MOVBE
+	movbe 0 * 4(%rdx), RL0d;
+	movbe 1 * 4(%rdx), RR0d;
+	movbe 2 * 4(%rdx), RL1d;
+	movbe 3 * 4(%rdx), RR1d;
+	movbe 4 * 4(%rdx), RL2d;
+	movbe 5 * 4(%rdx), RR2d;
+#else
 	movl 0 * 4(%rdx), RL0d;
 	movl 1 * 4(%rdx), RR0d;
 	movl 2 * 4(%rdx), RL1d;
@@ -456,6 +474,7 @@ ENTRY(des3_ede_x86_64_crypt_blk_3way)
 	bswapl RR1d;
 	bswapl RL2d;
 	bswapl RR2d;
+#endif
 
 	initial_permutation3(RL, RR);
 
@@ -516,6 +535,14 @@ ENTRY(des3_ede_x86_64_crypt_blk_3way)
 
 	final_permutation3(RR, RL);
 
+#ifdef CONFIG_MARCH_NATIVE_MOVBE
+	movbe RR0d, 0 * 4(%rsi);
+	movbe RL0d, 1 * 4(%rsi);
+	movbe RR1d, 2 * 4(%rsi);
+	movbe RL1d, 3 * 4(%rsi);
+	movbe RR2d, 4 * 4(%rsi);
+	movbe RL2d, 5 * 4(%rsi);
+#else
 	bswapl RR0d;
 	bswapl RL0d;
 	bswapl RR1d;
@@ -530,6 +557,7 @@ ENTRY(des3_ede_x86_64_crypt_blk_3way)
 	movl RL1d, 3 * 4(%rsi);
 	movl RR2d, 4 * 4(%rsi);
 	movl RL2d, 5 * 4(%rsi);
+#endif
 
 	popq %r15;
 	popq %r14;
diff --git a/arch/x86/net/bpf_jit.S b/arch/x86/net/bpf_jit.S
index b33093f84528..17fe33750298 100644
--- a/arch/x86/net/bpf_jit.S
+++ b/arch/x86/net/bpf_jit.S
@@ -34,8 +34,12 @@ FUNC(sk_load_word_positive_offset)
 	sub	%esi,%eax		# hlen - offset
 	cmp	$3,%eax
 	jle	bpf_slow_path_word
+#ifdef CONFIG_MARCH_NATIVE_MOVBE
+	movbe	(SKBDATA,%rsi),%eax
+#else
 	mov     (SKBDATA,%rsi),%eax
 	bswap   %eax  			/* ntohl() */
+#endif
 	ret
 
 FUNC(sk_load_half)
@@ -80,8 +84,12 @@ FUNC(sk_load_byte_positive_offset)
 bpf_slow_path_word:
 	bpf_slow_path_common(4)
 	js	bpf_error
+#ifdef CONFIG_MARCH_NATIVE_MOVBE
+	movbe	32(%rbp),%eax
+#else
 	mov	32(%rbp),%eax
 	bswap	%eax
+#endif
 	ret
 
 bpf_slow_path_half:
@@ -118,8 +126,12 @@ bpf_slow_path_word_neg:
 
 FUNC(sk_load_word_negative_offset)
 	sk_negative_common(4)
+#ifdef CONFIG_MARCH_NATIVE_MOVBE
+	movbe	(%rax), %eax
+#else
 	mov	(%rax), %eax
 	bswap	%eax
+#endif
 	ret
 
 bpf_slow_path_half_neg:
diff --git a/scripts/kconfig/cpuid.c b/scripts/kconfig/cpuid.c
index ecb285183581..2c23c8699ae6 100644
--- a/scripts/kconfig/cpuid.c
+++ b/scripts/kconfig/cpuid.c
@@ -42,6 +42,7 @@ static inline void cpuid2(uint32_t eax0, uint32_t ecx0, uint32_t *eax, uint32_t
 	);
 }
 
+static bool movbe	= false;
 static bool popcnt	= false;
 static bool rep_movsb	= false;
 static bool rep_stosb	= false;
@@ -56,6 +57,8 @@ static void intel(void)
 		cpuid(1, &eax, &ecx, &edx, &ebx);
 //		printf("%08x %08x %08x %08x\n", eax, ecx, edx, ebx);
 
+		if (ecx & (1 << 22))
+			movbe = true;
 		if (ecx & (1 << 23))
 			popcnt = true;
 	}
@@ -86,6 +89,7 @@ int main(int argc, char *argv[])
 		intel();
 
 #define _(x)	if (streq(opt, #x)) return x ? EXIT_SUCCESS : EXIT_FAILURE
+	_(movbe);
 	_(popcnt);
 	_(rep_movsb);
 	_(rep_stosb);
diff --git a/scripts/march-native.sh b/scripts/march-native.sh
index d3adf0edb2be..93f6a9bd4a6c 100755
--- a/scripts/march-native.sh
+++ b/scripts/march-native.sh
@@ -30,6 +30,7 @@ option() {
 }
 
 if test -x "$CPUID"; then
+	"$CPUID" movbe		&& option "CONFIG_MARCH_NATIVE_MOVBE"
 	"$CPUID" popcnt		&& option "CONFIG_MARCH_NATIVE_POPCNT"
 	"$CPUID" rep_movsb	&& option "CONFIG_MARCH_NATIVE_REP_MOVSB"
 	"$CPUID" rep_stosb	&& option "CONFIG_MARCH_NATIVE_REP_STOSB"
@@ -76,7 +77,7 @@ for i in $COLLECT_GCC_OPTIONS; do
 		-mhle)		option "CONFIG_MARCH_NATIVE_HLE"	;;
 		-mlzcnt)	option "CONFIG_MARCH_NATIVE_LZCNT"	;;
 		-mmmx)		option "CONFIG_MARCH_NATIVE_MMX"	;;
-		-mmovbe)	option "CONFIG_MARCH_NATIVE_MOVBE"	;;
+		-mmovbe);;
 		-mpclmul)	option "CONFIG_MARCH_NATIVE_PCLMUL"	;;
 		-mpopcnt);;
 		-mprfchw)	option "CONFIG_MARCH_NATIVE_PREFETCHW"	;;
-- 
2.13.6

  parent reply	other threads:[~2017-12-07 22:42 UTC|newest]

Thread overview: 10+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2017-12-07 22:41 [PATCH v0 1/5] x86_64: march=native support Alexey Dobriyan
2017-12-07 22:41 ` [PATCH 2/5] -march=native: POPCNT support Alexey Dobriyan
2017-12-07 23:07   ` H. Peter Anvin
2017-12-08 10:09     ` Alexey Dobriyan
2017-12-07 22:41 ` [PATCH 3/5] -march=native: REP MOVSB support Alexey Dobriyan
2017-12-07 22:41 ` [PATCH 4/5] -march=native: REP STOSB Alexey Dobriyan
2017-12-08 19:08   ` Andi Kleen
2017-12-07 22:41 ` Alexey Dobriyan [this message]
2017-12-07 23:32 ` [PATCH v0 1/5] x86_64: march=native support H. Peter Anvin
2017-12-08  9:57   ` Alexey Dobriyan

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20171207224154.4687-5-adobriyan@gmail.com \
    --to=adobriyan@gmail.com \
    --cc=hpa@zytor.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mingo@redhat.com \
    --cc=tglx@linutronix.de \
    --cc=x86@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.