All of lore.kernel.org
 help / color / mirror / Atom feed
From: Catalin Marinas <catalin.marinas@arm.com>
To: Ard Biesheuvel <ardb@kernel.org>
Cc: linux-arm-kernel@lists.infradead.org, will@kernel.org,
	Mark Rutland <mark.rutland@arm.com>,
	Peter Zijlstra <peterz@infradead.org>
Subject: Re: [PATCH 2/2] arm64/xor: use EOR3 instructions when available
Date: Mon, 13 Dec 2021 13:24:55 +0000	[thread overview]
Message-ID: <YbdJp1EhAt/UkGjB@arm.com> (raw)
In-Reply-To: <20211109120336.3561463-3-ardb@kernel.org>

Hi Ard,

I trust you on the algorithm but some minor issues below.

On Tue, Nov 09, 2021 at 01:03:36PM +0100, Ard Biesheuvel wrote:
> diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
> index 6f2d3e31fb54..14354acba5b4 100644
> --- a/arch/arm64/Kconfig
> +++ b/arch/arm64/Kconfig
> @@ -2034,6 +2034,9 @@ config SYSVIPC_COMPAT
>  	def_bool y
>  	depends on COMPAT && SYSVIPC
>  
> +config CC_HAVE_SHA3
> +	def_bool $(cc-option, -march=armv8.2-a+sha3)

Is it the compiler or the assembler that we need to support this? I
think it's sufficient to only check the latter.

I'd also move it to the ARMv8.2 section.

> +
>  menu "Power management options"
>  
>  source "kernel/power/Kconfig"
> diff --git a/arch/arm64/lib/xor-neon.c b/arch/arm64/lib/xor-neon.c
> index ee4795f3e166..0415cb94c781 100644
> --- a/arch/arm64/lib/xor-neon.c
> +++ b/arch/arm64/lib/xor-neon.c
> @@ -172,6 +172,135 @@ void xor_arm64_neon_5(unsigned long bytes, unsigned long *p1,
>  }
>  EXPORT_SYMBOL(xor_arm64_neon_5);
>  
> +static inline uint64x2_t eor3(uint64x2_t p, uint64x2_t q, uint64x2_t r)
> +{
> +	uint64x2_t res;
> +
> +	asm(".arch	armv8.2-a+sha3		\n"
> +	    "eor3 %0.16b, %1.16b, %2.16b, %3.16b"
> +	    : "=w"(res) : "w"(p), "w"(q), "w"(r));
> +	return res;
> +}

The .arch here may confuse the compiler/assembler since it overrides any
other .arch. I think this diff on top would do but I haven't extensively
tested it. I can fold it in if you give it a try:

diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 5adae54c98d8..c5104e8829e5 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -1545,6 +1545,12 @@ endmenu
 
 menu "ARMv8.2 architectural features"
 
+config AS_HAS_ARMV8_2
+	def_bool $(cc-option,-Wa$(comma)-march=armv8.2-a)
+
+config AS_HAS_SHA3
+	def_bool $(as-instr,.arch armv8.2-a+sha3)
+
 config ARM64_PMEM
 	bool "Enable support for persistent memory"
 	select ARCH_HAS_PMEM_API
@@ -2032,9 +2038,6 @@ config SYSVIPC_COMPAT
 	def_bool y
 	depends on COMPAT && SYSVIPC
 
-config CC_HAVE_SHA3
-	def_bool $(cc-option, -march=armv8.2-a+sha3)
-
 menu "Power management options"
 
 source "kernel/power/Kconfig"
diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile
index e8cfc5868aa8..2f1de88651e6 100644
--- a/arch/arm64/Makefile
+++ b/arch/arm64/Makefile
@@ -58,6 +58,11 @@ stack_protector_prepare: prepare0
 					include/generated/asm-offsets.h))
 endif
 
+ifeq ($(CONFIG_AS_HAS_ARMV8_2), y)
+# make sure to pass the newest target architecture to -march.
+asm-arch := armv8.2-a
+endif
+
 # Ensure that if the compiler supports branch protection we default it
 # off, this will be overridden if we are using branch protection.
 branch-prot-flags-y += $(call cc-option,-mbranch-protection=none)
diff --git a/arch/arm64/lib/xor-neon.c b/arch/arm64/lib/xor-neon.c
index 0415cb94c781..2ca823825363 100644
--- a/arch/arm64/lib/xor-neon.c
+++ b/arch/arm64/lib/xor-neon.c
@@ -176,7 +176,7 @@ static inline uint64x2_t eor3(uint64x2_t p, uint64x2_t q, uint64x2_t r)
 {
 	uint64x2_t res;
 
-	asm(".arch	armv8.2-a+sha3		\n"
+	asm(ARM64_ASM_PREAMBLE ".arch_extension sha3\n"
 	    "eor3 %0.16b, %1.16b, %2.16b, %3.16b"
 	    : "=w"(res) : "w"(p), "w"(q), "w"(r));
 	return res;
@@ -311,7 +311,7 @@ EXPORT_STATIC_CALL(xor_arm64_5);
 
 static int __init xor_neon_init(void)
 {
-	if (IS_ENABLED(CONFIG_CC_HAVE_SHA3) && cpu_have_named_feature(SHA3)) {
+	if (IS_ENABLED(CONFIG_AS_HAS_SHA3) && cpu_have_named_feature(SHA3)) {
 		static_call_update(xor_arm64_3, xor_arm64_eor3_3);
 		static_call_update(xor_arm64_4, xor_arm64_eor3_4);
 		static_call_update(xor_arm64_5, xor_arm64_eor3_5);

-- 
Catalin

_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

  reply	other threads:[~2021-12-13 13:26 UTC|newest]

Thread overview: 7+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-11-09 12:03 [PATCH 0/2] arm64: use SHA3 instructions to speed up XOR Ard Biesheuvel
2021-11-09 12:03 ` [PATCH 1/2] arm64/xor: use static calls for inner NEON helpers Ard Biesheuvel
2021-11-09 12:03 ` [PATCH 2/2] arm64/xor: use EOR3 instructions when available Ard Biesheuvel
2021-12-13 13:24   ` Catalin Marinas [this message]
2021-12-13 13:33     ` Ard Biesheuvel
2021-12-13 15:05       ` Catalin Marinas
2021-12-13 15:10         ` Ard Biesheuvel

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=YbdJp1EhAt/UkGjB@arm.com \
    --to=catalin.marinas@arm.com \
    --cc=ardb@kernel.org \
    --cc=linux-arm-kernel@lists.infradead.org \
    --cc=mark.rutland@arm.com \
    --cc=peterz@infradead.org \
    --cc=will@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.