From: Ard Biesheuvel <ardb@kernel.org>
To: linux-crypto@vger.kernel.org
Cc: herbert@gondor.apana.org.au,
linux-arm-kernel@lists.infradead.org, will@kernel.org,
catalin.marinas@arm.com, mark.rutland@arm.com,
Ard Biesheuvel <ardb@kernel.org>,
Dave Martin <dave.martin@arm.com>,
Eric Biggers <ebiggers@google.com>
Subject: [PATCH 5/9] crypto: arm64/sha512-ce - simplify NEON yield
Date: Thu, 28 Jan 2021 14:06:21 +0100 [thread overview]
Message-ID: <20210128130625.54076-6-ardb@kernel.org> (raw)
In-Reply-To: <20210128130625.54076-1-ardb@kernel.org>
Instead of calling into kernel_neon_end() and kernel_neon_begin() (and
potentially into schedule()) from the assembler code when running in
task mode and a reschedule is pending, perform only the preempt count
check in assembler, but simply return early in this case, and let the C
code deal with the consequences.
This reverts commit 6caf7adc5e458f77f550b6c6ca8effa152d61b4a.
Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
---
arch/arm64/crypto/sha512-ce-core.S | 29 +++--------
arch/arm64/crypto/sha512-ce-glue.c | 53 ++++++++++----------
2 files changed, 34 insertions(+), 48 deletions(-)
diff --git a/arch/arm64/crypto/sha512-ce-core.S b/arch/arm64/crypto/sha512-ce-core.S
index cde606c0323e..d6e7f6c95fa6 100644
--- a/arch/arm64/crypto/sha512-ce-core.S
+++ b/arch/arm64/crypto/sha512-ce-core.S
@@ -107,23 +107,17 @@
*/
.text
SYM_FUNC_START(sha512_ce_transform)
- frame_push 3
-
- mov x19, x0
- mov x20, x1
- mov x21, x2
-
/* load state */
-0: ld1 {v8.2d-v11.2d}, [x19]
+ ld1 {v8.2d-v11.2d}, [x0]
/* load first 4 round constants */
adr_l x3, .Lsha512_rcon
ld1 {v20.2d-v23.2d}, [x3], #64
/* load input */
-1: ld1 {v12.2d-v15.2d}, [x20], #64
- ld1 {v16.2d-v19.2d}, [x20], #64
- sub w21, w21, #1
+0: ld1 {v12.2d-v15.2d}, [x1], #64
+ ld1 {v16.2d-v19.2d}, [x1], #64
+ sub w2, w2, #1
CPU_LE( rev64 v12.16b, v12.16b )
CPU_LE( rev64 v13.16b, v13.16b )
@@ -201,19 +195,12 @@ CPU_LE( rev64 v19.16b, v19.16b )
add v10.2d, v10.2d, v2.2d
add v11.2d, v11.2d, v3.2d
+ cond_yield 3f, x4
/* handled all input blocks? */
- cbz w21, 3f
-
- if_will_cond_yield_neon
- st1 {v8.2d-v11.2d}, [x19]
- do_cond_yield_neon
- b 0b
- endif_yield_neon
-
- b 1b
+ cbnz w2, 0b
/* store new state */
-3: st1 {v8.2d-v11.2d}, [x19]
- frame_pop
+3: st1 {v8.2d-v11.2d}, [x0]
+ mov w0, w2
ret
SYM_FUNC_END(sha512_ce_transform)
diff --git a/arch/arm64/crypto/sha512-ce-glue.c b/arch/arm64/crypto/sha512-ce-glue.c
index a6b1adf31c56..e62a094a9d52 100644
--- a/arch/arm64/crypto/sha512-ce-glue.c
+++ b/arch/arm64/crypto/sha512-ce-glue.c
@@ -26,11 +26,25 @@ MODULE_LICENSE("GPL v2");
MODULE_ALIAS_CRYPTO("sha384");
MODULE_ALIAS_CRYPTO("sha512");
-asmlinkage void sha512_ce_transform(struct sha512_state *sst, u8 const *src,
- int blocks);
+asmlinkage int sha512_ce_transform(struct sha512_state *sst, u8 const *src,
+ int blocks);
asmlinkage void sha512_block_data_order(u64 *digest, u8 const *src, int blocks);
+static void __sha512_ce_transform(struct sha512_state *sst, u8 const *src,
+ int blocks)
+{
+ while (blocks) {
+ int rem;
+
+ kernel_neon_begin();
+ rem = sha512_ce_transform(sst, src, blocks);
+ kernel_neon_end();
+ src += (blocks - rem) * SHA512_BLOCK_SIZE;
+ blocks = rem;
+ }
+}
+
static void __sha512_block_data_order(struct sha512_state *sst, u8 const *src,
int blocks)
{
@@ -40,45 +54,30 @@ static void __sha512_block_data_order(struct sha512_state *sst, u8 const *src,
static int sha512_ce_update(struct shash_desc *desc, const u8 *data,
unsigned int len)
{
- if (!crypto_simd_usable())
- return sha512_base_do_update(desc, data, len,
- __sha512_block_data_order);
-
- kernel_neon_begin();
- sha512_base_do_update(desc, data, len, sha512_ce_transform);
- kernel_neon_end();
+ sha512_block_fn *fn = crypto_simd_usable() ? __sha512_ce_transform
+ : __sha512_block_data_order;
+ sha512_base_do_update(desc, data, len, fn);
return 0;
}
static int sha512_ce_finup(struct shash_desc *desc, const u8 *data,
unsigned int len, u8 *out)
{
- if (!crypto_simd_usable()) {
- if (len)
- sha512_base_do_update(desc, data, len,
- __sha512_block_data_order);
- sha512_base_do_finalize(desc, __sha512_block_data_order);
- return sha512_base_finish(desc, out);
- }
+ sha512_block_fn *fn = crypto_simd_usable() ? __sha512_ce_transform
+ : __sha512_block_data_order;
- kernel_neon_begin();
- sha512_base_do_update(desc, data, len, sha512_ce_transform);
- sha512_base_do_finalize(desc, sha512_ce_transform);
- kernel_neon_end();
+ sha512_base_do_update(desc, data, len, fn);
+ sha512_base_do_finalize(desc, fn);
return sha512_base_finish(desc, out);
}
static int sha512_ce_final(struct shash_desc *desc, u8 *out)
{
- if (!crypto_simd_usable()) {
- sha512_base_do_finalize(desc, __sha512_block_data_order);
- return sha512_base_finish(desc, out);
- }
+ sha512_block_fn *fn = crypto_simd_usable() ? __sha512_ce_transform
+ : __sha512_block_data_order;
- kernel_neon_begin();
- sha512_base_do_finalize(desc, sha512_ce_transform);
- kernel_neon_end();
+ sha512_base_do_finalize(desc, fn);
return sha512_base_finish(desc, out);
}
--
2.29.2
WARNING: multiple messages have this Message-ID (diff)
From: Ard Biesheuvel <ardb@kernel.org>
To: linux-crypto@vger.kernel.org
Cc: mark.rutland@arm.com, herbert@gondor.apana.org.au,
Eric Biggers <ebiggers@google.com>,
catalin.marinas@arm.com, Dave Martin <dave.martin@arm.com>,
will@kernel.org, Ard Biesheuvel <ardb@kernel.org>,
linux-arm-kernel@lists.infradead.org
Subject: [PATCH 5/9] crypto: arm64/sha512-ce - simplify NEON yield
Date: Thu, 28 Jan 2021 14:06:21 +0100 [thread overview]
Message-ID: <20210128130625.54076-6-ardb@kernel.org> (raw)
In-Reply-To: <20210128130625.54076-1-ardb@kernel.org>
Instead of calling into kernel_neon_end() and kernel_neon_begin() (and
potentially into schedule()) from the assembler code when running in
task mode and a reschedule is pending, perform only the preempt count
check in assembler, but simply return early in this case, and let the C
code deal with the consequences.
This reverts commit 6caf7adc5e458f77f550b6c6ca8effa152d61b4a.
Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
---
arch/arm64/crypto/sha512-ce-core.S | 29 +++--------
arch/arm64/crypto/sha512-ce-glue.c | 53 ++++++++++----------
2 files changed, 34 insertions(+), 48 deletions(-)
diff --git a/arch/arm64/crypto/sha512-ce-core.S b/arch/arm64/crypto/sha512-ce-core.S
index cde606c0323e..d6e7f6c95fa6 100644
--- a/arch/arm64/crypto/sha512-ce-core.S
+++ b/arch/arm64/crypto/sha512-ce-core.S
@@ -107,23 +107,17 @@
*/
.text
SYM_FUNC_START(sha512_ce_transform)
- frame_push 3
-
- mov x19, x0
- mov x20, x1
- mov x21, x2
-
/* load state */
-0: ld1 {v8.2d-v11.2d}, [x19]
+ ld1 {v8.2d-v11.2d}, [x0]
/* load first 4 round constants */
adr_l x3, .Lsha512_rcon
ld1 {v20.2d-v23.2d}, [x3], #64
/* load input */
-1: ld1 {v12.2d-v15.2d}, [x20], #64
- ld1 {v16.2d-v19.2d}, [x20], #64
- sub w21, w21, #1
+0: ld1 {v12.2d-v15.2d}, [x1], #64
+ ld1 {v16.2d-v19.2d}, [x1], #64
+ sub w2, w2, #1
CPU_LE( rev64 v12.16b, v12.16b )
CPU_LE( rev64 v13.16b, v13.16b )
@@ -201,19 +195,12 @@ CPU_LE( rev64 v19.16b, v19.16b )
add v10.2d, v10.2d, v2.2d
add v11.2d, v11.2d, v3.2d
+ cond_yield 3f, x4
/* handled all input blocks? */
- cbz w21, 3f
-
- if_will_cond_yield_neon
- st1 {v8.2d-v11.2d}, [x19]
- do_cond_yield_neon
- b 0b
- endif_yield_neon
-
- b 1b
+ cbnz w2, 0b
/* store new state */
-3: st1 {v8.2d-v11.2d}, [x19]
- frame_pop
+3: st1 {v8.2d-v11.2d}, [x0]
+ mov w0, w2
ret
SYM_FUNC_END(sha512_ce_transform)
diff --git a/arch/arm64/crypto/sha512-ce-glue.c b/arch/arm64/crypto/sha512-ce-glue.c
index a6b1adf31c56..e62a094a9d52 100644
--- a/arch/arm64/crypto/sha512-ce-glue.c
+++ b/arch/arm64/crypto/sha512-ce-glue.c
@@ -26,11 +26,25 @@ MODULE_LICENSE("GPL v2");
MODULE_ALIAS_CRYPTO("sha384");
MODULE_ALIAS_CRYPTO("sha512");
-asmlinkage void sha512_ce_transform(struct sha512_state *sst, u8 const *src,
- int blocks);
+asmlinkage int sha512_ce_transform(struct sha512_state *sst, u8 const *src,
+ int blocks);
asmlinkage void sha512_block_data_order(u64 *digest, u8 const *src, int blocks);
+static void __sha512_ce_transform(struct sha512_state *sst, u8 const *src,
+ int blocks)
+{
+ while (blocks) {
+ int rem;
+
+ kernel_neon_begin();
+ rem = sha512_ce_transform(sst, src, blocks);
+ kernel_neon_end();
+ src += (blocks - rem) * SHA512_BLOCK_SIZE;
+ blocks = rem;
+ }
+}
+
static void __sha512_block_data_order(struct sha512_state *sst, u8 const *src,
int blocks)
{
@@ -40,45 +54,30 @@ static void __sha512_block_data_order(struct sha512_state *sst, u8 const *src,
static int sha512_ce_update(struct shash_desc *desc, const u8 *data,
unsigned int len)
{
- if (!crypto_simd_usable())
- return sha512_base_do_update(desc, data, len,
- __sha512_block_data_order);
-
- kernel_neon_begin();
- sha512_base_do_update(desc, data, len, sha512_ce_transform);
- kernel_neon_end();
+ sha512_block_fn *fn = crypto_simd_usable() ? __sha512_ce_transform
+ : __sha512_block_data_order;
+ sha512_base_do_update(desc, data, len, fn);
return 0;
}
static int sha512_ce_finup(struct shash_desc *desc, const u8 *data,
unsigned int len, u8 *out)
{
- if (!crypto_simd_usable()) {
- if (len)
- sha512_base_do_update(desc, data, len,
- __sha512_block_data_order);
- sha512_base_do_finalize(desc, __sha512_block_data_order);
- return sha512_base_finish(desc, out);
- }
+ sha512_block_fn *fn = crypto_simd_usable() ? __sha512_ce_transform
+ : __sha512_block_data_order;
- kernel_neon_begin();
- sha512_base_do_update(desc, data, len, sha512_ce_transform);
- sha512_base_do_finalize(desc, sha512_ce_transform);
- kernel_neon_end();
+ sha512_base_do_update(desc, data, len, fn);
+ sha512_base_do_finalize(desc, fn);
return sha512_base_finish(desc, out);
}
static int sha512_ce_final(struct shash_desc *desc, u8 *out)
{
- if (!crypto_simd_usable()) {
- sha512_base_do_finalize(desc, __sha512_block_data_order);
- return sha512_base_finish(desc, out);
- }
+ sha512_block_fn *fn = crypto_simd_usable() ? __sha512_ce_transform
+ : __sha512_block_data_order;
- kernel_neon_begin();
- sha512_base_do_finalize(desc, sha512_ce_transform);
- kernel_neon_end();
+ sha512_base_do_finalize(desc, fn);
return sha512_base_finish(desc, out);
}
--
2.29.2
_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel
next prev parent reply other threads:[~2021-01-28 13:07 UTC|newest]
Thread overview: 26+ messages / expand[flat|nested] mbox.gz Atom feed top
2021-01-28 13:06 [PATCH 0/9] arm64: rework NEON yielding to avoid scheduling from asm code Ard Biesheuvel
2021-01-28 13:06 ` Ard Biesheuvel
2021-01-28 13:06 ` [PATCH 1/9] arm64: assembler: add cond_yield macro Ard Biesheuvel
2021-01-28 13:06 ` Ard Biesheuvel
2021-01-28 20:24 ` Will Deacon
2021-01-28 20:24 ` Will Deacon
2021-01-28 20:25 ` Will Deacon
2021-01-28 20:25 ` Will Deacon
2021-01-28 20:26 ` Ard Biesheuvel
2021-01-28 20:26 ` Ard Biesheuvel
2021-01-28 13:06 ` [PATCH 2/9] crypto: arm64/sha1-ce - simplify NEON yield Ard Biesheuvel
2021-01-28 13:06 ` Ard Biesheuvel
2021-01-28 13:06 ` [PATCH 3/9] crypto: arm64/sha2-ce " Ard Biesheuvel
2021-01-28 13:06 ` Ard Biesheuvel
2021-01-28 13:06 ` [PATCH 4/9] crypto: arm64/sha3-ce " Ard Biesheuvel
2021-01-28 13:06 ` Ard Biesheuvel
2021-01-28 13:06 ` Ard Biesheuvel [this message]
2021-01-28 13:06 ` [PATCH 5/9] crypto: arm64/sha512-ce " Ard Biesheuvel
2021-01-28 13:06 ` [PATCH 6/9] crypto: arm64/aes-neonbs - remove NEON yield calls Ard Biesheuvel
2021-01-28 13:06 ` Ard Biesheuvel
2021-01-28 13:06 ` [PATCH 7/9] crypto: arm64/aes-ce-mac - simplify NEON yield Ard Biesheuvel
2021-01-28 13:06 ` Ard Biesheuvel
2021-01-28 13:06 ` [PATCH 8/9] crypto: arm64/crc-t10dif - move NEON yield to C code Ard Biesheuvel
2021-01-28 13:06 ` Ard Biesheuvel
2021-01-28 13:06 ` [PATCH 9/9] arm64: assembler: remove conditional NEON yield macros Ard Biesheuvel
2021-01-28 13:06 ` Ard Biesheuvel
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20210128130625.54076-6-ardb@kernel.org \
--to=ardb@kernel.org \
--cc=catalin.marinas@arm.com \
--cc=dave.martin@arm.com \
--cc=ebiggers@google.com \
--cc=herbert@gondor.apana.org.au \
--cc=linux-arm-kernel@lists.infradead.org \
--cc=linux-crypto@vger.kernel.org \
--cc=mark.rutland@arm.com \
--cc=will@kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.