From: Ard Biesheuvel <ardb@kernel.org>
To: linux-crypto@vger.kernel.org
Cc: linux-arm-kernel@lists.infradead.org, will@kernel.org,
mark.rutland@arm.com, catalin.marinas@arm.com,
herbert@gondor.apana.org.au, Ard Biesheuvel <ardb@kernel.org>,
Dave Martin <dave.martin@arm.com>,
Eric Biggers <ebiggers@google.com>
Subject: [PATCH v2 4/9] crypto: arm64/sha3-ce - simplify NEON yield
Date: Wed, 3 Feb 2021 12:36:21 +0100 [thread overview]
Message-ID: <20210203113626.220151-5-ardb@kernel.org> (raw)
In-Reply-To: <20210203113626.220151-1-ardb@kernel.org>
Instead of calling into kernel_neon_end() and kernel_neon_begin() (and
potentially into schedule()) from the assembler code when running in
task mode and a reschedule is pending, perform only the preempt count
check in assembler, but simply return early in this case, and let the C
code deal with the consequences.
This reverts commit 7edc86cb1c18b4c274672232117586ea2bef1d9a.
Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
---
arch/arm64/crypto/sha3-ce-core.S | 81 ++++++++------------
arch/arm64/crypto/sha3-ce-glue.c | 14 ++--
2 files changed, 39 insertions(+), 56 deletions(-)
diff --git a/arch/arm64/crypto/sha3-ce-core.S b/arch/arm64/crypto/sha3-ce-core.S
index 1cfb768df350..6f5208414fe3 100644
--- a/arch/arm64/crypto/sha3-ce-core.S
+++ b/arch/arm64/crypto/sha3-ce-core.S
@@ -37,20 +37,13 @@
.endm
/*
- * sha3_ce_transform(u64 *st, const u8 *data, int blocks, int dg_size)
+ * int sha3_ce_transform(u64 *st, const u8 *data, int blocks, int dg_size)
*/
.text
SYM_FUNC_START(sha3_ce_transform)
- frame_push 4
-
- mov x19, x0
- mov x20, x1
- mov x21, x2
- mov x22, x3
-
-0: /* load state */
- add x8, x19, #32
- ld1 { v0.1d- v3.1d}, [x19]
+ /* load state */
+ add x8, x0, #32
+ ld1 { v0.1d- v3.1d}, [x0]
ld1 { v4.1d- v7.1d}, [x8], #32
ld1 { v8.1d-v11.1d}, [x8], #32
ld1 {v12.1d-v15.1d}, [x8], #32
@@ -58,13 +51,13 @@ SYM_FUNC_START(sha3_ce_transform)
ld1 {v20.1d-v23.1d}, [x8], #32
ld1 {v24.1d}, [x8]
-1: sub w21, w21, #1
+0: sub w2, w2, #1
mov w8, #24
adr_l x9, .Lsha3_rcon
/* load input */
- ld1 {v25.8b-v28.8b}, [x20], #32
- ld1 {v29.8b-v31.8b}, [x20], #24
+ ld1 {v25.8b-v28.8b}, [x1], #32
+ ld1 {v29.8b-v31.8b}, [x1], #24
eor v0.8b, v0.8b, v25.8b
eor v1.8b, v1.8b, v26.8b
eor v2.8b, v2.8b, v27.8b
@@ -73,10 +66,10 @@ SYM_FUNC_START(sha3_ce_transform)
eor v5.8b, v5.8b, v30.8b
eor v6.8b, v6.8b, v31.8b
- tbnz x22, #6, 3f // SHA3-512
+ tbnz x3, #6, 2f // SHA3-512
- ld1 {v25.8b-v28.8b}, [x20], #32
- ld1 {v29.8b-v30.8b}, [x20], #16
+ ld1 {v25.8b-v28.8b}, [x1], #32
+ ld1 {v29.8b-v30.8b}, [x1], #16
eor v7.8b, v7.8b, v25.8b
eor v8.8b, v8.8b, v26.8b
eor v9.8b, v9.8b, v27.8b
@@ -84,34 +77,34 @@ SYM_FUNC_START(sha3_ce_transform)
eor v11.8b, v11.8b, v29.8b
eor v12.8b, v12.8b, v30.8b
- tbnz x22, #4, 2f // SHA3-384 or SHA3-224
+ tbnz x3, #4, 1f // SHA3-384 or SHA3-224
// SHA3-256
- ld1 {v25.8b-v28.8b}, [x20], #32
+ ld1 {v25.8b-v28.8b}, [x1], #32
eor v13.8b, v13.8b, v25.8b
eor v14.8b, v14.8b, v26.8b
eor v15.8b, v15.8b, v27.8b
eor v16.8b, v16.8b, v28.8b
- b 4f
+ b 3f
-2: tbz x22, #2, 4f // bit 2 cleared? SHA-384
+1: tbz x3, #2, 3f // bit 2 cleared? SHA-384
// SHA3-224
- ld1 {v25.8b-v28.8b}, [x20], #32
- ld1 {v29.8b}, [x20], #8
+ ld1 {v25.8b-v28.8b}, [x1], #32
+ ld1 {v29.8b}, [x1], #8
eor v13.8b, v13.8b, v25.8b
eor v14.8b, v14.8b, v26.8b
eor v15.8b, v15.8b, v27.8b
eor v16.8b, v16.8b, v28.8b
eor v17.8b, v17.8b, v29.8b
- b 4f
+ b 3f
// SHA3-512
-3: ld1 {v25.8b-v26.8b}, [x20], #16
+2: ld1 {v25.8b-v26.8b}, [x1], #16
eor v7.8b, v7.8b, v25.8b
eor v8.8b, v8.8b, v26.8b
-4: sub w8, w8, #1
+3: sub w8, w8, #1
eor3 v29.16b, v4.16b, v9.16b, v14.16b
eor3 v26.16b, v1.16b, v6.16b, v11.16b
@@ -190,33 +183,19 @@ SYM_FUNC_START(sha3_ce_transform)
eor v0.16b, v0.16b, v31.16b
- cbnz w8, 4b
- cbz w21, 5f
-
- if_will_cond_yield_neon
- add x8, x19, #32
- st1 { v0.1d- v3.1d}, [x19]
- st1 { v4.1d- v7.1d}, [x8], #32
- st1 { v8.1d-v11.1d}, [x8], #32
- st1 {v12.1d-v15.1d}, [x8], #32
- st1 {v16.1d-v19.1d}, [x8], #32
- st1 {v20.1d-v23.1d}, [x8], #32
- st1 {v24.1d}, [x8]
- do_cond_yield_neon
- b 0b
- endif_yield_neon
-
- b 1b
+ cbnz w8, 3b
+ cond_yield 3f, x8
+ cbnz w2, 0b
/* save state */
-5: st1 { v0.1d- v3.1d}, [x19], #32
- st1 { v4.1d- v7.1d}, [x19], #32
- st1 { v8.1d-v11.1d}, [x19], #32
- st1 {v12.1d-v15.1d}, [x19], #32
- st1 {v16.1d-v19.1d}, [x19], #32
- st1 {v20.1d-v23.1d}, [x19], #32
- st1 {v24.1d}, [x19]
- frame_pop
+3: st1 { v0.1d- v3.1d}, [x0], #32
+ st1 { v4.1d- v7.1d}, [x0], #32
+ st1 { v8.1d-v11.1d}, [x0], #32
+ st1 {v12.1d-v15.1d}, [x0], #32
+ st1 {v16.1d-v19.1d}, [x0], #32
+ st1 {v20.1d-v23.1d}, [x0], #32
+ st1 {v24.1d}, [x0]
+ mov w0, w2
ret
SYM_FUNC_END(sha3_ce_transform)
diff --git a/arch/arm64/crypto/sha3-ce-glue.c b/arch/arm64/crypto/sha3-ce-glue.c
index 7288d3046354..8c65cecf560a 100644
--- a/arch/arm64/crypto/sha3-ce-glue.c
+++ b/arch/arm64/crypto/sha3-ce-glue.c
@@ -28,8 +28,8 @@ MODULE_ALIAS_CRYPTO("sha3-256");
MODULE_ALIAS_CRYPTO("sha3-384");
MODULE_ALIAS_CRYPTO("sha3-512");
-asmlinkage void sha3_ce_transform(u64 *st, const u8 *data, int blocks,
- int md_len);
+asmlinkage int sha3_ce_transform(u64 *st, const u8 *data, int blocks,
+ int md_len);
static int sha3_update(struct shash_desc *desc, const u8 *data,
unsigned int len)
@@ -59,11 +59,15 @@ static int sha3_update(struct shash_desc *desc, const u8 *data,
blocks = len / sctx->rsiz;
len %= sctx->rsiz;
- if (blocks) {
+ while (blocks) {
+ int rem;
+
kernel_neon_begin();
- sha3_ce_transform(sctx->st, data, blocks, digest_size);
+ rem = sha3_ce_transform(sctx->st, data, blocks,
+ digest_size);
kernel_neon_end();
- data += blocks * sctx->rsiz;
+ data += (blocks - rem) * sctx->rsiz;
+ blocks = rem;
}
}
--
2.30.0
WARNING: multiple messages have this Message-ID (diff)
From: Ard Biesheuvel <ardb@kernel.org>
To: linux-crypto@vger.kernel.org
Cc: mark.rutland@arm.com, herbert@gondor.apana.org.au,
Eric Biggers <ebiggers@google.com>,
catalin.marinas@arm.com, Dave Martin <dave.martin@arm.com>,
will@kernel.org, Ard Biesheuvel <ardb@kernel.org>,
linux-arm-kernel@lists.infradead.org
Subject: [PATCH v2 4/9] crypto: arm64/sha3-ce - simplify NEON yield
Date: Wed, 3 Feb 2021 12:36:21 +0100 [thread overview]
Message-ID: <20210203113626.220151-5-ardb@kernel.org> (raw)
In-Reply-To: <20210203113626.220151-1-ardb@kernel.org>
Instead of calling into kernel_neon_end() and kernel_neon_begin() (and
potentially into schedule()) from the assembler code when running in
task mode and a reschedule is pending, perform only the preempt count
check in assembler, but simply return early in this case, and let the C
code deal with the consequences.
This reverts commit 7edc86cb1c18b4c274672232117586ea2bef1d9a.
Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
---
arch/arm64/crypto/sha3-ce-core.S | 81 ++++++++------------
arch/arm64/crypto/sha3-ce-glue.c | 14 ++--
2 files changed, 39 insertions(+), 56 deletions(-)
diff --git a/arch/arm64/crypto/sha3-ce-core.S b/arch/arm64/crypto/sha3-ce-core.S
index 1cfb768df350..6f5208414fe3 100644
--- a/arch/arm64/crypto/sha3-ce-core.S
+++ b/arch/arm64/crypto/sha3-ce-core.S
@@ -37,20 +37,13 @@
.endm
/*
- * sha3_ce_transform(u64 *st, const u8 *data, int blocks, int dg_size)
+ * int sha3_ce_transform(u64 *st, const u8 *data, int blocks, int dg_size)
*/
.text
SYM_FUNC_START(sha3_ce_transform)
- frame_push 4
-
- mov x19, x0
- mov x20, x1
- mov x21, x2
- mov x22, x3
-
-0: /* load state */
- add x8, x19, #32
- ld1 { v0.1d- v3.1d}, [x19]
+ /* load state */
+ add x8, x0, #32
+ ld1 { v0.1d- v3.1d}, [x0]
ld1 { v4.1d- v7.1d}, [x8], #32
ld1 { v8.1d-v11.1d}, [x8], #32
ld1 {v12.1d-v15.1d}, [x8], #32
@@ -58,13 +51,13 @@ SYM_FUNC_START(sha3_ce_transform)
ld1 {v20.1d-v23.1d}, [x8], #32
ld1 {v24.1d}, [x8]
-1: sub w21, w21, #1
+0: sub w2, w2, #1
mov w8, #24
adr_l x9, .Lsha3_rcon
/* load input */
- ld1 {v25.8b-v28.8b}, [x20], #32
- ld1 {v29.8b-v31.8b}, [x20], #24
+ ld1 {v25.8b-v28.8b}, [x1], #32
+ ld1 {v29.8b-v31.8b}, [x1], #24
eor v0.8b, v0.8b, v25.8b
eor v1.8b, v1.8b, v26.8b
eor v2.8b, v2.8b, v27.8b
@@ -73,10 +66,10 @@ SYM_FUNC_START(sha3_ce_transform)
eor v5.8b, v5.8b, v30.8b
eor v6.8b, v6.8b, v31.8b
- tbnz x22, #6, 3f // SHA3-512
+ tbnz x3, #6, 2f // SHA3-512
- ld1 {v25.8b-v28.8b}, [x20], #32
- ld1 {v29.8b-v30.8b}, [x20], #16
+ ld1 {v25.8b-v28.8b}, [x1], #32
+ ld1 {v29.8b-v30.8b}, [x1], #16
eor v7.8b, v7.8b, v25.8b
eor v8.8b, v8.8b, v26.8b
eor v9.8b, v9.8b, v27.8b
@@ -84,34 +77,34 @@ SYM_FUNC_START(sha3_ce_transform)
eor v11.8b, v11.8b, v29.8b
eor v12.8b, v12.8b, v30.8b
- tbnz x22, #4, 2f // SHA3-384 or SHA3-224
+ tbnz x3, #4, 1f // SHA3-384 or SHA3-224
// SHA3-256
- ld1 {v25.8b-v28.8b}, [x20], #32
+ ld1 {v25.8b-v28.8b}, [x1], #32
eor v13.8b, v13.8b, v25.8b
eor v14.8b, v14.8b, v26.8b
eor v15.8b, v15.8b, v27.8b
eor v16.8b, v16.8b, v28.8b
- b 4f
+ b 3f
-2: tbz x22, #2, 4f // bit 2 cleared? SHA-384
+1: tbz x3, #2, 3f // bit 2 cleared? SHA-384
// SHA3-224
- ld1 {v25.8b-v28.8b}, [x20], #32
- ld1 {v29.8b}, [x20], #8
+ ld1 {v25.8b-v28.8b}, [x1], #32
+ ld1 {v29.8b}, [x1], #8
eor v13.8b, v13.8b, v25.8b
eor v14.8b, v14.8b, v26.8b
eor v15.8b, v15.8b, v27.8b
eor v16.8b, v16.8b, v28.8b
eor v17.8b, v17.8b, v29.8b
- b 4f
+ b 3f
// SHA3-512
-3: ld1 {v25.8b-v26.8b}, [x20], #16
+2: ld1 {v25.8b-v26.8b}, [x1], #16
eor v7.8b, v7.8b, v25.8b
eor v8.8b, v8.8b, v26.8b
-4: sub w8, w8, #1
+3: sub w8, w8, #1
eor3 v29.16b, v4.16b, v9.16b, v14.16b
eor3 v26.16b, v1.16b, v6.16b, v11.16b
@@ -190,33 +183,19 @@ SYM_FUNC_START(sha3_ce_transform)
eor v0.16b, v0.16b, v31.16b
- cbnz w8, 4b
- cbz w21, 5f
-
- if_will_cond_yield_neon
- add x8, x19, #32
- st1 { v0.1d- v3.1d}, [x19]
- st1 { v4.1d- v7.1d}, [x8], #32
- st1 { v8.1d-v11.1d}, [x8], #32
- st1 {v12.1d-v15.1d}, [x8], #32
- st1 {v16.1d-v19.1d}, [x8], #32
- st1 {v20.1d-v23.1d}, [x8], #32
- st1 {v24.1d}, [x8]
- do_cond_yield_neon
- b 0b
- endif_yield_neon
-
- b 1b
+ cbnz w8, 3b
+ cond_yield 3f, x8
+ cbnz w2, 0b
/* save state */
-5: st1 { v0.1d- v3.1d}, [x19], #32
- st1 { v4.1d- v7.1d}, [x19], #32
- st1 { v8.1d-v11.1d}, [x19], #32
- st1 {v12.1d-v15.1d}, [x19], #32
- st1 {v16.1d-v19.1d}, [x19], #32
- st1 {v20.1d-v23.1d}, [x19], #32
- st1 {v24.1d}, [x19]
- frame_pop
+3: st1 { v0.1d- v3.1d}, [x0], #32
+ st1 { v4.1d- v7.1d}, [x0], #32
+ st1 { v8.1d-v11.1d}, [x0], #32
+ st1 {v12.1d-v15.1d}, [x0], #32
+ st1 {v16.1d-v19.1d}, [x0], #32
+ st1 {v20.1d-v23.1d}, [x0], #32
+ st1 {v24.1d}, [x0]
+ mov w0, w2
ret
SYM_FUNC_END(sha3_ce_transform)
diff --git a/arch/arm64/crypto/sha3-ce-glue.c b/arch/arm64/crypto/sha3-ce-glue.c
index 7288d3046354..8c65cecf560a 100644
--- a/arch/arm64/crypto/sha3-ce-glue.c
+++ b/arch/arm64/crypto/sha3-ce-glue.c
@@ -28,8 +28,8 @@ MODULE_ALIAS_CRYPTO("sha3-256");
MODULE_ALIAS_CRYPTO("sha3-384");
MODULE_ALIAS_CRYPTO("sha3-512");
-asmlinkage void sha3_ce_transform(u64 *st, const u8 *data, int blocks,
- int md_len);
+asmlinkage int sha3_ce_transform(u64 *st, const u8 *data, int blocks,
+ int md_len);
static int sha3_update(struct shash_desc *desc, const u8 *data,
unsigned int len)
@@ -59,11 +59,15 @@ static int sha3_update(struct shash_desc *desc, const u8 *data,
blocks = len / sctx->rsiz;
len %= sctx->rsiz;
- if (blocks) {
+ while (blocks) {
+ int rem;
+
kernel_neon_begin();
- sha3_ce_transform(sctx->st, data, blocks, digest_size);
+ rem = sha3_ce_transform(sctx->st, data, blocks,
+ digest_size);
kernel_neon_end();
- data += blocks * sctx->rsiz;
+ data += (blocks - rem) * sctx->rsiz;
+ blocks = rem;
}
}
--
2.30.0
_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel
next prev parent reply other threads:[~2021-02-03 11:37 UTC|newest]
Thread overview: 36+ messages / expand[flat|nested] mbox.gz Atom feed top
2021-02-03 11:36 [PATCH v2 0/9] arm64: rework NEON yielding to avoid scheduling from asm code Ard Biesheuvel
2021-02-03 11:36 ` Ard Biesheuvel
2021-02-03 11:36 ` [PATCH v2 1/9] arm64: assembler: add cond_yield macro Ard Biesheuvel
2021-02-03 11:36 ` Ard Biesheuvel
2021-02-03 11:36 ` [PATCH v2 2/9] crypto: arm64/sha1-ce - simplify NEON yield Ard Biesheuvel
2021-02-03 11:36 ` Ard Biesheuvel
2021-02-03 11:36 ` [PATCH v2 3/9] crypto: arm64/sha2-ce " Ard Biesheuvel
2021-02-03 11:36 ` Ard Biesheuvel
2021-02-03 11:36 ` Ard Biesheuvel [this message]
2021-02-03 11:36 ` [PATCH v2 4/9] crypto: arm64/sha3-ce " Ard Biesheuvel
2021-02-03 11:36 ` [PATCH v2 5/9] crypto: arm64/sha512-ce " Ard Biesheuvel
2021-02-03 11:36 ` Ard Biesheuvel
2021-02-03 11:36 ` [PATCH v2 6/9] crypto: arm64/aes-neonbs - remove NEON yield calls Ard Biesheuvel
2021-02-03 11:36 ` Ard Biesheuvel
2021-02-03 11:36 ` [PATCH v2 7/9] crypto: arm64/aes-ce-mac - simplify NEON yield Ard Biesheuvel
2021-02-03 11:36 ` Ard Biesheuvel
2021-02-03 11:36 ` [PATCH v2 8/9] crypto: arm64/crc-t10dif - move NEON yield to C code Ard Biesheuvel
2021-02-03 11:36 ` Ard Biesheuvel
2021-02-03 11:36 ` [PATCH v2 9/9] arm64: assembler: remove conditional NEON yield macros Ard Biesheuvel
2021-02-03 11:36 ` Ard Biesheuvel
2021-02-03 21:31 ` (subset) Re: [PATCH v2 0/9] arm64: rework NEON yielding to avoid scheduling from asm code Will Deacon
2021-02-03 21:31 ` Will Deacon
2021-02-04 2:44 ` Herbert Xu
2021-02-04 2:44 ` Herbert Xu
2021-02-04 8:29 ` Ard Biesheuvel
2021-02-04 8:29 ` Ard Biesheuvel
2021-02-04 11:10 ` Herbert Xu
2021-02-04 11:10 ` Herbert Xu
2021-02-04 13:03 ` Will Deacon
2021-02-04 13:03 ` Will Deacon
2021-02-04 19:45 ` Herbert Xu
2021-02-04 19:45 ` Herbert Xu
2021-02-04 10:33 ` Will Deacon
2021-02-04 10:33 ` Will Deacon
2021-02-10 7:23 ` Herbert Xu
2021-02-10 7:23 ` Herbert Xu
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20210203113626.220151-5-ardb@kernel.org \
--to=ardb@kernel.org \
--cc=catalin.marinas@arm.com \
--cc=dave.martin@arm.com \
--cc=ebiggers@google.com \
--cc=herbert@gondor.apana.org.au \
--cc=linux-arm-kernel@lists.infradead.org \
--cc=linux-crypto@vger.kernel.org \
--cc=mark.rutland@arm.com \
--cc=will@kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.