From: ard.biesheuvel@linaro.org (Ard Biesheuvel)
To: linux-arm-kernel@lists.infradead.org
Subject: [PATCH resend 09/10] crypto: arm64/sha3-ce - yield NEON after every block of input
Date: Mon, 30 Apr 2018 18:18:29 +0200 [thread overview]
Message-ID: <20180430161830.14892-10-ard.biesheuvel@linaro.org> (raw)
In-Reply-To: <20180430161830.14892-1-ard.biesheuvel@linaro.org>
Avoid excessive scheduling delays under a preemptible kernel by
conditionally yielding the NEON after every block of input.
Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
---
arch/arm64/crypto/sha3-ce-core.S | 77 +++++++++++++-------
1 file changed, 50 insertions(+), 27 deletions(-)
diff --git a/arch/arm64/crypto/sha3-ce-core.S b/arch/arm64/crypto/sha3-ce-core.S
index 332ad7530690..a7d587fa54f6 100644
--- a/arch/arm64/crypto/sha3-ce-core.S
+++ b/arch/arm64/crypto/sha3-ce-core.S
@@ -41,9 +41,16 @@
*/
.text
ENTRY(sha3_ce_transform)
- /* load state */
- add x8, x0, #32
- ld1 { v0.1d- v3.1d}, [x0]
+ frame_push 4
+
+ mov x19, x0
+ mov x20, x1
+ mov x21, x2
+ mov x22, x3
+
+0: /* load state */
+ add x8, x19, #32
+ ld1 { v0.1d- v3.1d}, [x19]
ld1 { v4.1d- v7.1d}, [x8], #32
ld1 { v8.1d-v11.1d}, [x8], #32
ld1 {v12.1d-v15.1d}, [x8], #32
@@ -51,13 +58,13 @@ ENTRY(sha3_ce_transform)
ld1 {v20.1d-v23.1d}, [x8], #32
ld1 {v24.1d}, [x8]
-0: sub w2, w2, #1
+1: sub w21, w21, #1
mov w8, #24
adr_l x9, .Lsha3_rcon
/* load input */
- ld1 {v25.8b-v28.8b}, [x1], #32
- ld1 {v29.8b-v31.8b}, [x1], #24
+ ld1 {v25.8b-v28.8b}, [x20], #32
+ ld1 {v29.8b-v31.8b}, [x20], #24
eor v0.8b, v0.8b, v25.8b
eor v1.8b, v1.8b, v26.8b
eor v2.8b, v2.8b, v27.8b
@@ -66,10 +73,10 @@ ENTRY(sha3_ce_transform)
eor v5.8b, v5.8b, v30.8b
eor v6.8b, v6.8b, v31.8b
- tbnz x3, #6, 2f // SHA3-512
+ tbnz x22, #6, 3f // SHA3-512
- ld1 {v25.8b-v28.8b}, [x1], #32
- ld1 {v29.8b-v30.8b}, [x1], #16
+ ld1 {v25.8b-v28.8b}, [x20], #32
+ ld1 {v29.8b-v30.8b}, [x20], #16
eor v7.8b, v7.8b, v25.8b
eor v8.8b, v8.8b, v26.8b
eor v9.8b, v9.8b, v27.8b
@@ -77,34 +84,34 @@ ENTRY(sha3_ce_transform)
eor v11.8b, v11.8b, v29.8b
eor v12.8b, v12.8b, v30.8b
- tbnz x3, #4, 1f // SHA3-384 or SHA3-224
+ tbnz x22, #4, 2f // SHA3-384 or SHA3-224
// SHA3-256
- ld1 {v25.8b-v28.8b}, [x1], #32
+ ld1 {v25.8b-v28.8b}, [x20], #32
eor v13.8b, v13.8b, v25.8b
eor v14.8b, v14.8b, v26.8b
eor v15.8b, v15.8b, v27.8b
eor v16.8b, v16.8b, v28.8b
- b 3f
+ b 4f
-1: tbz x3, #2, 3f // bit 2 cleared? SHA-384
+2: tbz x22, #2, 4f // bit 2 cleared? SHA-384
// SHA3-224
- ld1 {v25.8b-v28.8b}, [x1], #32
- ld1 {v29.8b}, [x1], #8
+ ld1 {v25.8b-v28.8b}, [x20], #32
+ ld1 {v29.8b}, [x20], #8
eor v13.8b, v13.8b, v25.8b
eor v14.8b, v14.8b, v26.8b
eor v15.8b, v15.8b, v27.8b
eor v16.8b, v16.8b, v28.8b
eor v17.8b, v17.8b, v29.8b
- b 3f
+ b 4f
// SHA3-512
-2: ld1 {v25.8b-v26.8b}, [x1], #16
+3: ld1 {v25.8b-v26.8b}, [x20], #16
eor v7.8b, v7.8b, v25.8b
eor v8.8b, v8.8b, v26.8b
-3: sub w8, w8, #1
+4: sub w8, w8, #1
eor3 v29.16b, v4.16b, v9.16b, v14.16b
eor3 v26.16b, v1.16b, v6.16b, v11.16b
@@ -183,17 +190,33 @@ ENTRY(sha3_ce_transform)
eor v0.16b, v0.16b, v31.16b
- cbnz w8, 3b
- cbnz w2, 0b
+ cbnz w8, 4b
+ cbz w21, 5f
+
+ if_will_cond_yield_neon
+ add x8, x19, #32
+ st1 { v0.1d- v3.1d}, [x19]
+ st1 { v4.1d- v7.1d}, [x8], #32
+ st1 { v8.1d-v11.1d}, [x8], #32
+ st1 {v12.1d-v15.1d}, [x8], #32
+ st1 {v16.1d-v19.1d}, [x8], #32
+ st1 {v20.1d-v23.1d}, [x8], #32
+ st1 {v24.1d}, [x8]
+ do_cond_yield_neon
+ b 0b
+ endif_yield_neon
+
+ b 1b
/* save state */
- st1 { v0.1d- v3.1d}, [x0], #32
- st1 { v4.1d- v7.1d}, [x0], #32
- st1 { v8.1d-v11.1d}, [x0], #32
- st1 {v12.1d-v15.1d}, [x0], #32
- st1 {v16.1d-v19.1d}, [x0], #32
- st1 {v20.1d-v23.1d}, [x0], #32
- st1 {v24.1d}, [x0]
+5: st1 { v0.1d- v3.1d}, [x19], #32
+ st1 { v4.1d- v7.1d}, [x19], #32
+ st1 { v8.1d-v11.1d}, [x19], #32
+ st1 {v12.1d-v15.1d}, [x19], #32
+ st1 {v16.1d-v19.1d}, [x19], #32
+ st1 {v20.1d-v23.1d}, [x19], #32
+ st1 {v24.1d}, [x19]
+ frame_pop
ret
ENDPROC(sha3_ce_transform)
--
2.17.0
next prev parent reply other threads:[~2018-04-30 16:18 UTC|newest]
Thread overview: 12+ messages / expand[flat|nested] mbox.gz Atom feed top
2018-04-30 16:18 [PATCH resend 00/10] crypto: arm64 - play nice with CONFIG_PREEMPT Ard Biesheuvel
2018-04-30 16:18 ` [PATCH resend 01/10] crypto: arm64/sha1-ce - yield NEON after every block of input Ard Biesheuvel
2018-04-30 16:18 ` [PATCH resend 02/10] crypto: arm64/sha2-ce " Ard Biesheuvel
2018-04-30 16:18 ` [PATCH resend 03/10] crypto: arm64/aes-ccm " Ard Biesheuvel
2018-04-30 16:18 ` [PATCH resend 04/10] crypto: arm64/aes-blk " Ard Biesheuvel
2018-04-30 16:18 ` [PATCH resend 05/10] crypto: arm64/aes-bs " Ard Biesheuvel
2018-04-30 16:18 ` [PATCH resend 06/10] crypto: arm64/aes-ghash " Ard Biesheuvel
2018-04-30 16:18 ` [PATCH resend 07/10] crypto: arm64/crc32-ce " Ard Biesheuvel
2018-04-30 16:18 ` [PATCH resend 08/10] crypto: arm64/crct10dif-ce " Ard Biesheuvel
2018-04-30 16:18 ` Ard Biesheuvel [this message]
2018-04-30 16:18 ` [PATCH resend 10/10] crypto: arm64/sha512-ce " Ard Biesheuvel
2018-05-11 16:18 ` [PATCH resend 00/10] crypto: arm64 - play nice with CONFIG_PREEMPT Herbert Xu
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20180430161830.14892-10-ard.biesheuvel@linaro.org \
--to=ard.biesheuvel@linaro.org \
--cc=linux-arm-kernel@lists.infradead.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).