From: ard.biesheuvel@linaro.org (Ard Biesheuvel)
To: linux-arm-kernel@lists.infradead.org
Subject: [PATCH v3 18/20] crypto: arm64/crc32-ce - yield NEON after every block of input
Date: Wed, 6 Dec 2017 19:43:44 +0000 [thread overview]
Message-ID: <20171206194346.24393-19-ard.biesheuvel@linaro.org> (raw)
In-Reply-To: <20171206194346.24393-1-ard.biesheuvel@linaro.org>
Avoid excessive scheduling delays under a preemptible kernel by
yielding the NEON after every block of input.
Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
---
arch/arm64/crypto/crc32-ce-core.S | 44 ++++++++++++++------
1 file changed, 32 insertions(+), 12 deletions(-)
diff --git a/arch/arm64/crypto/crc32-ce-core.S b/arch/arm64/crypto/crc32-ce-core.S
index 18f5a8442276..b4ddbb2027e5 100644
--- a/arch/arm64/crypto/crc32-ce-core.S
+++ b/arch/arm64/crypto/crc32-ce-core.S
@@ -100,9 +100,9 @@
dCONSTANT .req d0
qCONSTANT .req q0
- BUF .req x0
- LEN .req x1
- CRC .req x2
+ BUF .req x19
+ LEN .req x20
+ CRC .req x21
vzr .req v9
@@ -116,13 +116,21 @@
* size_t len, uint crc32)
*/
ENTRY(crc32_pmull_le)
- adr x3, .Lcrc32_constants
+ frame_push 4, 64
+
+ adr x22, .Lcrc32_constants
b 0f
ENTRY(crc32c_pmull_le)
- adr x3, .Lcrc32c_constants
+ frame_push 4, 64
+
+ adr x22, .Lcrc32c_constants
+
+0: mov BUF, x0
+ mov LEN, x1
+ mov CRC, x2
-0: bic LEN, LEN, #15
+ bic LEN, LEN, #15
ld1 {v1.16b-v4.16b}, [BUF], #0x40
movi vzr.16b, #0
fmov dCONSTANT, CRC
@@ -131,7 +139,7 @@ ENTRY(crc32c_pmull_le)
cmp LEN, #0x40
b.lt less_64
- ldr qCONSTANT, [x3]
+ ldr qCONSTANT, [x22]
loop_64: /* 64 bytes Full cache line folding */
sub LEN, LEN, #0x40
@@ -161,10 +169,21 @@ loop_64: /* 64 bytes Full cache line folding */
eor v4.16b, v4.16b, v8.16b
cmp LEN, #0x40
- b.ge loop_64
+ b.lt less_64
+
+ if_will_cond_yield_neon
+ stp q1, q2, [sp, #48]
+ stp q3, q4, [sp, #80]
+ do_cond_yield_neon
+ ldp q1, q2, [sp, #48]
+ ldp q3, q4, [sp, #80]
+ ldr qCONSTANT, [x22]
+ movi vzr.16b, #0
+ endif_yield_neon
+ b loop_64
less_64: /* Folding cache line into 128bit */
- ldr qCONSTANT, [x3, #16]
+ ldr qCONSTANT, [x22, #16]
pmull2 v5.1q, v1.2d, vCONSTANT.2d
pmull v1.1q, v1.1d, vCONSTANT.1d
@@ -203,8 +222,8 @@ fold_64:
eor v1.16b, v1.16b, v2.16b
/* final 32-bit fold */
- ldr dCONSTANT, [x3, #32]
- ldr d3, [x3, #40]
+ ldr dCONSTANT, [x22, #32]
+ ldr d3, [x22, #40]
ext v2.16b, v1.16b, vzr.16b, #4
and v1.16b, v1.16b, v3.16b
@@ -212,7 +231,7 @@ fold_64:
eor v1.16b, v1.16b, v2.16b
/* Finish up with the bit-reversed barrett reduction 64 ==> 32 bits */
- ldr qCONSTANT, [x3, #48]
+ ldr qCONSTANT, [x22, #48]
and v2.16b, v1.16b, v3.16b
ext v2.16b, vzr.16b, v2.16b, #8
@@ -222,6 +241,7 @@ fold_64:
eor v1.16b, v1.16b, v2.16b
mov w0, v1.s[1]
+ frame_pop 4, 64
ret
ENDPROC(crc32_pmull_le)
ENDPROC(crc32c_pmull_le)
--
2.11.0
next prev parent reply other threads:[~2017-12-06 19:43 UTC|newest]
Thread overview: 31+ messages / expand[flat|nested] mbox.gz Atom feed top
2017-12-06 19:43 [PATCH v3 00/20] crypto: arm64 - play nice with CONFIG_PREEMPT Ard Biesheuvel
2017-12-06 19:43 ` [PATCH v3 01/20] crypto: testmgr - add a new test case for CRC-T10DIF Ard Biesheuvel
2017-12-06 19:43 ` [PATCH v3 02/20] crypto: arm64/aes-ce-ccm - move kernel mode neon en/disable into loop Ard Biesheuvel
2017-12-06 19:43 ` [PATCH v3 03/20] crypto: arm64/aes-blk " Ard Biesheuvel
2017-12-06 19:43 ` [PATCH v3 04/20] crypto: arm64/aes-bs " Ard Biesheuvel
2017-12-06 19:43 ` [PATCH v3 05/20] crypto: arm64/chacha20 " Ard Biesheuvel
2017-12-06 19:43 ` [PATCH v3 06/20] crypto: arm64/aes-blk - remove configurable interleave Ard Biesheuvel
2017-12-06 19:43 ` [PATCH v3 07/20] crypto: arm64/aes-blk - add 4 way interleave to CBC encrypt path Ard Biesheuvel
2017-12-06 19:43 ` [PATCH v3 08/20] crypto: arm64/aes-blk - add 4 way interleave to CBC-MAC " Ard Biesheuvel
2017-12-06 19:43 ` [PATCH v3 09/20] crypto: arm64/sha256-neon - play nice with CONFIG_PREEMPT kernels Ard Biesheuvel
2017-12-06 19:43 ` [PATCH v3 10/20] arm64: assembler: add utility macros to push/pop stack frames Ard Biesheuvel
2017-12-07 14:11 ` Dave Martin
2017-12-07 14:21 ` Ard Biesheuvel
2017-12-07 14:53 ` Dave Martin
2017-12-07 14:58 ` Ard Biesheuvel
2017-12-06 19:43 ` [PATCH v3 11/20] arm64: assembler: add macros to conditionally yield the NEON under PREEMPT Ard Biesheuvel
2017-12-07 14:39 ` Dave Martin
2017-12-07 14:50 ` Ard Biesheuvel
2017-12-07 15:47 ` Ard Biesheuvel
2017-12-07 15:51 ` Ard Biesheuvel
2017-12-07 16:15 ` Dave Martin
2017-12-07 16:11 ` Dave Martin
2017-12-06 19:43 ` [PATCH v3 12/20] crypto: arm64/sha1-ce - yield NEON after every block of input Ard Biesheuvel
2017-12-06 19:43 ` [PATCH v3 13/20] crypto: arm64/sha2-ce " Ard Biesheuvel
2017-12-06 19:43 ` [PATCH v3 14/20] crypto: arm64/aes-ccm " Ard Biesheuvel
2017-12-06 19:43 ` [PATCH v3 15/20] crypto: arm64/aes-blk " Ard Biesheuvel
2017-12-06 19:43 ` [PATCH v3 16/20] crypto: arm64/aes-bs " Ard Biesheuvel
2017-12-06 19:43 ` [PATCH v3 17/20] crypto: arm64/aes-ghash " Ard Biesheuvel
2017-12-06 19:43 ` Ard Biesheuvel [this message]
2017-12-06 19:43 ` [PATCH v3 19/20] crypto: arm64/crct10dif-ce " Ard Biesheuvel
2017-12-06 19:43 ` [PATCH v3 20/20] DO NOT MERGE Ard Biesheuvel
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20171206194346.24393-19-ard.biesheuvel@linaro.org \
--to=ard.biesheuvel@linaro.org \
--cc=linux-arm-kernel@lists.infradead.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).