From: Eric Biggers <ebiggers@kernel.org>
To: linux-crypto@vger.kernel.org
Cc: x86@kernel.org, Ondrej Mosnacek <omosnace@redhat.com>,
linux-kernel@vger.kernel.org
Subject: [PATCH v2 03/10] crypto: x86/aegis128 - eliminate some indirect calls
Date: Wed, 16 Oct 2024 17:00:44 -0700 [thread overview]
Message-ID: <20241017000051.228294-4-ebiggers@kernel.org> (raw)
In-Reply-To: <20241017000051.228294-1-ebiggers@kernel.org>
From: Eric Biggers <ebiggers@google.com>
Instead of using a struct of function pointers to decide whether to call
the encryption or decryption assembly functions, use a conditional
branch on a bool. Force-inline the functions to avoid actually
generating the branch. This improves performance slightly since
indirect calls are slow. Remove the now-unnecessary CFI stubs.
Note that just force-inlining the existing functions might cause the
compiler to optimize out the indirect branches, but that would not be a
reliable way to do it and the CFI stubs would still be required.
Reviewed-by: Ondrej Mosnacek <omosnace@redhat.com>
Signed-off-by: Eric Biggers <ebiggers@google.com>
---
arch/x86/crypto/aegis128-aesni-asm.S | 9 ++--
arch/x86/crypto/aegis128-aesni-glue.c | 74 +++++++++++++--------------
2 files changed, 40 insertions(+), 43 deletions(-)
diff --git a/arch/x86/crypto/aegis128-aesni-asm.S b/arch/x86/crypto/aegis128-aesni-asm.S
index 2de859173940e..1b57558548c78 100644
--- a/arch/x86/crypto/aegis128-aesni-asm.S
+++ b/arch/x86/crypto/aegis128-aesni-asm.S
@@ -5,11 +5,10 @@
* Copyright (c) 2017-2018 Ondrej Mosnacek <omosnacek@gmail.com>
* Copyright (C) 2017-2018 Red Hat, Inc. All rights reserved.
*/
#include <linux/linkage.h>
-#include <linux/cfi_types.h>
#include <asm/frame.h>
#define STATE0 %xmm0
#define STATE1 %xmm1
#define STATE2 %xmm2
@@ -401,11 +400,11 @@ SYM_FUNC_END(crypto_aegis128_aesni_ad)
/*
* void crypto_aegis128_aesni_enc(void *state, unsigned int length,
* const void *src, void *dst);
*/
-SYM_TYPED_FUNC_START(crypto_aegis128_aesni_enc)
+SYM_FUNC_START(crypto_aegis128_aesni_enc)
FRAME_BEGIN
cmp $0x10, LEN
jb .Lenc_out
@@ -498,11 +497,11 @@ SYM_FUNC_END(crypto_aegis128_aesni_enc)
/*
* void crypto_aegis128_aesni_enc_tail(void *state, unsigned int length,
* const void *src, void *dst);
*/
-SYM_TYPED_FUNC_START(crypto_aegis128_aesni_enc_tail)
+SYM_FUNC_START(crypto_aegis128_aesni_enc_tail)
FRAME_BEGIN
/* load the state: */
movdqu 0x00(STATEP), STATE0
movdqu 0x10(STATEP), STATE1
@@ -555,11 +554,11 @@ SYM_FUNC_END(crypto_aegis128_aesni_enc_tail)
/*
* void crypto_aegis128_aesni_dec(void *state, unsigned int length,
* const void *src, void *dst);
*/
-SYM_TYPED_FUNC_START(crypto_aegis128_aesni_dec)
+SYM_FUNC_START(crypto_aegis128_aesni_dec)
FRAME_BEGIN
cmp $0x10, LEN
jb .Ldec_out
@@ -652,11 +651,11 @@ SYM_FUNC_END(crypto_aegis128_aesni_dec)
/*
* void crypto_aegis128_aesni_dec_tail(void *state, unsigned int length,
* const void *src, void *dst);
*/
-SYM_TYPED_FUNC_START(crypto_aegis128_aesni_dec_tail)
+SYM_FUNC_START(crypto_aegis128_aesni_dec_tail)
FRAME_BEGIN
/* load the state: */
movdqu 0x00(STATEP), STATE0
movdqu 0x10(STATEP), STATE1
diff --git a/arch/x86/crypto/aegis128-aesni-glue.c b/arch/x86/crypto/aegis128-aesni-glue.c
index 96586470154e0..deb39cef0be1a 100644
--- a/arch/x86/crypto/aegis128-aesni-glue.c
+++ b/arch/x86/crypto/aegis128-aesni-glue.c
@@ -54,20 +54,10 @@ struct aegis_state {
struct aegis_ctx {
struct aegis_block key;
};
-struct aegis_crypt_ops {
- int (*skcipher_walk_init)(struct skcipher_walk *walk,
- struct aead_request *req, bool atomic);
-
- void (*crypt_blocks)(void *state, unsigned int length, const void *src,
- void *dst);
- void (*crypt_tail)(void *state, unsigned int length, const void *src,
- void *dst);
-};
-
static void crypto_aegis128_aesni_process_ad(
struct aegis_state *state, struct scatterlist *sg_src,
unsigned int assoclen)
{
struct scatter_walk walk;
@@ -112,24 +102,41 @@ static void crypto_aegis128_aesni_process_ad(
memset(buf.bytes + pos, 0, AEGIS128_BLOCK_SIZE - pos);
crypto_aegis128_aesni_ad(state, AEGIS128_BLOCK_SIZE, buf.bytes);
}
}
-static void crypto_aegis128_aesni_process_crypt(
- struct aegis_state *state, struct skcipher_walk *walk,
- const struct aegis_crypt_ops *ops)
+static __always_inline void
+crypto_aegis128_aesni_process_crypt(struct aegis_state *state,
+ struct skcipher_walk *walk, bool enc)
{
while (walk->nbytes >= AEGIS128_BLOCK_SIZE) {
- ops->crypt_blocks(state,
- round_down(walk->nbytes, AEGIS128_BLOCK_SIZE),
- walk->src.virt.addr, walk->dst.virt.addr);
+ if (enc)
+ crypto_aegis128_aesni_enc(
+ state,
+ round_down(walk->nbytes,
+ AEGIS128_BLOCK_SIZE),
+ walk->src.virt.addr,
+ walk->dst.virt.addr);
+ else
+ crypto_aegis128_aesni_dec(
+ state,
+ round_down(walk->nbytes,
+ AEGIS128_BLOCK_SIZE),
+ walk->src.virt.addr,
+ walk->dst.virt.addr);
skcipher_walk_done(walk, walk->nbytes % AEGIS128_BLOCK_SIZE);
}
if (walk->nbytes) {
- ops->crypt_tail(state, walk->nbytes, walk->src.virt.addr,
- walk->dst.virt.addr);
+ if (enc)
+ crypto_aegis128_aesni_enc_tail(state, walk->nbytes,
+ walk->src.virt.addr,
+ walk->dst.virt.addr);
+ else
+ crypto_aegis128_aesni_dec_tail(state, walk->nbytes,
+ walk->src.virt.addr,
+ walk->dst.virt.addr);
skcipher_walk_done(walk, 0);
}
}
static struct aegis_ctx *crypto_aegis128_aesni_ctx(struct crypto_aead *aead)
@@ -160,71 +167,62 @@ static int crypto_aegis128_aesni_setauthsize(struct crypto_aead *tfm,
if (authsize < AEGIS128_MIN_AUTH_SIZE)
return -EINVAL;
return 0;
}
-static void crypto_aegis128_aesni_crypt(struct aead_request *req,
- struct aegis_block *tag_xor,
- unsigned int cryptlen,
- const struct aegis_crypt_ops *ops)
+static __always_inline void
+crypto_aegis128_aesni_crypt(struct aead_request *req,
+ struct aegis_block *tag_xor,
+ unsigned int cryptlen, bool enc)
{
struct crypto_aead *tfm = crypto_aead_reqtfm(req);
struct aegis_ctx *ctx = crypto_aegis128_aesni_ctx(tfm);
struct skcipher_walk walk;
struct aegis_state state;
- ops->skcipher_walk_init(&walk, req, true);
+ if (enc)
+ skcipher_walk_aead_encrypt(&walk, req, true);
+ else
+ skcipher_walk_aead_decrypt(&walk, req, true);
kernel_fpu_begin();
crypto_aegis128_aesni_init(&state, ctx->key.bytes, req->iv);
crypto_aegis128_aesni_process_ad(&state, req->src, req->assoclen);
- crypto_aegis128_aesni_process_crypt(&state, &walk, ops);
+ crypto_aegis128_aesni_process_crypt(&state, &walk, enc);
crypto_aegis128_aesni_final(&state, tag_xor, req->assoclen, cryptlen);
kernel_fpu_end();
}
static int crypto_aegis128_aesni_encrypt(struct aead_request *req)
{
- static const struct aegis_crypt_ops OPS = {
- .skcipher_walk_init = skcipher_walk_aead_encrypt,
- .crypt_blocks = crypto_aegis128_aesni_enc,
- .crypt_tail = crypto_aegis128_aesni_enc_tail,
- };
-
struct crypto_aead *tfm = crypto_aead_reqtfm(req);
struct aegis_block tag = {};
unsigned int authsize = crypto_aead_authsize(tfm);
unsigned int cryptlen = req->cryptlen;
- crypto_aegis128_aesni_crypt(req, &tag, cryptlen, &OPS);
+ crypto_aegis128_aesni_crypt(req, &tag, cryptlen, true);
scatterwalk_map_and_copy(tag.bytes, req->dst,
req->assoclen + cryptlen, authsize, 1);
return 0;
}
static int crypto_aegis128_aesni_decrypt(struct aead_request *req)
{
static const struct aegis_block zeros = {};
- static const struct aegis_crypt_ops OPS = {
- .skcipher_walk_init = skcipher_walk_aead_decrypt,
- .crypt_blocks = crypto_aegis128_aesni_dec,
- .crypt_tail = crypto_aegis128_aesni_dec_tail,
- };
-
struct crypto_aead *tfm = crypto_aead_reqtfm(req);
struct aegis_block tag;
unsigned int authsize = crypto_aead_authsize(tfm);
unsigned int cryptlen = req->cryptlen - authsize;
scatterwalk_map_and_copy(tag.bytes, req->src,
req->assoclen + cryptlen, authsize, 0);
- crypto_aegis128_aesni_crypt(req, &tag, cryptlen, &OPS);
+ crypto_aegis128_aesni_crypt(req, &tag, cryptlen, false);
return crypto_memneq(tag.bytes, zeros.bytes, authsize) ? -EBADMSG : 0;
}
static struct aead_alg crypto_aegis128_aesni_alg = {
--
2.47.0
next prev parent reply other threads:[~2024-10-17 0:02 UTC|newest]
Thread overview: 12+ messages / expand[flat|nested] mbox.gz Atom feed top
2024-10-17 0:00 [PATCH v2 00/10] AEGIS x86 assembly tuning Eric Biggers
2024-10-17 0:00 ` [PATCH v2 01/10] crypto: x86/aegis128 - access 32-bit arguments as 32-bit Eric Biggers
2024-10-17 0:00 ` [PATCH v2 02/10] crypto: x86/aegis128 - remove no-op init and exit functions Eric Biggers
2024-10-17 0:00 ` Eric Biggers [this message]
2024-10-17 0:00 ` [PATCH v2 04/10] crypto: x86/aegis128 - don't bother with special code for aligned data Eric Biggers
2024-10-17 0:00 ` [PATCH v2 05/10] crypto: x86/aegis128 - optimize length block preparation using SSE4.1 Eric Biggers
2024-10-17 0:00 ` [PATCH v2 06/10] crypto: x86/aegis128 - improve assembly function prototypes Eric Biggers
2024-10-17 0:00 ` [PATCH v2 07/10] crypto: x86/aegis128 - optimize partial block handling using SSE4.1 Eric Biggers
2024-10-17 0:00 ` [PATCH v2 08/10] crypto: x86/aegis128 - take advantage of block-aligned len Eric Biggers
2024-10-17 0:00 ` [PATCH v2 09/10] crypto: x86/aegis128 - remove unneeded FRAME_BEGIN and FRAME_END Eric Biggers
2024-10-17 0:00 ` [PATCH v2 10/10] crypto: x86/aegis128 - remove unneeded RETs Eric Biggers
2024-10-26 6:59 ` [PATCH v2 00/10] AEGIS x86 assembly tuning Herbert Xu
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20241017000051.228294-4-ebiggers@kernel.org \
--to=ebiggers@kernel.org \
--cc=linux-crypto@vger.kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=omosnace@redhat.com \
--cc=x86@kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox