qemu-devel.nongnu.org archive mirror
 help / color / mirror / Atom feed
From: Ard Biesheuvel <ardb@kernel.org>
To: qemu-arm@nongnu.org
Cc: qemu-devel@nongnu.org, "Ard Biesheuvel" <ardb@kernel.org>,
	"Peter Maydell" <peter.maydell@linaro.org>,
	"Alex Bennée" <alex.bennee@linaro.org>,
	"Richard Henderson" <richard.henderson@linaro.org>,
	"Philippe Mathieu-Daudé" <f4bug@amsat.org>
Subject: [PATCH v2 2/2] target/i386: Implement AES instructions using AArch64 counterparts
Date: Wed, 31 May 2023 13:22:39 +0200	[thread overview]
Message-ID: <20230531112239.3164777-3-ardb@kernel.org> (raw)
In-Reply-To: <20230531112239.3164777-1-ardb@kernel.org>

When available, use the AArch64 AES instructions to implement the x86
ones. These are not a 1:1 fit, but considerably more efficient, and
without data dependent timing.

For a typical benchmark (linux tcrypt mode=500), this gives a 2-3x
speedup when running on ThunderX2.

Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
---
 host/include/aarch64/host/cpuinfo.h |  1 +
 target/i386/ops_sse.h               | 69 ++++++++++++++++++++
 util/cpuinfo-aarch64.c              |  1 +
 3 files changed, 71 insertions(+)

diff --git a/host/include/aarch64/host/cpuinfo.h b/host/include/aarch64/host/cpuinfo.h
index 82227890b4b4db03..05feeb4f4369fc19 100644
--- a/host/include/aarch64/host/cpuinfo.h
+++ b/host/include/aarch64/host/cpuinfo.h
@@ -9,6 +9,7 @@
 #define CPUINFO_ALWAYS          (1u << 0)  /* so cpuinfo is nonzero */
 #define CPUINFO_LSE             (1u << 1)
 #define CPUINFO_LSE2            (1u << 2)
+#define CPUINFO_AES             (1u << 3)
 
 /* Initialized with a constructor. */
 extern unsigned cpuinfo;
diff --git a/target/i386/ops_sse.h b/target/i386/ops_sse.h
index fb63af7afa21588d..db79132778efd211 100644
--- a/target/i386/ops_sse.h
+++ b/target/i386/ops_sse.h
@@ -20,6 +20,11 @@
 
 #include "crypto/aes.h"
 
+#ifdef __aarch64__
+#include "host/cpuinfo.h"
+typedef uint8_t aes_vec_t __attribute__((vector_size(16)));
+#endif
+
 #if SHIFT == 0
 #define Reg MMXReg
 #define XMM_ONLY(...)
@@ -2165,6 +2170,20 @@ void glue(helper_aesdec, SUFFIX)(CPUX86State *env, Reg *d, Reg *v, Reg *s)
     Reg st = *v;
     Reg rk = *s;
 
+#ifdef __aarch64__
+    if (cpuinfo & CPUINFO_AES) {
+        asm("   .arch_extension aes             \n"
+            "   aesd    %0.16b, %1.16b          \n"
+            "   aesimc  %0.16b, %0.16b          \n"
+            "   eor     %0.16b, %0.16b, %2.16b  \n"
+            :   "=w"(*(aes_vec_t *)d)
+            :   "w"((aes_vec_t){}),
+                "w"(*(aes_vec_t *)s),
+                "0"(*(aes_vec_t *)v));
+        return;
+    }
+#endif
+
     for (i = 0 ; i < 2 << SHIFT ; i++) {
         int j = i & 3;
         d->L(i) = rk.L(i) ^ bswap32(AES_Td0[st.B(AES_ishifts[4 * j + 0])] ^
@@ -2180,6 +2199,19 @@ void glue(helper_aesdeclast, SUFFIX)(CPUX86State *env, Reg *d, Reg *v, Reg *s)
     Reg st = *v;
     Reg rk = *s;
 
+#ifdef __aarch64__
+    if (cpuinfo & CPUINFO_AES) {
+        asm("   .arch_extension aes             \n"
+            "   aesd    %0.16b, %1.16b          \n"
+            "   eor     %0.16b, %0.16b, %2.16b  \n"
+            :   "=w"(*(aes_vec_t *)d)
+            :   "w"((aes_vec_t){}),
+                "w"(*(aes_vec_t *)s),
+                "0"(*(aes_vec_t *)v));
+        return;
+    }
+#endif
+
     for (i = 0; i < 8 << SHIFT; i++) {
         d->B(i) = rk.B(i) ^ (AES_isbox[st.B(AES_ishifts[i & 15] + (i & ~15))]);
     }
@@ -2191,6 +2223,20 @@ void glue(helper_aesenc, SUFFIX)(CPUX86State *env, Reg *d, Reg *v, Reg *s)
     Reg st = *v;
     Reg rk = *s;
 
+#ifdef __aarch64__
+    if (cpuinfo & CPUINFO_AES) {
+        asm("   .arch_extension aes             \n"
+            "   aese    %0.16b, %1.16b          \n"
+            "   aesmc   %0.16b, %0.16b          \n"
+            "   eor     %0.16b, %0.16b, %2.16b  \n"
+            :   "=w"(*(aes_vec_t *)d)
+            :   "w"((aes_vec_t){}),
+                "w"(*(aes_vec_t *)s),
+                "0"(*(aes_vec_t *)v));
+        return;
+    }
+#endif
+
     for (i = 0 ; i < 2 << SHIFT ; i++) {
         int j = i & 3;
         d->L(i) = rk.L(i) ^ bswap32(AES_Te0[st.B(AES_shifts[4 * j + 0])] ^
@@ -2206,6 +2252,19 @@ void glue(helper_aesenclast, SUFFIX)(CPUX86State *env, Reg *d, Reg *v, Reg *s)
     Reg st = *v;
     Reg rk = *s;
 
+#ifdef __aarch64__
+    if (cpuinfo & CPUINFO_AES) {
+        asm("   .arch_extension aes             \n"
+            "   aese    %0.16b, %1.16b          \n"
+            "   eor     %0.16b, %0.16b, %2.16b  \n"
+            :   "=w"(*(aes_vec_t *)d)
+            :   "w"((aes_vec_t){}),
+                "w"(*(aes_vec_t *)s),
+                "0"(*(aes_vec_t *)v));
+        return;
+    }
+#endif
+
     for (i = 0; i < 8 << SHIFT; i++) {
         d->B(i) = rk.B(i) ^ (AES_sbox[st.B(AES_shifts[i & 15] + (i & ~15))]);
     }
@@ -2217,6 +2276,16 @@ void glue(helper_aesimc, SUFFIX)(CPUX86State *env, Reg *d, Reg *s)
     int i;
     Reg tmp = *s;
 
+#ifdef __aarch64__
+    if (cpuinfo & CPUINFO_AES) {
+        asm("   .arch_extension aes             \n"
+            "   aesimc  %0.16b, %1.16b          \n"
+            :   "=w"(*(aes_vec_t *)d)
+            :   "w"(*(aes_vec_t *)s));
+        return;
+    }
+#endif
+
     for (i = 0 ; i < 4 ; i++) {
         d->L(i) = bswap32(AES_imc[tmp.B(4 * i + 0)][0] ^
                           AES_imc[tmp.B(4 * i + 1)][1] ^
diff --git a/util/cpuinfo-aarch64.c b/util/cpuinfo-aarch64.c
index f99acb788454e5ab..769cdfeb2fc32d5e 100644
--- a/util/cpuinfo-aarch64.c
+++ b/util/cpuinfo-aarch64.c
@@ -56,6 +56,7 @@ unsigned __attribute__((constructor)) cpuinfo_init(void)
     unsigned long hwcap = qemu_getauxval(AT_HWCAP);
     info |= (hwcap & HWCAP_ATOMICS ? CPUINFO_LSE : 0);
     info |= (hwcap & HWCAP_USCAT ? CPUINFO_LSE2 : 0);
+    info |= (hwcap & HWCAP_AES ? CPUINFO_AES : 0);
 #endif
 #ifdef CONFIG_DARWIN
     info |= sysctl_for_bool("hw.optional.arm.FEAT_LSE") * CPUINFO_LSE;
-- 
2.39.2



  parent reply	other threads:[~2023-05-31 11:23 UTC|newest]

Thread overview: 8+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-05-31 11:22 [PATCH v2 0/2] Implement AES on ARM using x86 instructions and vv Ard Biesheuvel
2023-05-31 11:22 ` [PATCH v2 1/2] target/arm: use x86 intrinsics to implement AES instructions Ard Biesheuvel
2023-05-31 11:22 ` Ard Biesheuvel [this message]
2023-05-31 17:13   ` [PATCH v2 2/2] target/i386: Implement AES instructions using AArch64 counterparts Richard Henderson
2023-05-31 16:33 ` [PATCH v2 0/2] Implement AES on ARM using x86 instructions and vv Richard Henderson
2023-05-31 16:47   ` Ard Biesheuvel
2023-05-31 17:08     ` Richard Henderson
2023-06-01  4:08       ` Richard Henderson

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20230531112239.3164777-3-ardb@kernel.org \
    --to=ardb@kernel.org \
    --cc=alex.bennee@linaro.org \
    --cc=f4bug@amsat.org \
    --cc=peter.maydell@linaro.org \
    --cc=qemu-arm@nongnu.org \
    --cc=qemu-devel@nongnu.org \
    --cc=richard.henderson@linaro.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).