All of lore.kernel.org
 help / color / mirror / Atom feed
From: Aurelien Jarno <aurelien@aurel32.net>
To: qemu-devel@nongnu.org
Cc: Aurelien Jarno <aurelien@aurel32.net>
Subject: [Qemu-devel] [PATCH 3/3] target-i386: add AES-NI instructions
Date: Thu, 28 Mar 2013 01:00:11 +0100	[thread overview]
Message-ID: <1364428811-8226-4-git-send-email-aurelien@aurel32.net> (raw)
In-Reply-To: <1364428811-8226-1-git-send-email-aurelien@aurel32.net>

Signed-off-by: Aurelien Jarno <aurelien@aurel32.net>
---
 target-i386/cpu.c            |    6 +-
 target-i386/ops_sse.h        |  188 ++++++++++++++++++++++++++++++++++++++++++
 target-i386/ops_sse_header.h |    6 ++
 target-i386/translate.c      |    7 ++
 4 files changed, 204 insertions(+), 3 deletions(-)

diff --git a/target-i386/cpu.c b/target-i386/cpu.c
index 5941d40..321d945 100644
--- a/target-i386/cpu.c
+++ b/target-i386/cpu.c
@@ -390,13 +390,13 @@ typedef struct x86_def_t {
 #define TCG_EXT_FEATURES (CPUID_EXT_SSE3 | CPUID_EXT_PCLMULQDQ | \
           CPUID_EXT_MONITOR | CPUID_EXT_SSSE3 | CPUID_EXT_CX16 | \
           CPUID_EXT_SSE41 | CPUID_EXT_SSE42 | CPUID_EXT_POPCNT | \
-          CPUID_EXT_MOVBE | CPUID_EXT_HYPERVISOR)
+          CPUID_EXT_MOVBE | CPUID_EXT_AES | CPUID_EXT_HYPERVISOR)
           /* missing:
           CPUID_EXT_DTES64, CPUID_EXT_DSCPL, CPUID_EXT_VMX, CPUID_EXT_SMX,
           CPUID_EXT_EST, CPUID_EXT_TM2, CPUID_EXT_CID, CPUID_EXT_FMA,
           CPUID_EXT_XTPR, CPUID_EXT_PDCM, CPUID_EXT_PCID, CPUID_EXT_DCA,
-          CPUID_EXT_X2APIC, CPUID_EXT_TSC_DEADLINE_TIMER, CPUID_EXT_AES,
-          CPUID_EXT_XSAVE, CPUID_EXT_OSXSAVE, CPUID_EXT_AVX, CPUID_EXT_F16C,
+          CPUID_EXT_X2APIC, CPUID_EXT_TSC_DEADLINE_TIMER, CPUID_EXT_XSAVE,
+          CPUID_EXT_OSXSAVE, CPUID_EXT_AVX, CPUID_EXT_F16C,
           CPUID_EXT_RDRAND */
 #define TCG_EXT2_FEATURES ((TCG_FEATURES & CPUID_EXT2_AMD_ALIASES) | \
           CPUID_EXT2_NX | CPUID_EXT2_MMXEXT | CPUID_EXT2_RDTSCP | \
diff --git a/target-i386/ops_sse.h b/target-i386/ops_sse.h
index 2ee5b8d..05b9842 100644
--- a/target-i386/ops_sse.h
+++ b/target-i386/ops_sse.h
@@ -2203,6 +2203,194 @@ void glue(helper_pclmulqdq, SUFFIX)(CPUX86State *env, Reg *d, Reg *s,
     d->Q(0) = resl;
     d->Q(1) = resh;
 }
+
+/* AES-NI op helpers */
+static const uint8_t aes_sbox[256] = {
+    0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5,
+    0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76,
+    0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0,
+    0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0,
+    0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc,
+    0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15,
+    0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a,
+    0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75,
+    0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0,
+    0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84,
+    0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b,
+    0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf,
+    0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85,
+    0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8,
+    0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5,
+    0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2,
+    0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17,
+    0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73,
+    0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88,
+    0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb,
+    0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c,
+    0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79,
+    0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9,
+    0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08,
+    0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6,
+    0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a,
+    0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e,
+    0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e,
+    0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94,
+    0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf,
+    0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68,
+    0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16
+};
+
+static const uint8_t aes_inv_sbox[256] = {
+    0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38,
+    0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb,
+    0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87,
+    0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb,
+    0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d,
+    0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e,
+    0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2,
+    0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25,
+    0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16,
+    0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92,
+    0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda,
+    0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84,
+    0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a,
+    0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06,
+    0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02,
+    0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b,
+    0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea,
+    0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73,
+    0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85,
+    0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e,
+    0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89,
+    0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b,
+    0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20,
+    0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4,
+    0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31,
+    0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f,
+    0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d,
+    0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef,
+    0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0,
+    0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61,
+    0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26,
+    0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d
+};
+
+static inline void aes_subreg(Reg *d, const uint8_t s[256])
+{
+    int i;
+
+    for (i = 0; i < 16; i++) {
+        d->B(i) = s[d->B(i)];
+    }
+}
+
+const uint8_t aes_shifts[16] = {
+    0, 5, 10, 15, 4, 9, 14, 3, 8, 13, 2, 7, 12, 1, 6, 11
+};
+
+const uint8_t aes_inv_shifts[16] = {
+    0, 13, 10, 7, 4, 1, 14, 11, 8, 5, 2, 15, 12, 9, 6, 3
+};
+
+static inline void aes_shiftrows(Reg *a, const uint8_t s[16])
+{
+    int i;
+    Reg tmp = *a;
+
+    for (i = 0 ; i < 16 ; i++) {
+        a->B(i) = tmp.B(s[i]);
+    }
+}
+
+const uint8_t aes_mix[4] = {
+    2, 3, 1, 1
+};
+
+const uint8_t aes_inv_mix[4] = {
+    14, 11, 13, 9
+};
+
+static inline uint8_t aes_ffmul(uint8_t a, uint8_t b)
+{
+    uint8_t res = 0;
+
+    while (b) {
+        if (b & 1) {
+            res ^= a;
+        }
+        a = (a << 1) ^ (a & 0x80 ? 0x1b : 0x00);
+        b >>= 1;
+    }
+    return res;
+}
+
+static inline void aes_mixcolumns(Reg *a, const uint8_t m[4])
+{
+    int i, j, k;
+    Reg tmp = *a;
+
+    for (i = 0; i < 4; i += 1) {
+        for (j = 0; j < 4; j += 1) {
+            a->B(i * 4 + j) = 0;
+            for (k = 0; k < 4; k += 1) {
+                a->B(i * 4 + j) ^= aes_ffmul(m[(k - j) & 3], tmp.B(i * 4 + k));
+            }
+        }
+    }
+}
+
+void glue(helper_aesdec, SUFFIX)(CPUX86State *env, Reg *d, Reg *s)
+{
+    aes_shiftrows(d, aes_inv_shifts);
+    aes_subreg(d, aes_inv_sbox);
+    aes_mixcolumns(d, aes_inv_mix);
+    d->Q(0) ^= s->Q(0);
+    d->Q(1) ^= s->Q(1);
+}
+
+void glue(helper_aesdeclast, SUFFIX)(CPUX86State *env, Reg *d, Reg *s)
+{
+    aes_shiftrows(d, aes_inv_shifts);
+    aes_subreg(d, aes_inv_sbox);
+    d->Q(0) ^= s->Q(0);
+    d->Q(1) ^= s->Q(1);
+}
+
+void glue(helper_aesenc, SUFFIX)(CPUX86State *env, Reg *d, Reg *s)
+{
+    aes_shiftrows(d, aes_shifts);
+    aes_subreg(d, aes_sbox);
+    aes_mixcolumns(d, aes_mix);
+    d->Q(0) ^= s->Q(0);
+    d->Q(1) ^= s->Q(1);
+}
+
+void glue(helper_aesenclast, SUFFIX)(CPUX86State *env, Reg *d, Reg *s)
+{
+    aes_shiftrows(d, aes_shifts);
+    aes_subreg(d, aes_sbox);
+    d->Q(0) ^= s->Q(0);
+    d->Q(1) ^= s->Q(1);
+}
+
+void glue(helper_aesimc, SUFFIX)(CPUX86State *env, Reg *d, Reg *s)
+{
+    *d = *s;
+    aes_mixcolumns(d, aes_inv_mix);
+}
+
+void glue(helper_aeskeygenassist, SUFFIX)(CPUX86State *env, Reg *d, Reg *s,
+                                          uint32_t ctrl)
+{
+    int i;
+
+    for (i = 0 ; i < 4 ; i++) {
+        d->B(i) = aes_sbox[s->B(i + 4)];
+        d->B(i + 8) = aes_sbox[s->B(i + 12)];
+    }
+    d->L(1) = (d->L(0) << 24 | d->L(0) >> 8) ^ ctrl;
+    d->L(3) = (d->L(2) << 24 | d->L(2) >> 8) ^ ctrl;
+}
 #endif
 
 #undef SHIFT
diff --git a/target-i386/ops_sse_header.h b/target-i386/ops_sse_header.h
index 2842233..a68c7cc 100644
--- a/target-i386/ops_sse_header.h
+++ b/target-i386/ops_sse_header.h
@@ -338,6 +338,12 @@ DEF_HELPER_3(popcnt, tl, env, tl, i32)
 
 /* AES-NI op helpers */
 #if SHIFT == 1
+DEF_HELPER_3(glue(aesdec, SUFFIX), void, env, Reg, Reg)
+DEF_HELPER_3(glue(aesdeclast, SUFFIX), void, env, Reg, Reg)
+DEF_HELPER_3(glue(aesenc, SUFFIX), void, env, Reg, Reg)
+DEF_HELPER_3(glue(aesenclast, SUFFIX), void, env, Reg, Reg)
+DEF_HELPER_3(glue(aesimc, SUFFIX), void, env, Reg, Reg)
+DEF_HELPER_4(glue(aeskeygenassist, SUFFIX), void, env, Reg, Reg, i32)
 DEF_HELPER_4(glue(pclmulqdq, SUFFIX), void, env, Reg, Reg, i32)
 #endif
 
diff --git a/target-i386/translate.c b/target-i386/translate.c
index d649e99..233f24f 100644
--- a/target-i386/translate.c
+++ b/target-i386/translate.c
@@ -3149,6 +3149,7 @@ struct SSEOpHelper_eppi {
 #define SSE41_SPECIAL { { NULL, SSE_SPECIAL }, CPUID_EXT_SSE41 }
 #define PCLMULQDQ_OP(x) { { NULL, gen_helper_ ## x ## _xmm }, \
         CPUID_EXT_PCLMULQDQ }
+#define AESNI_OP(x) { { NULL, gen_helper_ ## x ## _xmm }, CPUID_EXT_AES }
 
 static const struct SSEOpHelper_epp sse_op_table6[256] = {
     [0x00] = SSSE3_OP(pshufb),
@@ -3197,6 +3198,11 @@ static const struct SSEOpHelper_epp sse_op_table6[256] = {
     [0x3f] = SSE41_OP(pmaxud),
     [0x40] = SSE41_OP(pmulld),
     [0x41] = SSE41_OP(phminposuw),
+    [0xdb] = AESNI_OP(aesimc),
+    [0xdc] = AESNI_OP(aesenc),
+    [0xdd] = AESNI_OP(aesenclast),
+    [0xde] = AESNI_OP(aesdec),
+    [0xdf] = AESNI_OP(aesdeclast),
 };
 
 static const struct SSEOpHelper_eppi sse_op_table7[256] = {
@@ -3223,6 +3229,7 @@ static const struct SSEOpHelper_eppi sse_op_table7[256] = {
     [0x61] = SSE42_OP(pcmpestri),
     [0x62] = SSE42_OP(pcmpistrm),
     [0x63] = SSE42_OP(pcmpistri),
+    [0xdf] = AESNI_OP(aeskeygenassist),
 };
 
 static void gen_sse(CPUX86State *env, DisasContext *s, int b,
-- 
1.7.10.4

  parent reply	other threads:[~2013-03-28  0:00 UTC|newest]

Thread overview: 7+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2013-03-28  0:00 [Qemu-devel] [PATCH 0/3] target-i386: add PCLMULQDQ and AES-NI instructions Aurelien Jarno
2013-03-28  0:00 ` [Qemu-devel] [PATCH 1/3] target-i386: add pclmulqdq instruction Aurelien Jarno
2013-03-28 15:45   ` Richard Henderson
2013-03-28  0:00 ` [Qemu-devel] [PATCH 2/3] target-i386: enable PCLMULQDQ on Westmere CPU Aurelien Jarno
2013-03-28 15:47   ` Richard Henderson
2013-03-28  0:00 ` Aurelien Jarno [this message]
2013-03-28 15:49   ` [Qemu-devel] [PATCH 3/3] target-i386: add AES-NI instructions Richard Henderson

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1364428811-8226-4-git-send-email-aurelien@aurel32.net \
    --to=aurelien@aurel32.net \
    --cc=qemu-devel@nongnu.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.