From: Aurelien Jarno <aurelien@aurel32.net>
To: qemu-devel@nongnu.org
Cc: Aurelien Jarno <aurelien@aurel32.net>
Subject: [Qemu-devel] [PATCH 3/3] target-i386: add AES-NI instructions
Date: Thu, 28 Mar 2013 01:00:11 +0100 [thread overview]
Message-ID: <1364428811-8226-4-git-send-email-aurelien@aurel32.net> (raw)
In-Reply-To: <1364428811-8226-1-git-send-email-aurelien@aurel32.net>
Signed-off-by: Aurelien Jarno <aurelien@aurel32.net>
---
target-i386/cpu.c | 6 +-
target-i386/ops_sse.h | 188 ++++++++++++++++++++++++++++++++++++++++++
| 6 ++
target-i386/translate.c | 7 ++
4 files changed, 204 insertions(+), 3 deletions(-)
diff --git a/target-i386/cpu.c b/target-i386/cpu.c
index 5941d40..321d945 100644
--- a/target-i386/cpu.c
+++ b/target-i386/cpu.c
@@ -390,13 +390,13 @@ typedef struct x86_def_t {
#define TCG_EXT_FEATURES (CPUID_EXT_SSE3 | CPUID_EXT_PCLMULQDQ | \
CPUID_EXT_MONITOR | CPUID_EXT_SSSE3 | CPUID_EXT_CX16 | \
CPUID_EXT_SSE41 | CPUID_EXT_SSE42 | CPUID_EXT_POPCNT | \
- CPUID_EXT_MOVBE | CPUID_EXT_HYPERVISOR)
+ CPUID_EXT_MOVBE | CPUID_EXT_AES | CPUID_EXT_HYPERVISOR)
/* missing:
CPUID_EXT_DTES64, CPUID_EXT_DSCPL, CPUID_EXT_VMX, CPUID_EXT_SMX,
CPUID_EXT_EST, CPUID_EXT_TM2, CPUID_EXT_CID, CPUID_EXT_FMA,
CPUID_EXT_XTPR, CPUID_EXT_PDCM, CPUID_EXT_PCID, CPUID_EXT_DCA,
- CPUID_EXT_X2APIC, CPUID_EXT_TSC_DEADLINE_TIMER, CPUID_EXT_AES,
- CPUID_EXT_XSAVE, CPUID_EXT_OSXSAVE, CPUID_EXT_AVX, CPUID_EXT_F16C,
+ CPUID_EXT_X2APIC, CPUID_EXT_TSC_DEADLINE_TIMER, CPUID_EXT_XSAVE,
+ CPUID_EXT_OSXSAVE, CPUID_EXT_AVX, CPUID_EXT_F16C,
CPUID_EXT_RDRAND */
#define TCG_EXT2_FEATURES ((TCG_FEATURES & CPUID_EXT2_AMD_ALIASES) | \
CPUID_EXT2_NX | CPUID_EXT2_MMXEXT | CPUID_EXT2_RDTSCP | \
diff --git a/target-i386/ops_sse.h b/target-i386/ops_sse.h
index 2ee5b8d..05b9842 100644
--- a/target-i386/ops_sse.h
+++ b/target-i386/ops_sse.h
@@ -2203,6 +2203,194 @@ void glue(helper_pclmulqdq, SUFFIX)(CPUX86State *env, Reg *d, Reg *s,
d->Q(0) = resl;
d->Q(1) = resh;
}
+
+/* AES-NI op helpers */
+static const uint8_t aes_sbox[256] = {
+ 0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5,
+ 0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76,
+ 0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0,
+ 0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0,
+ 0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc,
+ 0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15,
+ 0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a,
+ 0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75,
+ 0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0,
+ 0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84,
+ 0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b,
+ 0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf,
+ 0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85,
+ 0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8,
+ 0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5,
+ 0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2,
+ 0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17,
+ 0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73,
+ 0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88,
+ 0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb,
+ 0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c,
+ 0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79,
+ 0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9,
+ 0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08,
+ 0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6,
+ 0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a,
+ 0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e,
+ 0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e,
+ 0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94,
+ 0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf,
+ 0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68,
+ 0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16
+};
+
+static const uint8_t aes_inv_sbox[256] = {
+ 0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38,
+ 0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb,
+ 0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87,
+ 0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb,
+ 0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d,
+ 0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e,
+ 0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2,
+ 0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25,
+ 0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16,
+ 0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92,
+ 0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda,
+ 0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84,
+ 0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a,
+ 0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06,
+ 0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02,
+ 0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b,
+ 0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea,
+ 0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73,
+ 0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85,
+ 0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e,
+ 0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89,
+ 0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b,
+ 0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20,
+ 0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4,
+ 0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31,
+ 0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f,
+ 0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d,
+ 0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef,
+ 0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0,
+ 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61,
+ 0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26,
+ 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d
+};
+
+static inline void aes_subreg(Reg *d, const uint8_t s[256])
+{
+ int i;
+
+ for (i = 0; i < 16; i++) {
+ d->B(i) = s[d->B(i)];
+ }
+}
+
+const uint8_t aes_shifts[16] = {
+ 0, 5, 10, 15, 4, 9, 14, 3, 8, 13, 2, 7, 12, 1, 6, 11
+};
+
+const uint8_t aes_inv_shifts[16] = {
+ 0, 13, 10, 7, 4, 1, 14, 11, 8, 5, 2, 15, 12, 9, 6, 3
+};
+
+static inline void aes_shiftrows(Reg *a, const uint8_t s[16])
+{
+ int i;
+ Reg tmp = *a;
+
+ for (i = 0 ; i < 16 ; i++) {
+ a->B(i) = tmp.B(s[i]);
+ }
+}
+
+const uint8_t aes_mix[4] = {
+ 2, 3, 1, 1
+};
+
+const uint8_t aes_inv_mix[4] = {
+ 14, 11, 13, 9
+};
+
+static inline uint8_t aes_ffmul(uint8_t a, uint8_t b)
+{
+ uint8_t res = 0;
+
+ while (b) {
+ if (b & 1) {
+ res ^= a;
+ }
+ a = (a << 1) ^ (a & 0x80 ? 0x1b : 0x00);
+ b >>= 1;
+ }
+ return res;
+}
+
+static inline void aes_mixcolumns(Reg *a, const uint8_t m[4])
+{
+ int i, j, k;
+ Reg tmp = *a;
+
+ for (i = 0; i < 4; i += 1) {
+ for (j = 0; j < 4; j += 1) {
+ a->B(i * 4 + j) = 0;
+ for (k = 0; k < 4; k += 1) {
+ a->B(i * 4 + j) ^= aes_ffmul(m[(k - j) & 3], tmp.B(i * 4 + k));
+ }
+ }
+ }
+}
+
+void glue(helper_aesdec, SUFFIX)(CPUX86State *env, Reg *d, Reg *s)
+{
+ aes_shiftrows(d, aes_inv_shifts);
+ aes_subreg(d, aes_inv_sbox);
+ aes_mixcolumns(d, aes_inv_mix);
+ d->Q(0) ^= s->Q(0);
+ d->Q(1) ^= s->Q(1);
+}
+
+void glue(helper_aesdeclast, SUFFIX)(CPUX86State *env, Reg *d, Reg *s)
+{
+ aes_shiftrows(d, aes_inv_shifts);
+ aes_subreg(d, aes_inv_sbox);
+ d->Q(0) ^= s->Q(0);
+ d->Q(1) ^= s->Q(1);
+}
+
+void glue(helper_aesenc, SUFFIX)(CPUX86State *env, Reg *d, Reg *s)
+{
+ aes_shiftrows(d, aes_shifts);
+ aes_subreg(d, aes_sbox);
+ aes_mixcolumns(d, aes_mix);
+ d->Q(0) ^= s->Q(0);
+ d->Q(1) ^= s->Q(1);
+}
+
+void glue(helper_aesenclast, SUFFIX)(CPUX86State *env, Reg *d, Reg *s)
+{
+ aes_shiftrows(d, aes_shifts);
+ aes_subreg(d, aes_sbox);
+ d->Q(0) ^= s->Q(0);
+ d->Q(1) ^= s->Q(1);
+}
+
+void glue(helper_aesimc, SUFFIX)(CPUX86State *env, Reg *d, Reg *s)
+{
+ *d = *s;
+ aes_mixcolumns(d, aes_inv_mix);
+}
+
+void glue(helper_aeskeygenassist, SUFFIX)(CPUX86State *env, Reg *d, Reg *s,
+ uint32_t ctrl)
+{
+ int i;
+
+ for (i = 0 ; i < 4 ; i++) {
+ d->B(i) = aes_sbox[s->B(i + 4)];
+ d->B(i + 8) = aes_sbox[s->B(i + 12)];
+ }
+ d->L(1) = (d->L(0) << 24 | d->L(0) >> 8) ^ ctrl;
+ d->L(3) = (d->L(2) << 24 | d->L(2) >> 8) ^ ctrl;
+}
#endif
#undef SHIFT
--git a/target-i386/ops_sse_header.h b/target-i386/ops_sse_header.h
index 2842233..a68c7cc 100644
--- a/target-i386/ops_sse_header.h
+++ b/target-i386/ops_sse_header.h
@@ -338,6 +338,12 @@ DEF_HELPER_3(popcnt, tl, env, tl, i32)
/* AES-NI op helpers */
#if SHIFT == 1
+DEF_HELPER_3(glue(aesdec, SUFFIX), void, env, Reg, Reg)
+DEF_HELPER_3(glue(aesdeclast, SUFFIX), void, env, Reg, Reg)
+DEF_HELPER_3(glue(aesenc, SUFFIX), void, env, Reg, Reg)
+DEF_HELPER_3(glue(aesenclast, SUFFIX), void, env, Reg, Reg)
+DEF_HELPER_3(glue(aesimc, SUFFIX), void, env, Reg, Reg)
+DEF_HELPER_4(glue(aeskeygenassist, SUFFIX), void, env, Reg, Reg, i32)
DEF_HELPER_4(glue(pclmulqdq, SUFFIX), void, env, Reg, Reg, i32)
#endif
diff --git a/target-i386/translate.c b/target-i386/translate.c
index d649e99..233f24f 100644
--- a/target-i386/translate.c
+++ b/target-i386/translate.c
@@ -3149,6 +3149,7 @@ struct SSEOpHelper_eppi {
#define SSE41_SPECIAL { { NULL, SSE_SPECIAL }, CPUID_EXT_SSE41 }
#define PCLMULQDQ_OP(x) { { NULL, gen_helper_ ## x ## _xmm }, \
CPUID_EXT_PCLMULQDQ }
+#define AESNI_OP(x) { { NULL, gen_helper_ ## x ## _xmm }, CPUID_EXT_AES }
static const struct SSEOpHelper_epp sse_op_table6[256] = {
[0x00] = SSSE3_OP(pshufb),
@@ -3197,6 +3198,11 @@ static const struct SSEOpHelper_epp sse_op_table6[256] = {
[0x3f] = SSE41_OP(pmaxud),
[0x40] = SSE41_OP(pmulld),
[0x41] = SSE41_OP(phminposuw),
+ [0xdb] = AESNI_OP(aesimc),
+ [0xdc] = AESNI_OP(aesenc),
+ [0xdd] = AESNI_OP(aesenclast),
+ [0xde] = AESNI_OP(aesdec),
+ [0xdf] = AESNI_OP(aesdeclast),
};
static const struct SSEOpHelper_eppi sse_op_table7[256] = {
@@ -3223,6 +3229,7 @@ static const struct SSEOpHelper_eppi sse_op_table7[256] = {
[0x61] = SSE42_OP(pcmpestri),
[0x62] = SSE42_OP(pcmpistrm),
[0x63] = SSE42_OP(pcmpistri),
+ [0xdf] = AESNI_OP(aeskeygenassist),
};
static void gen_sse(CPUX86State *env, DisasContext *s, int b,
--
1.7.10.4
next prev parent reply other threads:[~2013-03-28 0:00 UTC|newest]
Thread overview: 7+ messages / expand[flat|nested] mbox.gz Atom feed top
2013-03-28 0:00 [Qemu-devel] [PATCH 0/3] target-i386: add PCLMULQDQ and AES-NI instructions Aurelien Jarno
2013-03-28 0:00 ` [Qemu-devel] [PATCH 1/3] target-i386: add pclmulqdq instruction Aurelien Jarno
2013-03-28 15:45 ` Richard Henderson
2013-03-28 0:00 ` [Qemu-devel] [PATCH 2/3] target-i386: enable PCLMULQDQ on Westmere CPU Aurelien Jarno
2013-03-28 15:47 ` Richard Henderson
2013-03-28 0:00 ` Aurelien Jarno [this message]
2013-03-28 15:49 ` [Qemu-devel] [PATCH 3/3] target-i386: add AES-NI instructions Richard Henderson
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1364428811-8226-4-git-send-email-aurelien@aurel32.net \
--to=aurelien@aurel32.net \
--cc=qemu-devel@nongnu.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.