* [Qemu-devel] [PATCH 0/3] target-i386: add PCLMULQDQ and AES-NI instructions
@ 2013-03-28 0:00 Aurelien Jarno
2013-03-28 0:00 ` [Qemu-devel] [PATCH 1/3] target-i386: add pclmulqdq instruction Aurelien Jarno
` (2 more replies)
0 siblings, 3 replies; 7+ messages in thread
From: Aurelien Jarno @ 2013-03-28 0:00 UTC (permalink / raw)
To: qemu-devel; +Cc: Aurelien Jarno
This patch series adds the PCLMULQDQ and AES-NI instructions to the x86
emulation. Along with the SSE4.1 and SSE4.2 series, this brings the
instructions emulation to the level of a Westmere CPU.
It has been tested with the valgrind testsuite and with the kernel
autotest.
Aurelien Jarno (3):
target-i386: add pclmulqdq instruction
target-i386: enable PCLMULQDQ on Westmere CPU
target-i386: add AES-NI instructions
target-i386/cpu.c | 19 ++--
target-i386/ops_sse.h | 212 ++++++++++++++++++++++++++++++++++++++++++
target-i386/ops_sse_header.h | 11 +++
target-i386/translate.c | 10 ++
4 files changed, 242 insertions(+), 10 deletions(-)
--
1.7.10.4
^ permalink raw reply [flat|nested] 7+ messages in thread
* [Qemu-devel] [PATCH 1/3] target-i386: add pclmulqdq instruction
2013-03-28 0:00 [Qemu-devel] [PATCH 0/3] target-i386: add PCLMULQDQ and AES-NI instructions Aurelien Jarno
@ 2013-03-28 0:00 ` Aurelien Jarno
2013-03-28 15:45 ` Richard Henderson
2013-03-28 0:00 ` [Qemu-devel] [PATCH 2/3] target-i386: enable PCLMULQDQ on Westmere CPU Aurelien Jarno
2013-03-28 0:00 ` [Qemu-devel] [PATCH 3/3] target-i386: add AES-NI instructions Aurelien Jarno
2 siblings, 1 reply; 7+ messages in thread
From: Aurelien Jarno @ 2013-03-28 0:00 UTC (permalink / raw)
To: qemu-devel; +Cc: Aurelien Jarno
Signed-off-by: Aurelien Jarno <aurelien@aurel32.net>
---
target-i386/cpu.c | 19 +++++++++----------
target-i386/ops_sse.h | 24 ++++++++++++++++++++++++
| 5 +++++
target-i386/translate.c | 3 +++
4 files changed, 41 insertions(+), 10 deletions(-)
diff --git a/target-i386/cpu.c b/target-i386/cpu.c
index 4b43759..41382c5 100644
--- a/target-i386/cpu.c
+++ b/target-i386/cpu.c
@@ -387,17 +387,16 @@ typedef struct x86_def_t {
CPUID_PSE36 (needed for Solaris) */
/* missing:
CPUID_VME, CPUID_DTS, CPUID_SS, CPUID_HT, CPUID_TM, CPUID_PBE */
-#define TCG_EXT_FEATURES (CPUID_EXT_SSE3 | CPUID_EXT_MONITOR | \
- CPUID_EXT_SSSE3 | CPUID_EXT_CX16 | CPUID_EXT_SSE41 | \
- CPUID_EXT_SSE42 | CPUID_EXT_POPCNT | CPUID_EXT_MOVBE | \
- CPUID_EXT_HYPERVISOR)
+#define TCG_EXT_FEATURES (CPUID_EXT_SSE3 | CPUID_EXT_PCLMULQDQ | \
+ CPUID_EXT_MONITOR | CPUID_EXT_SSSE3 | CPUID_EXT_CX16 | \
+ CPUID_EXT_SSE41 | CPUID_EXT_SSE42 | CPUID_EXT_POPCNT | \
+ CPUID_EXT_MOVBE | CPUID_EXT_HYPERVISOR)
/* missing:
- CPUID_EXT_PCLMULQDQ, CPUID_EXT_DTES64, CPUID_EXT_DSCPL,
- CPUID_EXT_VMX, CPUID_EXT_SMX, CPUID_EXT_EST, CPUID_EXT_TM2,
- CPUID_EXT_CID, CPUID_EXT_FMA, CPUID_EXT_XTPR, CPUID_EXT_PDCM,
- CPUID_EXT_PCID, CPUID_EXT_DCA, CPUID_EXT_X2APIC,
- CPUID_EXT_TSC_DEADLINE_TIMER, CPUID_EXT_AES, CPUID_EXT_XSAVE,
- CPUID_EXT_OSXSAVE, CPUID_EXT_AVX, CPUID_EXT_F16C,
+ CPUID_EXT_DTES64, CPUID_EXT_DSCPL, CPUID_EXT_VMX, CPUID_EXT_SMX,
+ CPUID_EXT_EST, CPUID_EXT_TM2, CPUID_EXT_CID, CPUID_EXT_FMA,
+ CPUID_EXT_XTPR, CPUID_EXT_PDCM, CPUID_EXT_PCID, CPUID_EXT_DCA,
+ CPUID_EXT_X2APIC, CPUID_EXT_TSC_DEADLINE_TIMER, CPUID_EXT_AES,
+ CPUID_EXT_XSAVE, CPUID_EXT_OSXSAVE, CPUID_EXT_AVX, CPUID_EXT_F16C,
CPUID_EXT_RDRAND */
#define TCG_EXT2_FEATURES ((TCG_FEATURES & CPUID_EXT2_AMD_ALIASES) | \
CPUID_EXT2_NX | CPUID_EXT2_MMXEXT | CPUID_EXT2_RDTSCP | \
diff --git a/target-i386/ops_sse.h b/target-i386/ops_sse.h
index a11dba1..2ee5b8d 100644
--- a/target-i386/ops_sse.h
+++ b/target-i386/ops_sse.h
@@ -2179,6 +2179,30 @@ target_ulong helper_popcnt(CPUX86State *env, target_ulong n, uint32_t type)
return POPCOUNT(n, 5);
#endif
}
+
+void glue(helper_pclmulqdq, SUFFIX)(CPUX86State *env, Reg *d, Reg *s,
+ uint32_t ctrl)
+{
+ uint64_t ah, al, b, resh, resl;
+
+ ah = 0;
+ al = d->Q((ctrl & 1) != 0);
+ b = s->Q((ctrl & 16) != 0);
+ resh = resl = 0;
+
+ while (b) {
+ if (b & 1) {
+ resl ^= al;
+ resh ^= ah;
+ }
+ ah = (ah << 1) | (al >> 63);
+ al <<= 1;
+ b >>= 1;
+ }
+
+ d->Q(0) = resl;
+ d->Q(1) = resh;
+}
#endif
#undef SHIFT
--git a/target-i386/ops_sse_header.h b/target-i386/ops_sse_header.h
index 401eac6..2842233 100644
--- a/target-i386/ops_sse_header.h
+++ b/target-i386/ops_sse_header.h
@@ -336,6 +336,11 @@ DEF_HELPER_3(crc32, tl, i32, tl, i32)
DEF_HELPER_3(popcnt, tl, env, tl, i32)
#endif
+/* AES-NI op helpers */
+#if SHIFT == 1
+DEF_HELPER_4(glue(pclmulqdq, SUFFIX), void, env, Reg, Reg, i32)
+#endif
+
#undef SHIFT
#undef Reg
#undef SUFFIX
diff --git a/target-i386/translate.c b/target-i386/translate.c
index 7596a90..d649e99 100644
--- a/target-i386/translate.c
+++ b/target-i386/translate.c
@@ -3147,6 +3147,8 @@ struct SSEOpHelper_eppi {
#define SSE41_OP(x) { { NULL, gen_helper_ ## x ## _xmm }, CPUID_EXT_SSE41 }
#define SSE42_OP(x) { { NULL, gen_helper_ ## x ## _xmm }, CPUID_EXT_SSE42 }
#define SSE41_SPECIAL { { NULL, SSE_SPECIAL }, CPUID_EXT_SSE41 }
+#define PCLMULQDQ_OP(x) { { NULL, gen_helper_ ## x ## _xmm }, \
+ CPUID_EXT_PCLMULQDQ }
static const struct SSEOpHelper_epp sse_op_table6[256] = {
[0x00] = SSSE3_OP(pshufb),
@@ -3216,6 +3218,7 @@ static const struct SSEOpHelper_eppi sse_op_table7[256] = {
[0x40] = SSE41_OP(dpps),
[0x41] = SSE41_OP(dppd),
[0x42] = SSE41_OP(mpsadbw),
+ [0x44] = PCLMULQDQ_OP(pclmulqdq),
[0x60] = SSE42_OP(pcmpestrm),
[0x61] = SSE42_OP(pcmpestri),
[0x62] = SSE42_OP(pcmpistrm),
--
1.7.10.4
^ permalink raw reply related [flat|nested] 7+ messages in thread
* [Qemu-devel] [PATCH 2/3] target-i386: enable PCLMULQDQ on Westmere CPU
2013-03-28 0:00 [Qemu-devel] [PATCH 0/3] target-i386: add PCLMULQDQ and AES-NI instructions Aurelien Jarno
2013-03-28 0:00 ` [Qemu-devel] [PATCH 1/3] target-i386: add pclmulqdq instruction Aurelien Jarno
@ 2013-03-28 0:00 ` Aurelien Jarno
2013-03-28 15:47 ` Richard Henderson
2013-03-28 0:00 ` [Qemu-devel] [PATCH 3/3] target-i386: add AES-NI instructions Aurelien Jarno
2 siblings, 1 reply; 7+ messages in thread
From: Aurelien Jarno @ 2013-03-28 0:00 UTC (permalink / raw)
To: qemu-devel; +Cc: Aurelien Jarno
The PCLMULQDQ instruction has been introduced on the Westmere CPU.
Signed-off-by: Aurelien Jarno <aurelien@aurel32.net>
---
target-i386/cpu.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/target-i386/cpu.c b/target-i386/cpu.c
index 41382c5..5941d40 100644
--- a/target-i386/cpu.c
+++ b/target-i386/cpu.c
@@ -687,7 +687,7 @@ static x86_def_t builtin_x86_defs[] = {
CPUID_DE | CPUID_FP87,
.ext_features = CPUID_EXT_AES | CPUID_EXT_POPCNT | CPUID_EXT_SSE42 |
CPUID_EXT_SSE41 | CPUID_EXT_CX16 | CPUID_EXT_SSSE3 |
- CPUID_EXT_SSE3,
+ CPUID_EXT_PCLMULQDQ | CPUID_EXT_SSE3,
.ext2_features = CPUID_EXT2_LM | CPUID_EXT2_SYSCALL | CPUID_EXT2_NX,
.ext3_features = CPUID_EXT3_LAHF_LM,
.xlevel = 0x8000000A,
--
1.7.10.4
^ permalink raw reply related [flat|nested] 7+ messages in thread
* [Qemu-devel] [PATCH 3/3] target-i386: add AES-NI instructions
2013-03-28 0:00 [Qemu-devel] [PATCH 0/3] target-i386: add PCLMULQDQ and AES-NI instructions Aurelien Jarno
2013-03-28 0:00 ` [Qemu-devel] [PATCH 1/3] target-i386: add pclmulqdq instruction Aurelien Jarno
2013-03-28 0:00 ` [Qemu-devel] [PATCH 2/3] target-i386: enable PCLMULQDQ on Westmere CPU Aurelien Jarno
@ 2013-03-28 0:00 ` Aurelien Jarno
2013-03-28 15:49 ` Richard Henderson
2 siblings, 1 reply; 7+ messages in thread
From: Aurelien Jarno @ 2013-03-28 0:00 UTC (permalink / raw)
To: qemu-devel; +Cc: Aurelien Jarno
Signed-off-by: Aurelien Jarno <aurelien@aurel32.net>
---
target-i386/cpu.c | 6 +-
target-i386/ops_sse.h | 188 ++++++++++++++++++++++++++++++++++++++++++
| 6 ++
target-i386/translate.c | 7 ++
4 files changed, 204 insertions(+), 3 deletions(-)
diff --git a/target-i386/cpu.c b/target-i386/cpu.c
index 5941d40..321d945 100644
--- a/target-i386/cpu.c
+++ b/target-i386/cpu.c
@@ -390,13 +390,13 @@ typedef struct x86_def_t {
#define TCG_EXT_FEATURES (CPUID_EXT_SSE3 | CPUID_EXT_PCLMULQDQ | \
CPUID_EXT_MONITOR | CPUID_EXT_SSSE3 | CPUID_EXT_CX16 | \
CPUID_EXT_SSE41 | CPUID_EXT_SSE42 | CPUID_EXT_POPCNT | \
- CPUID_EXT_MOVBE | CPUID_EXT_HYPERVISOR)
+ CPUID_EXT_MOVBE | CPUID_EXT_AES | CPUID_EXT_HYPERVISOR)
/* missing:
CPUID_EXT_DTES64, CPUID_EXT_DSCPL, CPUID_EXT_VMX, CPUID_EXT_SMX,
CPUID_EXT_EST, CPUID_EXT_TM2, CPUID_EXT_CID, CPUID_EXT_FMA,
CPUID_EXT_XTPR, CPUID_EXT_PDCM, CPUID_EXT_PCID, CPUID_EXT_DCA,
- CPUID_EXT_X2APIC, CPUID_EXT_TSC_DEADLINE_TIMER, CPUID_EXT_AES,
- CPUID_EXT_XSAVE, CPUID_EXT_OSXSAVE, CPUID_EXT_AVX, CPUID_EXT_F16C,
+ CPUID_EXT_X2APIC, CPUID_EXT_TSC_DEADLINE_TIMER, CPUID_EXT_XSAVE,
+ CPUID_EXT_OSXSAVE, CPUID_EXT_AVX, CPUID_EXT_F16C,
CPUID_EXT_RDRAND */
#define TCG_EXT2_FEATURES ((TCG_FEATURES & CPUID_EXT2_AMD_ALIASES) | \
CPUID_EXT2_NX | CPUID_EXT2_MMXEXT | CPUID_EXT2_RDTSCP | \
diff --git a/target-i386/ops_sse.h b/target-i386/ops_sse.h
index 2ee5b8d..05b9842 100644
--- a/target-i386/ops_sse.h
+++ b/target-i386/ops_sse.h
@@ -2203,6 +2203,194 @@ void glue(helper_pclmulqdq, SUFFIX)(CPUX86State *env, Reg *d, Reg *s,
d->Q(0) = resl;
d->Q(1) = resh;
}
+
+/* AES-NI op helpers */
+static const uint8_t aes_sbox[256] = {
+ 0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5,
+ 0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76,
+ 0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0,
+ 0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0,
+ 0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc,
+ 0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15,
+ 0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a,
+ 0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75,
+ 0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0,
+ 0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84,
+ 0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b,
+ 0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf,
+ 0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85,
+ 0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8,
+ 0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5,
+ 0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2,
+ 0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17,
+ 0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73,
+ 0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88,
+ 0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb,
+ 0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c,
+ 0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79,
+ 0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9,
+ 0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08,
+ 0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6,
+ 0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a,
+ 0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e,
+ 0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e,
+ 0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94,
+ 0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf,
+ 0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68,
+ 0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16
+};
+
+static const uint8_t aes_inv_sbox[256] = {
+ 0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38,
+ 0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb,
+ 0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87,
+ 0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb,
+ 0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d,
+ 0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e,
+ 0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2,
+ 0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25,
+ 0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16,
+ 0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92,
+ 0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda,
+ 0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84,
+ 0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a,
+ 0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06,
+ 0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02,
+ 0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b,
+ 0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea,
+ 0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73,
+ 0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85,
+ 0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e,
+ 0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89,
+ 0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b,
+ 0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20,
+ 0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4,
+ 0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31,
+ 0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f,
+ 0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d,
+ 0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef,
+ 0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0,
+ 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61,
+ 0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26,
+ 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d
+};
+
+static inline void aes_subreg(Reg *d, const uint8_t s[256])
+{
+ int i;
+
+ for (i = 0; i < 16; i++) {
+ d->B(i) = s[d->B(i)];
+ }
+}
+
+const uint8_t aes_shifts[16] = {
+ 0, 5, 10, 15, 4, 9, 14, 3, 8, 13, 2, 7, 12, 1, 6, 11
+};
+
+const uint8_t aes_inv_shifts[16] = {
+ 0, 13, 10, 7, 4, 1, 14, 11, 8, 5, 2, 15, 12, 9, 6, 3
+};
+
+static inline void aes_shiftrows(Reg *a, const uint8_t s[16])
+{
+ int i;
+ Reg tmp = *a;
+
+ for (i = 0 ; i < 16 ; i++) {
+ a->B(i) = tmp.B(s[i]);
+ }
+}
+
+const uint8_t aes_mix[4] = {
+ 2, 3, 1, 1
+};
+
+const uint8_t aes_inv_mix[4] = {
+ 14, 11, 13, 9
+};
+
+static inline uint8_t aes_ffmul(uint8_t a, uint8_t b)
+{
+ uint8_t res = 0;
+
+ while (b) {
+ if (b & 1) {
+ res ^= a;
+ }
+ a = (a << 1) ^ (a & 0x80 ? 0x1b : 0x00);
+ b >>= 1;
+ }
+ return res;
+}
+
+static inline void aes_mixcolumns(Reg *a, const uint8_t m[4])
+{
+ int i, j, k;
+ Reg tmp = *a;
+
+ for (i = 0; i < 4; i += 1) {
+ for (j = 0; j < 4; j += 1) {
+ a->B(i * 4 + j) = 0;
+ for (k = 0; k < 4; k += 1) {
+ a->B(i * 4 + j) ^= aes_ffmul(m[(k - j) & 3], tmp.B(i * 4 + k));
+ }
+ }
+ }
+}
+
+void glue(helper_aesdec, SUFFIX)(CPUX86State *env, Reg *d, Reg *s)
+{
+ aes_shiftrows(d, aes_inv_shifts);
+ aes_subreg(d, aes_inv_sbox);
+ aes_mixcolumns(d, aes_inv_mix);
+ d->Q(0) ^= s->Q(0);
+ d->Q(1) ^= s->Q(1);
+}
+
+void glue(helper_aesdeclast, SUFFIX)(CPUX86State *env, Reg *d, Reg *s)
+{
+ aes_shiftrows(d, aes_inv_shifts);
+ aes_subreg(d, aes_inv_sbox);
+ d->Q(0) ^= s->Q(0);
+ d->Q(1) ^= s->Q(1);
+}
+
+void glue(helper_aesenc, SUFFIX)(CPUX86State *env, Reg *d, Reg *s)
+{
+ aes_shiftrows(d, aes_shifts);
+ aes_subreg(d, aes_sbox);
+ aes_mixcolumns(d, aes_mix);
+ d->Q(0) ^= s->Q(0);
+ d->Q(1) ^= s->Q(1);
+}
+
+void glue(helper_aesenclast, SUFFIX)(CPUX86State *env, Reg *d, Reg *s)
+{
+ aes_shiftrows(d, aes_shifts);
+ aes_subreg(d, aes_sbox);
+ d->Q(0) ^= s->Q(0);
+ d->Q(1) ^= s->Q(1);
+}
+
+void glue(helper_aesimc, SUFFIX)(CPUX86State *env, Reg *d, Reg *s)
+{
+ *d = *s;
+ aes_mixcolumns(d, aes_inv_mix);
+}
+
+void glue(helper_aeskeygenassist, SUFFIX)(CPUX86State *env, Reg *d, Reg *s,
+ uint32_t ctrl)
+{
+ int i;
+
+ for (i = 0 ; i < 4 ; i++) {
+ d->B(i) = aes_sbox[s->B(i + 4)];
+ d->B(i + 8) = aes_sbox[s->B(i + 12)];
+ }
+ d->L(1) = (d->L(0) << 24 | d->L(0) >> 8) ^ ctrl;
+ d->L(3) = (d->L(2) << 24 | d->L(2) >> 8) ^ ctrl;
+}
#endif
#undef SHIFT
--git a/target-i386/ops_sse_header.h b/target-i386/ops_sse_header.h
index 2842233..a68c7cc 100644
--- a/target-i386/ops_sse_header.h
+++ b/target-i386/ops_sse_header.h
@@ -338,6 +338,12 @@ DEF_HELPER_3(popcnt, tl, env, tl, i32)
/* AES-NI op helpers */
#if SHIFT == 1
+DEF_HELPER_3(glue(aesdec, SUFFIX), void, env, Reg, Reg)
+DEF_HELPER_3(glue(aesdeclast, SUFFIX), void, env, Reg, Reg)
+DEF_HELPER_3(glue(aesenc, SUFFIX), void, env, Reg, Reg)
+DEF_HELPER_3(glue(aesenclast, SUFFIX), void, env, Reg, Reg)
+DEF_HELPER_3(glue(aesimc, SUFFIX), void, env, Reg, Reg)
+DEF_HELPER_4(glue(aeskeygenassist, SUFFIX), void, env, Reg, Reg, i32)
DEF_HELPER_4(glue(pclmulqdq, SUFFIX), void, env, Reg, Reg, i32)
#endif
diff --git a/target-i386/translate.c b/target-i386/translate.c
index d649e99..233f24f 100644
--- a/target-i386/translate.c
+++ b/target-i386/translate.c
@@ -3149,6 +3149,7 @@ struct SSEOpHelper_eppi {
#define SSE41_SPECIAL { { NULL, SSE_SPECIAL }, CPUID_EXT_SSE41 }
#define PCLMULQDQ_OP(x) { { NULL, gen_helper_ ## x ## _xmm }, \
CPUID_EXT_PCLMULQDQ }
+#define AESNI_OP(x) { { NULL, gen_helper_ ## x ## _xmm }, CPUID_EXT_AES }
static const struct SSEOpHelper_epp sse_op_table6[256] = {
[0x00] = SSSE3_OP(pshufb),
@@ -3197,6 +3198,11 @@ static const struct SSEOpHelper_epp sse_op_table6[256] = {
[0x3f] = SSE41_OP(pmaxud),
[0x40] = SSE41_OP(pmulld),
[0x41] = SSE41_OP(phminposuw),
+ [0xdb] = AESNI_OP(aesimc),
+ [0xdc] = AESNI_OP(aesenc),
+ [0xdd] = AESNI_OP(aesenclast),
+ [0xde] = AESNI_OP(aesdec),
+ [0xdf] = AESNI_OP(aesdeclast),
};
static const struct SSEOpHelper_eppi sse_op_table7[256] = {
@@ -3223,6 +3229,7 @@ static const struct SSEOpHelper_eppi sse_op_table7[256] = {
[0x61] = SSE42_OP(pcmpestri),
[0x62] = SSE42_OP(pcmpistrm),
[0x63] = SSE42_OP(pcmpistri),
+ [0xdf] = AESNI_OP(aeskeygenassist),
};
static void gen_sse(CPUX86State *env, DisasContext *s, int b,
--
1.7.10.4
^ permalink raw reply related [flat|nested] 7+ messages in thread
* Re: [Qemu-devel] [PATCH 1/3] target-i386: add pclmulqdq instruction
2013-03-28 0:00 ` [Qemu-devel] [PATCH 1/3] target-i386: add pclmulqdq instruction Aurelien Jarno
@ 2013-03-28 15:45 ` Richard Henderson
0 siblings, 0 replies; 7+ messages in thread
From: Richard Henderson @ 2013-03-28 15:45 UTC (permalink / raw)
To: Aurelien Jarno; +Cc: qemu-devel
On 03/27/2013 05:00 PM, Aurelien Jarno wrote:
> Signed-off-by: Aurelien Jarno <aurelien@aurel32.net>
> ---
> target-i386/cpu.c | 19 +++++++++----------
> target-i386/ops_sse.h | 24 ++++++++++++++++++++++++
> target-i386/ops_sse_header.h | 5 +++++
> target-i386/translate.c | 3 +++
> 4 files changed, 41 insertions(+), 10 deletions(-)
Reviewed-by: Richard Henderson <rth@twiddle.net>
r~
^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [Qemu-devel] [PATCH 2/3] target-i386: enable PCLMULQDQ on Westmere CPU
2013-03-28 0:00 ` [Qemu-devel] [PATCH 2/3] target-i386: enable PCLMULQDQ on Westmere CPU Aurelien Jarno
@ 2013-03-28 15:47 ` Richard Henderson
0 siblings, 0 replies; 7+ messages in thread
From: Richard Henderson @ 2013-03-28 15:47 UTC (permalink / raw)
To: Aurelien Jarno; +Cc: qemu-devel
On 03/27/2013 05:00 PM, Aurelien Jarno wrote:
> The PCLMULQDQ instruction has been introduced on the Westmere CPU.
>
> Signed-off-by: Aurelien Jarno <aurelien@aurel32.net>
Reviewed-by: Richard Henderson <rth@twiddle.net>
r~
^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [Qemu-devel] [PATCH 3/3] target-i386: add AES-NI instructions
2013-03-28 0:00 ` [Qemu-devel] [PATCH 3/3] target-i386: add AES-NI instructions Aurelien Jarno
@ 2013-03-28 15:49 ` Richard Henderson
0 siblings, 0 replies; 7+ messages in thread
From: Richard Henderson @ 2013-03-28 15:49 UTC (permalink / raw)
To: Aurelien Jarno; +Cc: qemu-devel
On 03/27/2013 05:00 PM, Aurelien Jarno wrote:
> +const uint8_t aes_shifts[16] = {
> +const uint8_t aes_inv_shifts[16] = {
> +const uint8_t aes_mix[4] = {
> +const uint8_t aes_inv_mix[4] = {
static?
r~
^ permalink raw reply [flat|nested] 7+ messages in thread
end of thread, other threads:[~2013-03-28 15:51 UTC | newest]
Thread overview: 7+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2013-03-28 0:00 [Qemu-devel] [PATCH 0/3] target-i386: add PCLMULQDQ and AES-NI instructions Aurelien Jarno
2013-03-28 0:00 ` [Qemu-devel] [PATCH 1/3] target-i386: add pclmulqdq instruction Aurelien Jarno
2013-03-28 15:45 ` Richard Henderson
2013-03-28 0:00 ` [Qemu-devel] [PATCH 2/3] target-i386: enable PCLMULQDQ on Westmere CPU Aurelien Jarno
2013-03-28 15:47 ` Richard Henderson
2013-03-28 0:00 ` [Qemu-devel] [PATCH 3/3] target-i386: add AES-NI instructions Aurelien Jarno
2013-03-28 15:49 ` Richard Henderson
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).