From mboxrd@z Thu Jan 1 00:00:00 1970 Received: from mailman by lists.gnu.org with tmda-scanned (Exim 4.43) id 1JjJqu-00036R-5Q for qemu-devel@nongnu.org; Tue, 08 Apr 2008 15:51:32 -0400 Received: from exim by lists.gnu.org with spam-scanned (Exim 4.43) id 1JjJqt-00034Q-0l for qemu-devel@nongnu.org; Tue, 08 Apr 2008 15:51:31 -0400 Received: from [199.232.76.173] (helo=monty-python.gnu.org) by lists.gnu.org with esmtp (Exim 4.43) id 1JjJqs-00034A-S5 for qemu-devel@nongnu.org; Tue, 08 Apr 2008 15:51:30 -0400 Received: from savannah.gnu.org ([199.232.41.3] helo=sv.gnu.org) by monty-python.gnu.org with esmtps (TLS-1.0:RSA_AES_256_CBC_SHA1:32) (Exim 4.60) (envelope-from ) id 1JjJqs-0005go-B6 for qemu-devel@nongnu.org; Tue, 08 Apr 2008 15:51:30 -0400 Received: from cvs.savannah.gnu.org ([199.232.41.69]) by sv.gnu.org with esmtp (Exim 4.63) (envelope-from ) id 1JjJqr-000882-Nt for qemu-devel@nongnu.org; Tue, 08 Apr 2008 19:51:29 +0000 Received: from aurel32 by cvs.savannah.gnu.org with local (Exim 4.63) (envelope-from ) id 1JjJqr-00087x-DJ for qemu-devel@nongnu.org; Tue, 08 Apr 2008 19:51:29 +0000 MIME-Version: 1.0 Errors-To: aurel32 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit From: Aurelien Jarno Message-Id: Date: Tue, 08 Apr 2008 19:51:29 +0000 Subject: [Qemu-devel] [4180] 3DNow! instruction set emulation Reply-To: qemu-devel@nongnu.org List-Id: qemu-devel.nongnu.org List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , To: qemu-devel@nongnu.org Revision: 4180 http://svn.sv.gnu.org/viewvc/?view=rev&root=qemu&revision=4180 Author: aurel32 Date: 2008-04-08 19:51:29 +0000 (Tue, 08 Apr 2008) Log Message: ----------- 3DNow! instruction set emulation (Michael Tross) Modified Paths: -------------- trunk/target-i386/cpu.h trunk/target-i386/helper2.c trunk/target-i386/ops_sse.h trunk/target-i386/translate.c Modified: trunk/target-i386/cpu.h =================================================================== --- trunk/target-i386/cpu.h 2008-04-08 19:51:21 UTC (rev 4179) +++ trunk/target-i386/cpu.h 2008-04-08 19:51:29 UTC (rev 4180) @@ -428,8 +428,9 @@ typedef union { uint8_t _b[8]; - uint16_t _w[2]; - uint32_t _l[1]; + uint16_t _w[4]; + uint32_t _l[2]; + float32 _s[2]; uint64_t q; } MMXReg; @@ -444,6 +445,7 @@ #define MMX_B(n) _b[7 - (n)] #define MMX_W(n) _w[3 - (n)] #define MMX_L(n) _l[1 - (n)] +#define MMX_S(n) _s[1 - (n)] #else #define XMM_B(n) _b[n] #define XMM_W(n) _w[n] @@ -455,6 +457,7 @@ #define MMX_B(n) _b[n] #define MMX_W(n) _w[n] #define MMX_L(n) _l[n] +#define MMX_S(n) _s[n] #endif #define MMX_Q(n) q @@ -520,6 +523,7 @@ int64_t i64; } fp_convert; + float_status mmx_status; /* for 3DNow! float ops */ float_status sse_status; uint32_t mxcsr; XMMReg xmm_regs[CPU_NB_REGS]; Modified: trunk/target-i386/helper2.c =================================================================== --- trunk/target-i386/helper2.c 2008-04-08 19:51:21 UTC (rev 4179) +++ trunk/target-i386/helper2.c 2008-04-08 19:51:29 UTC (rev 4180) @@ -150,7 +150,8 @@ CPUID_PSE36, .ext_features = CPUID_EXT_SSE3, .ext2_features = (PPRO_FEATURES & 0x0183F3FF) | - CPUID_EXT2_LM | CPUID_EXT2_SYSCALL | CPUID_EXT2_NX, + CPUID_EXT2_LM | CPUID_EXT2_SYSCALL | CPUID_EXT2_NX | + CPUID_EXT2_3DNOW | CPUID_EXT2_3DNOWEXT, .ext3_features = CPUID_EXT3_SVM, .xlevel = 0x8000000A, }, @@ -201,6 +202,19 @@ .features = 0x0383F9FF, .xlevel = 0, }, + { + .name = "athlon", + .level = 2, + .vendor1 = 0x68747541, /* "Auth" */ + .vendor2 = 0x69746e65, /* "enti" */ + .vendor3 = 0x444d4163, /* "cAMD" */ + .family = 6, + .model = 2, + .stepping = 3, + .features = PPRO_FEATURES | PPRO_FEATURES | CPUID_PSE36 | CPUID_VME | CPUID_MTRR | CPUID_MCA, + .ext2_features = (PPRO_FEATURES & 0x0183F3FF) | CPUID_EXT2_MMXEXT | CPUID_EXT2_3DNOW | CPUID_EXT2_3DNOWEXT, + .xlevel = 0x80000008, + }, }; static int cpu_x86_find_by_name(x86_def_t *x86_cpu_def, const char *cpu_model) Modified: trunk/target-i386/ops_sse.h =================================================================== --- trunk/target-i386/ops_sse.h 2008-04-08 19:51:21 UTC (rev 4179) +++ trunk/target-i386/ops_sse.h 2008-04-08 19:51:29 UTC (rev 4180) @@ -1,5 +1,5 @@ /* - * MMX/SSE/SSE2/PNI support + * MMX/3DNow!/SSE/SSE2/SSE3/PNI support * * Copyright (c) 2005 Fabrice Bellard * @@ -409,6 +409,7 @@ #define FCMPEQ(a, b) (a) == (b) ? -1 : 0 #define FMULLW(a, b) (a) * (b) +#define FMULHRW(a, b) ((int16_t)(a) * (int16_t)(b) + 0x8000) >> 16 #define FMULHUW(a, b) (a) * (b) >> 16 #define FMULHW(a, b) (int16_t)(a) * (int16_t)(b) >> 16 @@ -455,6 +456,9 @@ SSE_OP_L(op_pcmpeql, FCMPEQ) SSE_OP_W(op_pmullw, FMULLW) +#if SHIFT == 0 +SSE_OP_W(op_pmulhrw, FMULHRW) +#endif SSE_OP_W(op_pmulhuw, FMULHUW) SSE_OP_W(op_pmulhw, FMULHW) @@ -1383,6 +1387,175 @@ UNPCK_OP(l, 0) UNPCK_OP(h, 1) +/* 3DNow! float ops */ +#if SHIFT == 0 +void OPPROTO op_pi2fd(void) +{ + MMXReg *d = (MMXReg *)((char *)env + PARAM1); + MMXReg *s = (MMXReg *)((char *)env + PARAM2); + d->MMX_S(0) = int32_to_float32(s->MMX_L(0), &env->mmx_status); + d->MMX_S(1) = int32_to_float32(s->MMX_L(1), &env->mmx_status); +} + +void OPPROTO op_pi2fw(void) +{ + MMXReg *d = (MMXReg *)((char *)env + PARAM1); + MMXReg *s = (MMXReg *)((char *)env + PARAM2); + d->MMX_S(0) = int32_to_float32((int16_t)s->MMX_W(0), &env->mmx_status); + d->MMX_S(1) = int32_to_float32((int16_t)s->MMX_W(2), &env->mmx_status); +} + +void OPPROTO op_pf2id(void) +{ + MMXReg *d = (MMXReg *)((char *)env + PARAM1); + MMXReg *s = (MMXReg *)((char *)env + PARAM2); + d->MMX_L(0) = float32_to_int32_round_to_zero(s->MMX_S(0), &env->mmx_status); + d->MMX_L(1) = float32_to_int32_round_to_zero(s->MMX_S(1), &env->mmx_status); +} + +void OPPROTO op_pf2iw(void) +{ + MMXReg *d = (MMXReg *)((char *)env + PARAM1); + MMXReg *s = (MMXReg *)((char *)env + PARAM2); + d->MMX_L(0) = satsw(float32_to_int32_round_to_zero(s->MMX_S(0), &env->mmx_status)); + d->MMX_L(1) = satsw(float32_to_int32_round_to_zero(s->MMX_S(1), &env->mmx_status)); +} + +void OPPROTO op_pfacc(void) +{ + MMXReg *d = (MMXReg *)((char *)env + PARAM1); + MMXReg *s = (MMXReg *)((char *)env + PARAM2); + MMXReg r; + r.MMX_S(0) = float32_add(d->MMX_S(0), d->MMX_S(1), &env->mmx_status); + r.MMX_S(1) = float32_add(s->MMX_S(0), s->MMX_S(1), &env->mmx_status); + *d = r; +} + +void OPPROTO op_pfadd(void) +{ + MMXReg *d = (MMXReg *)((char *)env + PARAM1); + MMXReg *s = (MMXReg *)((char *)env + PARAM2); + d->MMX_S(0) = float32_add(d->MMX_S(0), s->MMX_S(0), &env->mmx_status); + d->MMX_S(1) = float32_add(d->MMX_S(1), s->MMX_S(1), &env->mmx_status); +} + +void OPPROTO op_pfcmpeq(void) +{ + MMXReg *d = (MMXReg *)((char *)env + PARAM1); + MMXReg *s = (MMXReg *)((char *)env + PARAM2); + d->MMX_L(0) = float32_eq(d->MMX_S(0), s->MMX_S(0), &env->mmx_status) ? -1 : 0; + d->MMX_L(1) = float32_eq(d->MMX_S(1), s->MMX_S(1), &env->mmx_status) ? -1 : 0; +} + +void OPPROTO op_pfcmpge(void) +{ + MMXReg *d = (MMXReg *)((char *)env + PARAM1); + MMXReg *s = (MMXReg *)((char *)env + PARAM2); + d->MMX_L(0) = float32_le(s->MMX_S(0), d->MMX_S(0), &env->mmx_status) ? -1 : 0; + d->MMX_L(1) = float32_le(s->MMX_S(1), d->MMX_S(1), &env->mmx_status) ? -1 : 0; +} + +void OPPROTO op_pfcmpgt(void) +{ + MMXReg *d = (MMXReg *)((char *)env + PARAM1); + MMXReg *s = (MMXReg *)((char *)env + PARAM2); + d->MMX_L(0) = float32_lt(s->MMX_S(0), d->MMX_S(0), &env->mmx_status) ? -1 : 0; + d->MMX_L(1) = float32_lt(s->MMX_S(1), d->MMX_S(1), &env->mmx_status) ? -1 : 0; +} + +void OPPROTO op_pfmax(void) +{ + MMXReg *d = (MMXReg *)((char *)env + PARAM1); + MMXReg *s = (MMXReg *)((char *)env + PARAM2); + if (float32_lt(d->MMX_S(0), s->MMX_S(0), &env->mmx_status)) + d->MMX_S(0) = s->MMX_S(0); + if (float32_lt(d->MMX_S(1), s->MMX_S(1), &env->mmx_status)) + d->MMX_S(1) = s->MMX_S(1); +} + +void OPPROTO op_pfmin(void) +{ + MMXReg *d = (MMXReg *)((char *)env + PARAM1); + MMXReg *s = (MMXReg *)((char *)env + PARAM2); + if (float32_lt(s->MMX_S(0), d->MMX_S(0), &env->mmx_status)) + d->MMX_S(0) = s->MMX_S(0); + if (float32_lt(s->MMX_S(1), d->MMX_S(1), &env->mmx_status)) + d->MMX_S(1) = s->MMX_S(1); +} + +void OPPROTO op_pfmul(void) +{ + MMXReg *d = (MMXReg *)((char *)env + PARAM1); + MMXReg *s = (MMXReg *)((char *)env + PARAM2); + d->MMX_S(0) = float32_mul(d->MMX_S(0), s->MMX_S(0), &env->mmx_status); + d->MMX_S(1) = float32_mul(d->MMX_S(1), s->MMX_S(1), &env->mmx_status); +} + +void OPPROTO op_pfnacc(void) +{ + MMXReg *d = (MMXReg *)((char *)env + PARAM1); + MMXReg *s = (MMXReg *)((char *)env + PARAM2); + MMXReg r; + r.MMX_S(0) = float32_sub(d->MMX_S(0), d->MMX_S(1), &env->mmx_status); + r.MMX_S(1) = float32_sub(s->MMX_S(0), s->MMX_S(1), &env->mmx_status); + *d = r; +} + +void OPPROTO op_pfpnacc(void) +{ + MMXReg *d = (MMXReg *)((char *)env + PARAM1); + MMXReg *s = (MMXReg *)((char *)env + PARAM2); + MMXReg r; + r.MMX_S(0) = float32_sub(d->MMX_S(0), d->MMX_S(1), &env->mmx_status); + r.MMX_S(1) = float32_add(s->MMX_S(0), s->MMX_S(1), &env->mmx_status); + *d = r; +} + +void OPPROTO op_pfrcp(void) +{ + MMXReg *d = (MMXReg *)((char *)env + PARAM1); + MMXReg *s = (MMXReg *)((char *)env + PARAM2); + d->MMX_S(0) = approx_rcp(s->MMX_S(0)); + d->MMX_S(1) = d->MMX_S(0); +} + +void OPPROTO op_pfrsqrt(void) +{ + MMXReg *d = (MMXReg *)((char *)env + PARAM1); + MMXReg *s = (MMXReg *)((char *)env + PARAM2); + d->MMX_L(1) = s->MMX_L(0) & 0x7fffffff; + d->MMX_S(1) = approx_rsqrt(d->MMX_S(1)); + d->MMX_L(1) |= s->MMX_L(0) & 0x80000000; + d->MMX_L(0) = d->MMX_L(1); +} + +void OPPROTO op_pfsub(void) +{ + MMXReg *d = (MMXReg *)((char *)env + PARAM1); + MMXReg *s = (MMXReg *)((char *)env + PARAM2); + d->MMX_S(0) = float32_sub(d->MMX_S(0), s->MMX_S(0), &env->mmx_status); + d->MMX_S(1) = float32_sub(d->MMX_S(1), s->MMX_S(1), &env->mmx_status); +} + +void OPPROTO op_pfsubr(void) +{ + MMXReg *d = (MMXReg *)((char *)env + PARAM1); + MMXReg *s = (MMXReg *)((char *)env + PARAM2); + d->MMX_S(0) = float32_sub(s->MMX_S(0), d->MMX_S(0), &env->mmx_status); + d->MMX_S(1) = float32_sub(s->MMX_S(1), d->MMX_S(1), &env->mmx_status); +} + +void OPPROTO op_pswapd(void) +{ + MMXReg *d = (MMXReg *)((char *)env + PARAM1); + MMXReg *s = (MMXReg *)((char *)env + PARAM2); + MMXReg r; + r.MMX_L(0) = s->MMX_L(1); + r.MMX_L(1) = s->MMX_L(0); + *d = r; +} +#endif + #undef SHIFT #undef XMM_ONLY #undef Reg Modified: trunk/target-i386/translate.c =================================================================== --- trunk/target-i386/translate.c 2008-04-08 19:51:21 UTC (rev 4179) +++ trunk/target-i386/translate.c 2008-04-08 19:51:29 UTC (rev 4180) @@ -2408,12 +2408,16 @@ }; #define SSE_SPECIAL ((GenOpFunc2 *)1) +#define SSE_DUMMY ((GenOpFunc2 *)2) #define MMX_OP2(x) { gen_op_ ## x ## _mmx, gen_op_ ## x ## _xmm } #define SSE_FOP(x) { gen_op_ ## x ## ps, gen_op_ ## x ## pd, \ gen_op_ ## x ## ss, gen_op_ ## x ## sd, } static GenOpFunc2 *sse_op_table1[256][4] = { + /* 3DNow! extensions */ + [0x0e] = { SSE_DUMMY }, /* femms */ + [0x0f] = { SSE_DUMMY }, /* pf... */ /* pure SSE operations */ [0x10] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movups, movupd, movss, movsd */ [0x11] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movups, movupd, movss, movsd */ @@ -2480,7 +2484,7 @@ [0x74] = MMX_OP2(pcmpeqb), [0x75] = MMX_OP2(pcmpeqw), [0x76] = MMX_OP2(pcmpeql), - [0x77] = { SSE_SPECIAL }, /* emms */ + [0x77] = { SSE_DUMMY }, /* emms */ [0x7c] = { NULL, gen_op_haddpd, NULL, gen_op_haddps }, [0x7d] = { NULL, gen_op_hsubpd, NULL, gen_op_hsubps }, [0x7e] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movd, movd, , movq */ @@ -2577,6 +2581,33 @@ SSE_FOP(cmpord), }; +static GenOpFunc2 *sse_op_table5[256] = { + [0x0c] = gen_op_pi2fw, + [0x0d] = gen_op_pi2fd, + [0x1c] = gen_op_pf2iw, + [0x1d] = gen_op_pf2id, + [0x8a] = gen_op_pfnacc, + [0x8e] = gen_op_pfpnacc, + [0x90] = gen_op_pfcmpge, + [0x94] = gen_op_pfmin, + [0x96] = gen_op_pfrcp, + [0x97] = gen_op_pfrsqrt, + [0x9a] = gen_op_pfsub, + [0x9e] = gen_op_pfadd, + [0xa0] = gen_op_pfcmpgt, + [0xa4] = gen_op_pfmax, + [0xa6] = gen_op_movq, /* pfrcpit1; no need to actually increase precision */ + [0xa7] = gen_op_movq, /* pfrsqit1 */ + [0xaa] = gen_op_pfsubr, + [0xae] = gen_op_pfacc, + [0xb0] = gen_op_pfcmpeq, + [0xb4] = gen_op_pfmul, + [0xb6] = gen_op_movq, /* pfrcpit2 */ + [0xb7] = gen_op_pmulhrw_mmx, + [0xbb] = gen_op_pswapd, + [0xbf] = gen_op_pavgb_mmx /* pavgusb */ +}; + static void gen_sse(DisasContext *s, int b, target_ulong pc_start, int rex_r) { int b1, op1_offset, op2_offset, is_xmm, val, ot; @@ -2596,7 +2627,7 @@ sse_op2 = sse_op_table1[b][b1]; if (!sse_op2) goto illegal_op; - if (b <= 0x5f || b == 0xc6 || b == 0xc2) { + if ((b <= 0x5f && b >= 0x10) || b == 0xc6 || b == 0xc2) { is_xmm = 1; } else { if (b1 == 0) { @@ -2618,8 +2649,8 @@ } if (is_xmm && !(s->flags & HF_OSFXSR_MASK)) goto illegal_op; - if (b == 0x77) { - /* emms */ + if (b == 0x77 || b == 0x0e) { + /* emms or femms */ gen_op_emms(); return; } @@ -3151,6 +3182,13 @@ } } switch(b) { + case 0x0f: /* 3DNow! data insns */ + val = ldub_code(s->pc++); + sse_op2 = sse_op_table5[val]; + if (!sse_op2) + goto illegal_op; + sse_op2(op1_offset, op2_offset); + break; case 0x70: /* pshufx insn */ case 0xc6: /* pshufx insn */ val = ldub_code(s->pc++); @@ -6148,7 +6186,7 @@ gen_eob(s); } break; - /* MMX/SSE/SSE2/PNI support */ + /* MMX/3DNow!/SSE/SSE2/SSE3 support */ case 0x1c3: /* MOVNTI reg, mem */ if (!(s->cpuid_features & CPUID_SSE2)) goto illegal_op; @@ -6214,6 +6252,7 @@ case 7: /* sfence / clflush */ if ((modrm & 0xc7) == 0xc0) { /* sfence */ + /* XXX: also check for cpuid_ext2_features & CPUID_EXT2_EMMX */ if (!(s->cpuid_features & CPUID_SSE)) goto illegal_op; } else { @@ -6227,8 +6266,11 @@ goto illegal_op; } break; - case 0x10d: /* prefetch */ + case 0x10d: /* 3DNow! prefetch(w) */ modrm = ldub_code(s->pc++); + mod = (modrm >> 6) & 3; + if (mod == 3) + goto illegal_op; gen_lea_modrm(s, modrm, ®_addr, &offset_addr); /* ignore for now */ break; @@ -6245,6 +6287,9 @@ gen_op_rsm(); gen_eob(s); break; + case 0x10e ... 0x10f: + /* 3DNow! instructions, ignore prefixes */ + s->prefix &= ~(PREFIX_REPZ | PREFIX_REPNZ | PREFIX_DATA); case 0x110 ... 0x117: case 0x128 ... 0x12f: case 0x150 ... 0x177: