From mboxrd@z Thu Jan 1 00:00:00 1970 Received: from mailman by lists.gnu.org with tmda-scanned (Exim 4.43) id 1JjLlB-0001K8-37 for qemu-devel@nongnu.org; Tue, 08 Apr 2008 17:53:45 -0400 Received: from exim by lists.gnu.org with spam-scanned (Exim 4.43) id 1JjLlA-0001Im-2X for qemu-devel@nongnu.org; Tue, 08 Apr 2008 17:53:44 -0400 Received: from [199.232.76.173] (helo=monty-python.gnu.org) by lists.gnu.org with esmtp (Exim 4.43) id 1JjLl9-0001IV-QX for qemu-devel@nongnu.org; Tue, 08 Apr 2008 17:53:43 -0400 Received: from relay3-v.mail.gandi.net ([217.70.178.77]) by monty-python.gnu.org with esmtp (Exim 4.60) (envelope-from ) id 1JjLl9-0007e1-2f for qemu-devel@nongnu.org; Tue, 08 Apr 2008 17:53:43 -0400 Received: from localhost (mfilter1-c.gandi.net [217.70.182.21]) by relay3-v.mail.gandi.net (Postfix) with ESMTP id 65B3DBA0B for ; Tue, 8 Apr 2008 23:53:40 +0200 (CEST) Received: from relay3-v.mail.gandi.net ([217.70.178.77]) by localhost (mfilter1-c.mgt.gandi.net [217.70.182.21]) (amavisd-new, port 10024) with ESMTP id L23kTm1Qp9ID for ; Tue, 8 Apr 2008 23:53:33 +0200 (CEST) Received: from [84.102.211.19] (19.211.102-84.rev.gaoland.net [84.102.211.19]) by relay3-v.mail.gandi.net (Postfix) with ESMTP id B9EB7B9FF for ; Tue, 8 Apr 2008 23:53:32 +0200 (CEST) Message-ID: <47FBE934.2040501@bellard.org> Date: Tue, 08 Apr 2008 23:52:52 +0200 From: Fabrice Bellard MIME-Version: 1.0 Subject: Re: [Qemu-devel] [4180] 3DNow! instruction set emulation References: In-Reply-To: Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 7bit Reply-To: qemu-devel@nongnu.org List-Id: qemu-devel.nongnu.org List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , To: qemu-devel@nongnu.org Hi, Without a proper CPUID test for 3Dnow instruction support, this patch is incomplete to say the least because it adds 3Dnow instructions for all CPUs. Fabrice. Aurelien Jarno wrote: > Revision: 4180 > http://svn.sv.gnu.org/viewvc/?view=rev&root=qemu&revision=4180 > Author: aurel32 > Date: 2008-04-08 19:51:29 +0000 (Tue, 08 Apr 2008) > > Log Message: > ----------- > 3DNow! instruction set emulation > > (Michael Tross) > > Modified Paths: > -------------- > trunk/target-i386/cpu.h > trunk/target-i386/helper2.c > trunk/target-i386/ops_sse.h > trunk/target-i386/translate.c > > Modified: trunk/target-i386/cpu.h > =================================================================== > --- trunk/target-i386/cpu.h 2008-04-08 19:51:21 UTC (rev 4179) > +++ trunk/target-i386/cpu.h 2008-04-08 19:51:29 UTC (rev 4180) > @@ -428,8 +428,9 @@ > > typedef union { > uint8_t _b[8]; > - uint16_t _w[2]; > - uint32_t _l[1]; > + uint16_t _w[4]; > + uint32_t _l[2]; > + float32 _s[2]; > uint64_t q; > } MMXReg; > > @@ -444,6 +445,7 @@ > #define MMX_B(n) _b[7 - (n)] > #define MMX_W(n) _w[3 - (n)] > #define MMX_L(n) _l[1 - (n)] > +#define MMX_S(n) _s[1 - (n)] > #else > #define XMM_B(n) _b[n] > #define XMM_W(n) _w[n] > @@ -455,6 +457,7 @@ > #define MMX_B(n) _b[n] > #define MMX_W(n) _w[n] > #define MMX_L(n) _l[n] > +#define MMX_S(n) _s[n] > #endif > #define MMX_Q(n) q > > @@ -520,6 +523,7 @@ > int64_t i64; > } fp_convert; > > + float_status mmx_status; /* for 3DNow! float ops */ > float_status sse_status; > uint32_t mxcsr; > XMMReg xmm_regs[CPU_NB_REGS]; > > Modified: trunk/target-i386/helper2.c > =================================================================== > --- trunk/target-i386/helper2.c 2008-04-08 19:51:21 UTC (rev 4179) > +++ trunk/target-i386/helper2.c 2008-04-08 19:51:29 UTC (rev 4180) > @@ -150,7 +150,8 @@ > CPUID_PSE36, > .ext_features = CPUID_EXT_SSE3, > .ext2_features = (PPRO_FEATURES & 0x0183F3FF) | > - CPUID_EXT2_LM | CPUID_EXT2_SYSCALL | CPUID_EXT2_NX, > + CPUID_EXT2_LM | CPUID_EXT2_SYSCALL | CPUID_EXT2_NX | > + CPUID_EXT2_3DNOW | CPUID_EXT2_3DNOWEXT, > .ext3_features = CPUID_EXT3_SVM, > .xlevel = 0x8000000A, > }, > @@ -201,6 +202,19 @@ > .features = 0x0383F9FF, > .xlevel = 0, > }, > + { > + .name = "athlon", > + .level = 2, > + .vendor1 = 0x68747541, /* "Auth" */ > + .vendor2 = 0x69746e65, /* "enti" */ > + .vendor3 = 0x444d4163, /* "cAMD" */ > + .family = 6, > + .model = 2, > + .stepping = 3, > + .features = PPRO_FEATURES | PPRO_FEATURES | CPUID_PSE36 | CPUID_VME | CPUID_MTRR | CPUID_MCA, > + .ext2_features = (PPRO_FEATURES & 0x0183F3FF) | CPUID_EXT2_MMXEXT | CPUID_EXT2_3DNOW | CPUID_EXT2_3DNOWEXT, > + .xlevel = 0x80000008, > + }, > }; > > static int cpu_x86_find_by_name(x86_def_t *x86_cpu_def, const char *cpu_model) > > Modified: trunk/target-i386/ops_sse.h > =================================================================== > --- trunk/target-i386/ops_sse.h 2008-04-08 19:51:21 UTC (rev 4179) > +++ trunk/target-i386/ops_sse.h 2008-04-08 19:51:29 UTC (rev 4180) > @@ -1,5 +1,5 @@ > /* > - * MMX/SSE/SSE2/PNI support > + * MMX/3DNow!/SSE/SSE2/SSE3/PNI support > * > * Copyright (c) 2005 Fabrice Bellard > * > @@ -409,6 +409,7 @@ > #define FCMPEQ(a, b) (a) == (b) ? -1 : 0 > > #define FMULLW(a, b) (a) * (b) > +#define FMULHRW(a, b) ((int16_t)(a) * (int16_t)(b) + 0x8000) >> 16 > #define FMULHUW(a, b) (a) * (b) >> 16 > #define FMULHW(a, b) (int16_t)(a) * (int16_t)(b) >> 16 > > @@ -455,6 +456,9 @@ > SSE_OP_L(op_pcmpeql, FCMPEQ) > > SSE_OP_W(op_pmullw, FMULLW) > +#if SHIFT == 0 > +SSE_OP_W(op_pmulhrw, FMULHRW) > +#endif > SSE_OP_W(op_pmulhuw, FMULHUW) > SSE_OP_W(op_pmulhw, FMULHW) > > @@ -1383,6 +1387,175 @@ > UNPCK_OP(l, 0) > UNPCK_OP(h, 1) > > +/* 3DNow! float ops */ > +#if SHIFT == 0 > +void OPPROTO op_pi2fd(void) > +{ > + MMXReg *d = (MMXReg *)((char *)env + PARAM1); > + MMXReg *s = (MMXReg *)((char *)env + PARAM2); > + d->MMX_S(0) = int32_to_float32(s->MMX_L(0), &env->mmx_status); > + d->MMX_S(1) = int32_to_float32(s->MMX_L(1), &env->mmx_status); > +} > + > +void OPPROTO op_pi2fw(void) > +{ > + MMXReg *d = (MMXReg *)((char *)env + PARAM1); > + MMXReg *s = (MMXReg *)((char *)env + PARAM2); > + d->MMX_S(0) = int32_to_float32((int16_t)s->MMX_W(0), &env->mmx_status); > + d->MMX_S(1) = int32_to_float32((int16_t)s->MMX_W(2), &env->mmx_status); > +} > + > +void OPPROTO op_pf2id(void) > +{ > + MMXReg *d = (MMXReg *)((char *)env + PARAM1); > + MMXReg *s = (MMXReg *)((char *)env + PARAM2); > + d->MMX_L(0) = float32_to_int32_round_to_zero(s->MMX_S(0), &env->mmx_status); > + d->MMX_L(1) = float32_to_int32_round_to_zero(s->MMX_S(1), &env->mmx_status); > +} > + > +void OPPROTO op_pf2iw(void) > +{ > + MMXReg *d = (MMXReg *)((char *)env + PARAM1); > + MMXReg *s = (MMXReg *)((char *)env + PARAM2); > + d->MMX_L(0) = satsw(float32_to_int32_round_to_zero(s->MMX_S(0), &env->mmx_status)); > + d->MMX_L(1) = satsw(float32_to_int32_round_to_zero(s->MMX_S(1), &env->mmx_status)); > +} > + > +void OPPROTO op_pfacc(void) > +{ > + MMXReg *d = (MMXReg *)((char *)env + PARAM1); > + MMXReg *s = (MMXReg *)((char *)env + PARAM2); > + MMXReg r; > + r.MMX_S(0) = float32_add(d->MMX_S(0), d->MMX_S(1), &env->mmx_status); > + r.MMX_S(1) = float32_add(s->MMX_S(0), s->MMX_S(1), &env->mmx_status); > + *d = r; > +} > + > +void OPPROTO op_pfadd(void) > +{ > + MMXReg *d = (MMXReg *)((char *)env + PARAM1); > + MMXReg *s = (MMXReg *)((char *)env + PARAM2); > + d->MMX_S(0) = float32_add(d->MMX_S(0), s->MMX_S(0), &env->mmx_status); > + d->MMX_S(1) = float32_add(d->MMX_S(1), s->MMX_S(1), &env->mmx_status); > +} > + > +void OPPROTO op_pfcmpeq(void) > +{ > + MMXReg *d = (MMXReg *)((char *)env + PARAM1); > + MMXReg *s = (MMXReg *)((char *)env + PARAM2); > + d->MMX_L(0) = float32_eq(d->MMX_S(0), s->MMX_S(0), &env->mmx_status) ? -1 : 0; > + d->MMX_L(1) = float32_eq(d->MMX_S(1), s->MMX_S(1), &env->mmx_status) ? -1 : 0; > +} > + > +void OPPROTO op_pfcmpge(void) > +{ > + MMXReg *d = (MMXReg *)((char *)env + PARAM1); > + MMXReg *s = (MMXReg *)((char *)env + PARAM2); > + d->MMX_L(0) = float32_le(s->MMX_S(0), d->MMX_S(0), &env->mmx_status) ? -1 : 0; > + d->MMX_L(1) = float32_le(s->MMX_S(1), d->MMX_S(1), &env->mmx_status) ? -1 : 0; > +} > + > +void OPPROTO op_pfcmpgt(void) > +{ > + MMXReg *d = (MMXReg *)((char *)env + PARAM1); > + MMXReg *s = (MMXReg *)((char *)env + PARAM2); > + d->MMX_L(0) = float32_lt(s->MMX_S(0), d->MMX_S(0), &env->mmx_status) ? -1 : 0; > + d->MMX_L(1) = float32_lt(s->MMX_S(1), d->MMX_S(1), &env->mmx_status) ? -1 : 0; > +} > + > +void OPPROTO op_pfmax(void) > +{ > + MMXReg *d = (MMXReg *)((char *)env + PARAM1); > + MMXReg *s = (MMXReg *)((char *)env + PARAM2); > + if (float32_lt(d->MMX_S(0), s->MMX_S(0), &env->mmx_status)) > + d->MMX_S(0) = s->MMX_S(0); > + if (float32_lt(d->MMX_S(1), s->MMX_S(1), &env->mmx_status)) > + d->MMX_S(1) = s->MMX_S(1); > +} > + > +void OPPROTO op_pfmin(void) > +{ > + MMXReg *d = (MMXReg *)((char *)env + PARAM1); > + MMXReg *s = (MMXReg *)((char *)env + PARAM2); > + if (float32_lt(s->MMX_S(0), d->MMX_S(0), &env->mmx_status)) > + d->MMX_S(0) = s->MMX_S(0); > + if (float32_lt(s->MMX_S(1), d->MMX_S(1), &env->mmx_status)) > + d->MMX_S(1) = s->MMX_S(1); > +} > + > +void OPPROTO op_pfmul(void) > +{ > + MMXReg *d = (MMXReg *)((char *)env + PARAM1); > + MMXReg *s = (MMXReg *)((char *)env + PARAM2); > + d->MMX_S(0) = float32_mul(d->MMX_S(0), s->MMX_S(0), &env->mmx_status); > + d->MMX_S(1) = float32_mul(d->MMX_S(1), s->MMX_S(1), &env->mmx_status); > +} > + > +void OPPROTO op_pfnacc(void) > +{ > + MMXReg *d = (MMXReg *)((char *)env + PARAM1); > + MMXReg *s = (MMXReg *)((char *)env + PARAM2); > + MMXReg r; > + r.MMX_S(0) = float32_sub(d->MMX_S(0), d->MMX_S(1), &env->mmx_status); > + r.MMX_S(1) = float32_sub(s->MMX_S(0), s->MMX_S(1), &env->mmx_status); > + *d = r; > +} > + > +void OPPROTO op_pfpnacc(void) > +{ > + MMXReg *d = (MMXReg *)((char *)env + PARAM1); > + MMXReg *s = (MMXReg *)((char *)env + PARAM2); > + MMXReg r; > + r.MMX_S(0) = float32_sub(d->MMX_S(0), d->MMX_S(1), &env->mmx_status); > + r.MMX_S(1) = float32_add(s->MMX_S(0), s->MMX_S(1), &env->mmx_status); > + *d = r; > +} > + > +void OPPROTO op_pfrcp(void) > +{ > + MMXReg *d = (MMXReg *)((char *)env + PARAM1); > + MMXReg *s = (MMXReg *)((char *)env + PARAM2); > + d->MMX_S(0) = approx_rcp(s->MMX_S(0)); > + d->MMX_S(1) = d->MMX_S(0); > +} > + > +void OPPROTO op_pfrsqrt(void) > +{ > + MMXReg *d = (MMXReg *)((char *)env + PARAM1); > + MMXReg *s = (MMXReg *)((char *)env + PARAM2); > + d->MMX_L(1) = s->MMX_L(0) & 0x7fffffff; > + d->MMX_S(1) = approx_rsqrt(d->MMX_S(1)); > + d->MMX_L(1) |= s->MMX_L(0) & 0x80000000; > + d->MMX_L(0) = d->MMX_L(1); > +} > + > +void OPPROTO op_pfsub(void) > +{ > + MMXReg *d = (MMXReg *)((char *)env + PARAM1); > + MMXReg *s = (MMXReg *)((char *)env + PARAM2); > + d->MMX_S(0) = float32_sub(d->MMX_S(0), s->MMX_S(0), &env->mmx_status); > + d->MMX_S(1) = float32_sub(d->MMX_S(1), s->MMX_S(1), &env->mmx_status); > +} > + > +void OPPROTO op_pfsubr(void) > +{ > + MMXReg *d = (MMXReg *)((char *)env + PARAM1); > + MMXReg *s = (MMXReg *)((char *)env + PARAM2); > + d->MMX_S(0) = float32_sub(s->MMX_S(0), d->MMX_S(0), &env->mmx_status); > + d->MMX_S(1) = float32_sub(s->MMX_S(1), d->MMX_S(1), &env->mmx_status); > +} > + > +void OPPROTO op_pswapd(void) > +{ > + MMXReg *d = (MMXReg *)((char *)env + PARAM1); > + MMXReg *s = (MMXReg *)((char *)env + PARAM2); > + MMXReg r; > + r.MMX_L(0) = s->MMX_L(1); > + r.MMX_L(1) = s->MMX_L(0); > + *d = r; > +} > +#endif > + > #undef SHIFT > #undef XMM_ONLY > #undef Reg > > Modified: trunk/target-i386/translate.c > =================================================================== > --- trunk/target-i386/translate.c 2008-04-08 19:51:21 UTC (rev 4179) > +++ trunk/target-i386/translate.c 2008-04-08 19:51:29 UTC (rev 4180) > @@ -2408,12 +2408,16 @@ > }; > > #define SSE_SPECIAL ((GenOpFunc2 *)1) > +#define SSE_DUMMY ((GenOpFunc2 *)2) > > #define MMX_OP2(x) { gen_op_ ## x ## _mmx, gen_op_ ## x ## _xmm } > #define SSE_FOP(x) { gen_op_ ## x ## ps, gen_op_ ## x ## pd, \ > gen_op_ ## x ## ss, gen_op_ ## x ## sd, } > > static GenOpFunc2 *sse_op_table1[256][4] = { > + /* 3DNow! extensions */ > + [0x0e] = { SSE_DUMMY }, /* femms */ > + [0x0f] = { SSE_DUMMY }, /* pf... */ > /* pure SSE operations */ > [0x10] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movups, movupd, movss, movsd */ > [0x11] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movups, movupd, movss, movsd */ > @@ -2480,7 +2484,7 @@ > [0x74] = MMX_OP2(pcmpeqb), > [0x75] = MMX_OP2(pcmpeqw), > [0x76] = MMX_OP2(pcmpeql), > - [0x77] = { SSE_SPECIAL }, /* emms */ > + [0x77] = { SSE_DUMMY }, /* emms */ > [0x7c] = { NULL, gen_op_haddpd, NULL, gen_op_haddps }, > [0x7d] = { NULL, gen_op_hsubpd, NULL, gen_op_hsubps }, > [0x7e] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movd, movd, , movq */ > @@ -2577,6 +2581,33 @@ > SSE_FOP(cmpord), > }; > > +static GenOpFunc2 *sse_op_table5[256] = { > + [0x0c] = gen_op_pi2fw, > + [0x0d] = gen_op_pi2fd, > + [0x1c] = gen_op_pf2iw, > + [0x1d] = gen_op_pf2id, > + [0x8a] = gen_op_pfnacc, > + [0x8e] = gen_op_pfpnacc, > + [0x90] = gen_op_pfcmpge, > + [0x94] = gen_op_pfmin, > + [0x96] = gen_op_pfrcp, > + [0x97] = gen_op_pfrsqrt, > + [0x9a] = gen_op_pfsub, > + [0x9e] = gen_op_pfadd, > + [0xa0] = gen_op_pfcmpgt, > + [0xa4] = gen_op_pfmax, > + [0xa6] = gen_op_movq, /* pfrcpit1; no need to actually increase precision */ > + [0xa7] = gen_op_movq, /* pfrsqit1 */ > + [0xaa] = gen_op_pfsubr, > + [0xae] = gen_op_pfacc, > + [0xb0] = gen_op_pfcmpeq, > + [0xb4] = gen_op_pfmul, > + [0xb6] = gen_op_movq, /* pfrcpit2 */ > + [0xb7] = gen_op_pmulhrw_mmx, > + [0xbb] = gen_op_pswapd, > + [0xbf] = gen_op_pavgb_mmx /* pavgusb */ > +}; > + > static void gen_sse(DisasContext *s, int b, target_ulong pc_start, int rex_r) > { > int b1, op1_offset, op2_offset, is_xmm, val, ot; > @@ -2596,7 +2627,7 @@ > sse_op2 = sse_op_table1[b][b1]; > if (!sse_op2) > goto illegal_op; > - if (b <= 0x5f || b == 0xc6 || b == 0xc2) { > + if ((b <= 0x5f && b >= 0x10) || b == 0xc6 || b == 0xc2) { > is_xmm = 1; > } else { > if (b1 == 0) { > @@ -2618,8 +2649,8 @@ > } > if (is_xmm && !(s->flags & HF_OSFXSR_MASK)) > goto illegal_op; > - if (b == 0x77) { > - /* emms */ > + if (b == 0x77 || b == 0x0e) { > + /* emms or femms */ > gen_op_emms(); > return; > } > @@ -3151,6 +3182,13 @@ > } > } > switch(b) { > + case 0x0f: /* 3DNow! data insns */ > + val = ldub_code(s->pc++); > + sse_op2 = sse_op_table5[val]; > + if (!sse_op2) > + goto illegal_op; > + sse_op2(op1_offset, op2_offset); > + break; > case 0x70: /* pshufx insn */ > case 0xc6: /* pshufx insn */ > val = ldub_code(s->pc++); > @@ -6148,7 +6186,7 @@ > gen_eob(s); > } > break; > - /* MMX/SSE/SSE2/PNI support */ > + /* MMX/3DNow!/SSE/SSE2/SSE3 support */ > case 0x1c3: /* MOVNTI reg, mem */ > if (!(s->cpuid_features & CPUID_SSE2)) > goto illegal_op; > @@ -6214,6 +6252,7 @@ > case 7: /* sfence / clflush */ > if ((modrm & 0xc7) == 0xc0) { > /* sfence */ > + /* XXX: also check for cpuid_ext2_features & CPUID_EXT2_EMMX */ > if (!(s->cpuid_features & CPUID_SSE)) > goto illegal_op; > } else { > @@ -6227,8 +6266,11 @@ > goto illegal_op; > } > break; > - case 0x10d: /* prefetch */ > + case 0x10d: /* 3DNow! prefetch(w) */ > modrm = ldub_code(s->pc++); > + mod = (modrm >> 6) & 3; > + if (mod == 3) > + goto illegal_op; > gen_lea_modrm(s, modrm, ®_addr, &offset_addr); > /* ignore for now */ > break; > @@ -6245,6 +6287,9 @@ > gen_op_rsm(); > gen_eob(s); > break; > + case 0x10e ... 0x10f: > + /* 3DNow! instructions, ignore prefixes */ > + s->prefix &= ~(PREFIX_REPZ | PREFIX_REPNZ | PREFIX_DATA); > case 0x110 ... 0x117: > case 0x128 ... 0x12f: > case 0x150 ... 0x177: > > > > >