All of lore.kernel.org
 help / color / mirror / Atom feed
From: Fabrice Bellard <fabrice@bellard.org>
To: qemu-devel@nongnu.org
Subject: Re: [Qemu-devel] [4180] 3DNow! instruction set emulation
Date: Tue, 08 Apr 2008 23:52:52 +0200	[thread overview]
Message-ID: <47FBE934.2040501@bellard.org> (raw)
In-Reply-To: <E1JjJqr-00087x-DJ@cvs.savannah.gnu.org>

Hi,

Without a proper CPUID test for 3Dnow instruction support, this patch is
incomplete to say the least because it adds 3Dnow instructions for all CPUs.

Fabrice.

Aurelien Jarno wrote:
> Revision: 4180
>           http://svn.sv.gnu.org/viewvc/?view=rev&root=qemu&revision=4180
> Author:   aurel32
> Date:     2008-04-08 19:51:29 +0000 (Tue, 08 Apr 2008)
> 
> Log Message:
> -----------
> 3DNow! instruction set emulation
> 
> (Michael Tross)
> 
> Modified Paths:
> --------------
>     trunk/target-i386/cpu.h
>     trunk/target-i386/helper2.c
>     trunk/target-i386/ops_sse.h
>     trunk/target-i386/translate.c
> 
> Modified: trunk/target-i386/cpu.h
> ===================================================================
> --- trunk/target-i386/cpu.h	2008-04-08 19:51:21 UTC (rev 4179)
> +++ trunk/target-i386/cpu.h	2008-04-08 19:51:29 UTC (rev 4180)
> @@ -428,8 +428,9 @@
>  
>  typedef union {
>      uint8_t _b[8];
> -    uint16_t _w[2];
> -    uint32_t _l[1];
> +    uint16_t _w[4];
> +    uint32_t _l[2];
> +    float32 _s[2];
>      uint64_t q;
>  } MMXReg;
>  
> @@ -444,6 +445,7 @@
>  #define MMX_B(n) _b[7 - (n)]
>  #define MMX_W(n) _w[3 - (n)]
>  #define MMX_L(n) _l[1 - (n)]
> +#define MMX_S(n) _s[1 - (n)]
>  #else
>  #define XMM_B(n) _b[n]
>  #define XMM_W(n) _w[n]
> @@ -455,6 +457,7 @@
>  #define MMX_B(n) _b[n]
>  #define MMX_W(n) _w[n]
>  #define MMX_L(n) _l[n]
> +#define MMX_S(n) _s[n]
>  #endif
>  #define MMX_Q(n) q
>  
> @@ -520,6 +523,7 @@
>          int64_t i64;
>      } fp_convert;
>  
> +    float_status mmx_status; /* for 3DNow! float ops */
>      float_status sse_status;
>      uint32_t mxcsr;
>      XMMReg xmm_regs[CPU_NB_REGS];
> 
> Modified: trunk/target-i386/helper2.c
> ===================================================================
> --- trunk/target-i386/helper2.c	2008-04-08 19:51:21 UTC (rev 4179)
> +++ trunk/target-i386/helper2.c	2008-04-08 19:51:29 UTC (rev 4180)
> @@ -150,7 +150,8 @@
>              CPUID_PSE36,
>          .ext_features = CPUID_EXT_SSE3,
>          .ext2_features = (PPRO_FEATURES & 0x0183F3FF) | 
> -            CPUID_EXT2_LM | CPUID_EXT2_SYSCALL | CPUID_EXT2_NX,
> +            CPUID_EXT2_LM | CPUID_EXT2_SYSCALL | CPUID_EXT2_NX |
> +            CPUID_EXT2_3DNOW | CPUID_EXT2_3DNOWEXT,
>          .ext3_features = CPUID_EXT3_SVM,
>          .xlevel = 0x8000000A,
>      },
> @@ -201,6 +202,19 @@
>          .features = 0x0383F9FF,
>          .xlevel = 0,
>      },
> +    {
> +        .name = "athlon",
> +        .level = 2,
> +        .vendor1 = 0x68747541, /* "Auth" */
> +        .vendor2 = 0x69746e65, /* "enti" */
> +        .vendor3 = 0x444d4163, /* "cAMD" */
> +        .family = 6,
> +        .model = 2,
> +        .stepping = 3,
> +        .features = PPRO_FEATURES | PPRO_FEATURES | CPUID_PSE36 | CPUID_VME | CPUID_MTRR | CPUID_MCA,
> +        .ext2_features = (PPRO_FEATURES & 0x0183F3FF) | CPUID_EXT2_MMXEXT | CPUID_EXT2_3DNOW | CPUID_EXT2_3DNOWEXT,
> +        .xlevel = 0x80000008,
> +    },
>  };
>  
>  static int cpu_x86_find_by_name(x86_def_t *x86_cpu_def, const char *cpu_model)
> 
> Modified: trunk/target-i386/ops_sse.h
> ===================================================================
> --- trunk/target-i386/ops_sse.h	2008-04-08 19:51:21 UTC (rev 4179)
> +++ trunk/target-i386/ops_sse.h	2008-04-08 19:51:29 UTC (rev 4180)
> @@ -1,5 +1,5 @@
>  /*
> - *  MMX/SSE/SSE2/PNI support
> + *  MMX/3DNow!/SSE/SSE2/SSE3/PNI support
>   *
>   *  Copyright (c) 2005 Fabrice Bellard
>   *
> @@ -409,6 +409,7 @@
>  #define FCMPEQ(a, b) (a) == (b) ? -1 : 0
>  
>  #define FMULLW(a, b) (a) * (b)
> +#define FMULHRW(a, b) ((int16_t)(a) * (int16_t)(b) + 0x8000) >> 16
>  #define FMULHUW(a, b) (a) * (b) >> 16
>  #define FMULHW(a, b) (int16_t)(a) * (int16_t)(b) >> 16
>  
> @@ -455,6 +456,9 @@
>  SSE_OP_L(op_pcmpeql, FCMPEQ)
>  
>  SSE_OP_W(op_pmullw, FMULLW)
> +#if SHIFT == 0
> +SSE_OP_W(op_pmulhrw, FMULHRW)
> +#endif
>  SSE_OP_W(op_pmulhuw, FMULHUW)
>  SSE_OP_W(op_pmulhw, FMULHW)
>  
> @@ -1383,6 +1387,175 @@
>  UNPCK_OP(l, 0)
>  UNPCK_OP(h, 1)
>  
> +/* 3DNow! float ops */
> +#if SHIFT == 0
> +void OPPROTO op_pi2fd(void)
> +{
> +    MMXReg *d = (MMXReg *)((char *)env + PARAM1);
> +    MMXReg *s = (MMXReg *)((char *)env + PARAM2);
> +    d->MMX_S(0) = int32_to_float32(s->MMX_L(0), &env->mmx_status);
> +    d->MMX_S(1) = int32_to_float32(s->MMX_L(1), &env->mmx_status);
> +}
> +
> +void OPPROTO op_pi2fw(void)
> +{
> +    MMXReg *d = (MMXReg *)((char *)env + PARAM1);
> +    MMXReg *s = (MMXReg *)((char *)env + PARAM2);
> +    d->MMX_S(0) = int32_to_float32((int16_t)s->MMX_W(0), &env->mmx_status);
> +    d->MMX_S(1) = int32_to_float32((int16_t)s->MMX_W(2), &env->mmx_status);
> +}
> +
> +void OPPROTO op_pf2id(void)
> +{
> +    MMXReg *d = (MMXReg *)((char *)env + PARAM1);
> +    MMXReg *s = (MMXReg *)((char *)env + PARAM2);
> +    d->MMX_L(0) = float32_to_int32_round_to_zero(s->MMX_S(0), &env->mmx_status);
> +    d->MMX_L(1) = float32_to_int32_round_to_zero(s->MMX_S(1), &env->mmx_status);
> +}
> +
> +void OPPROTO op_pf2iw(void)
> +{
> +    MMXReg *d = (MMXReg *)((char *)env + PARAM1);
> +    MMXReg *s = (MMXReg *)((char *)env + PARAM2);
> +    d->MMX_L(0) = satsw(float32_to_int32_round_to_zero(s->MMX_S(0), &env->mmx_status));
> +    d->MMX_L(1) = satsw(float32_to_int32_round_to_zero(s->MMX_S(1), &env->mmx_status));
> +}
> +
> +void OPPROTO op_pfacc(void)
> +{
> +    MMXReg *d = (MMXReg *)((char *)env + PARAM1);
> +    MMXReg *s = (MMXReg *)((char *)env + PARAM2);
> +    MMXReg r;
> +    r.MMX_S(0) = float32_add(d->MMX_S(0), d->MMX_S(1), &env->mmx_status);
> +    r.MMX_S(1) = float32_add(s->MMX_S(0), s->MMX_S(1), &env->mmx_status);
> +    *d = r;
> +}
> +
> +void OPPROTO op_pfadd(void)
> +{
> +    MMXReg *d = (MMXReg *)((char *)env + PARAM1);
> +    MMXReg *s = (MMXReg *)((char *)env + PARAM2);
> +    d->MMX_S(0) = float32_add(d->MMX_S(0), s->MMX_S(0), &env->mmx_status);
> +    d->MMX_S(1) = float32_add(d->MMX_S(1), s->MMX_S(1), &env->mmx_status);
> +}
> +
> +void OPPROTO op_pfcmpeq(void)
> +{
> +    MMXReg *d = (MMXReg *)((char *)env + PARAM1);
> +    MMXReg *s = (MMXReg *)((char *)env + PARAM2);
> +    d->MMX_L(0) = float32_eq(d->MMX_S(0), s->MMX_S(0), &env->mmx_status) ? -1 : 0;
> +    d->MMX_L(1) = float32_eq(d->MMX_S(1), s->MMX_S(1), &env->mmx_status) ? -1 : 0;
> +}
> +
> +void OPPROTO op_pfcmpge(void)
> +{
> +    MMXReg *d = (MMXReg *)((char *)env + PARAM1);
> +    MMXReg *s = (MMXReg *)((char *)env + PARAM2);
> +    d->MMX_L(0) = float32_le(s->MMX_S(0), d->MMX_S(0), &env->mmx_status) ? -1 : 0;
> +    d->MMX_L(1) = float32_le(s->MMX_S(1), d->MMX_S(1), &env->mmx_status) ? -1 : 0;
> +}
> +
> +void OPPROTO op_pfcmpgt(void)
> +{
> +    MMXReg *d = (MMXReg *)((char *)env + PARAM1);
> +    MMXReg *s = (MMXReg *)((char *)env + PARAM2);
> +    d->MMX_L(0) = float32_lt(s->MMX_S(0), d->MMX_S(0), &env->mmx_status) ? -1 : 0;
> +    d->MMX_L(1) = float32_lt(s->MMX_S(1), d->MMX_S(1), &env->mmx_status) ? -1 : 0;
> +}
> +
> +void OPPROTO op_pfmax(void)
> +{
> +    MMXReg *d = (MMXReg *)((char *)env + PARAM1);
> +    MMXReg *s = (MMXReg *)((char *)env + PARAM2);
> +    if (float32_lt(d->MMX_S(0), s->MMX_S(0), &env->mmx_status))
> +        d->MMX_S(0) = s->MMX_S(0);
> +    if (float32_lt(d->MMX_S(1), s->MMX_S(1), &env->mmx_status))
> +        d->MMX_S(1) = s->MMX_S(1);
> +}
> +
> +void OPPROTO op_pfmin(void)
> +{
> +    MMXReg *d = (MMXReg *)((char *)env + PARAM1);
> +    MMXReg *s = (MMXReg *)((char *)env + PARAM2);
> +    if (float32_lt(s->MMX_S(0), d->MMX_S(0), &env->mmx_status))
> +        d->MMX_S(0) = s->MMX_S(0);
> +    if (float32_lt(s->MMX_S(1), d->MMX_S(1), &env->mmx_status))
> +        d->MMX_S(1) = s->MMX_S(1);
> +}
> +
> +void OPPROTO op_pfmul(void)
> +{
> +    MMXReg *d = (MMXReg *)((char *)env + PARAM1);
> +    MMXReg *s = (MMXReg *)((char *)env + PARAM2);
> +    d->MMX_S(0) = float32_mul(d->MMX_S(0), s->MMX_S(0), &env->mmx_status);
> +    d->MMX_S(1) = float32_mul(d->MMX_S(1), s->MMX_S(1), &env->mmx_status);
> +}
> +
> +void OPPROTO op_pfnacc(void)
> +{
> +    MMXReg *d = (MMXReg *)((char *)env + PARAM1);
> +    MMXReg *s = (MMXReg *)((char *)env + PARAM2);
> +    MMXReg r;
> +    r.MMX_S(0) = float32_sub(d->MMX_S(0), d->MMX_S(1), &env->mmx_status);
> +    r.MMX_S(1) = float32_sub(s->MMX_S(0), s->MMX_S(1), &env->mmx_status);
> +    *d = r;
> +}
> +
> +void OPPROTO op_pfpnacc(void)
> +{
> +    MMXReg *d = (MMXReg *)((char *)env + PARAM1);
> +    MMXReg *s = (MMXReg *)((char *)env + PARAM2);
> +    MMXReg r;
> +    r.MMX_S(0) = float32_sub(d->MMX_S(0), d->MMX_S(1), &env->mmx_status);
> +    r.MMX_S(1) = float32_add(s->MMX_S(0), s->MMX_S(1), &env->mmx_status);
> +    *d = r;
> +}
> +
> +void OPPROTO op_pfrcp(void)
> +{
> +    MMXReg *d = (MMXReg *)((char *)env + PARAM1);
> +    MMXReg *s = (MMXReg *)((char *)env + PARAM2);
> +    d->MMX_S(0) = approx_rcp(s->MMX_S(0));
> +    d->MMX_S(1) = d->MMX_S(0);
> +}
> +
> +void OPPROTO op_pfrsqrt(void)
> +{
> +    MMXReg *d = (MMXReg *)((char *)env + PARAM1);
> +    MMXReg *s = (MMXReg *)((char *)env + PARAM2);
> +    d->MMX_L(1) = s->MMX_L(0) & 0x7fffffff;
> +    d->MMX_S(1) = approx_rsqrt(d->MMX_S(1));
> +    d->MMX_L(1) |= s->MMX_L(0) & 0x80000000;
> +    d->MMX_L(0) = d->MMX_L(1);
> +}
> +
> +void OPPROTO op_pfsub(void)
> +{
> +    MMXReg *d = (MMXReg *)((char *)env + PARAM1);
> +    MMXReg *s = (MMXReg *)((char *)env + PARAM2);
> +    d->MMX_S(0) = float32_sub(d->MMX_S(0), s->MMX_S(0), &env->mmx_status);
> +    d->MMX_S(1) = float32_sub(d->MMX_S(1), s->MMX_S(1), &env->mmx_status);
> +}
> +
> +void OPPROTO op_pfsubr(void)
> +{
> +    MMXReg *d = (MMXReg *)((char *)env + PARAM1);
> +    MMXReg *s = (MMXReg *)((char *)env + PARAM2);
> +    d->MMX_S(0) = float32_sub(s->MMX_S(0), d->MMX_S(0), &env->mmx_status);
> +    d->MMX_S(1) = float32_sub(s->MMX_S(1), d->MMX_S(1), &env->mmx_status);
> +}
> +
> +void OPPROTO op_pswapd(void)
> +{
> +    MMXReg *d = (MMXReg *)((char *)env + PARAM1);
> +    MMXReg *s = (MMXReg *)((char *)env + PARAM2);
> +    MMXReg r;
> +    r.MMX_L(0) = s->MMX_L(1);
> +    r.MMX_L(1) = s->MMX_L(0);
> +    *d = r;
> +}
> +#endif
> +
>  #undef SHIFT
>  #undef XMM_ONLY
>  #undef Reg
> 
> Modified: trunk/target-i386/translate.c
> ===================================================================
> --- trunk/target-i386/translate.c	2008-04-08 19:51:21 UTC (rev 4179)
> +++ trunk/target-i386/translate.c	2008-04-08 19:51:29 UTC (rev 4180)
> @@ -2408,12 +2408,16 @@
>  };
>  
>  #define SSE_SPECIAL ((GenOpFunc2 *)1)
> +#define SSE_DUMMY ((GenOpFunc2 *)2)
>  
>  #define MMX_OP2(x) { gen_op_ ## x ## _mmx, gen_op_ ## x ## _xmm }
>  #define SSE_FOP(x) { gen_op_ ## x ## ps, gen_op_ ## x ## pd, \
>                       gen_op_ ## x ## ss, gen_op_ ## x ## sd, }
>  
>  static GenOpFunc2 *sse_op_table1[256][4] = {
> +    /* 3DNow! extensions */
> +    [0x0e] = { SSE_DUMMY }, /* femms */
> +    [0x0f] = { SSE_DUMMY }, /* pf... */
>      /* pure SSE operations */
>      [0x10] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movups, movupd, movss, movsd */
>      [0x11] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movups, movupd, movss, movsd */
> @@ -2480,7 +2484,7 @@
>      [0x74] = MMX_OP2(pcmpeqb),
>      [0x75] = MMX_OP2(pcmpeqw),
>      [0x76] = MMX_OP2(pcmpeql),
> -    [0x77] = { SSE_SPECIAL }, /* emms */
> +    [0x77] = { SSE_DUMMY }, /* emms */
>      [0x7c] = { NULL, gen_op_haddpd, NULL, gen_op_haddps },
>      [0x7d] = { NULL, gen_op_hsubpd, NULL, gen_op_hsubps },
>      [0x7e] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movd, movd, , movq */
> @@ -2577,6 +2581,33 @@
>      SSE_FOP(cmpord),
>  };
>  
> +static GenOpFunc2 *sse_op_table5[256] = {
> +    [0x0c] = gen_op_pi2fw,
> +    [0x0d] = gen_op_pi2fd,
> +    [0x1c] = gen_op_pf2iw,
> +    [0x1d] = gen_op_pf2id,
> +    [0x8a] = gen_op_pfnacc,
> +    [0x8e] = gen_op_pfpnacc,
> +    [0x90] = gen_op_pfcmpge,
> +    [0x94] = gen_op_pfmin,
> +    [0x96] = gen_op_pfrcp,
> +    [0x97] = gen_op_pfrsqrt,
> +    [0x9a] = gen_op_pfsub,
> +    [0x9e] = gen_op_pfadd,
> +    [0xa0] = gen_op_pfcmpgt,
> +    [0xa4] = gen_op_pfmax,
> +    [0xa6] = gen_op_movq, /* pfrcpit1; no need to actually increase precision */
> +    [0xa7] = gen_op_movq, /* pfrsqit1 */
> +    [0xaa] = gen_op_pfsubr,
> +    [0xae] = gen_op_pfacc,
> +    [0xb0] = gen_op_pfcmpeq,
> +    [0xb4] = gen_op_pfmul,
> +    [0xb6] = gen_op_movq, /* pfrcpit2 */
> +    [0xb7] = gen_op_pmulhrw_mmx,
> +    [0xbb] = gen_op_pswapd,
> +    [0xbf] = gen_op_pavgb_mmx /* pavgusb */
> +};
> +
>  static void gen_sse(DisasContext *s, int b, target_ulong pc_start, int rex_r)
>  {
>      int b1, op1_offset, op2_offset, is_xmm, val, ot;
> @@ -2596,7 +2627,7 @@
>      sse_op2 = sse_op_table1[b][b1];
>      if (!sse_op2)
>          goto illegal_op;
> -    if (b <= 0x5f || b == 0xc6 || b == 0xc2) {
> +    if ((b <= 0x5f && b >= 0x10) || b == 0xc6 || b == 0xc2) {
>          is_xmm = 1;
>      } else {
>          if (b1 == 0) {
> @@ -2618,8 +2649,8 @@
>      }
>      if (is_xmm && !(s->flags & HF_OSFXSR_MASK))
>          goto illegal_op;
> -    if (b == 0x77) {
> -        /* emms */
> +    if (b == 0x77 || b == 0x0e) {
> +        /* emms or femms */
>          gen_op_emms();
>          return;
>      }
> @@ -3151,6 +3182,13 @@
>              }
>          }
>          switch(b) {
> +        case 0x0f: /* 3DNow! data insns */
> +            val = ldub_code(s->pc++);
> +            sse_op2 = sse_op_table5[val];
> +            if (!sse_op2)
> +                goto illegal_op;
> +            sse_op2(op1_offset, op2_offset);
> +            break;
>          case 0x70: /* pshufx insn */
>          case 0xc6: /* pshufx insn */
>              val = ldub_code(s->pc++);
> @@ -6148,7 +6186,7 @@
>              gen_eob(s);
>          }
>          break;
> -    /* MMX/SSE/SSE2/PNI support */
> +    /* MMX/3DNow!/SSE/SSE2/SSE3 support */
>      case 0x1c3: /* MOVNTI reg, mem */
>          if (!(s->cpuid_features & CPUID_SSE2))
>              goto illegal_op;
> @@ -6214,6 +6252,7 @@
>          case 7: /* sfence / clflush */
>              if ((modrm & 0xc7) == 0xc0) {
>                  /* sfence */
> +                /* XXX: also check for cpuid_ext2_features & CPUID_EXT2_EMMX */
>                  if (!(s->cpuid_features & CPUID_SSE))
>                      goto illegal_op;
>              } else {
> @@ -6227,8 +6266,11 @@
>              goto illegal_op;
>          }
>          break;
> -    case 0x10d: /* prefetch */
> +    case 0x10d: /* 3DNow! prefetch(w) */
>          modrm = ldub_code(s->pc++);
> +        mod = (modrm >> 6) & 3;
> +        if (mod == 3)
> +            goto illegal_op;
>          gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
>          /* ignore for now */
>          break;
> @@ -6245,6 +6287,9 @@
>          gen_op_rsm();
>          gen_eob(s);
>          break;
> +    case 0x10e ... 0x10f:
> +        /* 3DNow! instructions, ignore prefixes */
> +        s->prefix &= ~(PREFIX_REPZ | PREFIX_REPNZ | PREFIX_DATA);
>      case 0x110 ... 0x117:
>      case 0x128 ... 0x12f:
>      case 0x150 ... 0x177:
> 
> 
> 
> 
> 

      reply	other threads:[~2008-04-08 21:53 UTC|newest]

Thread overview: 2+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2008-04-08 19:51 [Qemu-devel] [4180] 3DNow! instruction set emulation Aurelien Jarno
2008-04-08 21:52 ` Fabrice Bellard [this message]

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=47FBE934.2040501@bellard.org \
    --to=fabrice@bellard.org \
    --cc=qemu-devel@nongnu.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.