qemu-devel.nongnu.org archive mirror
 help / color / mirror / Atom feed
* [Qemu-devel] [PATCH v2 0/4] target/i386 tcg updates
@ 2017-10-26 10:50 Richard Henderson
  2017-10-26 10:50 ` [Qemu-devel] [PATCH v2 1/4] target/i386: Decode AMD XOP prefix Richard Henderson
                   ` (3 more replies)
  0 siblings, 4 replies; 5+ messages in thread
From: Richard Henderson @ 2017-10-26 10:50 UTC (permalink / raw)
  To: qemu-devel; +Cc: pbonzini, ehabkost

Resurrecting some patches that were on the list in July.
There are two bug fixes in here, so we should at least
get those into 2.11.

Changes since v1:
  * Rebased, adjusting for x86_ldub_code.


r~


Ricardo Ribalda Delgado (1):
  target/i386: Fix ANDN (bmi)

Richard Henderson (3):
  target/i386: Decode AMD XOP prefix
  target/i386: Implement all TBM instructions
  target/i386: Fix BLSR and BLSI

 target/i386/cc_helper_template.h |  18 ++++
 target/i386/cpu.h                |   7 +-
 target/i386/cc_helper.c          |  28 +++++-
 target/i386/cpu.c                |   3 +-
 target/i386/translate.c          | 191 ++++++++++++++++++++++++++++++++-------
 5 files changed, 209 insertions(+), 38 deletions(-)

-- 
2.13.6

^ permalink raw reply	[flat|nested] 5+ messages in thread

* [Qemu-devel] [PATCH v2 1/4] target/i386: Decode AMD XOP prefix
  2017-10-26 10:50 [Qemu-devel] [PATCH v2 0/4] target/i386 tcg updates Richard Henderson
@ 2017-10-26 10:50 ` Richard Henderson
  2017-10-26 10:50 ` [Qemu-devel] [PATCH v2 2/4] target/i386: Implement all TBM instructions Richard Henderson
                   ` (2 subsequent siblings)
  3 siblings, 0 replies; 5+ messages in thread
From: Richard Henderson @ 2017-10-26 10:50 UTC (permalink / raw)
  To: qemu-devel; +Cc: pbonzini, ehabkost, Richard Henderson

From: Richard Henderson <rth@twiddle.net>

Signed-off-by: Richard Henderson <rth@twiddle.net>
---
 target/i386/translate.c | 46 ++++++++++++++++++++++++++++++++--------------
 1 file changed, 32 insertions(+), 14 deletions(-)

diff --git a/target/i386/translate.c b/target/i386/translate.c
index 7df9233ded..db88cc4764 100644
--- a/target/i386/translate.c
+++ b/target/i386/translate.c
@@ -4536,8 +4536,9 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
 #endif
     case 0xc5: /* 2-byte VEX */
     case 0xc4: /* 3-byte VEX */
+    case 0x8f: /* 3-byte XOP */
         /* VEX prefixes cannot be used except in 32-bit mode.
-           Otherwise the instruction is LES or LDS.  */
+           Otherwise the instruction is LES, LDS, or POP.  */
         if (s->code32 && !s->vm86) {
             static const int pp_prefix[4] = {
                 0, PREFIX_DATA, PREFIX_REPZ, PREFIX_REPNZ
@@ -4546,7 +4547,13 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
 
             if (!CODE64(s) && (vex2 & 0xc0) != 0xc0) {
                 /* 4.1.4.6: In 32-bit mode, bits [7:6] must be 11b,
-                   otherwise the instruction is LES or LDS.  */
+                   otherwise the instruction is LES, LDS, or POP.  */
+                break;
+            }
+            if (b == 0x8f && (vex2 & 0x1f) < 8) {
+                /* If the value of the XOP.map_select field is less than 8,
+                   the first two bytes of the three-byte XOP are interpreted
+                   as a form of the POP instruction.  */
                 break;
             }
             s->pc++;
@@ -4572,18 +4579,25 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
 #endif
                 vex3 = x86_ldub_code(env, s);
                 rex_w = (vex3 >> 7) & 1;
-                switch (vex2 & 0x1f) {
-                case 0x01: /* Implied 0f leading opcode bytes.  */
-                    b = x86_ldub_code(env, s) | 0x100;
-                    break;
-                case 0x02: /* Implied 0f 38 leading opcode bytes.  */
-                    b = 0x138;
-                    break;
-                case 0x03: /* Implied 0f 3a leading opcode bytes.  */
-                    b = 0x13a;
-                    break;
-                default:   /* Reserved for future use.  */
-                    goto unknown_op;
+                if (b == 0xc4) {
+                    switch (vex2 & 0x1f) {
+                    case 0x01: /* Implied 0f leading opcode bytes.  */
+                        b = x86_ldub_code(env, s) | 0x100;
+                        break;
+                    case 0x02: /* Implied 0f 38 leading opcode bytes.  */
+                        b = 0x138;
+                        break;
+                    case 0x03: /* Implied 0f 3a leading opcode bytes.  */
+                        b = 0x13a;
+                        break;
+                    default:   /* Reserved for future use.  */
+                        goto unknown_op;
+                    }
+                } else {
+                    /* Unlike VEX, XOP.map_select does not overlap the
+                       base instruction set.  Prepend the map_select to
+                       the next opcode byte.  */
+                    b = x86_ldub_code(env, s) + (vex2 & 0x1f) * 0x100;
                 }
             }
             s->vex_v = (~vex3 >> 3) & 0xf;
@@ -8307,6 +8321,10 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
     case 0x1d0 ... 0x1fe:
         gen_sse(env, s, b, pc_start, rex_r);
         break;
+
+    case 0x800 ... 0x8ff: /* XOP opcode map 8 */
+    case 0x900 ... 0x9ff: /* XOP opcode map 9 */
+    case 0xa00 ... 0xaff: /* XOP opcode map 10 */
     default:
         goto unknown_op;
     }
-- 
2.13.6

^ permalink raw reply related	[flat|nested] 5+ messages in thread

* [Qemu-devel] [PATCH v2 2/4] target/i386: Implement all TBM instructions
  2017-10-26 10:50 [Qemu-devel] [PATCH v2 0/4] target/i386 tcg updates Richard Henderson
  2017-10-26 10:50 ` [Qemu-devel] [PATCH v2 1/4] target/i386: Decode AMD XOP prefix Richard Henderson
@ 2017-10-26 10:50 ` Richard Henderson
  2017-10-26 10:50 ` [Qemu-devel] [PATCH v2 3/4] target/i386: Fix BLSR and BLSI Richard Henderson
  2017-10-26 10:50 ` [Qemu-devel] [PATCH v2 4/4] target/i386: Fix ANDN (bmi) Richard Henderson
  3 siblings, 0 replies; 5+ messages in thread
From: Richard Henderson @ 2017-10-26 10:50 UTC (permalink / raw)
  To: qemu-devel; +Cc: pbonzini, ehabkost, Richard Henderson

From: Richard Henderson <rth@twiddle.net>

Reported-by: Ricardo Ribalda Delgado <ricardo.ribalda@gmail.com>
Signed-off-by: Richard Henderson <rth@twiddle.net>
---
 target/i386/cc_helper_template.h |  18 ++++++
 target/i386/cpu.h                |   7 ++-
 target/i386/cc_helper.c          |  28 +++++++--
 target/i386/cpu.c                |   3 +-
 target/i386/translate.c          | 123 ++++++++++++++++++++++++++++++++++++++-
 5 files changed, 170 insertions(+), 9 deletions(-)

diff --git a/target/i386/cc_helper_template.h b/target/i386/cc_helper_template.h
index 607311f195..6ce63b7ca9 100644
--- a/target/i386/cc_helper_template.h
+++ b/target/i386/cc_helper_template.h
@@ -235,6 +235,24 @@ static int glue(compute_c_bmilg, SUFFIX)(DATA_TYPE dst, DATA_TYPE src1)
     return src1 == 0;
 }
 
+static int glue(compute_all_tbmadd, SUFFIX)(DATA_TYPE dst, DATA_TYPE src1)
+{
+    int cf, pf, af, zf, sf, of;
+
+    cf = (src1 == (DATA_TYPE)-1);
+    pf = 0; /* undefined */
+    af = 0; /* undefined */
+    zf = (dst == 0) * CC_Z;
+    sf = lshift(dst, 8 - DATA_BITS) & CC_S;
+    of = 0;
+    return cf | pf | af | zf | sf | of;
+}
+
+static int glue(compute_c_tbmadd, SUFFIX)(DATA_TYPE dst, DATA_TYPE src1)
+{
+    return src1 == (DATA_TYPE)-1;
+}
+
 #undef DATA_BITS
 #undef SIGN_MASK
 #undef DATA_TYPE
diff --git a/target/i386/cpu.h b/target/i386/cpu.h
index b086b1528b..6c520a90fb 100644
--- a/target/i386/cpu.h
+++ b/target/i386/cpu.h
@@ -774,11 +774,16 @@ typedef enum {
     CC_OP_SARL,
     CC_OP_SARQ,
 
-    CC_OP_BMILGB, /* Z,S via CC_DST, C = SRC==0; O=0; P,A undefined */
+    CC_OP_BMILGB, /* Z,S via DST, C = SRC==0; O=0; P,A undefined */
     CC_OP_BMILGW,
     CC_OP_BMILGL,
     CC_OP_BMILGQ,
 
+    CC_OP_TBMADDB, /* Z,S via DST; C = SRC==-1; O=0; P,A undefined */
+    CC_OP_TBMADDW,
+    CC_OP_TBMADDL,
+    CC_OP_TBMADDQ,
+
     CC_OP_ADCX, /* CC_DST = C, CC_SRC = rest.  */
     CC_OP_ADOX, /* CC_DST = O, CC_SRC = rest.  */
     CC_OP_ADCOX, /* CC_DST = C, CC_SRC2 = O, CC_SRC = rest.  */
diff --git a/target/i386/cc_helper.c b/target/i386/cc_helper.c
index c9c90e10db..2f12c3b6cb 100644
--- a/target/i386/cc_helper.c
+++ b/target/i386/cc_helper.c
@@ -98,9 +98,6 @@ target_ulong helper_cc_compute_all(target_ulong dst, target_ulong src1,
                                    target_ulong src2, int op)
 {
     switch (op) {
-    default: /* should never happen */
-        return 0;
-
     case CC_OP_EFLAGS:
         return src1;
     case CC_OP_CLR:
@@ -185,6 +182,13 @@ target_ulong helper_cc_compute_all(target_ulong dst, target_ulong src1,
     case CC_OP_BMILGL:
         return compute_all_bmilgl(dst, src1);
 
+    case CC_OP_TBMADDB:
+        return compute_all_tbmaddb(dst, src1);
+    case CC_OP_TBMADDW:
+        return compute_all_tbmaddw(dst, src1);
+    case CC_OP_TBMADDL:
+        return compute_all_tbmaddl(dst, src1);
+
     case CC_OP_ADCX:
         return compute_all_adcx(dst, src1, src2);
     case CC_OP_ADOX:
@@ -215,7 +219,12 @@ target_ulong helper_cc_compute_all(target_ulong dst, target_ulong src1,
         return compute_all_sarq(dst, src1);
     case CC_OP_BMILGQ:
         return compute_all_bmilgq(dst, src1);
+    case CC_OP_TBMADDQ:
+        return compute_all_tbmaddq(dst, src1);
 #endif
+
+    default:
+        g_assert_not_reached();
     }
 }
 
@@ -228,7 +237,6 @@ target_ulong helper_cc_compute_c(target_ulong dst, target_ulong src1,
                                  target_ulong src2, int op)
 {
     switch (op) {
-    default: /* should never happen */
     case CC_OP_LOGICB:
     case CC_OP_LOGICW:
     case CC_OP_LOGICL:
@@ -307,6 +315,13 @@ target_ulong helper_cc_compute_c(target_ulong dst, target_ulong src1,
     case CC_OP_BMILGL:
         return compute_c_bmilgl(dst, src1);
 
+    case CC_OP_TBMADDB:
+        return compute_c_tbmaddb(dst, src1);
+    case CC_OP_TBMADDW:
+        return compute_c_tbmaddw(dst, src1);
+    case CC_OP_TBMADDL:
+        return compute_c_tbmaddl(dst, src1);
+
 #ifdef TARGET_X86_64
     case CC_OP_ADDQ:
         return compute_c_addq(dst, src1);
@@ -320,7 +335,12 @@ target_ulong helper_cc_compute_c(target_ulong dst, target_ulong src1,
         return compute_c_shlq(dst, src1);
     case CC_OP_BMILGQ:
         return compute_c_bmilgq(dst, src1);
+    case CC_OP_TBMADDQ:
+        return compute_c_tbmaddq(dst, src1);
 #endif
+
+    default:
+        g_assert_not_reached();
     }
 }
 
diff --git a/target/i386/cpu.c b/target/i386/cpu.c
index 53ec94ac9b..f36844fd95 100644
--- a/target/i386/cpu.c
+++ b/target/i386/cpu.c
@@ -227,7 +227,8 @@ static void x86_cpu_vendor_words2str(char *dst, uint32_t vendor1,
           CPUID_EXT2_3DNOW | CPUID_EXT2_3DNOWEXT | CPUID_EXT2_PDPE1GB | \
           TCG_EXT2_X86_64_FEATURES)
 #define TCG_EXT3_FEATURES (CPUID_EXT3_LAHF_LM | CPUID_EXT3_SVM | \
-          CPUID_EXT3_CR8LEG | CPUID_EXT3_ABM | CPUID_EXT3_SSE4A)
+          CPUID_EXT3_CR8LEG | CPUID_EXT3_ABM | CPUID_EXT3_SSE4A | \
+          CPUID_EXT3_TBM)
 #define TCG_EXT4_FEATURES 0
 #define TCG_SVM_FEATURES 0
 #define TCG_KVM_FEATURES 0
diff --git a/target/i386/translate.c b/target/i386/translate.c
index db88cc4764..409b195d37 100644
--- a/target/i386/translate.c
+++ b/target/i386/translate.c
@@ -217,6 +217,7 @@ static const uint8_t cc_op_live[CC_OP_NB] = {
     [CC_OP_SHLB ... CC_OP_SHLQ] = USES_CC_DST | USES_CC_SRC,
     [CC_OP_SARB ... CC_OP_SARQ] = USES_CC_DST | USES_CC_SRC,
     [CC_OP_BMILGB ... CC_OP_BMILGQ] = USES_CC_DST | USES_CC_SRC,
+    [CC_OP_TBMADDB ... CC_OP_TBMADDQ] = USES_CC_DST | USES_CC_SRC,
     [CC_OP_ADCX] = USES_CC_DST | USES_CC_SRC,
     [CC_OP_ADOX] = USES_CC_SRC | USES_CC_SRC2,
     [CC_OP_ADCOX] = USES_CC_DST | USES_CC_SRC | USES_CC_SRC2,
@@ -781,6 +782,12 @@ static CCPrepare gen_prepare_eflags_c(DisasContext *s, TCGv reg)
         t0 = gen_ext_tl(reg, cpu_cc_src, size, false);
         return (CCPrepare) { .cond = TCG_COND_EQ, .reg = t0, .mask = -1 };
 
+    case CC_OP_TBMADDB ... CC_OP_TBMADDQ:
+        size = s->cc_op - CC_OP_TBMADDB;
+        t0 = gen_ext_tl(reg, cpu_cc_src, size, true);
+        return (CCPrepare) { .cond = TCG_COND_EQ, .reg = t0,
+                             .mask = -1, .imm = -1 };
+
     case CC_OP_ADCX:
     case CC_OP_ADCOX:
         return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_dst,
@@ -8322,9 +8329,119 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu)
         gen_sse(env, s, b, pc_start, rex_r);
         break;
 
-    case 0x800 ... 0x8ff: /* XOP opcode map 8 */
-    case 0x900 ... 0x9ff: /* XOP opcode map 9 */
-    case 0xa00 ... 0xaff: /* XOP opcode map 10 */
+    case 0x901:
+    case 0x902: /* most tbm insns */
+        if (!(s->cpuid_ext3_features & CPUID_EXT3_TBM)
+            || s->vex_l != 0) {
+            goto illegal_op;
+        }
+        modrm = x86_ldub_code(env, s);
+        mod = (modrm >> 6) & 3;
+        rm = (modrm & 7) | REX_B(s);
+        ot = mo_64_32(s->dflag);
+        if (mod != 3) {
+            gen_lea_modrm(env, s, modrm);
+            gen_op_ld_v(s, ot, cpu_T0, cpu_A0);
+        } else {
+            gen_op_mov_v_reg(ot, cpu_T0, rm);
+        }
+
+        tcg_gen_mov_tl(cpu_cc_src, cpu_T0);
+        switch ((b & 2) * 4 + ((modrm >> 3) & 7)) {
+        case 1: /* blcfill */
+            op = CC_OP_TBMADDB;
+            tcg_gen_addi_tl(cpu_T1, cpu_T0, 1);
+            tcg_gen_and_tl(cpu_T0, cpu_T0, cpu_T1);
+            break;
+        case 2: /* blsfill */
+            op = CC_OP_BMILGB;
+            tcg_gen_subi_tl(cpu_T1, cpu_T0, 1);
+            tcg_gen_or_tl(cpu_T0, cpu_T0, cpu_T1);
+            break;
+        case 3: /* blcs */
+            op = CC_OP_TBMADDB;
+            tcg_gen_addi_tl(cpu_T1, cpu_T0, 1);
+            tcg_gen_or_tl(cpu_T0, cpu_T0, cpu_T1);
+            break;
+        case 4: /* tzmsk */
+            op = CC_OP_BMILGB;
+            tcg_gen_subi_tl(cpu_T1, cpu_T0, 1);
+            tcg_gen_andc_tl(cpu_T0, cpu_T1, cpu_T0);
+            break;
+        case 5: /* blcic */
+            op = CC_OP_TBMADDB;
+            tcg_gen_addi_tl(cpu_T1, cpu_T0, 1);
+            tcg_gen_andc_tl(cpu_T0, cpu_T1, cpu_T0);
+            break;
+        case 6: /* blsic */
+            op = CC_OP_BMILGB;
+            tcg_gen_subi_tl(cpu_T1, cpu_T0, 1);
+            tcg_gen_orc_tl(cpu_T0, cpu_T1, cpu_T0);
+            break;
+        case 7: /* t1mskc */
+            op = CC_OP_TBMADDB;
+            tcg_gen_addi_tl(cpu_T1, cpu_T0, 1);
+            tcg_gen_orc_tl(cpu_T0, cpu_T1, cpu_T0);
+            break;
+        case 8 + 1: /* blcmsk */
+            op = CC_OP_TBMADDB;
+            tcg_gen_addi_tl(cpu_T1, cpu_T0, 1);
+            tcg_gen_xor_tl(cpu_T0, cpu_T0, cpu_T1);
+            break;
+        case 8 + 6: /* blci */
+            op = CC_OP_TBMADDB;
+            tcg_gen_addi_tl(cpu_T1, cpu_T0, 1);
+            tcg_gen_orc_tl(cpu_T0, cpu_T0, cpu_T1);
+            break;
+        default:
+            goto unknown_op;
+        }
+        gen_op_mov_reg_v(ot, s->vex_v, cpu_T0);
+        tcg_gen_mov_tl(cpu_cc_dst, cpu_T0);
+        set_cc_op(s, op + ot);
+        break;
+
+    case 0xa10: /* bextr Gy, Ey, imm4 */
+        {
+            int ofs, len, max;
+
+            if (!(s->cpuid_ext3_features & CPUID_EXT3_TBM)
+                || s->vex_l != 0) {
+                goto illegal_op;
+            }
+
+            s->rip_offset = 4;
+            modrm = cpu_ldub_code(env, s->pc++);
+            reg = ((modrm >> 3) & 7) | rex_r;
+            mod = (modrm >> 6) & 3;
+            rm = (modrm & 7) | REX_B(s);
+            ot = mo_64_32(s->dflag);
+            if (mod != 3) {
+                gen_lea_modrm(env, s, modrm);
+                gen_op_ld_v(s, ot, cpu_T0, cpu_A0);
+            } else {
+                gen_op_mov_v_reg(ot, cpu_T0, rm);
+            }
+            val = cpu_ldl_code(env, s->pc);
+            s->pc += 4;
+
+            ofs = extract32(val, 0, 8);
+            len = extract32(val, 8, 8);
+            max = 8 << ot;
+            if (len == 0 || ofs >= max) {
+                tcg_gen_movi_tl(cpu_T0, 0);
+            } else {
+                len = MIN(len, max - ofs);
+                tcg_gen_extract_tl(cpu_T0, cpu_T0, ofs, len);
+            }
+            tcg_gen_mov_tl(cpu_regs[reg], cpu_T0);
+            gen_op_update1_cc();
+            /* Z is set as per result, C/O = 0, S/A/P = undefined.
+               Which is less strict than LOGIC, but accurate.  */
+            set_cc_op(s, CC_OP_LOGICB + ot);
+        }
+        break;
+
     default:
         goto unknown_op;
     }
-- 
2.13.6

^ permalink raw reply related	[flat|nested] 5+ messages in thread

* [Qemu-devel] [PATCH v2 3/4] target/i386: Fix BLSR and BLSI
  2017-10-26 10:50 [Qemu-devel] [PATCH v2 0/4] target/i386 tcg updates Richard Henderson
  2017-10-26 10:50 ` [Qemu-devel] [PATCH v2 1/4] target/i386: Decode AMD XOP prefix Richard Henderson
  2017-10-26 10:50 ` [Qemu-devel] [PATCH v2 2/4] target/i386: Implement all TBM instructions Richard Henderson
@ 2017-10-26 10:50 ` Richard Henderson
  2017-10-26 10:50 ` [Qemu-devel] [PATCH v2 4/4] target/i386: Fix ANDN (bmi) Richard Henderson
  3 siblings, 0 replies; 5+ messages in thread
From: Richard Henderson @ 2017-10-26 10:50 UTC (permalink / raw)
  To: qemu-devel; +Cc: pbonzini, ehabkost, Richard Henderson

From: Richard Henderson <rth@twiddle.net>

The implementation of these two instructions was swapped.
At the same time, unify the setup of eflags for the insn group.

Reported-by: Ricardo Ribalda Delgado <ricardo.ribalda@gmail.com>
Signed-off-by: Richard Henderson <rth@twiddle.net>
---
 target/i386/translate.c | 26 +++++++++-----------------
 1 file changed, 9 insertions(+), 17 deletions(-)

diff --git a/target/i386/translate.c b/target/i386/translate.c
index 409b195d37..dd464b98b0 100644
--- a/target/i386/translate.c
+++ b/target/i386/translate.c
@@ -4067,34 +4067,26 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
                 ot = mo_64_32(s->dflag);
                 gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
 
+                tcg_gen_mov_tl(cpu_cc_src, cpu_T0);
                 switch (reg & 7) {
                 case 1: /* blsr By,Ey */
-                    tcg_gen_neg_tl(cpu_T1, cpu_T0);
+                    tcg_gen_subi_tl(cpu_T1, cpu_T0, 1);
                     tcg_gen_and_tl(cpu_T0, cpu_T0, cpu_T1);
-                    gen_op_mov_reg_v(ot, s->vex_v, cpu_T0);
-                    gen_op_update2_cc();
-                    set_cc_op(s, CC_OP_BMILGB + ot);
                     break;
-
                 case 2: /* blsmsk By,Ey */
-                    tcg_gen_mov_tl(cpu_cc_src, cpu_T0);
-                    tcg_gen_subi_tl(cpu_T0, cpu_T0, 1);
-                    tcg_gen_xor_tl(cpu_T0, cpu_T0, cpu_cc_src);
-                    tcg_gen_mov_tl(cpu_cc_dst, cpu_T0);
-                    set_cc_op(s, CC_OP_BMILGB + ot);
+                    tcg_gen_subi_tl(cpu_T1, cpu_T0, 1);
+                    tcg_gen_xor_tl(cpu_T0, cpu_T0, cpu_T1);
                     break;
-
                 case 3: /* blsi By, Ey */
-                    tcg_gen_mov_tl(cpu_cc_src, cpu_T0);
-                    tcg_gen_subi_tl(cpu_T0, cpu_T0, 1);
-                    tcg_gen_and_tl(cpu_T0, cpu_T0, cpu_cc_src);
-                    tcg_gen_mov_tl(cpu_cc_dst, cpu_T0);
-                    set_cc_op(s, CC_OP_BMILGB + ot);
+                    tcg_gen_neg_tl(cpu_T1, cpu_T0);
+                    tcg_gen_and_tl(cpu_T0, cpu_T0, cpu_T1);
                     break;
-
                 default:
                     goto unknown_op;
                 }
+                tcg_gen_mov_tl(cpu_cc_dst, cpu_T0);
+                gen_op_mov_reg_v(ot, s->vex_v, cpu_T0);
+                set_cc_op(s, CC_OP_BMILGB + ot);
                 break;
 
             default:
-- 
2.13.6

^ permalink raw reply related	[flat|nested] 5+ messages in thread

* [Qemu-devel] [PATCH v2 4/4] target/i386: Fix ANDN (bmi)
  2017-10-26 10:50 [Qemu-devel] [PATCH v2 0/4] target/i386 tcg updates Richard Henderson
                   ` (2 preceding siblings ...)
  2017-10-26 10:50 ` [Qemu-devel] [PATCH v2 3/4] target/i386: Fix BLSR and BLSI Richard Henderson
@ 2017-10-26 10:50 ` Richard Henderson
  3 siblings, 0 replies; 5+ messages in thread
From: Richard Henderson @ 2017-10-26 10:50 UTC (permalink / raw)
  To: qemu-devel; +Cc: pbonzini, ehabkost, Ricardo Ribalda Delgado, Richard Henderson

From: Ricardo Ribalda Delgado <ricardo.ribalda@gmail.com>

Operands on ANDN are swapped.

Tested with the following function:

long test_andn(long v1, long v2){
	return (~v1 & v2);
}

Compiled with:

gcc kk.c -mbmi -O3 -Wall

0000000000000910 <test_andn>:
 910:c4 e2 c0 f2 c6       	andn   %rsi,%rdi,%rax
 915:c3                   	retq
 916:66 2e 0f 1f 84 00 00 	nopw   %cs:0x0(%rax,%rax,1)
 91d:00 00 00

and

gcc kk.c -march=native -O3 -Wall

0000000000000930 <test_andn>:
 930:   48 f7 d7                not    %rdi
 933:   48 89 f8                mov    %rdi,%rax
 936:   48 21 f0                and    %rsi,%rax
 939:   c3                      retq
 93a:   66 0f 1f 44 00 00       nopw   0x0(%rax,%rax,1)

The test showed than -mbmi version behaved differently than the
-march native version.

Signed-off-by: Ricardo Ribalda Delgado <ricardo.ribalda@gmail.com>
Message-Id: <20170713215137.5307-1-ricardo.ribalda@gmail.com>
Signed-off-by: Richard Henderson <rth@twiddle.net>
---
 target/i386/translate.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/target/i386/translate.c b/target/i386/translate.c
index dd464b98b0..96cd04c6de 100644
--- a/target/i386/translate.c
+++ b/target/i386/translate.c
@@ -3810,7 +3810,7 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
                 }
                 ot = mo_64_32(s->dflag);
                 gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
-                tcg_gen_andc_tl(cpu_T0, cpu_regs[s->vex_v], cpu_T0);
+                tcg_gen_andc_tl(cpu_T0, cpu_T0, cpu_regs[s->vex_v]);
                 gen_op_mov_reg_v(ot, reg, cpu_T0);
                 gen_op_update1_cc();
                 set_cc_op(s, CC_OP_LOGICB + ot);
-- 
2.13.6

^ permalink raw reply related	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2017-10-26 10:50 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2017-10-26 10:50 [Qemu-devel] [PATCH v2 0/4] target/i386 tcg updates Richard Henderson
2017-10-26 10:50 ` [Qemu-devel] [PATCH v2 1/4] target/i386: Decode AMD XOP prefix Richard Henderson
2017-10-26 10:50 ` [Qemu-devel] [PATCH v2 2/4] target/i386: Implement all TBM instructions Richard Henderson
2017-10-26 10:50 ` [Qemu-devel] [PATCH v2 3/4] target/i386: Fix BLSR and BLSI Richard Henderson
2017-10-26 10:50 ` [Qemu-devel] [PATCH v2 4/4] target/i386: Fix ANDN (bmi) Richard Henderson

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).