qemu-devel.nongnu.org archive mirror
 help / color / mirror / Atom feed
From: Paolo Bonzini <pbonzini@redhat.com>
To: qemu-devel@nongnu.org
Cc: Richard Henderson <richard.henderson@linaro.org>
Subject: [PULL 47/53] target/i386: reimplement 0x0f 0x28-0x2f, add AVX
Date: Tue, 18 Oct 2022 15:30:36 +0200	[thread overview]
Message-ID: <20221018133042.856368-48-pbonzini@redhat.com> (raw)
In-Reply-To: <20221018133042.856368-1-pbonzini@redhat.com>

Here the code is a bit uglier due to the truncation and extension
of registers to and from 32-bit.  There is also a mistake in the
manual with respect to the size of the memory operand of CVTPS2PI
and CVTTPS2PI, reported by Ricky Zhou.

Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 target/i386/tcg/decode-new.c.inc |  56 ++++++++++++++
 target/i386/tcg/emit.c.inc       | 128 +++++++++++++++++++++++++++++++
 target/i386/tcg/translate.c      |   1 +
 3 files changed, 185 insertions(+)

diff --git a/target/i386/tcg/decode-new.c.inc b/target/i386/tcg/decode-new.c.inc
index 5435447e07..a5d5428260 100644
--- a/target/i386/tcg/decode-new.c.inc
+++ b/target/i386/tcg/decode-new.c.inc
@@ -672,6 +672,53 @@ static void decode_0F16(DisasContext *s, CPUX86State *env, X86OpEntry *entry, ui
     }
 }
 
+static void decode_0F2A(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
+{
+    static const X86OpEntry opcodes_0F2A[4] = {
+        X86_OP_ENTRY3(CVTPI2Px,  V,x,  None,None, Q,q),
+        X86_OP_ENTRY3(CVTPI2Px,  V,x,  None,None, Q,q),
+        X86_OP_ENTRY3(VCVTSI2Sx, V,x,  H,x, E,y,        vex3),
+        X86_OP_ENTRY3(VCVTSI2Sx, V,x,  H,x, E,y,        vex3),
+    };
+    *entry = *decode_by_prefix(s, opcodes_0F2A);
+}
+
+static void decode_0F2B(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
+{
+    static const X86OpEntry opcodes_0F2B[4] = {
+        X86_OP_ENTRY3(MOVDQ,      M,x,  None,None, V,x, vex4), /* MOVNTPS */
+        X86_OP_ENTRY3(MOVDQ,      M,x,  None,None, V,x, vex4), /* MOVNTPD */
+        X86_OP_ENTRY3(VMOVSS_st,  M,ss, None,None, V,x, vex4 cpuid(SSE4A)), /* MOVNTSS */
+        X86_OP_ENTRY3(VMOVLPx_st, M,sd, None,None, V,x, vex4 cpuid(SSE4A)), /* MOVNTSD */
+    };
+
+    *entry = *decode_by_prefix(s, opcodes_0F2B);
+}
+
+static void decode_0F2C(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
+{
+    static const X86OpEntry opcodes_0F2C[4] = {
+        /* Listed as ps/pd in the manual, but CVTTPS2PI only reads 64-bit.  */
+        X86_OP_ENTRY3(CVTTPx2PI,  P,q,  None,None, W,q),
+        X86_OP_ENTRY3(CVTTPx2PI,  P,q,  None,None, W,dq),
+        X86_OP_ENTRY3(VCVTTSx2SI, G,y,  None,None, W,ss, vex3),
+        X86_OP_ENTRY3(VCVTTSx2SI, G,y,  None,None, W,sd, vex3),
+    };
+    *entry = *decode_by_prefix(s, opcodes_0F2C);
+}
+
+static void decode_0F2D(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
+{
+    static const X86OpEntry opcodes_0F2D[4] = {
+        /* Listed as ps/pd in the manual, but CVTPS2PI only reads 64-bit.  */
+        X86_OP_ENTRY3(CVTPx2PI,  P,q,  None,None, W,q),
+        X86_OP_ENTRY3(CVTPx2PI,  P,q,  None,None, W,dq),
+        X86_OP_ENTRY3(VCVTSx2SI, G,y,  None,None, W,ss, vex3),
+        X86_OP_ENTRY3(VCVTSx2SI, G,y,  None,None, W,sd, vex3),
+    };
+    *entry = *decode_by_prefix(s, opcodes_0F2D);
+}
+
 static void decode_sse_unary(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
 {
     if (!(s->prefix & (PREFIX_REPZ | PREFIX_REPNZ))) {
@@ -746,6 +793,15 @@ static const X86OpEntry opcodes_0F[256] = {
     [0x76] = X86_OP_ENTRY3(PCMPEQD,    V,x, H,x, W,x,  vex4 mmx avx2_256 p_00_66),
     [0x77] = X86_OP_GROUP0(0F77),
 
+    [0x28] = X86_OP_ENTRY3(MOVDQ,      V,x,  None,None, W,x, vex1 p_00_66), /* MOVAPS */
+    [0x29] = X86_OP_ENTRY3(MOVDQ,      W,x,  None,None, V,x, vex1 p_00_66), /* MOVAPS */
+    [0x2A] = X86_OP_GROUP0(0F2A),
+    [0x2B] = X86_OP_GROUP0(0F2B),
+    [0x2C] = X86_OP_GROUP0(0F2C),
+    [0x2D] = X86_OP_GROUP0(0F2D),
+    [0x2E] = X86_OP_ENTRY3(VUCOMI,     None,None, V,x, W,x,  vex4 p_00_66),
+    [0x2F] = X86_OP_ENTRY3(VCOMI,      None,None, V,x, W,x,  vex4 p_00_66),
+
     [0x38] = X86_OP_GROUP0(0F38),
     [0x3a] = X86_OP_GROUP0(0F3A),
 
diff --git a/target/i386/tcg/emit.c.inc b/target/i386/tcg/emit.c.inc
index d87f6016d9..266e7499ad 100644
--- a/target/i386/tcg/emit.c.inc
+++ b/target/i386/tcg/emit.c.inc
@@ -1038,6 +1038,36 @@ static void gen_CRC32(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
     gen_helper_crc32(s->T0, s->tmp2_i32, s->T1, tcg_constant_i32(8 << ot));
 }
 
+static void gen_CVTPI2Px(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+{
+    gen_helper_enter_mmx(cpu_env);
+    if (s->prefix & PREFIX_DATA) {
+        gen_helper_cvtpi2pd(cpu_env, OP_PTR0, OP_PTR2);
+    } else {
+        gen_helper_cvtpi2ps(cpu_env, OP_PTR0, OP_PTR2);
+    }
+}
+
+static void gen_CVTPx2PI(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+{
+    gen_helper_enter_mmx(cpu_env);
+    if (s->prefix & PREFIX_DATA) {
+        gen_helper_cvtpd2pi(cpu_env, OP_PTR0, OP_PTR2);
+    } else {
+        gen_helper_cvtps2pi(cpu_env, OP_PTR0, OP_PTR2);
+    }
+}
+
+static void gen_CVTTPx2PI(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+{
+    gen_helper_enter_mmx(cpu_env);
+    if (s->prefix & PREFIX_DATA) {
+        gen_helper_cvttpd2pi(cpu_env, OP_PTR0, OP_PTR2);
+    } else {
+        gen_helper_cvttps2pi(cpu_env, OP_PTR0, OP_PTR2);
+    }
+}
+
 static void gen_EMMS(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
 {
     gen_helper_emms(cpu_env);
@@ -1724,6 +1754,14 @@ static void gen_VCMP(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
     gen_helper_cmp_funcs[index][b](cpu_env, OP_PTR0, OP_PTR1, OP_PTR2);
 }
 
+static void gen_VCOMI(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+{
+    SSEFunc_0_epp fn;
+    fn = s->prefix & PREFIX_DATA ? gen_helper_comisd : gen_helper_comiss;
+    fn(cpu_env, OP_PTR1, OP_PTR2);
+    set_cc_op(s, CC_OP_EFLAGS);
+}
+
 static void gen_VCVTfp2fp(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
 {
     gen_unary_fp_sse(s, env, decode,
@@ -1732,6 +1770,88 @@ static void gen_VCVTfp2fp(DisasContext *s, CPUX86State *env, X86DecodedInsn *dec
                      gen_helper_cvtsd2ss, gen_helper_cvtss2sd);
 }
 
+static void gen_VCVTSI2Sx(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+{
+    int vec_len = vector_len(s, decode);
+    TCGv_i32 in;
+
+    tcg_gen_gvec_mov(MO_64, decode->op[0].offset, decode->op[1].offset, vec_len, vec_len);
+
+#ifdef TARGET_X86_64
+    MemOp ot = decode->op[2].ot;
+    if (ot == MO_64) {
+        if (s->prefix & PREFIX_REPNZ) {
+            gen_helper_cvtsq2sd(cpu_env, OP_PTR0, s->T1);
+        } else {
+            gen_helper_cvtsq2ss(cpu_env, OP_PTR0, s->T1);
+        }
+        return;
+    }
+    in = s->tmp2_i32;
+    tcg_gen_trunc_tl_i32(in, s->T1);
+#else
+    in = s->T1;
+#endif
+
+    if (s->prefix & PREFIX_REPNZ) {
+        gen_helper_cvtsi2sd(cpu_env, OP_PTR0, in);
+    } else {
+        gen_helper_cvtsi2ss(cpu_env, OP_PTR0, in);
+    }
+}
+
+static inline void gen_VCVTtSx2SI(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode,
+                                  SSEFunc_i_ep ss2si, SSEFunc_l_ep ss2sq,
+                                  SSEFunc_i_ep sd2si, SSEFunc_l_ep sd2sq)
+{
+    TCGv_i32 out;
+
+#ifdef TARGET_X86_64
+    MemOp ot = decode->op[0].ot;
+    if (ot == MO_64) {
+        if (s->prefix & PREFIX_REPNZ) {
+            sd2sq(s->T0, cpu_env, OP_PTR2);
+        } else {
+            ss2sq(s->T0, cpu_env, OP_PTR2);
+        }
+        return;
+    }
+
+    out = s->tmp2_i32;
+#else
+    out = s->T0;
+#endif
+    if (s->prefix & PREFIX_REPNZ) {
+        sd2si(out, cpu_env, OP_PTR2);
+    } else {
+        ss2si(out, cpu_env, OP_PTR2);
+    }
+#ifdef TARGET_X86_64
+    tcg_gen_extu_i32_tl(s->T0, out);
+#endif
+}
+
+#ifndef TARGET_X86_64
+#define gen_helper_cvtss2sq NULL
+#define gen_helper_cvtsd2sq NULL
+#define gen_helper_cvttss2sq NULL
+#define gen_helper_cvttsd2sq NULL
+#endif
+
+static void gen_VCVTSx2SI(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+{
+    gen_VCVTtSx2SI(s, env, decode,
+                   gen_helper_cvtss2si, gen_helper_cvtss2sq,
+                   gen_helper_cvtsd2si, gen_helper_cvtsd2sq);
+}
+
+static void gen_VCVTTSx2SI(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+{
+    gen_VCVTtSx2SI(s, env, decode,
+                   gen_helper_cvttss2si, gen_helper_cvttss2sq,
+                   gen_helper_cvttsd2si, gen_helper_cvttsd2sq);
+}
+
 static void gen_VEXTRACTx128(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
 {
     int mask = decode->immediate & 1;
@@ -1987,6 +2107,14 @@ static void gen_VSHUF(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
     fn(OP_PTR0, OP_PTR1, OP_PTR2, imm);
 }
 
+static void gen_VUCOMI(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+{
+    SSEFunc_0_epp fn;
+    fn = s->prefix & PREFIX_DATA ? gen_helper_ucomisd : gen_helper_ucomiss;
+    fn(cpu_env, OP_PTR1, OP_PTR2);
+    set_cc_op(s, CC_OP_EFLAGS);
+}
+
 static void gen_VZEROALL(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
 {
     TCGv_ptr ptr = tcg_temp_new_ptr();
diff --git a/target/i386/tcg/translate.c b/target/i386/tcg/translate.c
index 90bdd0994e..cf895e4132 100644
--- a/target/i386/tcg/translate.c
+++ b/target/i386/tcg/translate.c
@@ -4784,6 +4784,7 @@ static bool disas_insn(DisasContext *s, CPUState *cpu)
         if (use_new &&
             (b == 0x138 || b == 0x13a ||
              (b >= 0x110 && b <= 0x117) ||
+             (b >= 0x128 && b <= 0x12f) ||
              (b >= 0x150 && b <= 0x17f) ||
              b == 0x1c2 || (b >= 0x1c4 && b <= 0x1c6) ||
              (b >= 0x1d0 && b <= 0x1ff))) {
-- 
2.37.3



  parent reply	other threads:[~2022-10-18 14:28 UTC|newest]

Thread overview: 56+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-10-18 13:29 [PULL 00/53] target/i386, scsi, build patches for 2022-10-18 Paolo Bonzini
2022-10-18 13:29 ` [PULL 01/53] virtio-scsi: Send "REPORTED LUNS CHANGED" sense data upon disk hotplug events Paolo Bonzini
2022-10-18 13:29 ` [PULL 02/53] configure: don't enable firmware for targets that are not built Paolo Bonzini
2022-10-18 13:29 ` [PULL 03/53] scsi: Use device_cold_reset() and bus_cold_reset() Paolo Bonzini
2022-10-18 13:29 ` [PULL 04/53] hw/scsi/vmw_pvscsi.c: Use device_cold_reset() to reset SCSI devices Paolo Bonzini
2022-10-18 13:29 ` [PULL 05/53] hyperv: fix SynIC SINT assertion failure on guest reset Paolo Bonzini
2022-10-18 13:29 ` [PULL 06/53] configure: Avoid using strings binary Paolo Bonzini
2022-10-18 13:29 ` [PULL 07/53] target/i386: Use device_cold_reset() to reset the APIC Paolo Bonzini
2022-10-18 13:29 ` [PULL 08/53] target/i386: Save and restore pc_save before tcg_remove_ops_after Paolo Bonzini
2022-10-18 13:29 ` [PULL 09/53] target/i386: Use MMUAccessType across excp_helper.c Paolo Bonzini
2022-10-18 13:29 ` [PULL 10/53] target/i386: Direct call get_hphys from mmu_translate Paolo Bonzini
2022-10-18 13:30 ` [PULL 11/53] target/i386: Introduce structures for mmu_translate Paolo Bonzini
2022-10-18 13:30 ` [PULL 12/53] target/i386: Reorg GET_HPHYS Paolo Bonzini
2022-10-18 13:30 ` [PULL 13/53] target/i386: Add MMU_PHYS_IDX and MMU_NESTED_IDX Paolo Bonzini
2022-10-18 13:30 ` [PULL 14/53] target/i386: Use MMU_NESTED_IDX for vmload/vmsave Paolo Bonzini
2022-10-18 13:30 ` [PULL 15/53] target/i386: Combine 5 sets of variables in mmu_translate Paolo Bonzini
2022-10-18 13:30 ` [PULL 16/53] target/i386: Use atomic operations for pte updates Paolo Bonzini
2022-10-18 13:30 ` [PULL 17/53] target/i386: Use probe_access_full for final stage2 translation Paolo Bonzini
2022-10-18 13:30 ` [PULL 18/53] target/i386: Define XMMReg and access macros, align ZMM registers Paolo Bonzini
2022-10-18 13:30 ` [PULL 19/53] target/i386: make ldo/sto operations consistent with ldq Paolo Bonzini
2022-10-18 13:30 ` [PULL 20/53] target/i386: make rex_w available even in 32-bit mode Paolo Bonzini
2022-10-18 13:30 ` [PULL 21/53] target/i386: add core of new i386 decoder Paolo Bonzini
2022-10-18 13:30 ` [PULL 22/53] target/i386: add ALU load/writeback core Paolo Bonzini
2022-10-18 13:30 ` [PULL 23/53] target/i386: add CPUID[EAX=7,ECX=0].ECX to DisasContext Paolo Bonzini
2022-10-18 13:30 ` [PULL 24/53] target/i386: add CPUID feature checks to new decoder Paolo Bonzini
2022-10-18 13:30 ` [PULL 25/53] target/i386: add AVX_EN hflag Paolo Bonzini
2022-10-18 13:30 ` [PULL 26/53] target/i386: validate VEX prefixes via the instructions' exception classes Paolo Bonzini
2022-10-18 13:30 ` [PULL 27/53] target/i386: validate SSE prefixes directly in the decoding table Paolo Bonzini
2022-10-18 13:30 ` [PULL 28/53] target/i386: move scalar 0F 38 and 0F 3A instruction to new decoder Paolo Bonzini
2022-10-18 13:30 ` [PULL 29/53] target/i386: Prepare ops_sse_header.h for 256 bit AVX Paolo Bonzini
2022-10-18 13:30 ` [PULL 30/53] target/i386: extend helpers to support VEX.V 3- and 4- operand encodings Paolo Bonzini
2022-10-18 13:30 ` [PULL 31/53] target/i386: support operand merging in binary scalar helpers Paolo Bonzini
2022-10-18 13:30 ` [PULL 32/53] target/i386: provide 3-operand versions of unary " Paolo Bonzini
2022-10-18 13:30 ` [PULL 33/53] target/i386: implement additional AVX comparison operators Paolo Bonzini
2022-10-18 13:30 ` [PULL 34/53] target/i386: Introduce 256-bit vector helpers Paolo Bonzini
2022-10-18 13:30 ` [PULL 35/53] target/i386: reimplement 0x0f 0x60-0x6f, add AVX Paolo Bonzini
2022-10-18 13:30 ` [PULL 36/53] target/i386: reimplement 0x0f 0xd8-0xdf, 0xe8-0xef, 0xf8-0xff, " Paolo Bonzini
2022-10-18 13:30 ` [PULL 37/53] target/i386: reimplement 0x0f 0x50-0x5f, " Paolo Bonzini
2022-10-18 13:30 ` [PULL 38/53] target/i386: reimplement 0x0f 0x78-0x7f, " Paolo Bonzini
2022-10-18 13:30 ` [PULL 39/53] target/i386: reimplement 0x0f 0x70-0x77, " Paolo Bonzini
2022-10-18 13:30 ` [PULL 40/53] target/i386: reimplement 0x0f 0xd0-0xd7, 0xe0-0xe7, 0xf0-0xf7, " Paolo Bonzini
2022-10-18 13:30 ` [PULL 41/53] target/i386: clarify (un)signedness of immediates from 0F3Ah opcodes Paolo Bonzini
2022-10-18 13:30 ` [PULL 42/53] target/i386: reimplement 0x0f 0x3a, add AVX Paolo Bonzini
2022-10-18 13:30 ` [PULL 43/53] target/i386: Use tcg gvec ops for pmovmskb Paolo Bonzini
2022-10-18 13:30 ` [PULL 44/53] target/i386: reimplement 0x0f 0x38, add AVX Paolo Bonzini
2022-10-18 13:30 ` [PULL 45/53] target/i386: reimplement 0x0f 0xc2, 0xc4-0xc6, " Paolo Bonzini
2022-10-18 13:30 ` [PULL 46/53] target/i386: reimplement 0x0f 0x10-0x17, " Paolo Bonzini
2022-10-18 13:30 ` Paolo Bonzini [this message]
2023-05-09 10:52   ` [PULL 47/53] target/i386: reimplement 0x0f 0x28-0x2f, " Peter Maydell
2022-10-18 13:30 ` [PULL 48/53] target/i386: implement XSAVE and XRSTOR of AVX registers Paolo Bonzini
2022-10-18 13:30 ` [PULL 49/53] target/i386: implement VLDMXCSR/VSTMXCSR Paolo Bonzini
2022-10-18 13:30 ` [PULL 50/53] target/i386: Enable AVX cpuid bits when using TCG Paolo Bonzini
2022-10-18 13:30 ` [PULL 51/53] tests/tcg: extend SSE tests to AVX Paolo Bonzini
2022-10-18 13:30 ` [PULL 52/53] target/i386: move 3DNow to the new decoder Paolo Bonzini
2022-10-18 13:30 ` [PULL 53/53] target/i386: remove old SSE decoder Paolo Bonzini
2022-10-18 20:01 ` [PULL 00/53] target/i386, scsi, build patches for 2022-10-18 Stefan Hajnoczi

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20221018133042.856368-48-pbonzini@redhat.com \
    --to=pbonzini@redhat.com \
    --cc=qemu-devel@nongnu.org \
    --cc=richard.henderson@linaro.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).