From: Andrew Cooper <andrew.cooper3@citrix.com>
To: Jan Beulich <JBeulich@suse.com>,
xen-devel <xen-devel@lists.xenproject.org>
Cc: George Dunlap <George.Dunlap@eu.citrix.com>
Subject: Re: [PATCH v3 07/25] x86emul: support AVX2 gather insns
Date: Thu, 1 Feb 2018 20:53:47 +0000 [thread overview]
Message-ID: <fe6aaaff-45f2-305c-e589-a24b694fc278@citrix.com> (raw)
In-Reply-To: <5A29584702000078001958A3@prv-mh.provo.novell.com>
On 07/12/17 14:03, Jan Beulich wrote:
> --- a/xen/arch/x86/x86_emulate/x86_emulate.c
> +++ b/xen/arch/x86/x86_emulate/x86_emulate.c
> @@ -391,6 +391,7 @@ static const struct {
> [0x78 ... 0x79] = { .simd_size = simd_other, .two_op = 1 },
> [0x8c] = { .simd_size = simd_other },
> [0x8e] = { .simd_size = simd_other, .to_mem = 1 },
> + [0x90 ... 0x93] = { .simd_size = simd_other, .vsib = 1 },
> [0x96 ... 0x9f] = { .simd_size = simd_packed_fp },
> [0xa6 ... 0xaf] = { .simd_size = simd_packed_fp },
> [0xb6 ... 0xbf] = { .simd_size = simd_packed_fp },
> @@ -598,6 +599,7 @@ struct x86_emulate_state {
> ext_8f0a,
> } ext;
> uint8_t modrm, modrm_mod, modrm_reg, modrm_rm;
> + uint8_t sib_index, sib_scale;
> uint8_t rex_prefix;
> bool lock_prefix;
> bool not_64bit; /* Instruction not available in 64bit. */
> @@ -2411,7 +2413,7 @@ x86_decode(
> struct x86_emulate_ctxt *ctxt,
> const struct x86_emulate_ops *ops)
> {
> - uint8_t b, d, sib, sib_index, sib_base;
> + uint8_t b, d;
> unsigned int def_op_bytes, def_ad_bytes, opcode;
> enum x86_segment override_seg = x86_seg_none;
> bool pc_rel = false;
> @@ -2745,6 +2747,7 @@ x86_decode(
>
> if ( modrm_mod == 3 )
> {
> + generate_exception_if(d & vSIB, EXC_UD);
> modrm_rm |= (rex_prefix & 1) << 3;
> ea.type = OP_REG;
> }
> @@ -2805,13 +2808,17 @@ x86_decode(
> ea.type = OP_MEM;
> if ( modrm_rm == 4 )
> {
> - sib = insn_fetch_type(uint8_t);
> - sib_index = ((sib >> 3) & 7) | ((rex_prefix << 2) & 8);
> - sib_base = (sib & 7) | ((rex_prefix << 3) & 8);
> - if ( sib_index != 4 && !(d & vSIB) )
> - ea.mem.off = *decode_register(sib_index, state->regs,
> - false);
> - ea.mem.off <<= (sib >> 6) & 3;
> + uint8_t sib = insn_fetch_type(uint8_t);
> + uint8_t sib_base = (sib & 7) | ((rex_prefix << 3) & 8);
> +
> + state->sib_index = ((sib >> 3) & 7) | ((rex_prefix << 2) & 8);
> + state->sib_scale = (sib >> 6) & 3;
> + if ( state->sib_index != 4 && !(d & vSIB) )
> + {
> + ea.mem.off = *decode_register(state->sib_index,
> + state->regs, false);
> + ea.mem.off <<= state->sib_scale;
This is a functional change.
> + }
> if ( (modrm_mod == 0) && ((sib_base & 7) == 5) )
> ea.mem.off += insn_fetch_type(int32_t);
> else if ( sib_base == 4 )
> @@ -7472,6 +7479,110 @@ x86_emulate(
> break;
> }
>
> + case X86EMUL_OPC_VEX_66(0x0f38, 0x90): /* vpgatherd{d,q} {x,y}mm,mem,{x,y}mm */
> + case X86EMUL_OPC_VEX_66(0x0f38, 0x91): /* vpgatherq{d,q} {x,y}mm,mem,{x,y}mm */
> + case X86EMUL_OPC_VEX_66(0x0f38, 0x92): /* vgatherdp{s,d} {x,y}mm,mem,{x,y}mm */
> + case X86EMUL_OPC_VEX_66(0x0f38, 0x93): /* vgatherqp{s,d} {x,y}mm,mem,{x,y}mm */
> + {
> + unsigned int mask_reg = ~vex.reg & (mode_64bit() ? 0xf : 7);
> + typeof(vex) *pvex;
> + union {
> + int32_t dw[8];
> + int64_t qw[4];
> + } index, mask;
> +
> + ASSERT(ea.type == OP_MEM);
> + generate_exception_if(modrm_reg == state->sib_index ||
> + modrm_reg == mask_reg ||
> + state->sib_index == mask_reg, EXC_UD);
> + generate_exception_if(!cpu_has_avx, EXC_UD);
> + vcpu_must_have(avx2);
> + get_fpu(X86EMUL_FPU_ymm, &fic);
> +
> + /* Read destination, index, and mask registers. */
> + opc = init_prefixes(stub);
> + pvex = copy_VEX(opc, vex);
> + pvex->opcx = vex_0f;
> + opc[0] = 0x7f; /* vmovdqa */
> + /* Use (%rax) as destination and modrm_reg as source. */
> + pvex->r = !mode_64bit() || !(modrm_reg & 8);
> + pvex->b = 1;
> + opc[1] = (modrm_reg & 7) << 3;
> + pvex->reg = 0xf;
> + opc[2] = 0xc3;
> +
> + invoke_stub("", "", "=m" (*mmvalp) : "a" (mmvalp));
> +
> + pvex->pfx = vex_f3; /* vmovdqu */
> + /* Switch to sib_index as source. */
> + pvex->r = !mode_64bit() || !(state->sib_index & 8);
> + opc[1] = (state->sib_index & 7) << 3;
> +
> + invoke_stub("", "", "=m" (index) : "a" (&index));
> +
> + /* Switch to mask_reg as source. */
> + pvex->r = !mode_64bit() || !(mask_reg & 8);
> + opc[1] = (mask_reg & 7) << 3;
> +
> + invoke_stub("", "", "=m" (mask) : "a" (&mask));
> + put_stub(stub);
> +
> + /* Clear untouched parts of the destination and mask values. */
> + n = 1 << (2 + vex.l - ((b & 1) | vex.w));
> + op_bytes = 4 << vex.w;
> + memset((void *)mmvalp + n * op_bytes, 0, 32 - n * op_bytes);
> + memset((void *)&mask + n * op_bytes, 0, 32 - n * op_bytes);
> +
> + for ( i = 0; i < n && rc == X86EMUL_OKAY; ++i )
> + {
> + if ( (vex.w ? mask.qw[i] : mask.dw[i]) < 0 )
> + {
> + signed long idx = b & 1 ? index.qw[i] : index.dw[i];
> +
> + rc = ops->read(ea.mem.seg,
> + ea.mem.off + (idx << state->sib_scale),
> + (void *)mmvalp + i * op_bytes, op_bytes, ctxt);
> + if ( rc != X86EMUL_OKAY )
> + break;
> +
> +#ifdef __XEN__
> + if ( i + 1 < n && local_events_need_delivery() )
> + rc = X86EMUL_RETRY;
> +#endif
> + }
> +
> + if ( vex.w )
> + mask.qw[i] = 0;
> + else
> + mask.dw[i] = 0;
> + }
The incomplete case here is rather more complicated. In the case that
rc != OK and local events are pending, RF needs setting, although it is
not clear if this is only applicable if an exception is pending, or
between every element.
> +
> + /* Write destination and mask registers. */
> + opc = init_prefixes(stub);
> + pvex = copy_VEX(opc, vex);
> + pvex->opcx = vex_0f;
> + opc[0] = 0x6f; /* vmovdqa */
> + /* Use modrm_reg as destination and (%rax) as source. */
> + pvex->r = !mode_64bit() || !(modrm_reg & 8);
> + pvex->b = 1;
> + opc[1] = (modrm_reg & 7) << 3;
> + pvex->reg = 0xf;
> + opc[2] = 0xc3;
> +
> + invoke_stub("", "", "+m" (*mmvalp) : "a" (mmvalp));
> +
> + pvex->pfx = vex_f3; /* vmovdqu */
> + /* Switch to mask_reg as destination. */
> + pvex->r = !mode_64bit() || !(mask_reg & 8);
> + opc[1] = (mask_reg & 7) << 3;
> +
> + invoke_stub("", "", "+m" (mask) : "a" (&mask));
> + put_stub(stub);
> +
> + state->simd_size = simd_none;
> + break;
> + }
> +
> case X86EMUL_OPC_VEX_66(0x0f38, 0x96): /* vfmaddsub132p{s,d} {x,y}mm/mem,{x,y}mm,{x,y}mm */
> case X86EMUL_OPC_VEX_66(0x0f38, 0x97): /* vfmsubadd132p{s,d} {x,y}mm/mem,{x,y}mm,{x,y}mm */
> case X86EMUL_OPC_VEX_66(0x0f38, 0x98): /* vfmadd132p{s,d} {x,y}mm/mem,{x,y}mm,{x,y}mm */
> --- a/xen/arch/x86/x86_emulate.c
> +++ b/xen/arch/x86/x86_emulate.c
> @@ -10,6 +10,7 @@
> */
>
> #include <xen/domain_page.h>
> +#include <xen/event.h>
Spurious hunk?
~Andrew
> #include <asm/x86_emulate.h>
> #include <asm/asm_defns.h> /* mark_regs_dirty() */
> #include <asm/processor.h> /* current_cpu_info */
>
>
_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel
next prev parent reply other threads:[~2018-02-01 20:53 UTC|newest]
Thread overview: 85+ messages / expand[flat|nested] mbox.gz Atom feed top
2017-12-07 13:49 [PATCH v3 00/25] x86: emulator enhancements Jan Beulich
2017-12-07 13:58 ` [PATCH v3 01/25] x86emul: make decode_register() return unsigned long * Jan Beulich
2017-12-07 18:32 ` Andrew Cooper
2017-12-08 7:44 ` Jan Beulich
2017-12-07 13:59 ` [PATCH v3 02/25] x86emul: build SIMD tests with -Os Jan Beulich
2017-12-07 18:32 ` Andrew Cooper
2017-12-07 14:00 ` [PATCH v3 03/25] x86emul: support F16C insns Jan Beulich
2018-01-31 18:58 ` Andrew Cooper
2017-12-07 14:01 ` [PATCH v3 04/25] x86emul: support FMA4 insns Jan Beulich
2018-01-31 19:51 ` Andrew Cooper
2017-12-07 14:02 ` [PATCH v3 05/25] x86emul: support FMA insns Jan Beulich
2018-02-01 16:15 ` Andrew Cooper
2017-12-07 14:03 ` [PATCH v3 06/25] x86emul: support most remaining AVX2 insns Jan Beulich
2018-02-01 19:45 ` Andrew Cooper
2018-02-02 9:29 ` Jan Beulich
2017-12-07 14:03 ` [PATCH v3 07/25] x86emul: support AVX2 gather insns Jan Beulich
2018-02-01 20:53 ` Andrew Cooper [this message]
2018-02-02 9:44 ` Jan Beulich
2017-12-07 14:04 ` [PATCH v3 08/25] x86emul: add tables for XOP 08 and 09 extension spaces Jan Beulich
2018-02-02 11:43 ` Andrew Cooper
2018-02-02 15:15 ` Jan Beulich
2018-02-02 16:02 ` Andrew Cooper
2017-12-07 14:04 ` [PATCH v3 09/25] x86emul: support XOP insns Jan Beulich
2018-02-02 12:03 ` Andrew Cooper
2018-02-02 15:17 ` Jan Beulich
2018-02-05 13:01 ` Andrew Cooper
2017-12-07 14:05 ` [PATCH v3 10/25] x86emul: support 3DNow! insns Jan Beulich
2018-02-02 13:02 ` Andrew Cooper
2018-02-02 15:22 ` Jan Beulich
2018-02-02 16:04 ` Andrew Cooper
2017-12-07 14:06 ` [PATCH v3 11/25] x86emul: place test blobs in executable section Jan Beulich
2018-02-02 13:03 ` Andrew Cooper
2018-02-02 15:27 ` Jan Beulich
2018-02-05 13:11 ` Andrew Cooper
2018-02-05 13:55 ` Jan Beulich
2017-12-07 14:07 ` [PATCH v3 12/25] x86emul: abstract out XCRn accesses Jan Beulich
2018-02-02 13:29 ` Andrew Cooper
2018-02-02 17:05 ` Jan Beulich
2017-12-07 14:08 ` [PATCH v3 13/25] x86emul: adjust_bnd() should check XCR0 Jan Beulich
2018-02-02 13:30 ` Andrew Cooper
2018-02-02 16:19 ` Jan Beulich
2018-02-02 16:28 ` Andrew Cooper
2017-12-07 14:09 ` [PATCH v3 14/25] x86emul: make all FPU emulation use the stub Jan Beulich
2018-02-02 13:37 ` Andrew Cooper
2017-12-07 14:10 ` [PATCH v3 15/25] x86/HVM: eliminate custom #MF/#XM handling Jan Beulich
2018-02-02 13:38 ` Andrew Cooper
2017-12-07 14:11 ` [PATCH v3 16/25] x86emul: support SWAPGS Jan Beulich
2018-02-02 13:41 ` Andrew Cooper
2018-02-02 16:24 ` Jan Beulich
2017-12-07 14:11 ` [PATCH v3 17/25] x86emul: emulate {MONITOR, MWAIT}{, X} as no-op Jan Beulich
2018-02-02 14:05 ` Andrew Cooper
2017-12-07 14:12 ` [PATCH v3 18/25] x86emul: add missing suffixes in test harness Jan Beulich
2018-02-02 14:13 ` Andrew Cooper
2017-12-07 14:14 ` [PATCH v3 19/25] x86emul: tell cmpxchg hook whether LOCK is in effect Jan Beulich
2017-12-08 10:58 ` Paul Durrant
2018-02-02 14:13 ` Andrew Cooper
2017-12-07 14:15 ` [PATCH v3 20/25] x86emul: correctly handle CMPXCHG* comparison failures Jan Beulich
2018-02-02 14:49 ` Andrew Cooper
2018-02-05 8:07 ` Jan Beulich
2018-02-05 13:38 ` Andrew Cooper
2017-12-07 14:16 ` [PATCH v3 21/25] x86emul: add read-modify-write hook Jan Beulich
2018-02-02 16:13 ` Andrew Cooper
2018-02-05 8:22 ` Jan Beulich
2018-02-05 14:21 ` Andrew Cooper
2018-02-05 14:56 ` Jan Beulich
2017-12-07 14:16 ` [PATCH v3 22/25] x86/HVM: do actual CMPXCHG in hvmemul_cmpxchg() Jan Beulich
2017-12-07 14:38 ` Razvan Cojocaru
2017-12-08 10:38 ` Paul Durrant
2018-02-02 16:36 ` Andrew Cooper
2018-02-05 8:32 ` Jan Beulich
2018-02-05 16:09 ` Andrew Cooper
2018-02-05 16:49 ` Jan Beulich
2018-02-05 16:57 ` Andrew Cooper
2018-02-05 17:05 ` Jan Beulich
2017-12-07 14:17 ` [PATCH v3 23/25] x86/HVM: make use of new read-modify-write emulator hook Jan Beulich
2017-12-08 10:41 ` Paul Durrant
2018-02-02 16:37 ` Andrew Cooper
2018-02-05 8:34 ` Jan Beulich
2018-02-05 16:15 ` Andrew Cooper
2017-12-07 14:18 ` [PATCH v3 24/25] x86/shadow: fully move unmap-dest into common code Jan Beulich
2018-02-02 16:46 ` Andrew Cooper
2017-12-07 14:19 ` [PATCH v3 25/25] x86/shadow: fold sh_x86_emulate_{write, cmpxchg}() into their only callers Jan Beulich
2018-02-02 16:52 ` Andrew Cooper
2018-02-05 8:42 ` Jan Beulich
2018-02-05 12:16 ` Tim Deegan
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=fe6aaaff-45f2-305c-e589-a24b694fc278@citrix.com \
--to=andrew.cooper3@citrix.com \
--cc=George.Dunlap@eu.citrix.com \
--cc=JBeulich@suse.com \
--cc=xen-devel@lists.xenproject.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).