xen-devel.lists.xenproject.org archive mirror
 help / color / mirror / Atom feed
From: Andrew Cooper <andrew.cooper3@citrix.com>
To: Jan Beulich <JBeulich@suse.com>,
	xen-devel <xen-devel@lists.xenproject.org>
Cc: George Dunlap <George.Dunlap@eu.citrix.com>
Subject: Re: [PATCH v3 07/25] x86emul: support AVX2 gather insns
Date: Thu, 1 Feb 2018 20:53:47 +0000	[thread overview]
Message-ID: <fe6aaaff-45f2-305c-e589-a24b694fc278@citrix.com> (raw)
In-Reply-To: <5A29584702000078001958A3@prv-mh.provo.novell.com>

On 07/12/17 14:03, Jan Beulich wrote:
> --- a/xen/arch/x86/x86_emulate/x86_emulate.c
> +++ b/xen/arch/x86/x86_emulate/x86_emulate.c
> @@ -391,6 +391,7 @@ static const struct {
>      [0x78 ... 0x79] = { .simd_size = simd_other, .two_op = 1 },
>      [0x8c] = { .simd_size = simd_other },
>      [0x8e] = { .simd_size = simd_other, .to_mem = 1 },
> +    [0x90 ... 0x93] = { .simd_size = simd_other, .vsib = 1 },
>      [0x96 ... 0x9f] = { .simd_size = simd_packed_fp },
>      [0xa6 ... 0xaf] = { .simd_size = simd_packed_fp },
>      [0xb6 ... 0xbf] = { .simd_size = simd_packed_fp },
> @@ -598,6 +599,7 @@ struct x86_emulate_state {
>          ext_8f0a,
>      } ext;
>      uint8_t modrm, modrm_mod, modrm_reg, modrm_rm;
> +    uint8_t sib_index, sib_scale;
>      uint8_t rex_prefix;
>      bool lock_prefix;
>      bool not_64bit; /* Instruction not available in 64bit. */
> @@ -2411,7 +2413,7 @@ x86_decode(
>      struct x86_emulate_ctxt *ctxt,
>      const struct x86_emulate_ops  *ops)
>  {
> -    uint8_t b, d, sib, sib_index, sib_base;
> +    uint8_t b, d;
>      unsigned int def_op_bytes, def_ad_bytes, opcode;
>      enum x86_segment override_seg = x86_seg_none;
>      bool pc_rel = false;
> @@ -2745,6 +2747,7 @@ x86_decode(
>  
>          if ( modrm_mod == 3 )
>          {
> +            generate_exception_if(d & vSIB, EXC_UD);
>              modrm_rm |= (rex_prefix & 1) << 3;
>              ea.type = OP_REG;
>          }
> @@ -2805,13 +2808,17 @@ x86_decode(
>              ea.type = OP_MEM;
>              if ( modrm_rm == 4 )
>              {
> -                sib = insn_fetch_type(uint8_t);
> -                sib_index = ((sib >> 3) & 7) | ((rex_prefix << 2) & 8);
> -                sib_base  = (sib & 7) | ((rex_prefix << 3) & 8);
> -                if ( sib_index != 4 && !(d & vSIB) )
> -                    ea.mem.off = *decode_register(sib_index, state->regs,
> -                                                  false);
> -                ea.mem.off <<= (sib >> 6) & 3;
> +                uint8_t sib = insn_fetch_type(uint8_t);
> +                uint8_t sib_base = (sib & 7) | ((rex_prefix << 3) & 8);
> +
> +                state->sib_index = ((sib >> 3) & 7) | ((rex_prefix << 2) & 8);
> +                state->sib_scale = (sib >> 6) & 3;
> +                if ( state->sib_index != 4 && !(d & vSIB) )
> +                {
> +                    ea.mem.off = *decode_register(state->sib_index,
> +                                                  state->regs, false);
> +                    ea.mem.off <<= state->sib_scale;

This is a functional change.

> +                }
>                  if ( (modrm_mod == 0) && ((sib_base & 7) == 5) )
>                      ea.mem.off += insn_fetch_type(int32_t);
>                  else if ( sib_base == 4 )
> @@ -7472,6 +7479,110 @@ x86_emulate(
>          break;
>      }
>  
> +    case X86EMUL_OPC_VEX_66(0x0f38, 0x90): /* vpgatherd{d,q} {x,y}mm,mem,{x,y}mm */
> +    case X86EMUL_OPC_VEX_66(0x0f38, 0x91): /* vpgatherq{d,q} {x,y}mm,mem,{x,y}mm */
> +    case X86EMUL_OPC_VEX_66(0x0f38, 0x92): /* vgatherdp{s,d} {x,y}mm,mem,{x,y}mm */
> +    case X86EMUL_OPC_VEX_66(0x0f38, 0x93): /* vgatherqp{s,d} {x,y}mm,mem,{x,y}mm */
> +    {
> +        unsigned int mask_reg = ~vex.reg & (mode_64bit() ? 0xf : 7);
> +        typeof(vex) *pvex;
> +        union {
> +            int32_t dw[8];
> +            int64_t qw[4];
> +        } index, mask;
> +
> +        ASSERT(ea.type == OP_MEM);
> +        generate_exception_if(modrm_reg == state->sib_index ||
> +                              modrm_reg == mask_reg ||
> +                              state->sib_index == mask_reg, EXC_UD);
> +        generate_exception_if(!cpu_has_avx, EXC_UD);
> +        vcpu_must_have(avx2);
> +        get_fpu(X86EMUL_FPU_ymm, &fic);
> +
> +        /* Read destination, index, and mask registers. */
> +        opc = init_prefixes(stub);
> +        pvex = copy_VEX(opc, vex);
> +        pvex->opcx = vex_0f;
> +        opc[0] = 0x7f; /* vmovdqa */
> +        /* Use (%rax) as destination and modrm_reg as source. */
> +        pvex->r = !mode_64bit() || !(modrm_reg & 8);
> +        pvex->b = 1;
> +        opc[1] = (modrm_reg & 7) << 3;
> +        pvex->reg = 0xf;
> +        opc[2] = 0xc3;
> +
> +        invoke_stub("", "", "=m" (*mmvalp) : "a" (mmvalp));
> +
> +        pvex->pfx = vex_f3; /* vmovdqu */
> +        /* Switch to sib_index as source. */
> +        pvex->r = !mode_64bit() || !(state->sib_index & 8);
> +        opc[1] = (state->sib_index & 7) << 3;
> +
> +        invoke_stub("", "", "=m" (index) : "a" (&index));
> +
> +        /* Switch to mask_reg as source. */
> +        pvex->r = !mode_64bit() || !(mask_reg & 8);
> +        opc[1] = (mask_reg & 7) << 3;
> +
> +        invoke_stub("", "", "=m" (mask) : "a" (&mask));
> +        put_stub(stub);
> +
> +        /* Clear untouched parts of the destination and mask values. */
> +        n = 1 << (2 + vex.l - ((b & 1) | vex.w));
> +        op_bytes = 4 << vex.w;
> +        memset((void *)mmvalp + n * op_bytes, 0, 32 - n * op_bytes);
> +        memset((void *)&mask + n * op_bytes, 0, 32 - n * op_bytes);
> +
> +        for ( i = 0; i < n && rc == X86EMUL_OKAY; ++i )
> +        {
> +            if ( (vex.w ? mask.qw[i] : mask.dw[i]) < 0 )
> +            {
> +                signed long idx = b & 1 ? index.qw[i] : index.dw[i];
> +
> +                rc = ops->read(ea.mem.seg,
> +                               ea.mem.off + (idx << state->sib_scale),
> +                               (void *)mmvalp + i * op_bytes, op_bytes, ctxt);
> +                if ( rc != X86EMUL_OKAY )
> +                    break;
> +
> +#ifdef __XEN__
> +                if ( i + 1 < n && local_events_need_delivery() )
> +                    rc = X86EMUL_RETRY;
> +#endif
> +            }
> +
> +            if ( vex.w )
> +                mask.qw[i] = 0;
> +            else
> +                mask.dw[i] = 0;
> +        }

The incomplete case here is rather more complicated.  In the case that
rc != OK and local events are pending, RF needs setting, although it is
not clear if this is only applicable if an exception is pending, or
between every element.

> +
> +        /* Write destination and mask registers. */
> +        opc = init_prefixes(stub);
> +        pvex = copy_VEX(opc, vex);
> +        pvex->opcx = vex_0f;
> +        opc[0] = 0x6f; /* vmovdqa */
> +        /* Use modrm_reg as destination and (%rax) as source. */
> +        pvex->r = !mode_64bit() || !(modrm_reg & 8);
> +        pvex->b = 1;
> +        opc[1] = (modrm_reg & 7) << 3;
> +        pvex->reg = 0xf;
> +        opc[2] = 0xc3;
> +
> +        invoke_stub("", "", "+m" (*mmvalp) : "a" (mmvalp));
> +
> +        pvex->pfx = vex_f3; /* vmovdqu */
> +        /* Switch to mask_reg as destination. */
> +        pvex->r = !mode_64bit() || !(mask_reg & 8);
> +        opc[1] = (mask_reg & 7) << 3;
> +
> +        invoke_stub("", "", "+m" (mask) : "a" (&mask));
> +        put_stub(stub);
> +
> +        state->simd_size = simd_none;
> +        break;
> +    }
> +
>      case X86EMUL_OPC_VEX_66(0x0f38, 0x96): /* vfmaddsub132p{s,d} {x,y}mm/mem,{x,y}mm,{x,y}mm */
>      case X86EMUL_OPC_VEX_66(0x0f38, 0x97): /* vfmsubadd132p{s,d} {x,y}mm/mem,{x,y}mm,{x,y}mm */
>      case X86EMUL_OPC_VEX_66(0x0f38, 0x98): /* vfmadd132p{s,d} {x,y}mm/mem,{x,y}mm,{x,y}mm */
> --- a/xen/arch/x86/x86_emulate.c
> +++ b/xen/arch/x86/x86_emulate.c
> @@ -10,6 +10,7 @@
>   */
>  
>  #include <xen/domain_page.h>
> +#include <xen/event.h>

Spurious hunk?

~Andrew

>  #include <asm/x86_emulate.h>
>  #include <asm/asm_defns.h> /* mark_regs_dirty() */
>  #include <asm/processor.h> /* current_cpu_info */
>
>


_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel

  reply	other threads:[~2018-02-01 20:53 UTC|newest]

Thread overview: 85+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2017-12-07 13:49 [PATCH v3 00/25] x86: emulator enhancements Jan Beulich
2017-12-07 13:58 ` [PATCH v3 01/25] x86emul: make decode_register() return unsigned long * Jan Beulich
2017-12-07 18:32   ` Andrew Cooper
2017-12-08  7:44     ` Jan Beulich
2017-12-07 13:59 ` [PATCH v3 02/25] x86emul: build SIMD tests with -Os Jan Beulich
2017-12-07 18:32   ` Andrew Cooper
2017-12-07 14:00 ` [PATCH v3 03/25] x86emul: support F16C insns Jan Beulich
2018-01-31 18:58   ` Andrew Cooper
2017-12-07 14:01 ` [PATCH v3 04/25] x86emul: support FMA4 insns Jan Beulich
2018-01-31 19:51   ` Andrew Cooper
2017-12-07 14:02 ` [PATCH v3 05/25] x86emul: support FMA insns Jan Beulich
2018-02-01 16:15   ` Andrew Cooper
2017-12-07 14:03 ` [PATCH v3 06/25] x86emul: support most remaining AVX2 insns Jan Beulich
2018-02-01 19:45   ` Andrew Cooper
2018-02-02  9:29     ` Jan Beulich
2017-12-07 14:03 ` [PATCH v3 07/25] x86emul: support AVX2 gather insns Jan Beulich
2018-02-01 20:53   ` Andrew Cooper [this message]
2018-02-02  9:44     ` Jan Beulich
2017-12-07 14:04 ` [PATCH v3 08/25] x86emul: add tables for XOP 08 and 09 extension spaces Jan Beulich
2018-02-02 11:43   ` Andrew Cooper
2018-02-02 15:15     ` Jan Beulich
2018-02-02 16:02       ` Andrew Cooper
2017-12-07 14:04 ` [PATCH v3 09/25] x86emul: support XOP insns Jan Beulich
2018-02-02 12:03   ` Andrew Cooper
2018-02-02 15:17     ` Jan Beulich
2018-02-05 13:01       ` Andrew Cooper
2017-12-07 14:05 ` [PATCH v3 10/25] x86emul: support 3DNow! insns Jan Beulich
2018-02-02 13:02   ` Andrew Cooper
2018-02-02 15:22     ` Jan Beulich
2018-02-02 16:04       ` Andrew Cooper
2017-12-07 14:06 ` [PATCH v3 11/25] x86emul: place test blobs in executable section Jan Beulich
2018-02-02 13:03   ` Andrew Cooper
2018-02-02 15:27     ` Jan Beulich
2018-02-05 13:11       ` Andrew Cooper
2018-02-05 13:55         ` Jan Beulich
2017-12-07 14:07 ` [PATCH v3 12/25] x86emul: abstract out XCRn accesses Jan Beulich
2018-02-02 13:29   ` Andrew Cooper
2018-02-02 17:05     ` Jan Beulich
2017-12-07 14:08 ` [PATCH v3 13/25] x86emul: adjust_bnd() should check XCR0 Jan Beulich
2018-02-02 13:30   ` Andrew Cooper
2018-02-02 16:19     ` Jan Beulich
2018-02-02 16:28       ` Andrew Cooper
2017-12-07 14:09 ` [PATCH v3 14/25] x86emul: make all FPU emulation use the stub Jan Beulich
2018-02-02 13:37   ` Andrew Cooper
2017-12-07 14:10 ` [PATCH v3 15/25] x86/HVM: eliminate custom #MF/#XM handling Jan Beulich
2018-02-02 13:38   ` Andrew Cooper
2017-12-07 14:11 ` [PATCH v3 16/25] x86emul: support SWAPGS Jan Beulich
2018-02-02 13:41   ` Andrew Cooper
2018-02-02 16:24     ` Jan Beulich
2017-12-07 14:11 ` [PATCH v3 17/25] x86emul: emulate {MONITOR, MWAIT}{, X} as no-op Jan Beulich
2018-02-02 14:05   ` Andrew Cooper
2017-12-07 14:12 ` [PATCH v3 18/25] x86emul: add missing suffixes in test harness Jan Beulich
2018-02-02 14:13   ` Andrew Cooper
2017-12-07 14:14 ` [PATCH v3 19/25] x86emul: tell cmpxchg hook whether LOCK is in effect Jan Beulich
2017-12-08 10:58   ` Paul Durrant
2018-02-02 14:13   ` Andrew Cooper
2017-12-07 14:15 ` [PATCH v3 20/25] x86emul: correctly handle CMPXCHG* comparison failures Jan Beulich
2018-02-02 14:49   ` Andrew Cooper
2018-02-05  8:07     ` Jan Beulich
2018-02-05 13:38       ` Andrew Cooper
2017-12-07 14:16 ` [PATCH v3 21/25] x86emul: add read-modify-write hook Jan Beulich
2018-02-02 16:13   ` Andrew Cooper
2018-02-05  8:22     ` Jan Beulich
2018-02-05 14:21       ` Andrew Cooper
2018-02-05 14:56         ` Jan Beulich
2017-12-07 14:16 ` [PATCH v3 22/25] x86/HVM: do actual CMPXCHG in hvmemul_cmpxchg() Jan Beulich
2017-12-07 14:38   ` Razvan Cojocaru
2017-12-08 10:38   ` Paul Durrant
2018-02-02 16:36   ` Andrew Cooper
2018-02-05  8:32     ` Jan Beulich
2018-02-05 16:09       ` Andrew Cooper
2018-02-05 16:49         ` Jan Beulich
2018-02-05 16:57           ` Andrew Cooper
2018-02-05 17:05             ` Jan Beulich
2017-12-07 14:17 ` [PATCH v3 23/25] x86/HVM: make use of new read-modify-write emulator hook Jan Beulich
2017-12-08 10:41   ` Paul Durrant
2018-02-02 16:37   ` Andrew Cooper
2018-02-05  8:34     ` Jan Beulich
2018-02-05 16:15       ` Andrew Cooper
2017-12-07 14:18 ` [PATCH v3 24/25] x86/shadow: fully move unmap-dest into common code Jan Beulich
2018-02-02 16:46   ` Andrew Cooper
2017-12-07 14:19 ` [PATCH v3 25/25] x86/shadow: fold sh_x86_emulate_{write, cmpxchg}() into their only callers Jan Beulich
2018-02-02 16:52   ` Andrew Cooper
2018-02-05  8:42     ` Jan Beulich
2018-02-05 12:16       ` Tim Deegan

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=fe6aaaff-45f2-305c-e589-a24b694fc278@citrix.com \
    --to=andrew.cooper3@citrix.com \
    --cc=George.Dunlap@eu.citrix.com \
    --cc=JBeulich@suse.com \
    --cc=xen-devel@lists.xenproject.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).