All of lore.kernel.org
 help / color / mirror / Atom feed
From: "Alex Bennée" <alex.bennee@linaro.org>
To: Jan Bobek <jan.bobek@gmail.com>
Cc: Richard Henderson <richard.henderson@linaro.org>, qemu-devel@nongnu.org
Subject: Re: [Qemu-devel] [RISU v3 09/11] i386: Add avx512 state to reginfo_t
Date: Fri, 24 May 2019 10:29:39 +0100	[thread overview]
Message-ID: <87blzs19q4.fsf@zen.linaroharston> (raw)
In-Reply-To: <20190523204409.21068-10-jan.bobek@gmail.com>


Jan Bobek <jan.bobek@gmail.com> writes:

> From: Richard Henderson <richard.henderson@linaro.org>
>
> The state expected for a given test must be specifically requested
> with the --xfeatures=mask command-line argument.  This is recorded
> with the saved state so that it is obvious if the apprentice is given
> a different argument.  Any features beyond what are present on the
> running cpu will read as zero.
>
> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>

Reviewed-by: Alex Bennée <alex.bennee@linaro.org>


> ---
>  risu_reginfo_i386.h |  14 +++
>  risu_reginfo_i386.c | 228 ++++++++++++++++++++++++++++++++++++++++++--
>  test_i386.S         |  39 ++++++++
>  3 files changed, 273 insertions(+), 8 deletions(-)
>
> diff --git a/risu_reginfo_i386.h b/risu_reginfo_i386.h
> index e350f01..b468f79 100644
> --- a/risu_reginfo_i386.h
> +++ b/risu_reginfo_i386.h
> @@ -12,6 +12,10 @@
>  #ifndef RISU_REGINFO_I386_H
>  #define RISU_REGINFO_I386_H
>
> +struct avx512_reg {
> +    uint64_t q[8];
> +};
> +
>  /*
>   * This is the data structure we pass over the socket.
>   * It is a simplified and reduced subset of what can
> @@ -19,7 +23,17 @@
>   */
>  struct reginfo {
>      uint32_t faulting_insn;
> +    uint32_t mxcsr;
> +    uint64_t xfeatures;
> +
>      gregset_t gregs;
> +
> +#ifdef __x86_64__
> +    struct avx512_reg vregs[32];
> +#else
> +    struct avx512_reg vregs[8];
> +#endif
> +    uint64_t kregs[8];
>  };
>
>  /*
> diff --git a/risu_reginfo_i386.c b/risu_reginfo_i386.c
> index c4dc14a..83f9541 100644
> --- a/risu_reginfo_i386.c
> +++ b/risu_reginfo_i386.c
> @@ -11,19 +11,32 @@
>
>  #include <stdio.h>
>  #include <stdlib.h>
> +#include <stddef.h>
>  #include <string.h>
>  #include <ucontext.h>
>  #include <assert.h>
> +#include <cpuid.h>
>
>  #include "risu.h"
>  #include "risu_reginfo_i386.h"
>
> -const struct option * const arch_long_opts;
> -const char * const arch_extra_help;
> +#include <asm/sigcontext.h>
> +
> +static uint64_t xfeatures = 3;  /* SSE */
> +
> +static const struct option extra_ops[] = {
> +    {"xfeatures", required_argument, NULL, FIRST_ARCH_OPT },
> +    {0, 0, 0, 0}
> +};
> +
> +const struct option * const arch_long_opts = extra_ops;
> +const char * const arch_extra_help
> +    = "  --xfeatures=<mask>  Use features in mask for XSAVE\n";
>
>  void process_arch_opt(int opt, const char *arg)
>  {
> -    abort();
> +    assert(opt == FIRST_ARCH_OPT);
> +    xfeatures = strtoull(arg, 0, 0);
>  }
>
>  const int reginfo_size(void)
> @@ -31,13 +44,37 @@ const int reginfo_size(void)
>      return sizeof(struct reginfo);
>  }
>
> +static void *xsave_feature_buf(struct _xstate *xs, int feature)
> +{
> +    unsigned int eax, ebx, ecx, edx;
> +    int ok;
> +
> +    /*
> +     * Get the location of the XSAVE feature from the cpuid leaf.
> +     * Given that we know the xfeature bit is set, this must succeed.
> +     */
> +    ok = __get_cpuid_count(0xd, feature, &eax, &ebx, &ecx, &edx);
> +    assert(ok);
> +
> +    /* Sanity check that the frame stored by the kernel contains the data. */
> +    assert(xs->fpstate.sw_reserved.extended_size >= eax + ebx);
> +
> +    return (void *)xs + ebx;
> +}
> +
>  /* reginfo_init: initialize with a ucontext */
>  void reginfo_init(struct reginfo *ri, ucontext_t *uc)
>  {
> -    int i;
> +    int i, nvecregs;
> +    struct _fpstate *fp;
> +    struct _xstate *xs;
> +    uint64_t features;
>
>      memset(ri, 0, sizeof(*ri));
>
> +    /* Require master and apprentice to be given the same arguments.  */
> +    ri->xfeatures = xfeatures;
> +
>      for (i = 0; i < NGREG; i++) {
>          switch (i) {
>          case REG_E(IP):
> @@ -79,12 +116,89 @@ void reginfo_init(struct reginfo *ri, ucontext_t *uc)
>       * distinguish 'do compare' from 'stop'.
>       */
>      ri->faulting_insn = *(uint32_t *)uc->uc_mcontext.gregs[REG_E(IP)];
> +
> +    /*
> +     * FP state is omitted if unused (aka in init state).
> +     * Use the <asm/sigcontext.h> struct for access to AVX state.
> +     */
> +
> +    fp = (struct _fpstate *)uc->uc_mcontext.fpregs;
> +    if (fp == NULL) {
> +        return;
> +    }
> +
> +#ifdef __x86_64__
> +    nvecregs = 16;
> +#else
> +    /* We don't (currently) care about the 80387 state, only SSE+.  */
> +    if (fp->magic != X86_FXSR_MAGIC) {
> +        return;
> +    }
> +    nvecregs = 8;
> +#endif
> +
> +    /*
> +     * Now we know that _fpstate contains FXSAVE data.
> +     */
> +    ri->mxcsr = fp->mxcsr;
> +
> +    for (i = 0; i < nvecregs; ++i) {
> +#ifdef __x86_64__
> +        memcpy(&ri->vregs[i], &fp->xmm_space[i * 4], 16);
> +#else
> +        memcpy(&ri->vregs[i], &fp->_xmm[i], 16);
> +#endif
> +    }
> +
> +    if (fp->sw_reserved.magic1 != FP_XSTATE_MAGIC1) {
> +        return;
> +    }
> +    xs = (struct _xstate *)fp;
> +    features = xfeatures & xs->xstate_hdr.xfeatures;
> +
> +    /*
> +     * Now we know that _fpstate contains XSAVE data.
> +     */
> +
> +    if (features & (1 << 2)) {
> +        /* YMM_Hi128 state */
> +        void *buf = xsave_feature_buf(xs, 2);
> +        for (i = 0; i < nvecregs; ++i) {
> +            memcpy(&ri->vregs[i].q[2], buf + 16 * i, 16);
> +        }
> +    }
> +
> +    if (features & (1 << 5)) {
> +        /* Opmask state */
> +        uint64_t *buf = xsave_feature_buf(xs, 5);
> +        for (i = 0; i < 8; ++i) {
> +            ri->kregs[i] = buf[i];
> +        }
> +    }
> +
> +    if (features & (1 << 6)) {
> +        /* ZMM_Hi256 state */
> +        void *buf = xsave_feature_buf(xs, 6);
> +        for (i = 0; i < nvecregs; ++i) {
> +            memcpy(&ri->vregs[i].q[4], buf + 32 * i, 32);
> +        }
> +    }
> +
> +#ifdef __x86_64__
> +    if (features & (1 << 7)) {
> +        /* Hi16_ZMM state */
> +        void *buf = xsave_feature_buf(xs, 7);
> +        for (i = 0; i < 16; ++i) {
> +            memcpy(&ri->vregs[i + 16], buf + 64 * i, 64);
> +        }
> +    }
> +#endif
>  }
>
>  /* reginfo_is_eq: compare the reginfo structs, returns nonzero if equal */
>  int reginfo_is_eq(struct reginfo *m, struct reginfo *a)
>  {
> -    return 0 == memcmp(m, a, sizeof(*m));
> +    return !memcmp(m, a, sizeof(*m));
>  }
>
>  static const char *const regname[NGREG] = {
> @@ -126,28 +240,126 @@ static const char *const regname[NGREG] = {
>  # define PRIxREG   "%08x"
>  #endif
>
> +static int get_nvecregs(uint64_t features)
> +{
> +#ifdef __x86_64__
> +    return features & (1 << 7) ? 32 : 16;
> +#else
> +    return 8;
> +#endif
> +}
> +
> +static int get_nvecquads(uint64_t features)
> +{
> +    if (features & (1 << 6)) {
> +        return 8;
> +    } else if (features & (1 << 2)) {
> +        return 4;
> +    } else {
> +        return 2;
> +    }
> +}
> +
> +static char get_vecletter(uint64_t features)
> +{
> +    if (features & (1 << 6 | 1 << 7)) {
> +        return 'z';
> +    } else if (features & (1 << 2)) {
> +        return 'y';
> +    } else {
> +        return 'x';
> +    }
> +}
> +
>  /* reginfo_dump: print state to a stream, returns nonzero on success */
>  int reginfo_dump(struct reginfo *ri, FILE *f)
>  {
> -    int i;
> +    uint64_t features;
> +    int i, j, n, w;
> +    char r;
> +
>      fprintf(f, "  faulting insn %x\n", ri->faulting_insn);
>      for (i = 0; i < NGREG; i++) {
>          if (regname[i]) {
>              fprintf(f, "  %-6s: " PRIxREG "\n", regname[i], ri->gregs[i]);
>          }
>      }
> +
> +    fprintf(f, "  mxcsr : %x\n", ri->mxcsr);
> +    fprintf(f, "  xfeat : %" PRIx64 "\n", ri->xfeatures);
> +
> +    features = ri->xfeatures;
> +    n = get_nvecregs(features);
> +    w = get_nvecquads(features);
> +    r = get_vecletter(features);
> +
> +    for (i = 0; i < n; i++) {
> +        fprintf(f, "  %cmm%-3d: ", r, i);
> +        for (j = w - 1; j >= 0; j--) {
> +            fprintf(f, "%016" PRIx64 "%c",
> +                    ri->vregs[i].q[j], j == 0 ? '\n' : ' ');
> +        }
> +    }
> +
> +    if (features & (1 << 5)) {
> +        for (i = 0; i < 8; i++) {
> +            fprintf(f, "  k%-5d: %016" PRIx64 "\n", i, ri->kregs[i]);
> +        }
> +    }
> +
>      return !ferror(f);
>  }
>
>  int reginfo_dump_mismatch(struct reginfo *m, struct reginfo *a, FILE *f)
>  {
> -    int i;
> +    int i, j, n, w;
> +    uint64_t features;
> +    char r;
> +
> +    fprintf(f, "Mismatch (master v apprentice):\n");
> +
>      for (i = 0; i < NGREG; i++) {
>          if (m->gregs[i] != a->gregs[i]) {
>              assert(regname[i]);
> -            fprintf(f, "Mismatch: %s: " PRIxREG " v " PRIxREG "\n",
> +            fprintf(f, "  %-6s: " PRIxREG " v " PRIxREG "\n",
>                      regname[i], m->gregs[i], a->gregs[i]);
>          }
>      }
> +
> +    if (m->mxcsr != a->mxcsr) {
> +        fprintf(f, "  mxcsr : %x v %x\n", m->mxcsr, a->mxcsr);
> +    }
> +    if (m->xfeatures != a->xfeatures) {
> +        fprintf(f, "  xfeat : %" PRIx64 " v %" PRIx64 "\n",
> +                m->xfeatures, a->xfeatures);
> +    }
> +
> +    features = m->xfeatures;
> +    n = get_nvecregs(features);
> +    w = get_nvecquads(features);
> +    r = get_vecletter(features);
> +
> +    for (i = 0; i < n; i++) {
> +        if (memcmp(&m->vregs[i], &a->vregs[i], w * 8)) {
> +            fprintf(f, "  %cmm%-3d: ", r, i);
> +            for (j = w - 1; j >= 0; j--) {
> +                fprintf(f, "%016" PRIx64 "%c",
> +                        m->vregs[i].q[j], j == 0 ? '\n' : ' ');
> +            }
> +            fprintf(f, "       v: ");
> +            for (j = w - 1; j >= 0; j--) {
> +                fprintf(f, "%016" PRIx64 "%c",
> +                        a->vregs[i].q[j], j == 0 ? '\n' : ' ');
> +            }
> +        }
> +    }
> +
> +    for (i = 0; i < 8; i++) {
> +        if (m->kregs[i] != a->kregs[i]) {
> +            fprintf(f, "  k%-5d: %016" PRIx64 " v %016" PRIx64 "\n",
> +                    i, m->kregs[i], a->kregs[i]);
> +        }
> +    }
> +
>      return !ferror(f);
>  }
> diff --git a/test_i386.S b/test_i386.S
> index 456b99c..05344d7 100644
> --- a/test_i386.S
> +++ b/test_i386.S
> @@ -12,6 +12,37 @@
>  /* A trivial test image for x86 */
>
>  /* Initialise the registers to avoid spurious mismatches */
> +
> +#ifdef __x86_64__
> +#define BASE	%rax
> +	lea	2f(%rip), BASE
> +#else
> +#define BASE	%eax
> +	call	1f
> +1:	pop	BASE
> +	add	$2f-1b, BASE
> +#endif
> +
> +	movdqa	0(BASE), %xmm0
> +	movdqa	1*16(BASE), %xmm1
> +	movdqa	2*16(BASE), %xmm2
> +	movdqa	3*16(BASE), %xmm3
> +	movdqa	4*16(BASE), %xmm4
> +	movdqa	5*16(BASE), %xmm5
> +	movdqa	6*16(BASE), %xmm6
> +	movdqa	7*16(BASE), %xmm7
> +
> +#ifdef __x86_64__
> +	movdqa	8*16(BASE), %xmm8
> +	movdqa	9*16(BASE), %xmm9
> +	movdqa	10*16(BASE), %xmm10
> +	movdqa	11*16(BASE), %xmm11
> +	movdqa	12*16(BASE), %xmm12
> +	movdqa	13*16(BASE), %xmm13
> +	movdqa	14*16(BASE), %xmm14
> +	movdqa	15*16(BASE), %xmm15
> +#endif
> +
>  	xor	%eax, %eax
>  	sahf				/* init eflags */
>
> @@ -39,3 +70,11 @@
>
>  /* exit test */
>  	ud1	%ecx, %eax
> +
> +	.p2align 16
> +2:
> +	.set	i, 0
> +	.rept	256
> +	.byte	i
> +	.set	i, i + 1
> +	.endr


--
Alex Bennée


  reply	other threads:[~2019-05-24  9:30 UTC|newest]

Thread overview: 20+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-05-23 20:43 [Qemu-devel] [RISU v3 00/11] Support for i386/x86_64 with vector extensions Jan Bobek
2019-05-23 20:43 ` [Qemu-devel] [RISU v3 01/11] Makefile: undefine the arch name symbol Jan Bobek
2019-05-23 20:44 ` [Qemu-devel] [RISU v3 02/11] risu_i386: move reginfo_t and related defines to risu_reginfo_i386.h Jan Bobek
2019-05-23 20:44 ` [Qemu-devel] [RISU v3 03/11] risu_i386: move reginfo-related code to risu_reginfo_i386.c Jan Bobek
2019-05-23 20:44 ` [Qemu-devel] [RISU v3 04/11] risu_reginfo_i386: implement arch-specific reginfo interface Jan Bobek
2019-05-23 20:44 ` [Qemu-devel] [RISU v3 05/11] risu_i386: implement missing CPU-specific functions Jan Bobek
2019-05-23 20:44 ` [Qemu-devel] [RISU v3 06/11] risu_i386: remove old unused code Jan Bobek
2019-05-23 20:44 ` [Qemu-devel] [RISU v3 07/11] test_i386: change syntax from nasm to gas Jan Bobek
2019-05-24  9:28   ` Alex Bennée
2019-05-23 20:44 ` [Qemu-devel] [RISU v3 08/11] configure: add i386/x86_64 architectures Jan Bobek
2019-05-23 20:44 ` [Qemu-devel] [RISU v3 09/11] i386: Add avx512 state to reginfo_t Jan Bobek
2019-05-24  9:29   ` Alex Bennée [this message]
2019-05-23 20:44 ` [Qemu-devel] [RISU v3 10/11] risu_reginfo_i386: replace xfeature constants with symbolic names Jan Bobek
2019-05-23 20:44 ` [Qemu-devel] [RISU v3 11/11] risu_reginfo_i386: rework --xfeatures value parsing Jan Bobek
2019-05-24  9:32   ` Alex Bennée
2019-05-24  9:27 ` [Qemu-devel] [RISU PATCH] build-all-arches: include x86 triplets in the build Alex Bennée
2019-05-24  9:42 ` [Qemu-devel] [RISU v3 00/11] Support for i386/x86_64 with vector extensions Alex Bennée
2019-06-07 10:07   ` Peter Maydell
2019-06-07 11:58     ` Alex Bennée
2019-06-07 13:36       ` Peter Maydell

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=87blzs19q4.fsf@zen.linaroharston \
    --to=alex.bennee@linaro.org \
    --cc=jan.bobek@gmail.com \
    --cc=qemu-devel@nongnu.org \
    --cc=richard.henderson@linaro.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.