From: "Alex Bennée" <alex.bennee@linaro.org>
To: Jan Bobek <jan.bobek@gmail.com>
Cc: Richard Henderson <richard.henderson@linaro.org>, qemu-devel@nongnu.org
Subject: Re: [Qemu-devel] [RISU v3 09/11] i386: Add avx512 state to reginfo_t
Date: Fri, 24 May 2019 10:29:39 +0100 [thread overview]
Message-ID: <87blzs19q4.fsf@zen.linaroharston> (raw)
In-Reply-To: <20190523204409.21068-10-jan.bobek@gmail.com>
Jan Bobek <jan.bobek@gmail.com> writes:
> From: Richard Henderson <richard.henderson@linaro.org>
>
> The state expected for a given test must be specifically requested
> with the --xfeatures=mask command-line argument. This is recorded
> with the saved state so that it is obvious if the apprentice is given
> a different argument. Any features beyond what are present on the
> running cpu will read as zero.
>
> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
> ---
> risu_reginfo_i386.h | 14 +++
> risu_reginfo_i386.c | 228 ++++++++++++++++++++++++++++++++++++++++++--
> test_i386.S | 39 ++++++++
> 3 files changed, 273 insertions(+), 8 deletions(-)
>
> diff --git a/risu_reginfo_i386.h b/risu_reginfo_i386.h
> index e350f01..b468f79 100644
> --- a/risu_reginfo_i386.h
> +++ b/risu_reginfo_i386.h
> @@ -12,6 +12,10 @@
> #ifndef RISU_REGINFO_I386_H
> #define RISU_REGINFO_I386_H
>
> +struct avx512_reg {
> + uint64_t q[8];
> +};
> +
> /*
> * This is the data structure we pass over the socket.
> * It is a simplified and reduced subset of what can
> @@ -19,7 +23,17 @@
> */
> struct reginfo {
> uint32_t faulting_insn;
> + uint32_t mxcsr;
> + uint64_t xfeatures;
> +
> gregset_t gregs;
> +
> +#ifdef __x86_64__
> + struct avx512_reg vregs[32];
> +#else
> + struct avx512_reg vregs[8];
> +#endif
> + uint64_t kregs[8];
> };
>
> /*
> diff --git a/risu_reginfo_i386.c b/risu_reginfo_i386.c
> index c4dc14a..83f9541 100644
> --- a/risu_reginfo_i386.c
> +++ b/risu_reginfo_i386.c
> @@ -11,19 +11,32 @@
>
> #include <stdio.h>
> #include <stdlib.h>
> +#include <stddef.h>
> #include <string.h>
> #include <ucontext.h>
> #include <assert.h>
> +#include <cpuid.h>
>
> #include "risu.h"
> #include "risu_reginfo_i386.h"
>
> -const struct option * const arch_long_opts;
> -const char * const arch_extra_help;
> +#include <asm/sigcontext.h>
> +
> +static uint64_t xfeatures = 3; /* SSE */
> +
> +static const struct option extra_ops[] = {
> + {"xfeatures", required_argument, NULL, FIRST_ARCH_OPT },
> + {0, 0, 0, 0}
> +};
> +
> +const struct option * const arch_long_opts = extra_ops;
> +const char * const arch_extra_help
> + = " --xfeatures=<mask> Use features in mask for XSAVE\n";
>
> void process_arch_opt(int opt, const char *arg)
> {
> - abort();
> + assert(opt == FIRST_ARCH_OPT);
> + xfeatures = strtoull(arg, 0, 0);
> }
>
> const int reginfo_size(void)
> @@ -31,13 +44,37 @@ const int reginfo_size(void)
> return sizeof(struct reginfo);
> }
>
> +static void *xsave_feature_buf(struct _xstate *xs, int feature)
> +{
> + unsigned int eax, ebx, ecx, edx;
> + int ok;
> +
> + /*
> + * Get the location of the XSAVE feature from the cpuid leaf.
> + * Given that we know the xfeature bit is set, this must succeed.
> + */
> + ok = __get_cpuid_count(0xd, feature, &eax, &ebx, &ecx, &edx);
> + assert(ok);
> +
> + /* Sanity check that the frame stored by the kernel contains the data. */
> + assert(xs->fpstate.sw_reserved.extended_size >= eax + ebx);
> +
> + return (void *)xs + ebx;
> +}
> +
> /* reginfo_init: initialize with a ucontext */
> void reginfo_init(struct reginfo *ri, ucontext_t *uc)
> {
> - int i;
> + int i, nvecregs;
> + struct _fpstate *fp;
> + struct _xstate *xs;
> + uint64_t features;
>
> memset(ri, 0, sizeof(*ri));
>
> + /* Require master and apprentice to be given the same arguments. */
> + ri->xfeatures = xfeatures;
> +
> for (i = 0; i < NGREG; i++) {
> switch (i) {
> case REG_E(IP):
> @@ -79,12 +116,89 @@ void reginfo_init(struct reginfo *ri, ucontext_t *uc)
> * distinguish 'do compare' from 'stop'.
> */
> ri->faulting_insn = *(uint32_t *)uc->uc_mcontext.gregs[REG_E(IP)];
> +
> + /*
> + * FP state is omitted if unused (aka in init state).
> + * Use the <asm/sigcontext.h> struct for access to AVX state.
> + */
> +
> + fp = (struct _fpstate *)uc->uc_mcontext.fpregs;
> + if (fp == NULL) {
> + return;
> + }
> +
> +#ifdef __x86_64__
> + nvecregs = 16;
> +#else
> + /* We don't (currently) care about the 80387 state, only SSE+. */
> + if (fp->magic != X86_FXSR_MAGIC) {
> + return;
> + }
> + nvecregs = 8;
> +#endif
> +
> + /*
> + * Now we know that _fpstate contains FXSAVE data.
> + */
> + ri->mxcsr = fp->mxcsr;
> +
> + for (i = 0; i < nvecregs; ++i) {
> +#ifdef __x86_64__
> + memcpy(&ri->vregs[i], &fp->xmm_space[i * 4], 16);
> +#else
> + memcpy(&ri->vregs[i], &fp->_xmm[i], 16);
> +#endif
> + }
> +
> + if (fp->sw_reserved.magic1 != FP_XSTATE_MAGIC1) {
> + return;
> + }
> + xs = (struct _xstate *)fp;
> + features = xfeatures & xs->xstate_hdr.xfeatures;
> +
> + /*
> + * Now we know that _fpstate contains XSAVE data.
> + */
> +
> + if (features & (1 << 2)) {
> + /* YMM_Hi128 state */
> + void *buf = xsave_feature_buf(xs, 2);
> + for (i = 0; i < nvecregs; ++i) {
> + memcpy(&ri->vregs[i].q[2], buf + 16 * i, 16);
> + }
> + }
> +
> + if (features & (1 << 5)) {
> + /* Opmask state */
> + uint64_t *buf = xsave_feature_buf(xs, 5);
> + for (i = 0; i < 8; ++i) {
> + ri->kregs[i] = buf[i];
> + }
> + }
> +
> + if (features & (1 << 6)) {
> + /* ZMM_Hi256 state */
> + void *buf = xsave_feature_buf(xs, 6);
> + for (i = 0; i < nvecregs; ++i) {
> + memcpy(&ri->vregs[i].q[4], buf + 32 * i, 32);
> + }
> + }
> +
> +#ifdef __x86_64__
> + if (features & (1 << 7)) {
> + /* Hi16_ZMM state */
> + void *buf = xsave_feature_buf(xs, 7);
> + for (i = 0; i < 16; ++i) {
> + memcpy(&ri->vregs[i + 16], buf + 64 * i, 64);
> + }
> + }
> +#endif
> }
>
> /* reginfo_is_eq: compare the reginfo structs, returns nonzero if equal */
> int reginfo_is_eq(struct reginfo *m, struct reginfo *a)
> {
> - return 0 == memcmp(m, a, sizeof(*m));
> + return !memcmp(m, a, sizeof(*m));
> }
>
> static const char *const regname[NGREG] = {
> @@ -126,28 +240,126 @@ static const char *const regname[NGREG] = {
> # define PRIxREG "%08x"
> #endif
>
> +static int get_nvecregs(uint64_t features)
> +{
> +#ifdef __x86_64__
> + return features & (1 << 7) ? 32 : 16;
> +#else
> + return 8;
> +#endif
> +}
> +
> +static int get_nvecquads(uint64_t features)
> +{
> + if (features & (1 << 6)) {
> + return 8;
> + } else if (features & (1 << 2)) {
> + return 4;
> + } else {
> + return 2;
> + }
> +}
> +
> +static char get_vecletter(uint64_t features)
> +{
> + if (features & (1 << 6 | 1 << 7)) {
> + return 'z';
> + } else if (features & (1 << 2)) {
> + return 'y';
> + } else {
> + return 'x';
> + }
> +}
> +
> /* reginfo_dump: print state to a stream, returns nonzero on success */
> int reginfo_dump(struct reginfo *ri, FILE *f)
> {
> - int i;
> + uint64_t features;
> + int i, j, n, w;
> + char r;
> +
> fprintf(f, " faulting insn %x\n", ri->faulting_insn);
> for (i = 0; i < NGREG; i++) {
> if (regname[i]) {
> fprintf(f, " %-6s: " PRIxREG "\n", regname[i], ri->gregs[i]);
> }
> }
> +
> + fprintf(f, " mxcsr : %x\n", ri->mxcsr);
> + fprintf(f, " xfeat : %" PRIx64 "\n", ri->xfeatures);
> +
> + features = ri->xfeatures;
> + n = get_nvecregs(features);
> + w = get_nvecquads(features);
> + r = get_vecletter(features);
> +
> + for (i = 0; i < n; i++) {
> + fprintf(f, " %cmm%-3d: ", r, i);
> + for (j = w - 1; j >= 0; j--) {
> + fprintf(f, "%016" PRIx64 "%c",
> + ri->vregs[i].q[j], j == 0 ? '\n' : ' ');
> + }
> + }
> +
> + if (features & (1 << 5)) {
> + for (i = 0; i < 8; i++) {
> + fprintf(f, " k%-5d: %016" PRIx64 "\n", i, ri->kregs[i]);
> + }
> + }
> +
> return !ferror(f);
> }
>
> int reginfo_dump_mismatch(struct reginfo *m, struct reginfo *a, FILE *f)
> {
> - int i;
> + int i, j, n, w;
> + uint64_t features;
> + char r;
> +
> + fprintf(f, "Mismatch (master v apprentice):\n");
> +
> for (i = 0; i < NGREG; i++) {
> if (m->gregs[i] != a->gregs[i]) {
> assert(regname[i]);
> - fprintf(f, "Mismatch: %s: " PRIxREG " v " PRIxREG "\n",
> + fprintf(f, " %-6s: " PRIxREG " v " PRIxREG "\n",
> regname[i], m->gregs[i], a->gregs[i]);
> }
> }
> +
> + if (m->mxcsr != a->mxcsr) {
> + fprintf(f, " mxcsr : %x v %x\n", m->mxcsr, a->mxcsr);
> + }
> + if (m->xfeatures != a->xfeatures) {
> + fprintf(f, " xfeat : %" PRIx64 " v %" PRIx64 "\n",
> + m->xfeatures, a->xfeatures);
> + }
> +
> + features = m->xfeatures;
> + n = get_nvecregs(features);
> + w = get_nvecquads(features);
> + r = get_vecletter(features);
> +
> + for (i = 0; i < n; i++) {
> + if (memcmp(&m->vregs[i], &a->vregs[i], w * 8)) {
> + fprintf(f, " %cmm%-3d: ", r, i);
> + for (j = w - 1; j >= 0; j--) {
> + fprintf(f, "%016" PRIx64 "%c",
> + m->vregs[i].q[j], j == 0 ? '\n' : ' ');
> + }
> + fprintf(f, " v: ");
> + for (j = w - 1; j >= 0; j--) {
> + fprintf(f, "%016" PRIx64 "%c",
> + a->vregs[i].q[j], j == 0 ? '\n' : ' ');
> + }
> + }
> + }
> +
> + for (i = 0; i < 8; i++) {
> + if (m->kregs[i] != a->kregs[i]) {
> + fprintf(f, " k%-5d: %016" PRIx64 " v %016" PRIx64 "\n",
> + i, m->kregs[i], a->kregs[i]);
> + }
> + }
> +
> return !ferror(f);
> }
> diff --git a/test_i386.S b/test_i386.S
> index 456b99c..05344d7 100644
> --- a/test_i386.S
> +++ b/test_i386.S
> @@ -12,6 +12,37 @@
> /* A trivial test image for x86 */
>
> /* Initialise the registers to avoid spurious mismatches */
> +
> +#ifdef __x86_64__
> +#define BASE %rax
> + lea 2f(%rip), BASE
> +#else
> +#define BASE %eax
> + call 1f
> +1: pop BASE
> + add $2f-1b, BASE
> +#endif
> +
> + movdqa 0(BASE), %xmm0
> + movdqa 1*16(BASE), %xmm1
> + movdqa 2*16(BASE), %xmm2
> + movdqa 3*16(BASE), %xmm3
> + movdqa 4*16(BASE), %xmm4
> + movdqa 5*16(BASE), %xmm5
> + movdqa 6*16(BASE), %xmm6
> + movdqa 7*16(BASE), %xmm7
> +
> +#ifdef __x86_64__
> + movdqa 8*16(BASE), %xmm8
> + movdqa 9*16(BASE), %xmm9
> + movdqa 10*16(BASE), %xmm10
> + movdqa 11*16(BASE), %xmm11
> + movdqa 12*16(BASE), %xmm12
> + movdqa 13*16(BASE), %xmm13
> + movdqa 14*16(BASE), %xmm14
> + movdqa 15*16(BASE), %xmm15
> +#endif
> +
> xor %eax, %eax
> sahf /* init eflags */
>
> @@ -39,3 +70,11 @@
>
> /* exit test */
> ud1 %ecx, %eax
> +
> + .p2align 16
> +2:
> + .set i, 0
> + .rept 256
> + .byte i
> + .set i, i + 1
> + .endr
--
Alex Bennée
next prev parent reply other threads:[~2019-05-24 9:30 UTC|newest]
Thread overview: 20+ messages / expand[flat|nested] mbox.gz Atom feed top
2019-05-23 20:43 [Qemu-devel] [RISU v3 00/11] Support for i386/x86_64 with vector extensions Jan Bobek
2019-05-23 20:43 ` [Qemu-devel] [RISU v3 01/11] Makefile: undefine the arch name symbol Jan Bobek
2019-05-23 20:44 ` [Qemu-devel] [RISU v3 02/11] risu_i386: move reginfo_t and related defines to risu_reginfo_i386.h Jan Bobek
2019-05-23 20:44 ` [Qemu-devel] [RISU v3 03/11] risu_i386: move reginfo-related code to risu_reginfo_i386.c Jan Bobek
2019-05-23 20:44 ` [Qemu-devel] [RISU v3 04/11] risu_reginfo_i386: implement arch-specific reginfo interface Jan Bobek
2019-05-23 20:44 ` [Qemu-devel] [RISU v3 05/11] risu_i386: implement missing CPU-specific functions Jan Bobek
2019-05-23 20:44 ` [Qemu-devel] [RISU v3 06/11] risu_i386: remove old unused code Jan Bobek
2019-05-23 20:44 ` [Qemu-devel] [RISU v3 07/11] test_i386: change syntax from nasm to gas Jan Bobek
2019-05-24 9:28 ` Alex Bennée
2019-05-23 20:44 ` [Qemu-devel] [RISU v3 08/11] configure: add i386/x86_64 architectures Jan Bobek
2019-05-23 20:44 ` [Qemu-devel] [RISU v3 09/11] i386: Add avx512 state to reginfo_t Jan Bobek
2019-05-24 9:29 ` Alex Bennée [this message]
2019-05-23 20:44 ` [Qemu-devel] [RISU v3 10/11] risu_reginfo_i386: replace xfeature constants with symbolic names Jan Bobek
2019-05-23 20:44 ` [Qemu-devel] [RISU v3 11/11] risu_reginfo_i386: rework --xfeatures value parsing Jan Bobek
2019-05-24 9:32 ` Alex Bennée
2019-05-24 9:27 ` [Qemu-devel] [RISU PATCH] build-all-arches: include x86 triplets in the build Alex Bennée
2019-05-24 9:42 ` [Qemu-devel] [RISU v3 00/11] Support for i386/x86_64 with vector extensions Alex Bennée
2019-06-07 10:07 ` Peter Maydell
2019-06-07 11:58 ` Alex Bennée
2019-06-07 13:36 ` Peter Maydell
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=87blzs19q4.fsf@zen.linaroharston \
--to=alex.bennee@linaro.org \
--cc=jan.bobek@gmail.com \
--cc=qemu-devel@nongnu.org \
--cc=richard.henderson@linaro.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.