From mboxrd@z Thu Jan 1 00:00:00 1970 From: Andrew Cooper Subject: Re: [PATCH] x86: AVX instruction emulation fixes Date: Wed, 28 Aug 2013 09:56:44 +0100 Message-ID: <521DBB4C.4030705@citrix.com> References: <521DD1CD02000078000EF000@nat28.tlf.novell.com> <521DB99E.9000204@m2r.biz> Mime-Version: 1.0 Content-Type: multipart/mixed; boundary="===============7726225049087649485==" Return-path: Received: from mail6.bemta4.messagelabs.com ([85.158.143.247]) by lists.xen.org with esmtp (Exim 4.72) (envelope-from ) id 1VEbYW-0001pD-Uw for xen-devel@lists.xenproject.org; Wed, 28 Aug 2013 08:56:49 +0000 In-Reply-To: <521DB99E.9000204@m2r.biz> List-Unsubscribe: , List-Post: List-Help: List-Subscribe: , Sender: xen-devel-bounces@lists.xen.org Errors-To: xen-devel-bounces@lists.xen.org To: Fabio Fantoni Cc: xen-devel , Keir Fraser , Jan Beulich List-Id: xen-devel@lists.xenproject.org --===============7726225049087649485== Content-Type: multipart/alternative; boundary="------------060703010004030202090808" --------------060703010004030202090808 Content-Type: text/plain; charset="ISO-8859-1" Content-Transfer-Encoding: 7bit On 28/08/13 09:49, Fabio Fantoni wrote: > Il 28/08/2013 10:32, Jan Beulich ha scritto: >> - we used the C4/C5 (first prefix) byte instead of the apparent ModR/M >> one as the second prefix byte >> - early decoding normalized vex.reg, thus corrupting it for the main >> consumer (copy_REX_VEX()), resulting in #UD on the two-operand >> instructions we emulate >> >> Also add respective test cases to the testing utility plus >> - fix get_fpu() (the fall-through order was inverted) >> - add cpu_has_avx2, even if it's currently unused (as in the new test >> cases I decided to refrain from using AVX2 instructions in order to >> be able to actually run all the tests on the hardware I have) >> - slightly tweak cpu_has_avx to more consistently express the outputs >> we don't care about (sinking them all into the same variable) > > This patch include the solution for full SSE support needed to solve > this problem? > http://bugs.xenproject.org/xen/bug/11 > > Thanks for any reply. No it doesn't. Bug #11 is a problem with the communication between Xen and Qemu. This fixes some bugs with Xens emulation of instructions, irrespective of Qemu. ~Andrew > >> Signed-off-by: Jan Beulich >> >> --- a/tools/tests/x86_emulator/test_x86_emulator.c >> +++ b/tools/tests/x86_emulator/test_x86_emulator.c >> @@ -94,13 +94,25 @@ static inline uint64_t xgetbv(uint32_t x >> } >> >> #define cpu_has_avx ({ \ >> - unsigned int eax = 1, ecx = 0, edx; \ >> - cpuid(&eax, &edx, &ecx, &edx, NULL); \ >> + unsigned int eax = 1, ecx = 0; \ >> + cpuid(&eax, &eax, &ecx, &eax, NULL); \ >> if ( !(ecx & (1U << 27)) || ((xgetbv(0) & 6) != 6) ) \ >> ecx = 0; \ >> (ecx & (1U << 28)) != 0; \ >> }) >> >> +#define cpu_has_avx2 ({ \ >> + unsigned int eax = 1, ebx, ecx = 0; \ >> + cpuid(&eax, &ebx, &ecx, &eax, NULL); \ >> + if ( !(ecx & (1U << 27)) || ((xgetbv(0) & 6) != 6) ) \ >> + ebx = 0; \ >> + else { \ >> + eax = 7, ecx = 0; \ >> + cpuid(&eax, &ebx, &ecx, &eax, NULL); \ >> + } \ >> + (ebx & (1U << 5)) != 0; \ >> +}) >> + >> int get_fpu( >> void (*exception_callback)(void *, struct cpu_user_regs *), >> void *exception_callback_arg, >> @@ -111,14 +123,14 @@ int get_fpu( >> { >> case X86EMUL_FPU_fpu: >> break; >> - case X86EMUL_FPU_ymm: >> - if ( cpu_has_avx ) >> + case X86EMUL_FPU_mmx: >> + if ( cpu_has_mmx ) >> break; >> case X86EMUL_FPU_xmm: >> if ( cpu_has_sse ) >> break; >> - case X86EMUL_FPU_mmx: >> - if ( cpu_has_mmx ) >> + case X86EMUL_FPU_ymm: >> + if ( cpu_has_avx ) >> break; >> default: >> return X86EMUL_UNHANDLEABLE; >> @@ -629,6 +641,73 @@ int main(int argc, char **argv) >> else >> printf("skipped\n"); >> >> + printf("%-40s", "Testing vmovdqu %ymm2,(%ecx)..."); >> + if ( stack_exec && cpu_has_avx ) >> + { >> + extern const unsigned char vmovdqu_to_mem[]; >> + >> + asm volatile ( "vpcmpeqb %%xmm2, %%xmm2, %%xmm2\n" >> + ".pushsection .test, \"a\", @progbits\n" >> + "vmovdqu_to_mem: vmovdqu %%ymm2, (%0)\n" >> + ".popsection" :: "c" (NULL) ); >> + >> + memcpy(instr, vmovdqu_to_mem, 15); >> + memset(res, 0x55, 128); >> + memset(res + 16, 0xff, 16); >> + memset(res + 20, 0x00, 16); >> + regs.eip = (unsigned long)&instr[0]; >> + regs.ecx = (unsigned long)res; >> + rc = x86_emulate(&ctxt, &emulops); >> + if ( (rc != X86EMUL_OKAY) || memcmp(res, res + 16, 64) ) >> + goto fail; >> + printf("okay\n"); >> + } >> + else >> + printf("skipped\n"); >> + >> + printf("%-40s", "Testing vmovdqu (%edx),%ymm4..."); >> + if ( stack_exec && cpu_has_avx ) >> + { >> + extern const unsigned char vmovdqu_from_mem[]; >> + >> +#if 0 /* Don't use AVX2 instructions for now */ >> + asm volatile ( "vpcmpgtb %%ymm4, %%ymm4, %%ymm4\n" >> +#else >> + asm volatile ( "vpcmpgtb %%xmm4, %%xmm4, %%xmm4\n\t" >> + "vinsertf128 $1, %%xmm4, %%ymm4, %%ymm4\n" >> +#endif >> + ".pushsection .test, \"a\", @progbits\n" >> + "vmovdqu_from_mem: vmovdqu (%0), %%ymm4\n" >> + ".popsection" :: "d" (NULL) ); >> + >> + memcpy(instr, vmovdqu_from_mem, 15); >> + memset(res + 4, 0xff, 16); >> + regs.eip = (unsigned long)&instr[0]; >> + regs.ecx = 0; >> + regs.edx = (unsigned long)res; >> + rc = x86_emulate(&ctxt, &emulops); >> + if ( rc != X86EMUL_OKAY ) >> + goto fail; >> +#if 0 /* Don't use AVX2 instructions for now */ >> + asm ( "vpcmpeqb %%ymm2, %%ymm2, %%ymm2\n\t" >> + "vpcmpeqb %%ymm4, %%ymm2, %%ymm0\n\t" >> + "vpmovmskb %%ymm1, %0" : "=r" (rc) ); >> +#else >> + asm ( "vextractf128 $1, %%ymm4, %%xmm3\n\t" >> + "vpcmpeqb %%xmm2, %%xmm2, %%xmm2\n\t" >> + "vpcmpeqb %%xmm4, %%xmm2, %%xmm0\n\t" >> + "vpcmpeqb %%xmm3, %%xmm2, %%xmm1\n\t" >> + "vpmovmskb %%xmm0, %0\n\t" >> + "vpmovmskb %%xmm1, %1" : "=r" (rc), "=r" (i) ); >> + rc |= i << 16; >> +#endif >> + if ( rc != 0xffffffff ) >> + goto fail; >> + printf("okay\n"); >> + } >> + else >> + printf("skipped\n"); >> + >> printf("%-40s", "Testing movsd %xmm5,(%ecx)..."); >> memset(res, 0x77, 64); >> memset(res + 10, 0x66, 8); >> @@ -683,6 +762,59 @@ int main(int argc, char **argv) >> else >> printf("skipped\n"); >> >> + printf("%-40s", "Testing vmovsd %xmm5,(%ecx)..."); >> + memset(res, 0x88, 64); >> + memset(res + 10, 0x77, 8); >> + if ( stack_exec && cpu_has_avx ) >> + { >> + extern const unsigned char vmovsd_to_mem[]; >> + >> + asm volatile ( "vbroadcastsd %0, %%ymm5\n" >> + ".pushsection .test, \"a\", @progbits\n" >> + "vmovsd_to_mem: vmovsd %%xmm5, (%1)\n" >> + ".popsection" :: "m" (res[10]), "c" (NULL) ); >> + >> + memcpy(instr, vmovsd_to_mem, 15); >> + regs.eip = (unsigned long)&instr[0]; >> + regs.ecx = (unsigned long)(res + 2); >> + regs.edx = 0; >> + rc = x86_emulate(&ctxt, &emulops); >> + if ( (rc != X86EMUL_OKAY) || memcmp(res, res + 8, 32) ) >> + goto fail; >> + printf("okay\n"); >> + } >> + else >> + { >> + printf("skipped\n"); >> + memset(res + 2, 0x77, 8); >> + } >> + >> + printf("%-40s", "Testing vmovaps (%edx),%ymm7..."); >> + if ( stack_exec && cpu_has_avx ) >> + { >> + extern const unsigned char vmovaps_from_mem[]; >> + >> + asm volatile ( "vxorps %%ymm7, %%ymm7, %%ymm7\n" >> + ".pushsection .test, \"a\", @progbits\n" >> + "vmovaps_from_mem: vmovaps (%0), %%ymm7\n" >> + ".popsection" :: "d" (NULL) ); >> + >> + memcpy(instr, vmovaps_from_mem, 15); >> + regs.eip = (unsigned long)&instr[0]; >> + regs.ecx = 0; >> + regs.edx = (unsigned long)res; >> + rc = x86_emulate(&ctxt, &emulops); >> + if ( rc != X86EMUL_OKAY ) >> + goto fail; >> + asm ( "vcmpeqps %1, %%ymm7, %%ymm0\n\t" >> + "vmovmskps %%ymm0, %0" : "=r" (rc) : "m" (res[8]) ); >> + if ( rc != 0xff ) >> + goto fail; >> + printf("okay\n"); >> + } >> + else >> + printf("skipped\n"); >> + >> for ( j = 1; j <= 2; j++ ) >> { >> #if defined(__i386__) >> --- a/xen/arch/x86/x86_emulate/x86_emulate.c >> +++ b/xen/arch/x86/x86_emulate/x86_emulate.c >> @@ -1454,10 +1454,10 @@ x86_emulate( >> /* VEX */ >> generate_exception_if(rex_prefix || vex.pfx, EXC_UD, -1); >> >> - vex.raw[0] = b; >> + vex.raw[0] = modrm; >> if ( b & 1 ) >> { >> - vex.raw[1] = b; >> + vex.raw[1] = modrm; >> vex.opcx = vex_0f; >> vex.x = 1; >> vex.b = 1; >> @@ -1479,10 +1479,7 @@ x86_emulate( >> } >> } >> } >> - vex.reg ^= 0xf; >> - if ( !mode_64bit() ) >> - vex.reg &= 0x7; >> - else if ( !vex.r ) >> + if ( mode_64bit() && !vex.r ) >> rex_prefix |= REX_R; >> >> fail_if(vex.opcx != vex_0f); >> @@ -3899,8 +3896,9 @@ x86_emulate( >> else >> { >> fail_if((vex.opcx != vex_0f) || >> - (vex.reg && ((ea.type == OP_MEM) || >> - !(vex.pfx & VEX_PREFIX_SCALAR_MASK)))); >> + ((vex.reg != 0xf) && >> + ((ea.type == OP_MEM) || >> + !(vex.pfx & VEX_PREFIX_SCALAR_MASK)))); >> vcpu_must_have_avx(); >> get_fpu(X86EMUL_FPU_ymm, &fic); >> ea.bytes = 16 << vex.l; >> @@ -4168,7 +4166,7 @@ x86_emulate( >> } >> else >> { >> - fail_if((vex.opcx != vex_0f) || vex.reg || >> + fail_if((vex.opcx != vex_0f) || (vex.reg != 0xf) || >> ((vex.pfx != vex_66) && (vex.pfx != vex_f3))); >> vcpu_must_have_avx(); >> get_fpu(X86EMUL_FPU_ymm, &fic); >> >> >> >> >> _______________________________________________ >> Xen-devel mailing list >> Xen-devel@lists.xen.org >> http://lists.xen.org/xen-devel > > > > _______________________________________________ > Xen-devel mailing list > Xen-devel@lists.xen.org > http://lists.xen.org/xen-devel --------------060703010004030202090808 Content-Type: text/html; charset="ISO-8859-1" Content-Transfer-Encoding: 7bit
On 28/08/13 09:49, Fabio Fantoni wrote:
Il 28/08/2013 10:32, Jan Beulich ha scritto:
- we used the C4/C5 (first prefix) byte instead of the apparent ModR/M
  one as the second prefix byte
- early decoding normalized vex.reg, thus corrupting it for the main
  consumer (copy_REX_VEX()), resulting in #UD on the two-operand
  instructions we emulate

Also add respective test cases to the testing utility plus
- fix get_fpu() (the fall-through order was inverted)
- add cpu_has_avx2, even if it's currently unused (as in the new test
  cases I decided to refrain from using AVX2 instructions in order to
  be able to actually run all the tests on the hardware I have)
- slightly tweak cpu_has_avx to more consistently express the outputs
  we don't care about (sinking them all into the same variable)

This patch include the solution for full SSE support needed to solve this problem?
http://bugs.xenproject.org/xen/bug/11

Thanks for any reply.

No it doesn't.  Bug #11 is a problem with the communication between Xen and Qemu.

This fixes some bugs with Xens emulation of instructions, irrespective of Qemu.

~Andrew


Signed-off-by: Jan Beulich <jbeulich@suse.com>

--- a/tools/tests/x86_emulator/test_x86_emulator.c
+++ b/tools/tests/x86_emulator/test_x86_emulator.c
@@ -94,13 +94,25 @@ static inline uint64_t xgetbv(uint32_t x
 }
 
 #define cpu_has_avx ({ \
-    unsigned int eax = 1, ecx = 0, edx; \
-    cpuid(&eax, &edx, &ecx, &edx, NULL); \
+    unsigned int eax = 1, ecx = 0; \
+    cpuid(&eax, &eax, &ecx, &eax, NULL); \
     if ( !(ecx & (1U << 27)) || ((xgetbv(0) & 6) != 6) ) \
         ecx = 0; \
     (ecx & (1U << 28)) != 0; \
 })
 
+#define cpu_has_avx2 ({ \
+    unsigned int eax = 1, ebx, ecx = 0; \
+    cpuid(&eax, &ebx, &ecx, &eax, NULL); \
+    if ( !(ecx & (1U << 27)) || ((xgetbv(0) & 6) != 6) ) \
+        ebx = 0; \
+    else { \
+        eax = 7, ecx = 0; \
+        cpuid(&eax, &ebx, &ecx, &eax, NULL); \
+    } \
+    (ebx & (1U << 5)) != 0; \
+})
+
 int get_fpu(
     void (*exception_callback)(void *, struct cpu_user_regs *),
     void *exception_callback_arg,
@@ -111,14 +123,14 @@ int get_fpu(
     {
     case X86EMUL_FPU_fpu:
         break;
-    case X86EMUL_FPU_ymm:
-        if ( cpu_has_avx )
+    case X86EMUL_FPU_mmx:
+        if ( cpu_has_mmx )
             break;
     case X86EMUL_FPU_xmm:
         if ( cpu_has_sse )
             break;
-    case X86EMUL_FPU_mmx:
-        if ( cpu_has_mmx )
+    case X86EMUL_FPU_ymm:
+        if ( cpu_has_avx )
             break;
     default:
         return X86EMUL_UNHANDLEABLE;
@@ -629,6 +641,73 @@ int main(int argc, char **argv)
     else
         printf("skipped\n");
 
+    printf("%-40s", "Testing vmovdqu %ymm2,(%ecx)...");
+    if ( stack_exec && cpu_has_avx )
+    {
+        extern const unsigned char vmovdqu_to_mem[];
+
+        asm volatile ( "vpcmpeqb %%xmm2, %%xmm2, %%xmm2\n"
+                       ".pushsection .test, \"a\", @progbits\n"
+                       "vmovdqu_to_mem: vmovdqu %%ymm2, (%0)\n"
+                       ".popsection" :: "c" (NULL) );
+
+        memcpy(instr, vmovdqu_to_mem, 15);
+        memset(res, 0x55, 128);
+        memset(res + 16, 0xff, 16);
+        memset(res + 20, 0x00, 16);
+        regs.eip    = (unsigned long)&instr[0];
+        regs.ecx    = (unsigned long)res;
+        rc = x86_emulate(&ctxt, &emulops);
+        if ( (rc != X86EMUL_OKAY) || memcmp(res, res + 16, 64) )
+            goto fail;
+        printf("okay\n");
+    }
+    else
+        printf("skipped\n");
+
+    printf("%-40s", "Testing vmovdqu (%edx),%ymm4...");
+    if ( stack_exec && cpu_has_avx )
+    {
+        extern const unsigned char vmovdqu_from_mem[];
+
+#if 0 /* Don't use AVX2 instructions for now */
+        asm volatile ( "vpcmpgtb %%ymm4, %%ymm4, %%ymm4\n"
+#else
+        asm volatile ( "vpcmpgtb %%xmm4, %%xmm4, %%xmm4\n\t"
+                       "vinsertf128 $1, %%xmm4, %%ymm4, %%ymm4\n"
+#endif
+                       ".pushsection .test, \"a\", @progbits\n"
+                       "vmovdqu_from_mem: vmovdqu (%0), %%ymm4\n"
+                       ".popsection" :: "d" (NULL) );
+
+        memcpy(instr, vmovdqu_from_mem, 15);
+        memset(res + 4, 0xff, 16);
+        regs.eip    = (unsigned long)&instr[0];
+        regs.ecx    = 0;
+        regs.edx    = (unsigned long)res;
+        rc = x86_emulate(&ctxt, &emulops);
+        if ( rc != X86EMUL_OKAY )
+            goto fail;
+#if 0 /* Don't use AVX2 instructions for now */
+        asm ( "vpcmpeqb %%ymm2, %%ymm2, %%ymm2\n\t"
+              "vpcmpeqb %%ymm4, %%ymm2, %%ymm0\n\t"
+              "vpmovmskb %%ymm1, %0" : "=r" (rc) );
+#else
+        asm ( "vextractf128 $1, %%ymm4, %%xmm3\n\t"
+              "vpcmpeqb %%xmm2, %%xmm2, %%xmm2\n\t"
+              "vpcmpeqb %%xmm4, %%xmm2, %%xmm0\n\t"
+              "vpcmpeqb %%xmm3, %%xmm2, %%xmm1\n\t"
+              "vpmovmskb %%xmm0, %0\n\t"
+              "vpmovmskb %%xmm1, %1" : "=r" (rc), "=r" (i) );
+        rc |= i << 16;
+#endif
+        if ( rc != 0xffffffff )
+            goto fail;
+        printf("okay\n");
+    }
+    else
+        printf("skipped\n");
+
     printf("%-40s", "Testing movsd %xmm5,(%ecx)...");
     memset(res, 0x77, 64);
     memset(res + 10, 0x66, 8);
@@ -683,6 +762,59 @@ int main(int argc, char **argv)
     else
         printf("skipped\n");
 
+    printf("%-40s", "Testing vmovsd %xmm5,(%ecx)...");
+    memset(res, 0x88, 64);
+    memset(res + 10, 0x77, 8);
+    if ( stack_exec && cpu_has_avx )
+    {
+        extern const unsigned char vmovsd_to_mem[];
+
+        asm volatile ( "vbroadcastsd %0, %%ymm5\n"
+                       ".pushsection .test, \"a\", @progbits\n"
+                       "vmovsd_to_mem: vmovsd %%xmm5, (%1)\n"
+                       ".popsection" :: "m" (res[10]), "c" (NULL) );
+
+        memcpy(instr, vmovsd_to_mem, 15);
+        regs.eip    = (unsigned long)&instr[0];
+        regs.ecx    = (unsigned long)(res + 2);
+        regs.edx    = 0;
+        rc = x86_emulate(&ctxt, &emulops);
+        if ( (rc != X86EMUL_OKAY) || memcmp(res, res + 8, 32) )
+            goto fail;
+        printf("okay\n");
+    }
+    else
+    {
+        printf("skipped\n");
+        memset(res + 2, 0x77, 8);
+    }
+
+    printf("%-40s", "Testing vmovaps (%edx),%ymm7...");
+    if ( stack_exec && cpu_has_avx )
+    {
+        extern const unsigned char vmovaps_from_mem[];
+
+        asm volatile ( "vxorps %%ymm7, %%ymm7, %%ymm7\n"
+                       ".pushsection .test, \"a\", @progbits\n"
+                       "vmovaps_from_mem: vmovaps (%0), %%ymm7\n"
+                       ".popsection" :: "d" (NULL) );
+
+        memcpy(instr, vmovaps_from_mem, 15);
+        regs.eip    = (unsigned long)&instr[0];
+        regs.ecx    = 0;
+        regs.edx    = (unsigned long)res;
+        rc = x86_emulate(&ctxt, &emulops);
+        if ( rc != X86EMUL_OKAY )
+            goto fail;
+        asm ( "vcmpeqps %1, %%ymm7, %%ymm0\n\t"
+              "vmovmskps %%ymm0, %0" : "=r" (rc) : "m" (res[8]) );
+        if ( rc != 0xff )
+            goto fail;
+        printf("okay\n");
+    }
+    else
+        printf("skipped\n");
+
     for ( j = 1; j <= 2; j++ )
     {
 #if defined(__i386__)
--- a/xen/arch/x86/x86_emulate/x86_emulate.c
+++ b/xen/arch/x86/x86_emulate/x86_emulate.c
@@ -1454,10 +1454,10 @@ x86_emulate(
                 /* VEX */
                 generate_exception_if(rex_prefix || vex.pfx, EXC_UD, -1);
 
-                vex.raw[0] = b;
+                vex.raw[0] = modrm;
                 if ( b & 1 )
                 {
-                    vex.raw[1] = b;
+                    vex.raw[1] = modrm;
                     vex.opcx = vex_0f;
                     vex.x = 1;
                     vex.b = 1;
@@ -1479,10 +1479,7 @@ x86_emulate(
                         }
                     }
                 }
-                vex.reg ^= 0xf;
-                if ( !mode_64bit() )
-                    vex.reg &= 0x7;
-                else if ( !vex.r )
+                if ( mode_64bit() && !vex.r )
                     rex_prefix |= REX_R;
 
                 fail_if(vex.opcx != vex_0f);
@@ -3899,8 +3896,9 @@ x86_emulate(
         else
         {
             fail_if((vex.opcx != vex_0f) ||
-                    (vex.reg && ((ea.type == OP_MEM) ||
-                                 !(vex.pfx & VEX_PREFIX_SCALAR_MASK))));
+                    ((vex.reg != 0xf) &&
+                     ((ea.type == OP_MEM) ||
+                      !(vex.pfx & VEX_PREFIX_SCALAR_MASK))));
             vcpu_must_have_avx();
             get_fpu(X86EMUL_FPU_ymm, &fic);
             ea.bytes = 16 << vex.l;
@@ -4168,7 +4166,7 @@ x86_emulate(
         }
         else
         {
-            fail_if((vex.opcx != vex_0f) || vex.reg ||
+            fail_if((vex.opcx != vex_0f) || (vex.reg != 0xf) ||
                     ((vex.pfx != vex_66) && (vex.pfx != vex_f3)));
             vcpu_must_have_avx();
             get_fpu(X86EMUL_FPU_ymm, &fic);




_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel



_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

--------------060703010004030202090808-- --===============7726225049087649485== Content-Type: text/plain; charset="us-ascii" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit Content-Disposition: inline _______________________________________________ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel --===============7726225049087649485==--