qemu-devel.nongnu.org archive mirror
 help / color / mirror / Atom feed
* [Qemu-devel] cvttps2dq, movdq2q, movq2dq incorrect behaviour
@ 2006-06-20 10:54 Julian Seward
  2006-06-20 11:29 ` malc
  2006-06-20 13:15 ` RE : " Sylvain Petreolle
  0 siblings, 2 replies; 15+ messages in thread
From: Julian Seward @ 2006-06-20 10:54 UTC (permalink / raw)
  To: qemu-devel


The SSE2 instructions cvttps2dq, movdq2q, movq2dq do not behave
correctly, as shown by the attached program.  It should print

  cvttps2dq_1 ... ok
  cvttps2dq_2 ... ok
  movdq2q_1 ... ok
  movq2dq_1 ... ok

but instead produces

  cvttps2dq_1 ... ok
  cvttps2dq_2 ... not ok
    result0.sd[0] = 12 (expected 12)
    result0.sd[1] = 3 (expected 56)
    result0.sd[2] = -2147483648 (expected 43)
    result0.sd[3] = 3 (expected 87)
  movdq2q_1 ... not ok
    result0.uq[0] = 1302123111658042420 (expected 5124095577148911)
  movq2dq_1 ... not ok
    result0.uq[0] = 1302123111658042420 (expected 5124095577148911)
    result0.uq[1] = 6221254864647256184 (expected 0)

I looked at QEMU's instruction decoders for these, and compared them
to Valgrind's, but could not see what the problem was.  The decode
logic looks OK.  Maybe the problem is elsewhere.

J

-------------------------------------------------------------------

#include <math.h>
#include <setjmp.h>
#include <signal.h>
#include <stdio.h>
#include <stdlib.h>

typedef union {
  char sb[1];
  unsigned char ub[1];
} reg8_t;

typedef union {
  char sb[2];
  unsigned char ub[2];
  short sw[1];
  unsigned short uw[1];
} reg16_t;

typedef union {
  char sb[4];
  unsigned char ub[4];
  short sw[2];
  unsigned short uw[2];
  long int sd[1];
  unsigned long int ud[1];
  float ps[1];
} reg32_t;

typedef union {
  char sb[8];
  unsigned char ub[8];
  short sw[4];
  unsigned short uw[4];
  long int sd[2];
  unsigned long int ud[2];
  long long int sq[1];
  unsigned long long int uq[1];
  float ps[2];
  double pd[1];
} reg64_t __attribute__ ((aligned (8)));

typedef union {
  char sb[16];
  unsigned char ub[16];
  short sw[8];
  unsigned short uw[8];
  long int sd[4];
  unsigned long int ud[4];
  long long int sq[2];
  unsigned long long int uq[2];
  float ps[4];
  double pd[2];
} reg128_t __attribute__ ((aligned (16)));

static sigjmp_buf catchpoint;

static void handle_sigill(int signum)
{
   siglongjmp(catchpoint, 1);
}

__attribute__((unused))
static int eq_float(float f1, float f2)
{
   return f1 == f2 || fabsf(f1 - f2) < fabsf(f1) * 1.5 * pow(2,-12);
}

__attribute__((unused))
static int eq_double(double d1, double d2)
{
   return d1 == d2 || fabs(d1 - d2) < fabs(d1) * 1.5 * pow(2,-12);
}

static void cvttps2dq_1(void)
{
   reg128_t arg0 = { .ps = { 12.34F, 56.78F, 43.21F, 87.65F } };
   reg128_t arg1 = { .sd = { 1L, 2L, 3L, 4L } };
   reg128_t result0;
   char state[108];

   if (sigsetjmp(catchpoint, 1) == 0)
   {
      asm(
         "fsave %3\n"
         "movlps 0%0, %%xmm4\n"
         "movhps 8%0, %%xmm4\n"
         "movlps 0%1, %%xmm5\n"
         "movhps 8%1, %%xmm5\n"
         "cvttps2dq %%xmm4, %%xmm5\n"
         "movlps %%xmm5, 0%2\n"
         "movhps %%xmm5, 8%2\n"
         "frstor %3\n"
         :
         : "m" (arg0), "m" (arg1), "m" (result0), "m" (state[0])
         : "xmm4", "xmm5"
      );

      if (result0.sd[0] == 12L && result0.sd[1] == 56L && result0.sd[2] == 43L 
&& result0.sd[3] == 87L )
      {
         printf("cvttps2dq_1 ... ok\n");
      }
      else
      {
         printf("cvttps2dq_1 ... not ok\n");
         printf("  result0.sd[0] = %ld (expected %ld)\n", result0.sd[0], 12L);
         printf("  result0.sd[1] = %ld (expected %ld)\n", result0.sd[1], 56L);
         printf("  result0.sd[2] = %ld (expected %ld)\n", result0.sd[2], 43L);
         printf("  result0.sd[3] = %ld (expected %ld)\n", result0.sd[3], 87L);
      }
   }
   else
   {
      printf("cvttps2dq_1 ... failed\n");
   }

   return;
}

static void cvttps2dq_2(void)
{
   reg128_t arg0 = { .ps = { 12.34F, 56.78F, 43.21F, 87.65F } };
   reg128_t arg1 = { .sd = { 1L, 2L, 3L, 4L } };
   reg128_t result0;
   char state[108];

   if (sigsetjmp(catchpoint, 1) == 0)
   {
      asm(
         "fsave %3\n"
         "movlps 0%1, %%xmm5\n"
         "movhps 8%1, %%xmm5\n"
         "cvttps2dq %0, %%xmm5\n"
         "movlps %%xmm5, 0%2\n"
         "movhps %%xmm5, 8%2\n"
         "frstor %3\n"
         :
         : "m" (arg0), "m" (arg1), "m" (result0), "m" (state[0])
         : "xmm4", "xmm5"
      );

      if (result0.sd[0] == 12L && result0.sd[1] == 56L && result0.sd[2] == 43L 
&& result0.sd[3] == 87L )
      {
         printf("cvttps2dq_2 ... ok\n");
      }
      else
      {
         printf("cvttps2dq_2 ... not ok\n");
         printf("  result0.sd[0] = %ld (expected %ld)\n", result0.sd[0], 12L);
         printf("  result0.sd[1] = %ld (expected %ld)\n", result0.sd[1], 56L);
         printf("  result0.sd[2] = %ld (expected %ld)\n", result0.sd[2], 43L);
         printf("  result0.sd[3] = %ld (expected %ld)\n", result0.sd[3], 87L);
      }
   }
   else
   {
      printf("cvttps2dq_2 ... failed\n");
   }

   return;
}

static void movdq2q_1(void)
{
   reg128_t arg0 = { .uq = { 0x012345678abcdefULL, 0xfedcba9876543210ULL } };
   reg64_t arg1 = { .uq = { 0x1212121234343434ULL } };
   reg64_t result0;
   char state[108];

   if (sigsetjmp(catchpoint, 1) == 0)
   {
      asm(
         "fsave %3\n"
         "movlps 0%0, %%xmm4\n"
         "movhps 8%0, %%xmm4\n"
         "movq %1, %%mm6\n"
         "movdq2q %%xmm4, %%mm6\n"
         "movq %%mm6, %2\n"
         "frstor %3\n"
         :
         : "m" (arg0), "m" (arg1), "m" (result0), "m" (state[0])
         : "xmm4", "mm6"
      );

      if (result0.uq[0] == 0x012345678abcdefULL )
      {
         printf("movdq2q_1 ... ok\n");
      }
      else
      {
         printf("movdq2q_1 ... not ok\n");
         printf("  result0.uq[0] = %llu (expected %llu)\n", result0.uq[0], 
0x012345678abcdefULL);
      }
   }
   else
   {
      printf("movdq2q_1 ... failed\n");
   }

   return;
}

static void movq2dq_1(void)
{
   reg64_t arg0 = { .uq = { 0x012345678abcdefULL } };
   reg128_t arg1 = { .uq = { 0x1212121234343434ULL, 0x5656565678787878ULL } };
   reg128_t result0;
   char state[108];

   if (sigsetjmp(catchpoint, 1) == 0)
   {
      asm(
         "fsave %3\n"
         "movq %0, %%mm6\n"
         "movlps 0%1, %%xmm4\n"
         "movhps 8%1, %%xmm4\n"
         "movq2dq %%mm6, %%xmm4\n"
         "movlps %%xmm4, 0%2\n"
         "movhps %%xmm4, 8%2\n"
         "frstor %3\n"
         :
         : "m" (arg0), "m" (arg1), "m" (result0), "m" (state[0])
         : "mm6", "xmm4"
      );

      if (result0.uq[0] == 0x012345678abcdefULL && result0.uq[1] == 0ULL )
      {
         printf("movq2dq_1 ... ok\n");
      }
      else
      {
         printf("movq2dq_1 ... not ok\n");
         printf("  result0.uq[0] = %llu (expected %llu)\n", result0.uq[0], 
0x012345678abcdefULL);
         printf("  result0.uq[1] = %llu (expected %llu)\n", result0.uq[1], 
0ULL);
      }
   }
   else
   {
      printf("movq2dq_1 ... failed\n");
   }

   return;
}

int main(int argc, char **argv)
{
   signal(SIGILL, handle_sigill);

   cvttps2dq_1();
   cvttps2dq_2();
   movdq2q_1();
   movq2dq_1();

   exit(0);
}

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [Qemu-devel] cvttps2dq, movdq2q, movq2dq incorrect behaviour
  2006-06-20 10:54 [Qemu-devel] cvttps2dq, movdq2q, movq2dq incorrect behaviour Julian Seward
@ 2006-06-20 11:29 ` malc
  2006-06-20 11:48   ` Julian Seward
  2006-06-20 13:15 ` RE : " Sylvain Petreolle
  1 sibling, 1 reply; 15+ messages in thread
From: malc @ 2006-06-20 11:29 UTC (permalink / raw)
  To: qemu-devel

[-- Attachment #1: Type: TEXT/PLAIN, Size: 1313 bytes --]

On Tue, 20 Jun 2006, Julian Seward wrote:

>
> The SSE2 instructions cvttps2dq, movdq2q, movq2dq do not behave
> correctly, as shown by the attached program.  It should print
>
>  cvttps2dq_1 ... ok
>  cvttps2dq_2 ... ok
>  movdq2q_1 ... ok
>  movq2dq_1 ... ok
>
> but instead produces
>
>  cvttps2dq_1 ... ok
>  cvttps2dq_2 ... not ok
>    result0.sd[0] = 12 (expected 12)
>    result0.sd[1] = 3 (expected 56)
>    result0.sd[2] = -2147483648 (expected 43)
>    result0.sd[3] = 3 (expected 87)
>  movdq2q_1 ... not ok
>    result0.uq[0] = 1302123111658042420 (expected 5124095577148911)
>  movq2dq_1 ... not ok
>    result0.uq[0] = 1302123111658042420 (expected 5124095577148911)
>    result0.uq[1] = 6221254864647256184 (expected 0)
>
> I looked at QEMU's instruction decoders for these, and compared them
> to Valgrind's, but could not see what the problem was.  The decode
> logic looks OK.  Maybe the problem is elsewhere.

The signature of movdq2q is Pq, VRq and for movq2dq - Vo, PRq it appears
that translate.c gets it backwards, attached patch should deal with it.

As for cvttps2dq i ran it with interpreter which uses outdated(i.e. non
soft-float) conversion routines and it passed, so my guess would be that
this is float32_to_int32_round_to_zero vs (int32_t) cast issue.

--
mailto:malc@pulsesoft.com

[-- Attachment #2: Type: TEXT/PLAIN, Size: 1181 bytes --]

--- translate.c	Tue Jun 20 15:19:01 2006
+++ /mnt/big/npf/cvs/qemux/qemu/target-i386/translate.c	Tue Jun 20 15:19:20 2006
@@ -2947,15 +2947,15 @@
         case 0x2d6: /* movq2dq */
             gen_op_enter_mmx();
             rm = (modrm & 7) | REX_B(s);
-            gen_op_movq(offsetof(CPUX86State,xmm_regs[reg & 7].XMM_Q(0)),
-                        offsetof(CPUX86State,fpregs[rm].mmx));
-            gen_op_movq_env_0(offsetof(CPUX86State,xmm_regs[reg & 7].XMM_Q(1)));
+            gen_op_movq(offsetof(CPUX86State,xmm_regs[rm].XMM_Q(0)),
+                        offsetof(CPUX86State,fpregs[reg & 7].mmx));
+            gen_op_movq_env_0(offsetof(CPUX86State,xmm_regs[rm].XMM_Q(1)));
             break;
         case 0x3d6: /* movdq2q */
             gen_op_enter_mmx();
             rm = (modrm & 7);
-            gen_op_movq(offsetof(CPUX86State,fpregs[reg].mmx),
-                        offsetof(CPUX86State,xmm_regs[rm].XMM_Q(0)));
+            gen_op_movq(offsetof(CPUX86State,fpregs[rm].mmx),
+                        offsetof(CPUX86State,xmm_regs[reg].XMM_Q(0)));
             break;
         case 0xd7: /* pmovmskb */
         case 0x1d7:

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [Qemu-devel] cvttps2dq, movdq2q, movq2dq incorrect behaviour
  2006-06-20 11:29 ` malc
@ 2006-06-20 11:48   ` Julian Seward
  2006-06-20 14:26     ` malc
  0 siblings, 1 reply; 15+ messages in thread
From: Julian Seward @ 2006-06-20 11:48 UTC (permalink / raw)
  To: qemu-devel

On Tuesday 20 June 2006 12:29, malc wrote:

> The signature of movdq2q is Pq, VRq and for movq2dq - Vo, PRq it appears
> that translate.c gets it backwards, attached patch should deal with it.

Cool.

> As for cvttps2dq i ran it with interpreter which uses outdated(i.e. non
> soft-float) conversion routines and it passed, so my guess would be that
> this is float32_to_int32_round_to_zero vs (int32_t) cast issue.

I had a feeling this is a garbage-in-memory (or regs, or somewhere)
problem.  Reason is that the wrong results kept changing as I cut
the full test program down to just the small one I posted.  Can you
try on a vanilla build of i386-softmmu from cvs?

J

^ permalink raw reply	[flat|nested] 15+ messages in thread

* RE : [Qemu-devel] cvttps2dq, movdq2q, movq2dq incorrect behaviour
  2006-06-20 10:54 [Qemu-devel] cvttps2dq, movdq2q, movq2dq incorrect behaviour Julian Seward
  2006-06-20 11:29 ` malc
@ 2006-06-20 13:15 ` Sylvain Petreolle
  2006-06-20 13:51   ` malc
  1 sibling, 1 reply; 15+ messages in thread
From: Sylvain Petreolle @ 2006-06-20 13:15 UTC (permalink / raw)
  To: qemu-devel

--- Julian Seward <jseward@acm.org> a écrit :
> 
> The SSE2 instructions cvttps2dq, movdq2q, movq2dq do not behave
> correctly, as shown by the attached program.  It should print
> 
>   cvttps2dq_1 ... ok
>   cvttps2dq_2 ... ok
>   movdq2q_1 ... ok
>   movq2dq_1 ... ok
> 
> 

I tried your program on my linux station :
CPU: AMD Athlon(tm) XP 1600+ stepping 02

[syl@wine qemu]$ gcc --version
gcc (GCC) 4.1.1 20060525 (Red Hat 4.1.1-1)

[syl@wine qemu]$ gcc -msse2 sse2test.c -o sse2test
[syl@wine qemu]$ ./sse2test
cvttps2dq_1 ... failed
cvttps2dq_2 ... failed
movdq2q_1 ... failed
movq2dq_1 ... failed

what am i doing wrong here ?

Kind regards,
Sylvain Petreolle (aka Usurp)
--- --- --- --- --- --- --- --- --- --- --- --- ---
Windows is proprietary
 Run your favorite apps with free ReactOS : http://www.reactos.org
Listen to free Music: http://www.jamendo.com

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: RE : [Qemu-devel] cvttps2dq, movdq2q, movq2dq incorrect behaviour
  2006-06-20 13:15 ` RE : " Sylvain Petreolle
@ 2006-06-20 13:51   ` malc
  2006-06-20 14:13     ` Julian Seward
  2006-06-20 14:17     ` RE : " Jens Axboe
  0 siblings, 2 replies; 15+ messages in thread
From: malc @ 2006-06-20 13:51 UTC (permalink / raw)
  To: spetreolle, qemu-devel

On Tue, 20 Jun 2006, Sylvain Petreolle wrote:

> --- Julian Seward <jseward@acm.org> a ?crit :
>>
>> The SSE2 instructions cvttps2dq, movdq2q, movq2dq do not behave
>> correctly, as shown by the attached program.  It should print
>>
>>   cvttps2dq_1 ... ok
>>   cvttps2dq_2 ... ok
>>   movdq2q_1 ... ok
>>   movq2dq_1 ... ok
>>
>>
>
> I tried your program on my linux station :
> CPU: AMD Athlon(tm) XP 1600+ stepping 02
>
> [syl@wine qemu]$ gcc --version
> gcc (GCC) 4.1.1 20060525 (Red Hat 4.1.1-1)
>
> [syl@wine qemu]$ gcc -msse2 sse2test.c -o sse2test
> [syl@wine qemu]$ ./sse2test
> cvttps2dq_1 ... failed
> cvttps2dq_2 ... failed
> movdq2q_1 ... failed
> movq2dq_1 ... failed
>
> what am i doing wrong here ?

Running it on a CPU without SSE2, if i'm allowed to venture a gues.

--
mailto:malc@pulsesoft.com

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [Qemu-devel] cvttps2dq, movdq2q, movq2dq incorrect behaviour
  2006-06-20 13:51   ` malc
@ 2006-06-20 14:13     ` Julian Seward
  2006-06-20 15:06       ` RE : " Sylvain Petreolle
  2006-06-20 14:17     ` RE : " Jens Axboe
  1 sibling, 1 reply; 15+ messages in thread
From: Julian Seward @ 2006-06-20 14:13 UTC (permalink / raw)
  To: qemu-devel; +Cc: spetreolle


> > [syl@wine qemu]$ gcc -msse2 sse2test.c -o sse2test
> > [syl@wine qemu]$ ./sse2test
> > cvttps2dq_1 ... failed
> > cvttps2dq_2 ... failed
> > movdq2q_1 ... failed
> > movq2dq_1 ... failed
> >
> > what am i doing wrong here ?
>
> Running it on a CPU without SSE2, if i'm allowed to venture a gues.

Yup.  Try 'strace ./sse2test' and see if it gets SIGILLs thrown at it.

J

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: RE : [Qemu-devel] cvttps2dq, movdq2q, movq2dq incorrect behaviour
  2006-06-20 13:51   ` malc
  2006-06-20 14:13     ` Julian Seward
@ 2006-06-20 14:17     ` Jens Axboe
  2006-06-20 14:23       ` Jens Axboe
  1 sibling, 1 reply; 15+ messages in thread
From: Jens Axboe @ 2006-06-20 14:17 UTC (permalink / raw)
  To: qemu-devel; +Cc: spetreolle

On Tue, Jun 20 2006, malc wrote:
> On Tue, 20 Jun 2006, Sylvain Petreolle wrote:
> 
> >--- Julian Seward <jseward@acm.org> a ?crit :
> >>
> >>The SSE2 instructions cvttps2dq, movdq2q, movq2dq do not behave
> >>correctly, as shown by the attached program.  It should print
> >>
> >>  cvttps2dq_1 ... ok
> >>  cvttps2dq_2 ... ok
> >>  movdq2q_1 ... ok
> >>  movq2dq_1 ... ok
> >>
> >>
> >
> >I tried your program on my linux station :
> >CPU: AMD Athlon(tm) XP 1600+ stepping 02
> >
> >[syl@wine qemu]$ gcc --version
> >gcc (GCC) 4.1.1 20060525 (Red Hat 4.1.1-1)
> >
> >[syl@wine qemu]$ gcc -msse2 sse2test.c -o sse2test
> >[syl@wine qemu]$ ./sse2test
> >cvttps2dq_1 ... failed
> >cvttps2dq_2 ... failed
> >movdq2q_1 ... failed
> >movq2dq_1 ... failed
> >
> >what am i doing wrong here ?
> 
> Running it on a CPU without SSE2, if i'm allowed to venture a gues.

Doesn't work for me, either:

axboe@nelson:/home/axboe $ ./a
cvttps2dq_1 ... not ok
  result0.sd[0] = 0 (expected 12)
  result0.sd[1] = 0 (expected 56)
  result0.sd[2] = 0 (expected 43)
  result0.sd[3] = 0 (expected 87)
cvttps2dq_2 ... not ok
  result0.sd[0] = 0 (expected 12)
  result0.sd[1] = 0 (expected 56)
  result0.sd[2] = 0 (expected 43)
  result0.sd[3] = 0 (expected 87)
movdq2q_1 ... not ok
  result0.uq[0] = 240518168588 (expected 5124095577148911)
movq2dq_1 ... not ok
  result0.uq[0] = 0 (expected 5124095577148911)
  result0.uq[1] = 0 (expected 0)
axboe@nelson:/home/axboe $ ./a
Segmentation fault

Varies between the two. Compiling without -O2 makes the last two
suceed, the others still not. This CPU has sse2.

-- 
Jens Axboe

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: RE : [Qemu-devel] cvttps2dq, movdq2q, movq2dq incorrect behaviour
  2006-06-20 14:17     ` RE : " Jens Axboe
@ 2006-06-20 14:23       ` Jens Axboe
  0 siblings, 0 replies; 15+ messages in thread
From: Jens Axboe @ 2006-06-20 14:23 UTC (permalink / raw)
  To: qemu-devel; +Cc: spetreolle

On Tue, Jun 20 2006, Jens Axboe wrote:
> On Tue, Jun 20 2006, malc wrote:
> > On Tue, 20 Jun 2006, Sylvain Petreolle wrote:
> > 
> > >--- Julian Seward <jseward@acm.org> a ?crit :
> > >>
> > >>The SSE2 instructions cvttps2dq, movdq2q, movq2dq do not behave
> > >>correctly, as shown by the attached program.  It should print
> > >>
> > >>  cvttps2dq_1 ... ok
> > >>  cvttps2dq_2 ... ok
> > >>  movdq2q_1 ... ok
> > >>  movq2dq_1 ... ok
> > >>
> > >>
> > >
> > >I tried your program on my linux station :
> > >CPU: AMD Athlon(tm) XP 1600+ stepping 02
> > >
> > >[syl@wine qemu]$ gcc --version
> > >gcc (GCC) 4.1.1 20060525 (Red Hat 4.1.1-1)
> > >
> > >[syl@wine qemu]$ gcc -msse2 sse2test.c -o sse2test
> > >[syl@wine qemu]$ ./sse2test
> > >cvttps2dq_1 ... failed
> > >cvttps2dq_2 ... failed
> > >movdq2q_1 ... failed
> > >movq2dq_1 ... failed
> > >
> > >what am i doing wrong here ?
> > 
> > Running it on a CPU without SSE2, if i'm allowed to venture a gues.
> 
> Doesn't work for me, either:
> 
> axboe@nelson:/home/axboe $ ./a
> cvttps2dq_1 ... not ok
>   result0.sd[0] = 0 (expected 12)
>   result0.sd[1] = 0 (expected 56)
>   result0.sd[2] = 0 (expected 43)
>   result0.sd[3] = 0 (expected 87)
> cvttps2dq_2 ... not ok
>   result0.sd[0] = 0 (expected 12)
>   result0.sd[1] = 0 (expected 56)
>   result0.sd[2] = 0 (expected 43)
>   result0.sd[3] = 0 (expected 87)
> movdq2q_1 ... not ok
>   result0.uq[0] = 240518168588 (expected 5124095577148911)
> movq2dq_1 ... not ok
>   result0.uq[0] = 0 (expected 5124095577148911)
>   result0.uq[1] = 0 (expected 0)
> axboe@nelson:/home/axboe $ ./a
> Segmentation fault
> 
> Varies between the two. Compiling without -O2 makes the last two
> suceed, the others still not. This CPU has sse2.

32-bit version works, as intended I guess.

-- 
Jens Axboe

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [Qemu-devel] cvttps2dq, movdq2q, movq2dq incorrect behaviour
  2006-06-20 11:48   ` Julian Seward
@ 2006-06-20 14:26     ` malc
  2006-06-21  0:31       ` Julian Seward
  0 siblings, 1 reply; 15+ messages in thread
From: malc @ 2006-06-20 14:26 UTC (permalink / raw)
  To: qemu-devel

On Tue, 20 Jun 2006, Julian Seward wrote:

>> As for cvttps2dq i ran it with interpreter which uses outdated(i.e. non
>> soft-float) conversion routines and it passed, so my guess would be that
>> this is float32_to_int32_round_to_zero vs (int32_t) cast issue.
>
> I had a feeling this is a garbage-in-memory (or regs, or somewhere)
> problem.  Reason is that the wrong results kept changing as I cut
> the full test program down to just the small one I posted.  Can you
> try on a vanilla build of i386-softmmu from cvs?

soft-float was a red herring, translate.c is at fault here (interpreter
does not use it, hence behaved correctly)

translate.c:3009
if (b1 >= 2 && ((b >= 0x50 && b <= 0x5f) ||
                 b == 0xc2)) {
     /* specific case for SSE single instructions */
     if (b1 == 2) {
         /* 32 bit access */
         gen_op_ld_T0_A0[OT_LONG + s->mem_index]();
         gen_op_movl_env_T0(offsetof(CPUX86State,xmm_t0.XMM_L(0)));
     } else {
         /* 64 bit access */
         gen_ldq_env_A0[s->mem_index >> 2](offsetof(CPUX86State,xmm_t0.XMM_D(0)));
     }
} else {
     gen_ldo_env_A0[s->mem_index >> 2](op2_offset);
}

cvttps2dq is 0x5b(b=0x5b) with repn prefix (b1=2) the above code is optimized
a bit more than it should have been, as it loads only 4 bytes into xmm_t0
instead of 16.

--
mailto:malc@pulsesoft.com

^ permalink raw reply	[flat|nested] 15+ messages in thread

* RE : Re: [Qemu-devel] cvttps2dq, movdq2q, movq2dq incorrect behaviour
  2006-06-20 14:13     ` Julian Seward
@ 2006-06-20 15:06       ` Sylvain Petreolle
  2006-06-20 15:14         ` Guillaume POIRIER
  0 siblings, 1 reply; 15+ messages in thread
From: Sylvain Petreolle @ 2006-06-20 15:06 UTC (permalink / raw)
  To: qemu-devel


--- Julian Seward <jseward@acm.org> a écrit :
> >
> > Running it on a CPU without SSE2, if i'm allowed to venture a gues.
> 
> Yup.  Try 'strace ./sse2test' and see if it gets SIGILLs thrown at it.
> 
> J
> 
You are right, I get SIGILLs. 
Seems I was wrong thinking Athlon xp was able to support sse2.

--- SIGILL (Illegal instruction) @ 0 (0) ---
rt_sigprocmask(SIG_SETMASK, [], NULL, 8) = 0
fstat64(1, {st_mode=S_IFREG|0644, st_size=1613, ...}) = 0
mmap2(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0xb7f3e000
rt_sigprocmask(SIG_BLOCK, NULL, [], 8)  = 0
--- SIGILL (Illegal instruction) @ 0 (0) ---
rt_sigprocmask(SIG_SETMASK, [], NULL, 8) = 0
rt_sigprocmask(SIG_BLOCK, NULL, [], 8)  = 0
--- SIGILL (Illegal instruction) @ 0 (0) ---
rt_sigprocmask(SIG_SETMASK, [], NULL, 8) = 0
rt_sigprocmask(SIG_BLOCK, NULL, [], 8)  = 0
--- SIGILL (Illegal instruction) @ 0 (0) ---
rt_sigprocmask(SIG_SETMASK, [], NULL, 8) = 0
write(1, "cvttps2dq_1 ... failed\ncvttps2dq"..., 88cvttps2dq_1 ... failed


Kind regards,
Sylvain Petreolle (aka Usurp)
--- --- --- --- --- --- --- --- --- --- --- --- ---

 Run your favorite Windows apps with free ReactOS : http://www.reactos.org
Listen to non-DRMised Music: http://www.jamendo.com


Linux is not as well stable as it is told to. The proof is, mine has restarted two years ago, on the occasion of a power cut.
- H. Eychenne

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: RE : Re: [Qemu-devel] cvttps2dq, movdq2q, movq2dq incorrect behaviour
  2006-06-20 15:06       ` RE : " Sylvain Petreolle
@ 2006-06-20 15:14         ` Guillaume POIRIER
  0 siblings, 0 replies; 15+ messages in thread
From: Guillaume POIRIER @ 2006-06-20 15:14 UTC (permalink / raw)
  To: spetreolle, qemu-devel

Hi,

On 6/20/06, Sylvain Petreolle <spetreolle@yahoo.fr> wrote:
>
> --- Julian Seward <jseward@acm.org> a écrit :
> > >
> > > Running it on a CPU without SSE2, if i'm allowed to venture a gues.
> >
> > Yup.  Try 'strace ./sse2test' and see if it gets SIGILLs thrown at it.
> >
> > J
> >
> You are right, I get SIGILLs.
> Seems I was wrong thinking Athlon xp was able to support sse2.

Indeed. See here:
http://en.wikipedia.org/wiki/List_of_AMD_Athlon_XP_microprocessors#Athlon_XP_.22Palomino.22_.28Model_6.2C_180_nm.29

SSE2 began to be supported in AMD processors with k8/hammer/Athon64

Guillaume
-- 
"Success consists of going from failure to failure without loss of enthusiasm."
 -- Winston Churchill

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [Qemu-devel] cvttps2dq, movdq2q, movq2dq incorrect behaviour
  2006-06-20 14:26     ` malc
@ 2006-06-21  0:31       ` Julian Seward
  2006-06-21  8:21         ` malc
  0 siblings, 1 reply; 15+ messages in thread
From: Julian Seward @ 2006-06-21  0:31 UTC (permalink / raw)
  To: qemu-devel


Malc, your sse-movq.patch works for me.  Thanks.

> soft-float was a red herring, translate.c is at fault here (interpreter
> does not use it, hence behaved correctly)
>
> translate.c:3009
> if (b1 >= 2 && ((b >= 0x50 && b <= 0x5f) ||
>                  b == 0xc2)) {
>      /* specific case for SSE single instructions */
>      if (b1 == 2) {
>          /* 32 bit access */
>          gen_op_ld_T0_A0[OT_LONG + s->mem_index]();
>          gen_op_movl_env_T0(offsetof(CPUX86State,xmm_t0.XMM_L(0)));
>      } else {
>          /* 64 bit access */
>          gen_ldq_env_A0[s->mem_index >>
> 2](offsetof(CPUX86State,xmm_t0.XMM_D(0))); }
> } else {
>      gen_ldo_env_A0[s->mem_index >> 2](op2_offset);
> }
>
> cvttps2dq is 0x5b(b=0x5b) with repn prefix (b1=2) the above code is
> optimized a bit more than it should have been, as it loads only 4 bytes
> into xmm_t0 instead of 16.

Uh, fine, but I don't understand how/what to fix.  Can you advise?

J

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [Qemu-devel] cvttps2dq, movdq2q, movq2dq incorrect behaviour
  2006-06-21  0:31       ` Julian Seward
@ 2006-06-21  8:21         ` malc
  2006-06-21 11:04           ` malc
  0 siblings, 1 reply; 15+ messages in thread
From: malc @ 2006-06-21  8:21 UTC (permalink / raw)
  To: qemu-devel

On Wed, 21 Jun 2006, Julian Seward wrote:

>
> Malc, your sse-movq.patch works for me.  Thanks.
>
>> soft-float was a red herring, translate.c is at fault here (interpreter
>> does not use it, hence behaved correctly)
>>
>> translate.c:3009
>> if (b1 >= 2 && ((b >= 0x50 && b <= 0x5f) ||
>>                  b == 0xc2)) {
>>      /* specific case for SSE single instructions */
>>      if (b1 == 2) {
>>          /* 32 bit access */
>>          gen_op_ld_T0_A0[OT_LONG + s->mem_index]();
>>          gen_op_movl_env_T0(offsetof(CPUX86State,xmm_t0.XMM_L(0)));
>>      } else {
>>          /* 64 bit access */
>>          gen_ldq_env_A0[s->mem_index >>
>> 2](offsetof(CPUX86State,xmm_t0.XMM_D(0))); }
>> } else {
>>      gen_ldo_env_A0[s->mem_index >> 2](op2_offset);
>> }
>>
>> cvttps2dq is 0x5b(b=0x5b) with repn prefix (b1=2) the above code is
>> optimized a bit more than it should have been, as it loads only 4 bytes
>> into xmm_t0 instead of 16.
>
> Uh, fine, but I don't understand how/what to fix.  Can you advise?

Following will fix the _specific_ case of cvttps2dq, ideally one
should go through all the [0x50..0x5f, 0xc2] with (repnz,repz prefix)
range and check wether the rules imposed by the above snippet apply.

--- /mnt/big/npf/cvs/qemux/qemu/target-i386/translate.c Tue Jun 20 15:19:20 2006
+++ translate.c Tue Jun 20 18:17:19 2006
@@ -3009,7 +3009,9 @@
                  if (b1 >= 2 && ((b >= 0x50 && b <= 0x5f) ||
                                  b == 0xc2)) {
                      /* specific case for SSE single instructions */
-                    if (b1 == 2) {
+                    if (b1 == 2 && b == 0x5b) {
+                        gen_ldo_env_A0[s->mem_index >> 2](offsetof(CPUX86State,xmm_t0.XMM_Q(0)));
+                    } else if (b1 == 2) {
                          /* 32 bit access */
                          gen_op_ld_T0_A0[OT_LONG + s->mem_index]();
                          gen_op_movl_env_T0(offsetof(CPUX86State,xmm_t0.XMM_L(0)));

-- 
mailto:malc@pulsesoft.com

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [Qemu-devel] cvttps2dq, movdq2q, movq2dq incorrect behaviour
  2006-06-21  8:21         ` malc
@ 2006-06-21 11:04           ` malc
  2006-06-21 23:01             ` Julian Seward
  0 siblings, 1 reply; 15+ messages in thread
From: malc @ 2006-06-21 11:04 UTC (permalink / raw)
  To: qemu-devel

[-- Attachment #1: Type: TEXT/PLAIN, Size: 1056 bytes --]

On Wed, 21 Jun 2006, malc wrote:

> On Wed, 21 Jun 2006, Julian Seward wrote:
>
>> 
>> Malc, your sse-movq.patch works for me.  Thanks.
>> 
>>> soft-float was a red herring, translate.c is at fault here (interpreter
>>> does not use it, hence behaved correctly)

[..snip..]

>>> 
>>> cvttps2dq is 0x5b(b=0x5b) with repn prefix (b1=2) the above code is
>>> optimized a bit more than it should have been, as it loads only 4 bytes
>>> into xmm_t0 instead of 16.
>> 
>> Uh, fine, but I don't understand how/what to fix.  Can you advise?
>
> Following will fix the _specific_ case of cvttps2dq, ideally one
> should go through all the [0x50..0x5f, 0xc2] with (repnz,repz prefix)
> range and check wether the rules imposed by the above snippet apply.

[..snip..]

>

It appears that cvttps2dq is indeed the only exception in the range,
combined patch that fixes both movd?q2d?q and cvttps2dq is attached.

I don't have any kind of SSE on this machine so would apprecaite if
someone would run tests/test-i386 with the patch attached.

--
mailto:malc@pulsesoft.com

[-- Attachment #2: Type: TEXT/PLAIN, Size: 1994 bytes --]

Index: target-i386/translate.c
===================================================================
RCS file: /cvsroot/qemu/qemu/target-i386/translate.c,v
retrieving revision 1.57
diff -u -u -r1.57 translate.c
--- target-i386/translate.c	14 Jun 2006 14:29:34 -0000	1.57
+++ target-i386/translate.c	21 Jun 2006 11:01:47 -0000
@@ -2947,15 +2947,15 @@
         case 0x2d6: /* movq2dq */
             gen_op_enter_mmx();
             rm = (modrm & 7) | REX_B(s);
-            gen_op_movq(offsetof(CPUX86State,xmm_regs[rm].XMM_Q(0)),
-                        offsetof(CPUX86State,fpregs[reg & 7].mmx));
-            gen_op_movq_env_0(offsetof(CPUX86State,xmm_regs[rm].XMM_Q(1)));
+            gen_op_movq(offsetof(CPUX86State,xmm_regs[reg & 7].XMM_Q(0)),
+                        offsetof(CPUX86State,fpregs[rm].mmx));
+            gen_op_movq_env_0(offsetof(CPUX86State,xmm_regs[reg & 7].XMM_Q(1)));
             break;
         case 0x3d6: /* movdq2q */
             gen_op_enter_mmx();
             rm = (modrm & 7);
-            gen_op_movq(offsetof(CPUX86State,fpregs[rm].mmx),
-                        offsetof(CPUX86State,xmm_regs[reg].XMM_Q(0)));
+            gen_op_movq(offsetof(CPUX86State,fpregs[reg].mmx),
+                        offsetof(CPUX86State,xmm_regs[rm].XMM_Q(0)));
             break;
         case 0xd7: /* pmovmskb */
         case 0x1d7:
@@ -3006,8 +3006,9 @@
             if (mod != 3) {
                 gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
                 op2_offset = offsetof(CPUX86State,xmm_t0);
-                if (b1 >= 2 && ((b >= 0x50 && b <= 0x5f) ||
-                                b == 0xc2)) {
+                if (!(b1 == 2 && b == 0x5b) &&
+                    (b1 >= 2 && ((b >= 0x50 && b <= 0x5f) ||
+                                b == 0xc2))) {
                     /* specific case for SSE single instructions */
                     if (b1 == 2) {
                         /* 32 bit access */

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [Qemu-devel] cvttps2dq, movdq2q, movq2dq incorrect behaviour
  2006-06-21 11:04           ` malc
@ 2006-06-21 23:01             ` Julian Seward
  0 siblings, 0 replies; 15+ messages in thread
From: Julian Seward @ 2006-06-21 23:01 UTC (permalink / raw)
  To: qemu-devel


> It appears that cvttps2dq is indeed the only exception in the range,
> combined patch that fixes both movd?q2d?q and cvttps2dq is attached.
>
> I don't have any kind of SSE on this machine so would apprecaite if
> someone would run tests/test-i386 with the patch attached.

That works for me.  Thanks.  Valgrind's integer/x87/MMX/SSE/SSE2 tests 
now all pass on i386-softmmu.  I didn't try tests/test-i386 though.

Fabrice, can you commit this?

J

^ permalink raw reply	[flat|nested] 15+ messages in thread

end of thread, other threads:[~2006-06-21 23:01 UTC | newest]

Thread overview: 15+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2006-06-20 10:54 [Qemu-devel] cvttps2dq, movdq2q, movq2dq incorrect behaviour Julian Seward
2006-06-20 11:29 ` malc
2006-06-20 11:48   ` Julian Seward
2006-06-20 14:26     ` malc
2006-06-21  0:31       ` Julian Seward
2006-06-21  8:21         ` malc
2006-06-21 11:04           ` malc
2006-06-21 23:01             ` Julian Seward
2006-06-20 13:15 ` RE : " Sylvain Petreolle
2006-06-20 13:51   ` malc
2006-06-20 14:13     ` Julian Seward
2006-06-20 15:06       ` RE : " Sylvain Petreolle
2006-06-20 15:14         ` Guillaume POIRIER
2006-06-20 14:17     ` RE : " Jens Axboe
2006-06-20 14:23       ` Jens Axboe

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).