All of lore.kernel.org
 help / color / mirror / Atom feed
From: Julian Seward <jseward@acm.org>
To: qemu-devel@nongnu.org
Subject: [Qemu-devel] cvttps2dq, movdq2q, movq2dq incorrect behaviour
Date: Tue, 20 Jun 2006 11:54:40 +0100	[thread overview]
Message-ID: <200606201154.40985.jseward@acm.org> (raw)


The SSE2 instructions cvttps2dq, movdq2q, movq2dq do not behave
correctly, as shown by the attached program.  It should print

  cvttps2dq_1 ... ok
  cvttps2dq_2 ... ok
  movdq2q_1 ... ok
  movq2dq_1 ... ok

but instead produces

  cvttps2dq_1 ... ok
  cvttps2dq_2 ... not ok
    result0.sd[0] = 12 (expected 12)
    result0.sd[1] = 3 (expected 56)
    result0.sd[2] = -2147483648 (expected 43)
    result0.sd[3] = 3 (expected 87)
  movdq2q_1 ... not ok
    result0.uq[0] = 1302123111658042420 (expected 5124095577148911)
  movq2dq_1 ... not ok
    result0.uq[0] = 1302123111658042420 (expected 5124095577148911)
    result0.uq[1] = 6221254864647256184 (expected 0)

I looked at QEMU's instruction decoders for these, and compared them
to Valgrind's, but could not see what the problem was.  The decode
logic looks OK.  Maybe the problem is elsewhere.

J

-------------------------------------------------------------------

#include <math.h>
#include <setjmp.h>
#include <signal.h>
#include <stdio.h>
#include <stdlib.h>

typedef union {
  char sb[1];
  unsigned char ub[1];
} reg8_t;

typedef union {
  char sb[2];
  unsigned char ub[2];
  short sw[1];
  unsigned short uw[1];
} reg16_t;

typedef union {
  char sb[4];
  unsigned char ub[4];
  short sw[2];
  unsigned short uw[2];
  long int sd[1];
  unsigned long int ud[1];
  float ps[1];
} reg32_t;

typedef union {
  char sb[8];
  unsigned char ub[8];
  short sw[4];
  unsigned short uw[4];
  long int sd[2];
  unsigned long int ud[2];
  long long int sq[1];
  unsigned long long int uq[1];
  float ps[2];
  double pd[1];
} reg64_t __attribute__ ((aligned (8)));

typedef union {
  char sb[16];
  unsigned char ub[16];
  short sw[8];
  unsigned short uw[8];
  long int sd[4];
  unsigned long int ud[4];
  long long int sq[2];
  unsigned long long int uq[2];
  float ps[4];
  double pd[2];
} reg128_t __attribute__ ((aligned (16)));

static sigjmp_buf catchpoint;

static void handle_sigill(int signum)
{
   siglongjmp(catchpoint, 1);
}

__attribute__((unused))
static int eq_float(float f1, float f2)
{
   return f1 == f2 || fabsf(f1 - f2) < fabsf(f1) * 1.5 * pow(2,-12);
}

__attribute__((unused))
static int eq_double(double d1, double d2)
{
   return d1 == d2 || fabs(d1 - d2) < fabs(d1) * 1.5 * pow(2,-12);
}

static void cvttps2dq_1(void)
{
   reg128_t arg0 = { .ps = { 12.34F, 56.78F, 43.21F, 87.65F } };
   reg128_t arg1 = { .sd = { 1L, 2L, 3L, 4L } };
   reg128_t result0;
   char state[108];

   if (sigsetjmp(catchpoint, 1) == 0)
   {
      asm(
         "fsave %3\n"
         "movlps 0%0, %%xmm4\n"
         "movhps 8%0, %%xmm4\n"
         "movlps 0%1, %%xmm5\n"
         "movhps 8%1, %%xmm5\n"
         "cvttps2dq %%xmm4, %%xmm5\n"
         "movlps %%xmm5, 0%2\n"
         "movhps %%xmm5, 8%2\n"
         "frstor %3\n"
         :
         : "m" (arg0), "m" (arg1), "m" (result0), "m" (state[0])
         : "xmm4", "xmm5"
      );

      if (result0.sd[0] == 12L && result0.sd[1] == 56L && result0.sd[2] == 43L 
&& result0.sd[3] == 87L )
      {
         printf("cvttps2dq_1 ... ok\n");
      }
      else
      {
         printf("cvttps2dq_1 ... not ok\n");
         printf("  result0.sd[0] = %ld (expected %ld)\n", result0.sd[0], 12L);
         printf("  result0.sd[1] = %ld (expected %ld)\n", result0.sd[1], 56L);
         printf("  result0.sd[2] = %ld (expected %ld)\n", result0.sd[2], 43L);
         printf("  result0.sd[3] = %ld (expected %ld)\n", result0.sd[3], 87L);
      }
   }
   else
   {
      printf("cvttps2dq_1 ... failed\n");
   }

   return;
}

static void cvttps2dq_2(void)
{
   reg128_t arg0 = { .ps = { 12.34F, 56.78F, 43.21F, 87.65F } };
   reg128_t arg1 = { .sd = { 1L, 2L, 3L, 4L } };
   reg128_t result0;
   char state[108];

   if (sigsetjmp(catchpoint, 1) == 0)
   {
      asm(
         "fsave %3\n"
         "movlps 0%1, %%xmm5\n"
         "movhps 8%1, %%xmm5\n"
         "cvttps2dq %0, %%xmm5\n"
         "movlps %%xmm5, 0%2\n"
         "movhps %%xmm5, 8%2\n"
         "frstor %3\n"
         :
         : "m" (arg0), "m" (arg1), "m" (result0), "m" (state[0])
         : "xmm4", "xmm5"
      );

      if (result0.sd[0] == 12L && result0.sd[1] == 56L && result0.sd[2] == 43L 
&& result0.sd[3] == 87L )
      {
         printf("cvttps2dq_2 ... ok\n");
      }
      else
      {
         printf("cvttps2dq_2 ... not ok\n");
         printf("  result0.sd[0] = %ld (expected %ld)\n", result0.sd[0], 12L);
         printf("  result0.sd[1] = %ld (expected %ld)\n", result0.sd[1], 56L);
         printf("  result0.sd[2] = %ld (expected %ld)\n", result0.sd[2], 43L);
         printf("  result0.sd[3] = %ld (expected %ld)\n", result0.sd[3], 87L);
      }
   }
   else
   {
      printf("cvttps2dq_2 ... failed\n");
   }

   return;
}

static void movdq2q_1(void)
{
   reg128_t arg0 = { .uq = { 0x012345678abcdefULL, 0xfedcba9876543210ULL } };
   reg64_t arg1 = { .uq = { 0x1212121234343434ULL } };
   reg64_t result0;
   char state[108];

   if (sigsetjmp(catchpoint, 1) == 0)
   {
      asm(
         "fsave %3\n"
         "movlps 0%0, %%xmm4\n"
         "movhps 8%0, %%xmm4\n"
         "movq %1, %%mm6\n"
         "movdq2q %%xmm4, %%mm6\n"
         "movq %%mm6, %2\n"
         "frstor %3\n"
         :
         : "m" (arg0), "m" (arg1), "m" (result0), "m" (state[0])
         : "xmm4", "mm6"
      );

      if (result0.uq[0] == 0x012345678abcdefULL )
      {
         printf("movdq2q_1 ... ok\n");
      }
      else
      {
         printf("movdq2q_1 ... not ok\n");
         printf("  result0.uq[0] = %llu (expected %llu)\n", result0.uq[0], 
0x012345678abcdefULL);
      }
   }
   else
   {
      printf("movdq2q_1 ... failed\n");
   }

   return;
}

static void movq2dq_1(void)
{
   reg64_t arg0 = { .uq = { 0x012345678abcdefULL } };
   reg128_t arg1 = { .uq = { 0x1212121234343434ULL, 0x5656565678787878ULL } };
   reg128_t result0;
   char state[108];

   if (sigsetjmp(catchpoint, 1) == 0)
   {
      asm(
         "fsave %3\n"
         "movq %0, %%mm6\n"
         "movlps 0%1, %%xmm4\n"
         "movhps 8%1, %%xmm4\n"
         "movq2dq %%mm6, %%xmm4\n"
         "movlps %%xmm4, 0%2\n"
         "movhps %%xmm4, 8%2\n"
         "frstor %3\n"
         :
         : "m" (arg0), "m" (arg1), "m" (result0), "m" (state[0])
         : "mm6", "xmm4"
      );

      if (result0.uq[0] == 0x012345678abcdefULL && result0.uq[1] == 0ULL )
      {
         printf("movq2dq_1 ... ok\n");
      }
      else
      {
         printf("movq2dq_1 ... not ok\n");
         printf("  result0.uq[0] = %llu (expected %llu)\n", result0.uq[0], 
0x012345678abcdefULL);
         printf("  result0.uq[1] = %llu (expected %llu)\n", result0.uq[1], 
0ULL);
      }
   }
   else
   {
      printf("movq2dq_1 ... failed\n");
   }

   return;
}

int main(int argc, char **argv)
{
   signal(SIGILL, handle_sigill);

   cvttps2dq_1();
   cvttps2dq_2();
   movdq2q_1();
   movq2dq_1();

   exit(0);
}

             reply	other threads:[~2006-06-20 10:54 UTC|newest]

Thread overview: 15+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2006-06-20 10:54 Julian Seward [this message]
2006-06-20 11:29 ` [Qemu-devel] cvttps2dq, movdq2q, movq2dq incorrect behaviour malc
2006-06-20 11:48   ` Julian Seward
2006-06-20 14:26     ` malc
2006-06-21  0:31       ` Julian Seward
2006-06-21  8:21         ` malc
2006-06-21 11:04           ` malc
2006-06-21 23:01             ` Julian Seward
2006-06-20 13:15 ` RE : " Sylvain Petreolle
2006-06-20 13:51   ` malc
2006-06-20 14:13     ` Julian Seward
2006-06-20 15:06       ` RE : " Sylvain Petreolle
2006-06-20 15:14         ` Guillaume POIRIER
2006-06-20 14:17     ` RE : " Jens Axboe
2006-06-20 14:23       ` Jens Axboe

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=200606201154.40985.jseward@acm.org \
    --to=jseward@acm.org \
    --cc=qemu-devel@nongnu.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.