From: Julian Seward <jseward@acm.org>
To: qemu-devel@nongnu.org
Subject: [Qemu-devel] cvttps2dq, movdq2q, movq2dq incorrect behaviour
Date: Tue, 20 Jun 2006 11:54:40 +0100 [thread overview]
Message-ID: <200606201154.40985.jseward@acm.org> (raw)
The SSE2 instructions cvttps2dq, movdq2q, movq2dq do not behave
correctly, as shown by the attached program. It should print
cvttps2dq_1 ... ok
cvttps2dq_2 ... ok
movdq2q_1 ... ok
movq2dq_1 ... ok
but instead produces
cvttps2dq_1 ... ok
cvttps2dq_2 ... not ok
result0.sd[0] = 12 (expected 12)
result0.sd[1] = 3 (expected 56)
result0.sd[2] = -2147483648 (expected 43)
result0.sd[3] = 3 (expected 87)
movdq2q_1 ... not ok
result0.uq[0] = 1302123111658042420 (expected 5124095577148911)
movq2dq_1 ... not ok
result0.uq[0] = 1302123111658042420 (expected 5124095577148911)
result0.uq[1] = 6221254864647256184 (expected 0)
I looked at QEMU's instruction decoders for these, and compared them
to Valgrind's, but could not see what the problem was. The decode
logic looks OK. Maybe the problem is elsewhere.
J
-------------------------------------------------------------------
#include <math.h>
#include <setjmp.h>
#include <signal.h>
#include <stdio.h>
#include <stdlib.h>
typedef union {
char sb[1];
unsigned char ub[1];
} reg8_t;
typedef union {
char sb[2];
unsigned char ub[2];
short sw[1];
unsigned short uw[1];
} reg16_t;
typedef union {
char sb[4];
unsigned char ub[4];
short sw[2];
unsigned short uw[2];
long int sd[1];
unsigned long int ud[1];
float ps[1];
} reg32_t;
typedef union {
char sb[8];
unsigned char ub[8];
short sw[4];
unsigned short uw[4];
long int sd[2];
unsigned long int ud[2];
long long int sq[1];
unsigned long long int uq[1];
float ps[2];
double pd[1];
} reg64_t __attribute__ ((aligned (8)));
typedef union {
char sb[16];
unsigned char ub[16];
short sw[8];
unsigned short uw[8];
long int sd[4];
unsigned long int ud[4];
long long int sq[2];
unsigned long long int uq[2];
float ps[4];
double pd[2];
} reg128_t __attribute__ ((aligned (16)));
static sigjmp_buf catchpoint;
static void handle_sigill(int signum)
{
siglongjmp(catchpoint, 1);
}
__attribute__((unused))
static int eq_float(float f1, float f2)
{
return f1 == f2 || fabsf(f1 - f2) < fabsf(f1) * 1.5 * pow(2,-12);
}
__attribute__((unused))
static int eq_double(double d1, double d2)
{
return d1 == d2 || fabs(d1 - d2) < fabs(d1) * 1.5 * pow(2,-12);
}
static void cvttps2dq_1(void)
{
reg128_t arg0 = { .ps = { 12.34F, 56.78F, 43.21F, 87.65F } };
reg128_t arg1 = { .sd = { 1L, 2L, 3L, 4L } };
reg128_t result0;
char state[108];
if (sigsetjmp(catchpoint, 1) == 0)
{
asm(
"fsave %3\n"
"movlps 0%0, %%xmm4\n"
"movhps 8%0, %%xmm4\n"
"movlps 0%1, %%xmm5\n"
"movhps 8%1, %%xmm5\n"
"cvttps2dq %%xmm4, %%xmm5\n"
"movlps %%xmm5, 0%2\n"
"movhps %%xmm5, 8%2\n"
"frstor %3\n"
:
: "m" (arg0), "m" (arg1), "m" (result0), "m" (state[0])
: "xmm4", "xmm5"
);
if (result0.sd[0] == 12L && result0.sd[1] == 56L && result0.sd[2] == 43L
&& result0.sd[3] == 87L )
{
printf("cvttps2dq_1 ... ok\n");
}
else
{
printf("cvttps2dq_1 ... not ok\n");
printf(" result0.sd[0] = %ld (expected %ld)\n", result0.sd[0], 12L);
printf(" result0.sd[1] = %ld (expected %ld)\n", result0.sd[1], 56L);
printf(" result0.sd[2] = %ld (expected %ld)\n", result0.sd[2], 43L);
printf(" result0.sd[3] = %ld (expected %ld)\n", result0.sd[3], 87L);
}
}
else
{
printf("cvttps2dq_1 ... failed\n");
}
return;
}
static void cvttps2dq_2(void)
{
reg128_t arg0 = { .ps = { 12.34F, 56.78F, 43.21F, 87.65F } };
reg128_t arg1 = { .sd = { 1L, 2L, 3L, 4L } };
reg128_t result0;
char state[108];
if (sigsetjmp(catchpoint, 1) == 0)
{
asm(
"fsave %3\n"
"movlps 0%1, %%xmm5\n"
"movhps 8%1, %%xmm5\n"
"cvttps2dq %0, %%xmm5\n"
"movlps %%xmm5, 0%2\n"
"movhps %%xmm5, 8%2\n"
"frstor %3\n"
:
: "m" (arg0), "m" (arg1), "m" (result0), "m" (state[0])
: "xmm4", "xmm5"
);
if (result0.sd[0] == 12L && result0.sd[1] == 56L && result0.sd[2] == 43L
&& result0.sd[3] == 87L )
{
printf("cvttps2dq_2 ... ok\n");
}
else
{
printf("cvttps2dq_2 ... not ok\n");
printf(" result0.sd[0] = %ld (expected %ld)\n", result0.sd[0], 12L);
printf(" result0.sd[1] = %ld (expected %ld)\n", result0.sd[1], 56L);
printf(" result0.sd[2] = %ld (expected %ld)\n", result0.sd[2], 43L);
printf(" result0.sd[3] = %ld (expected %ld)\n", result0.sd[3], 87L);
}
}
else
{
printf("cvttps2dq_2 ... failed\n");
}
return;
}
static void movdq2q_1(void)
{
reg128_t arg0 = { .uq = { 0x012345678abcdefULL, 0xfedcba9876543210ULL } };
reg64_t arg1 = { .uq = { 0x1212121234343434ULL } };
reg64_t result0;
char state[108];
if (sigsetjmp(catchpoint, 1) == 0)
{
asm(
"fsave %3\n"
"movlps 0%0, %%xmm4\n"
"movhps 8%0, %%xmm4\n"
"movq %1, %%mm6\n"
"movdq2q %%xmm4, %%mm6\n"
"movq %%mm6, %2\n"
"frstor %3\n"
:
: "m" (arg0), "m" (arg1), "m" (result0), "m" (state[0])
: "xmm4", "mm6"
);
if (result0.uq[0] == 0x012345678abcdefULL )
{
printf("movdq2q_1 ... ok\n");
}
else
{
printf("movdq2q_1 ... not ok\n");
printf(" result0.uq[0] = %llu (expected %llu)\n", result0.uq[0],
0x012345678abcdefULL);
}
}
else
{
printf("movdq2q_1 ... failed\n");
}
return;
}
static void movq2dq_1(void)
{
reg64_t arg0 = { .uq = { 0x012345678abcdefULL } };
reg128_t arg1 = { .uq = { 0x1212121234343434ULL, 0x5656565678787878ULL } };
reg128_t result0;
char state[108];
if (sigsetjmp(catchpoint, 1) == 0)
{
asm(
"fsave %3\n"
"movq %0, %%mm6\n"
"movlps 0%1, %%xmm4\n"
"movhps 8%1, %%xmm4\n"
"movq2dq %%mm6, %%xmm4\n"
"movlps %%xmm4, 0%2\n"
"movhps %%xmm4, 8%2\n"
"frstor %3\n"
:
: "m" (arg0), "m" (arg1), "m" (result0), "m" (state[0])
: "mm6", "xmm4"
);
if (result0.uq[0] == 0x012345678abcdefULL && result0.uq[1] == 0ULL )
{
printf("movq2dq_1 ... ok\n");
}
else
{
printf("movq2dq_1 ... not ok\n");
printf(" result0.uq[0] = %llu (expected %llu)\n", result0.uq[0],
0x012345678abcdefULL);
printf(" result0.uq[1] = %llu (expected %llu)\n", result0.uq[1],
0ULL);
}
}
else
{
printf("movq2dq_1 ... failed\n");
}
return;
}
int main(int argc, char **argv)
{
signal(SIGILL, handle_sigill);
cvttps2dq_1();
cvttps2dq_2();
movdq2q_1();
movq2dq_1();
exit(0);
}
next reply other threads:[~2006-06-20 10:54 UTC|newest]
Thread overview: 15+ messages / expand[flat|nested] mbox.gz Atom feed top
2006-06-20 10:54 Julian Seward [this message]
2006-06-20 11:29 ` [Qemu-devel] cvttps2dq, movdq2q, movq2dq incorrect behaviour malc
2006-06-20 11:48 ` Julian Seward
2006-06-20 14:26 ` malc
2006-06-21 0:31 ` Julian Seward
2006-06-21 8:21 ` malc
2006-06-21 11:04 ` malc
2006-06-21 23:01 ` Julian Seward
2006-06-20 13:15 ` RE : " Sylvain Petreolle
2006-06-20 13:51 ` malc
2006-06-20 14:13 ` Julian Seward
2006-06-20 15:06 ` RE : " Sylvain Petreolle
2006-06-20 15:14 ` Guillaume POIRIER
2006-06-20 14:17 ` RE : " Jens Axboe
2006-06-20 14:23 ` Jens Axboe
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=200606201154.40985.jseward@acm.org \
--to=jseward@acm.org \
--cc=qemu-devel@nongnu.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).