* [Qemu-devel] cvttps2dq, movdq2q, movq2dq incorrect behaviour
@ 2006-06-20 10:54 Julian Seward
2006-06-20 11:29 ` malc
2006-06-20 13:15 ` RE : " Sylvain Petreolle
0 siblings, 2 replies; 15+ messages in thread
From: Julian Seward @ 2006-06-20 10:54 UTC (permalink / raw)
To: qemu-devel
The SSE2 instructions cvttps2dq, movdq2q, movq2dq do not behave
correctly, as shown by the attached program. It should print
cvttps2dq_1 ... ok
cvttps2dq_2 ... ok
movdq2q_1 ... ok
movq2dq_1 ... ok
but instead produces
cvttps2dq_1 ... ok
cvttps2dq_2 ... not ok
result0.sd[0] = 12 (expected 12)
result0.sd[1] = 3 (expected 56)
result0.sd[2] = -2147483648 (expected 43)
result0.sd[3] = 3 (expected 87)
movdq2q_1 ... not ok
result0.uq[0] = 1302123111658042420 (expected 5124095577148911)
movq2dq_1 ... not ok
result0.uq[0] = 1302123111658042420 (expected 5124095577148911)
result0.uq[1] = 6221254864647256184 (expected 0)
I looked at QEMU's instruction decoders for these, and compared them
to Valgrind's, but could not see what the problem was. The decode
logic looks OK. Maybe the problem is elsewhere.
J
-------------------------------------------------------------------
#include <math.h>
#include <setjmp.h>
#include <signal.h>
#include <stdio.h>
#include <stdlib.h>
typedef union {
char sb[1];
unsigned char ub[1];
} reg8_t;
typedef union {
char sb[2];
unsigned char ub[2];
short sw[1];
unsigned short uw[1];
} reg16_t;
typedef union {
char sb[4];
unsigned char ub[4];
short sw[2];
unsigned short uw[2];
long int sd[1];
unsigned long int ud[1];
float ps[1];
} reg32_t;
typedef union {
char sb[8];
unsigned char ub[8];
short sw[4];
unsigned short uw[4];
long int sd[2];
unsigned long int ud[2];
long long int sq[1];
unsigned long long int uq[1];
float ps[2];
double pd[1];
} reg64_t __attribute__ ((aligned (8)));
typedef union {
char sb[16];
unsigned char ub[16];
short sw[8];
unsigned short uw[8];
long int sd[4];
unsigned long int ud[4];
long long int sq[2];
unsigned long long int uq[2];
float ps[4];
double pd[2];
} reg128_t __attribute__ ((aligned (16)));
static sigjmp_buf catchpoint;
static void handle_sigill(int signum)
{
siglongjmp(catchpoint, 1);
}
__attribute__((unused))
static int eq_float(float f1, float f2)
{
return f1 == f2 || fabsf(f1 - f2) < fabsf(f1) * 1.5 * pow(2,-12);
}
__attribute__((unused))
static int eq_double(double d1, double d2)
{
return d1 == d2 || fabs(d1 - d2) < fabs(d1) * 1.5 * pow(2,-12);
}
static void cvttps2dq_1(void)
{
reg128_t arg0 = { .ps = { 12.34F, 56.78F, 43.21F, 87.65F } };
reg128_t arg1 = { .sd = { 1L, 2L, 3L, 4L } };
reg128_t result0;
char state[108];
if (sigsetjmp(catchpoint, 1) == 0)
{
asm(
"fsave %3\n"
"movlps 0%0, %%xmm4\n"
"movhps 8%0, %%xmm4\n"
"movlps 0%1, %%xmm5\n"
"movhps 8%1, %%xmm5\n"
"cvttps2dq %%xmm4, %%xmm5\n"
"movlps %%xmm5, 0%2\n"
"movhps %%xmm5, 8%2\n"
"frstor %3\n"
:
: "m" (arg0), "m" (arg1), "m" (result0), "m" (state[0])
: "xmm4", "xmm5"
);
if (result0.sd[0] == 12L && result0.sd[1] == 56L && result0.sd[2] == 43L
&& result0.sd[3] == 87L )
{
printf("cvttps2dq_1 ... ok\n");
}
else
{
printf("cvttps2dq_1 ... not ok\n");
printf(" result0.sd[0] = %ld (expected %ld)\n", result0.sd[0], 12L);
printf(" result0.sd[1] = %ld (expected %ld)\n", result0.sd[1], 56L);
printf(" result0.sd[2] = %ld (expected %ld)\n", result0.sd[2], 43L);
printf(" result0.sd[3] = %ld (expected %ld)\n", result0.sd[3], 87L);
}
}
else
{
printf("cvttps2dq_1 ... failed\n");
}
return;
}
static void cvttps2dq_2(void)
{
reg128_t arg0 = { .ps = { 12.34F, 56.78F, 43.21F, 87.65F } };
reg128_t arg1 = { .sd = { 1L, 2L, 3L, 4L } };
reg128_t result0;
char state[108];
if (sigsetjmp(catchpoint, 1) == 0)
{
asm(
"fsave %3\n"
"movlps 0%1, %%xmm5\n"
"movhps 8%1, %%xmm5\n"
"cvttps2dq %0, %%xmm5\n"
"movlps %%xmm5, 0%2\n"
"movhps %%xmm5, 8%2\n"
"frstor %3\n"
:
: "m" (arg0), "m" (arg1), "m" (result0), "m" (state[0])
: "xmm4", "xmm5"
);
if (result0.sd[0] == 12L && result0.sd[1] == 56L && result0.sd[2] == 43L
&& result0.sd[3] == 87L )
{
printf("cvttps2dq_2 ... ok\n");
}
else
{
printf("cvttps2dq_2 ... not ok\n");
printf(" result0.sd[0] = %ld (expected %ld)\n", result0.sd[0], 12L);
printf(" result0.sd[1] = %ld (expected %ld)\n", result0.sd[1], 56L);
printf(" result0.sd[2] = %ld (expected %ld)\n", result0.sd[2], 43L);
printf(" result0.sd[3] = %ld (expected %ld)\n", result0.sd[3], 87L);
}
}
else
{
printf("cvttps2dq_2 ... failed\n");
}
return;
}
static void movdq2q_1(void)
{
reg128_t arg0 = { .uq = { 0x012345678abcdefULL, 0xfedcba9876543210ULL } };
reg64_t arg1 = { .uq = { 0x1212121234343434ULL } };
reg64_t result0;
char state[108];
if (sigsetjmp(catchpoint, 1) == 0)
{
asm(
"fsave %3\n"
"movlps 0%0, %%xmm4\n"
"movhps 8%0, %%xmm4\n"
"movq %1, %%mm6\n"
"movdq2q %%xmm4, %%mm6\n"
"movq %%mm6, %2\n"
"frstor %3\n"
:
: "m" (arg0), "m" (arg1), "m" (result0), "m" (state[0])
: "xmm4", "mm6"
);
if (result0.uq[0] == 0x012345678abcdefULL )
{
printf("movdq2q_1 ... ok\n");
}
else
{
printf("movdq2q_1 ... not ok\n");
printf(" result0.uq[0] = %llu (expected %llu)\n", result0.uq[0],
0x012345678abcdefULL);
}
}
else
{
printf("movdq2q_1 ... failed\n");
}
return;
}
static void movq2dq_1(void)
{
reg64_t arg0 = { .uq = { 0x012345678abcdefULL } };
reg128_t arg1 = { .uq = { 0x1212121234343434ULL, 0x5656565678787878ULL } };
reg128_t result0;
char state[108];
if (sigsetjmp(catchpoint, 1) == 0)
{
asm(
"fsave %3\n"
"movq %0, %%mm6\n"
"movlps 0%1, %%xmm4\n"
"movhps 8%1, %%xmm4\n"
"movq2dq %%mm6, %%xmm4\n"
"movlps %%xmm4, 0%2\n"
"movhps %%xmm4, 8%2\n"
"frstor %3\n"
:
: "m" (arg0), "m" (arg1), "m" (result0), "m" (state[0])
: "mm6", "xmm4"
);
if (result0.uq[0] == 0x012345678abcdefULL && result0.uq[1] == 0ULL )
{
printf("movq2dq_1 ... ok\n");
}
else
{
printf("movq2dq_1 ... not ok\n");
printf(" result0.uq[0] = %llu (expected %llu)\n", result0.uq[0],
0x012345678abcdefULL);
printf(" result0.uq[1] = %llu (expected %llu)\n", result0.uq[1],
0ULL);
}
}
else
{
printf("movq2dq_1 ... failed\n");
}
return;
}
int main(int argc, char **argv)
{
signal(SIGILL, handle_sigill);
cvttps2dq_1();
cvttps2dq_2();
movdq2q_1();
movq2dq_1();
exit(0);
}
^ permalink raw reply [flat|nested] 15+ messages in thread
* Re: [Qemu-devel] cvttps2dq, movdq2q, movq2dq incorrect behaviour
2006-06-20 10:54 [Qemu-devel] cvttps2dq, movdq2q, movq2dq incorrect behaviour Julian Seward
@ 2006-06-20 11:29 ` malc
2006-06-20 11:48 ` Julian Seward
2006-06-20 13:15 ` RE : " Sylvain Petreolle
1 sibling, 1 reply; 15+ messages in thread
From: malc @ 2006-06-20 11:29 UTC (permalink / raw)
To: qemu-devel
[-- Attachment #1: Type: TEXT/PLAIN, Size: 1313 bytes --]
On Tue, 20 Jun 2006, Julian Seward wrote:
>
> The SSE2 instructions cvttps2dq, movdq2q, movq2dq do not behave
> correctly, as shown by the attached program. It should print
>
> cvttps2dq_1 ... ok
> cvttps2dq_2 ... ok
> movdq2q_1 ... ok
> movq2dq_1 ... ok
>
> but instead produces
>
> cvttps2dq_1 ... ok
> cvttps2dq_2 ... not ok
> result0.sd[0] = 12 (expected 12)
> result0.sd[1] = 3 (expected 56)
> result0.sd[2] = -2147483648 (expected 43)
> result0.sd[3] = 3 (expected 87)
> movdq2q_1 ... not ok
> result0.uq[0] = 1302123111658042420 (expected 5124095577148911)
> movq2dq_1 ... not ok
> result0.uq[0] = 1302123111658042420 (expected 5124095577148911)
> result0.uq[1] = 6221254864647256184 (expected 0)
>
> I looked at QEMU's instruction decoders for these, and compared them
> to Valgrind's, but could not see what the problem was. The decode
> logic looks OK. Maybe the problem is elsewhere.
The signature of movdq2q is Pq, VRq and for movq2dq - Vo, PRq it appears
that translate.c gets it backwards, attached patch should deal with it.
As for cvttps2dq i ran it with interpreter which uses outdated(i.e. non
soft-float) conversion routines and it passed, so my guess would be that
this is float32_to_int32_round_to_zero vs (int32_t) cast issue.
--
mailto:malc@pulsesoft.com
[-- Attachment #2: Type: TEXT/PLAIN, Size: 1181 bytes --]
--- translate.c Tue Jun 20 15:19:01 2006
+++ /mnt/big/npf/cvs/qemux/qemu/target-i386/translate.c Tue Jun 20 15:19:20 2006
@@ -2947,15 +2947,15 @@
case 0x2d6: /* movq2dq */
gen_op_enter_mmx();
rm = (modrm & 7) | REX_B(s);
- gen_op_movq(offsetof(CPUX86State,xmm_regs[reg & 7].XMM_Q(0)),
- offsetof(CPUX86State,fpregs[rm].mmx));
- gen_op_movq_env_0(offsetof(CPUX86State,xmm_regs[reg & 7].XMM_Q(1)));
+ gen_op_movq(offsetof(CPUX86State,xmm_regs[rm].XMM_Q(0)),
+ offsetof(CPUX86State,fpregs[reg & 7].mmx));
+ gen_op_movq_env_0(offsetof(CPUX86State,xmm_regs[rm].XMM_Q(1)));
break;
case 0x3d6: /* movdq2q */
gen_op_enter_mmx();
rm = (modrm & 7);
- gen_op_movq(offsetof(CPUX86State,fpregs[reg].mmx),
- offsetof(CPUX86State,xmm_regs[rm].XMM_Q(0)));
+ gen_op_movq(offsetof(CPUX86State,fpregs[rm].mmx),
+ offsetof(CPUX86State,xmm_regs[reg].XMM_Q(0)));
break;
case 0xd7: /* pmovmskb */
case 0x1d7:
^ permalink raw reply [flat|nested] 15+ messages in thread
* Re: [Qemu-devel] cvttps2dq, movdq2q, movq2dq incorrect behaviour
2006-06-20 11:29 ` malc
@ 2006-06-20 11:48 ` Julian Seward
2006-06-20 14:26 ` malc
0 siblings, 1 reply; 15+ messages in thread
From: Julian Seward @ 2006-06-20 11:48 UTC (permalink / raw)
To: qemu-devel
On Tuesday 20 June 2006 12:29, malc wrote:
> The signature of movdq2q is Pq, VRq and for movq2dq - Vo, PRq it appears
> that translate.c gets it backwards, attached patch should deal with it.
Cool.
> As for cvttps2dq i ran it with interpreter which uses outdated(i.e. non
> soft-float) conversion routines and it passed, so my guess would be that
> this is float32_to_int32_round_to_zero vs (int32_t) cast issue.
I had a feeling this is a garbage-in-memory (or regs, or somewhere)
problem. Reason is that the wrong results kept changing as I cut
the full test program down to just the small one I posted. Can you
try on a vanilla build of i386-softmmu from cvs?
J
^ permalink raw reply [flat|nested] 15+ messages in thread
* Re: [Qemu-devel] cvttps2dq, movdq2q, movq2dq incorrect behaviour
2006-06-20 11:48 ` Julian Seward
@ 2006-06-20 14:26 ` malc
2006-06-21 0:31 ` Julian Seward
0 siblings, 1 reply; 15+ messages in thread
From: malc @ 2006-06-20 14:26 UTC (permalink / raw)
To: qemu-devel
On Tue, 20 Jun 2006, Julian Seward wrote:
>> As for cvttps2dq i ran it with interpreter which uses outdated(i.e. non
>> soft-float) conversion routines and it passed, so my guess would be that
>> this is float32_to_int32_round_to_zero vs (int32_t) cast issue.
>
> I had a feeling this is a garbage-in-memory (or regs, or somewhere)
> problem. Reason is that the wrong results kept changing as I cut
> the full test program down to just the small one I posted. Can you
> try on a vanilla build of i386-softmmu from cvs?
soft-float was a red herring, translate.c is at fault here (interpreter
does not use it, hence behaved correctly)
translate.c:3009
if (b1 >= 2 && ((b >= 0x50 && b <= 0x5f) ||
b == 0xc2)) {
/* specific case for SSE single instructions */
if (b1 == 2) {
/* 32 bit access */
gen_op_ld_T0_A0[OT_LONG + s->mem_index]();
gen_op_movl_env_T0(offsetof(CPUX86State,xmm_t0.XMM_L(0)));
} else {
/* 64 bit access */
gen_ldq_env_A0[s->mem_index >> 2](offsetof(CPUX86State,xmm_t0.XMM_D(0)));
}
} else {
gen_ldo_env_A0[s->mem_index >> 2](op2_offset);
}
cvttps2dq is 0x5b(b=0x5b) with repn prefix (b1=2) the above code is optimized
a bit more than it should have been, as it loads only 4 bytes into xmm_t0
instead of 16.
--
mailto:malc@pulsesoft.com
^ permalink raw reply [flat|nested] 15+ messages in thread
* Re: [Qemu-devel] cvttps2dq, movdq2q, movq2dq incorrect behaviour
2006-06-20 14:26 ` malc
@ 2006-06-21 0:31 ` Julian Seward
2006-06-21 8:21 ` malc
0 siblings, 1 reply; 15+ messages in thread
From: Julian Seward @ 2006-06-21 0:31 UTC (permalink / raw)
To: qemu-devel
Malc, your sse-movq.patch works for me. Thanks.
> soft-float was a red herring, translate.c is at fault here (interpreter
> does not use it, hence behaved correctly)
>
> translate.c:3009
> if (b1 >= 2 && ((b >= 0x50 && b <= 0x5f) ||
> b == 0xc2)) {
> /* specific case for SSE single instructions */
> if (b1 == 2) {
> /* 32 bit access */
> gen_op_ld_T0_A0[OT_LONG + s->mem_index]();
> gen_op_movl_env_T0(offsetof(CPUX86State,xmm_t0.XMM_L(0)));
> } else {
> /* 64 bit access */
> gen_ldq_env_A0[s->mem_index >>
> 2](offsetof(CPUX86State,xmm_t0.XMM_D(0))); }
> } else {
> gen_ldo_env_A0[s->mem_index >> 2](op2_offset);
> }
>
> cvttps2dq is 0x5b(b=0x5b) with repn prefix (b1=2) the above code is
> optimized a bit more than it should have been, as it loads only 4 bytes
> into xmm_t0 instead of 16.
Uh, fine, but I don't understand how/what to fix. Can you advise?
J
^ permalink raw reply [flat|nested] 15+ messages in thread
* Re: [Qemu-devel] cvttps2dq, movdq2q, movq2dq incorrect behaviour
2006-06-21 0:31 ` Julian Seward
@ 2006-06-21 8:21 ` malc
2006-06-21 11:04 ` malc
0 siblings, 1 reply; 15+ messages in thread
From: malc @ 2006-06-21 8:21 UTC (permalink / raw)
To: qemu-devel
On Wed, 21 Jun 2006, Julian Seward wrote:
>
> Malc, your sse-movq.patch works for me. Thanks.
>
>> soft-float was a red herring, translate.c is at fault here (interpreter
>> does not use it, hence behaved correctly)
>>
>> translate.c:3009
>> if (b1 >= 2 && ((b >= 0x50 && b <= 0x5f) ||
>> b == 0xc2)) {
>> /* specific case for SSE single instructions */
>> if (b1 == 2) {
>> /* 32 bit access */
>> gen_op_ld_T0_A0[OT_LONG + s->mem_index]();
>> gen_op_movl_env_T0(offsetof(CPUX86State,xmm_t0.XMM_L(0)));
>> } else {
>> /* 64 bit access */
>> gen_ldq_env_A0[s->mem_index >>
>> 2](offsetof(CPUX86State,xmm_t0.XMM_D(0))); }
>> } else {
>> gen_ldo_env_A0[s->mem_index >> 2](op2_offset);
>> }
>>
>> cvttps2dq is 0x5b(b=0x5b) with repn prefix (b1=2) the above code is
>> optimized a bit more than it should have been, as it loads only 4 bytes
>> into xmm_t0 instead of 16.
>
> Uh, fine, but I don't understand how/what to fix. Can you advise?
Following will fix the _specific_ case of cvttps2dq, ideally one
should go through all the [0x50..0x5f, 0xc2] with (repnz,repz prefix)
range and check wether the rules imposed by the above snippet apply.
--- /mnt/big/npf/cvs/qemux/qemu/target-i386/translate.c Tue Jun 20 15:19:20 2006
+++ translate.c Tue Jun 20 18:17:19 2006
@@ -3009,7 +3009,9 @@
if (b1 >= 2 && ((b >= 0x50 && b <= 0x5f) ||
b == 0xc2)) {
/* specific case for SSE single instructions */
- if (b1 == 2) {
+ if (b1 == 2 && b == 0x5b) {
+ gen_ldo_env_A0[s->mem_index >> 2](offsetof(CPUX86State,xmm_t0.XMM_Q(0)));
+ } else if (b1 == 2) {
/* 32 bit access */
gen_op_ld_T0_A0[OT_LONG + s->mem_index]();
gen_op_movl_env_T0(offsetof(CPUX86State,xmm_t0.XMM_L(0)));
--
mailto:malc@pulsesoft.com
^ permalink raw reply [flat|nested] 15+ messages in thread
* Re: [Qemu-devel] cvttps2dq, movdq2q, movq2dq incorrect behaviour
2006-06-21 8:21 ` malc
@ 2006-06-21 11:04 ` malc
2006-06-21 23:01 ` Julian Seward
0 siblings, 1 reply; 15+ messages in thread
From: malc @ 2006-06-21 11:04 UTC (permalink / raw)
To: qemu-devel
[-- Attachment #1: Type: TEXT/PLAIN, Size: 1056 bytes --]
On Wed, 21 Jun 2006, malc wrote:
> On Wed, 21 Jun 2006, Julian Seward wrote:
>
>>
>> Malc, your sse-movq.patch works for me. Thanks.
>>
>>> soft-float was a red herring, translate.c is at fault here (interpreter
>>> does not use it, hence behaved correctly)
[..snip..]
>>>
>>> cvttps2dq is 0x5b(b=0x5b) with repn prefix (b1=2) the above code is
>>> optimized a bit more than it should have been, as it loads only 4 bytes
>>> into xmm_t0 instead of 16.
>>
>> Uh, fine, but I don't understand how/what to fix. Can you advise?
>
> Following will fix the _specific_ case of cvttps2dq, ideally one
> should go through all the [0x50..0x5f, 0xc2] with (repnz,repz prefix)
> range and check wether the rules imposed by the above snippet apply.
[..snip..]
>
It appears that cvttps2dq is indeed the only exception in the range,
combined patch that fixes both movd?q2d?q and cvttps2dq is attached.
I don't have any kind of SSE on this machine so would apprecaite if
someone would run tests/test-i386 with the patch attached.
--
mailto:malc@pulsesoft.com
[-- Attachment #2: Type: TEXT/PLAIN, Size: 1994 bytes --]
Index: target-i386/translate.c
===================================================================
RCS file: /cvsroot/qemu/qemu/target-i386/translate.c,v
retrieving revision 1.57
diff -u -u -r1.57 translate.c
--- target-i386/translate.c 14 Jun 2006 14:29:34 -0000 1.57
+++ target-i386/translate.c 21 Jun 2006 11:01:47 -0000
@@ -2947,15 +2947,15 @@
case 0x2d6: /* movq2dq */
gen_op_enter_mmx();
rm = (modrm & 7) | REX_B(s);
- gen_op_movq(offsetof(CPUX86State,xmm_regs[rm].XMM_Q(0)),
- offsetof(CPUX86State,fpregs[reg & 7].mmx));
- gen_op_movq_env_0(offsetof(CPUX86State,xmm_regs[rm].XMM_Q(1)));
+ gen_op_movq(offsetof(CPUX86State,xmm_regs[reg & 7].XMM_Q(0)),
+ offsetof(CPUX86State,fpregs[rm].mmx));
+ gen_op_movq_env_0(offsetof(CPUX86State,xmm_regs[reg & 7].XMM_Q(1)));
break;
case 0x3d6: /* movdq2q */
gen_op_enter_mmx();
rm = (modrm & 7);
- gen_op_movq(offsetof(CPUX86State,fpregs[rm].mmx),
- offsetof(CPUX86State,xmm_regs[reg].XMM_Q(0)));
+ gen_op_movq(offsetof(CPUX86State,fpregs[reg].mmx),
+ offsetof(CPUX86State,xmm_regs[rm].XMM_Q(0)));
break;
case 0xd7: /* pmovmskb */
case 0x1d7:
@@ -3006,8 +3006,9 @@
if (mod != 3) {
gen_lea_modrm(s, modrm, ®_addr, &offset_addr);
op2_offset = offsetof(CPUX86State,xmm_t0);
- if (b1 >= 2 && ((b >= 0x50 && b <= 0x5f) ||
- b == 0xc2)) {
+ if (!(b1 == 2 && b == 0x5b) &&
+ (b1 >= 2 && ((b >= 0x50 && b <= 0x5f) ||
+ b == 0xc2))) {
/* specific case for SSE single instructions */
if (b1 == 2) {
/* 32 bit access */
^ permalink raw reply [flat|nested] 15+ messages in thread
* Re: [Qemu-devel] cvttps2dq, movdq2q, movq2dq incorrect behaviour
2006-06-21 11:04 ` malc
@ 2006-06-21 23:01 ` Julian Seward
0 siblings, 0 replies; 15+ messages in thread
From: Julian Seward @ 2006-06-21 23:01 UTC (permalink / raw)
To: qemu-devel
> It appears that cvttps2dq is indeed the only exception in the range,
> combined patch that fixes both movd?q2d?q and cvttps2dq is attached.
>
> I don't have any kind of SSE on this machine so would apprecaite if
> someone would run tests/test-i386 with the patch attached.
That works for me. Thanks. Valgrind's integer/x87/MMX/SSE/SSE2 tests
now all pass on i386-softmmu. I didn't try tests/test-i386 though.
Fabrice, can you commit this?
J
^ permalink raw reply [flat|nested] 15+ messages in thread
* RE : [Qemu-devel] cvttps2dq, movdq2q, movq2dq incorrect behaviour
2006-06-20 10:54 [Qemu-devel] cvttps2dq, movdq2q, movq2dq incorrect behaviour Julian Seward
2006-06-20 11:29 ` malc
@ 2006-06-20 13:15 ` Sylvain Petreolle
2006-06-20 13:51 ` malc
1 sibling, 1 reply; 15+ messages in thread
From: Sylvain Petreolle @ 2006-06-20 13:15 UTC (permalink / raw)
To: qemu-devel
--- Julian Seward <jseward@acm.org> a écrit :
>
> The SSE2 instructions cvttps2dq, movdq2q, movq2dq do not behave
> correctly, as shown by the attached program. It should print
>
> cvttps2dq_1 ... ok
> cvttps2dq_2 ... ok
> movdq2q_1 ... ok
> movq2dq_1 ... ok
>
>
I tried your program on my linux station :
CPU: AMD Athlon(tm) XP 1600+ stepping 02
[syl@wine qemu]$ gcc --version
gcc (GCC) 4.1.1 20060525 (Red Hat 4.1.1-1)
[syl@wine qemu]$ gcc -msse2 sse2test.c -o sse2test
[syl@wine qemu]$ ./sse2test
cvttps2dq_1 ... failed
cvttps2dq_2 ... failed
movdq2q_1 ... failed
movq2dq_1 ... failed
what am i doing wrong here ?
Kind regards,
Sylvain Petreolle (aka Usurp)
--- --- --- --- --- --- --- --- --- --- --- --- ---
Windows is proprietary
Run your favorite apps with free ReactOS : http://www.reactos.org
Listen to free Music: http://www.jamendo.com
^ permalink raw reply [flat|nested] 15+ messages in thread
* Re: RE : [Qemu-devel] cvttps2dq, movdq2q, movq2dq incorrect behaviour
2006-06-20 13:15 ` RE : " Sylvain Petreolle
@ 2006-06-20 13:51 ` malc
2006-06-20 14:13 ` Julian Seward
2006-06-20 14:17 ` RE : " Jens Axboe
0 siblings, 2 replies; 15+ messages in thread
From: malc @ 2006-06-20 13:51 UTC (permalink / raw)
To: spetreolle, qemu-devel
On Tue, 20 Jun 2006, Sylvain Petreolle wrote:
> --- Julian Seward <jseward@acm.org> a ?crit :
>>
>> The SSE2 instructions cvttps2dq, movdq2q, movq2dq do not behave
>> correctly, as shown by the attached program. It should print
>>
>> cvttps2dq_1 ... ok
>> cvttps2dq_2 ... ok
>> movdq2q_1 ... ok
>> movq2dq_1 ... ok
>>
>>
>
> I tried your program on my linux station :
> CPU: AMD Athlon(tm) XP 1600+ stepping 02
>
> [syl@wine qemu]$ gcc --version
> gcc (GCC) 4.1.1 20060525 (Red Hat 4.1.1-1)
>
> [syl@wine qemu]$ gcc -msse2 sse2test.c -o sse2test
> [syl@wine qemu]$ ./sse2test
> cvttps2dq_1 ... failed
> cvttps2dq_2 ... failed
> movdq2q_1 ... failed
> movq2dq_1 ... failed
>
> what am i doing wrong here ?
Running it on a CPU without SSE2, if i'm allowed to venture a gues.
--
mailto:malc@pulsesoft.com
^ permalink raw reply [flat|nested] 15+ messages in thread
* Re: [Qemu-devel] cvttps2dq, movdq2q, movq2dq incorrect behaviour
2006-06-20 13:51 ` malc
@ 2006-06-20 14:13 ` Julian Seward
2006-06-20 15:06 ` RE : " Sylvain Petreolle
2006-06-20 14:17 ` RE : " Jens Axboe
1 sibling, 1 reply; 15+ messages in thread
From: Julian Seward @ 2006-06-20 14:13 UTC (permalink / raw)
To: qemu-devel; +Cc: spetreolle
> > [syl@wine qemu]$ gcc -msse2 sse2test.c -o sse2test
> > [syl@wine qemu]$ ./sse2test
> > cvttps2dq_1 ... failed
> > cvttps2dq_2 ... failed
> > movdq2q_1 ... failed
> > movq2dq_1 ... failed
> >
> > what am i doing wrong here ?
>
> Running it on a CPU without SSE2, if i'm allowed to venture a gues.
Yup. Try 'strace ./sse2test' and see if it gets SIGILLs thrown at it.
J
^ permalink raw reply [flat|nested] 15+ messages in thread
* RE : Re: [Qemu-devel] cvttps2dq, movdq2q, movq2dq incorrect behaviour
2006-06-20 14:13 ` Julian Seward
@ 2006-06-20 15:06 ` Sylvain Petreolle
2006-06-20 15:14 ` Guillaume POIRIER
0 siblings, 1 reply; 15+ messages in thread
From: Sylvain Petreolle @ 2006-06-20 15:06 UTC (permalink / raw)
To: qemu-devel
--- Julian Seward <jseward@acm.org> a écrit :
> >
> > Running it on a CPU without SSE2, if i'm allowed to venture a gues.
>
> Yup. Try 'strace ./sse2test' and see if it gets SIGILLs thrown at it.
>
> J
>
You are right, I get SIGILLs.
Seems I was wrong thinking Athlon xp was able to support sse2.
--- SIGILL (Illegal instruction) @ 0 (0) ---
rt_sigprocmask(SIG_SETMASK, [], NULL, 8) = 0
fstat64(1, {st_mode=S_IFREG|0644, st_size=1613, ...}) = 0
mmap2(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0xb7f3e000
rt_sigprocmask(SIG_BLOCK, NULL, [], 8) = 0
--- SIGILL (Illegal instruction) @ 0 (0) ---
rt_sigprocmask(SIG_SETMASK, [], NULL, 8) = 0
rt_sigprocmask(SIG_BLOCK, NULL, [], 8) = 0
--- SIGILL (Illegal instruction) @ 0 (0) ---
rt_sigprocmask(SIG_SETMASK, [], NULL, 8) = 0
rt_sigprocmask(SIG_BLOCK, NULL, [], 8) = 0
--- SIGILL (Illegal instruction) @ 0 (0) ---
rt_sigprocmask(SIG_SETMASK, [], NULL, 8) = 0
write(1, "cvttps2dq_1 ... failed\ncvttps2dq"..., 88cvttps2dq_1 ... failed
Kind regards,
Sylvain Petreolle (aka Usurp)
--- --- --- --- --- --- --- --- --- --- --- --- ---
Run your favorite Windows apps with free ReactOS : http://www.reactos.org
Listen to non-DRMised Music: http://www.jamendo.com
Linux is not as well stable as it is told to. The proof is, mine has restarted two years ago, on the occasion of a power cut.
- H. Eychenne
^ permalink raw reply [flat|nested] 15+ messages in thread
* Re: RE : [Qemu-devel] cvttps2dq, movdq2q, movq2dq incorrect behaviour
2006-06-20 13:51 ` malc
2006-06-20 14:13 ` Julian Seward
@ 2006-06-20 14:17 ` Jens Axboe
2006-06-20 14:23 ` Jens Axboe
1 sibling, 1 reply; 15+ messages in thread
From: Jens Axboe @ 2006-06-20 14:17 UTC (permalink / raw)
To: qemu-devel; +Cc: spetreolle
On Tue, Jun 20 2006, malc wrote:
> On Tue, 20 Jun 2006, Sylvain Petreolle wrote:
>
> >--- Julian Seward <jseward@acm.org> a ?crit :
> >>
> >>The SSE2 instructions cvttps2dq, movdq2q, movq2dq do not behave
> >>correctly, as shown by the attached program. It should print
> >>
> >> cvttps2dq_1 ... ok
> >> cvttps2dq_2 ... ok
> >> movdq2q_1 ... ok
> >> movq2dq_1 ... ok
> >>
> >>
> >
> >I tried your program on my linux station :
> >CPU: AMD Athlon(tm) XP 1600+ stepping 02
> >
> >[syl@wine qemu]$ gcc --version
> >gcc (GCC) 4.1.1 20060525 (Red Hat 4.1.1-1)
> >
> >[syl@wine qemu]$ gcc -msse2 sse2test.c -o sse2test
> >[syl@wine qemu]$ ./sse2test
> >cvttps2dq_1 ... failed
> >cvttps2dq_2 ... failed
> >movdq2q_1 ... failed
> >movq2dq_1 ... failed
> >
> >what am i doing wrong here ?
>
> Running it on a CPU without SSE2, if i'm allowed to venture a gues.
Doesn't work for me, either:
axboe@nelson:/home/axboe $ ./a
cvttps2dq_1 ... not ok
result0.sd[0] = 0 (expected 12)
result0.sd[1] = 0 (expected 56)
result0.sd[2] = 0 (expected 43)
result0.sd[3] = 0 (expected 87)
cvttps2dq_2 ... not ok
result0.sd[0] = 0 (expected 12)
result0.sd[1] = 0 (expected 56)
result0.sd[2] = 0 (expected 43)
result0.sd[3] = 0 (expected 87)
movdq2q_1 ... not ok
result0.uq[0] = 240518168588 (expected 5124095577148911)
movq2dq_1 ... not ok
result0.uq[0] = 0 (expected 5124095577148911)
result0.uq[1] = 0 (expected 0)
axboe@nelson:/home/axboe $ ./a
Segmentation fault
Varies between the two. Compiling without -O2 makes the last two
suceed, the others still not. This CPU has sse2.
--
Jens Axboe
^ permalink raw reply [flat|nested] 15+ messages in thread
* Re: RE : [Qemu-devel] cvttps2dq, movdq2q, movq2dq incorrect behaviour
2006-06-20 14:17 ` RE : " Jens Axboe
@ 2006-06-20 14:23 ` Jens Axboe
0 siblings, 0 replies; 15+ messages in thread
From: Jens Axboe @ 2006-06-20 14:23 UTC (permalink / raw)
To: qemu-devel; +Cc: spetreolle
On Tue, Jun 20 2006, Jens Axboe wrote:
> On Tue, Jun 20 2006, malc wrote:
> > On Tue, 20 Jun 2006, Sylvain Petreolle wrote:
> >
> > >--- Julian Seward <jseward@acm.org> a ?crit :
> > >>
> > >>The SSE2 instructions cvttps2dq, movdq2q, movq2dq do not behave
> > >>correctly, as shown by the attached program. It should print
> > >>
> > >> cvttps2dq_1 ... ok
> > >> cvttps2dq_2 ... ok
> > >> movdq2q_1 ... ok
> > >> movq2dq_1 ... ok
> > >>
> > >>
> > >
> > >I tried your program on my linux station :
> > >CPU: AMD Athlon(tm) XP 1600+ stepping 02
> > >
> > >[syl@wine qemu]$ gcc --version
> > >gcc (GCC) 4.1.1 20060525 (Red Hat 4.1.1-1)
> > >
> > >[syl@wine qemu]$ gcc -msse2 sse2test.c -o sse2test
> > >[syl@wine qemu]$ ./sse2test
> > >cvttps2dq_1 ... failed
> > >cvttps2dq_2 ... failed
> > >movdq2q_1 ... failed
> > >movq2dq_1 ... failed
> > >
> > >what am i doing wrong here ?
> >
> > Running it on a CPU without SSE2, if i'm allowed to venture a gues.
>
> Doesn't work for me, either:
>
> axboe@nelson:/home/axboe $ ./a
> cvttps2dq_1 ... not ok
> result0.sd[0] = 0 (expected 12)
> result0.sd[1] = 0 (expected 56)
> result0.sd[2] = 0 (expected 43)
> result0.sd[3] = 0 (expected 87)
> cvttps2dq_2 ... not ok
> result0.sd[0] = 0 (expected 12)
> result0.sd[1] = 0 (expected 56)
> result0.sd[2] = 0 (expected 43)
> result0.sd[3] = 0 (expected 87)
> movdq2q_1 ... not ok
> result0.uq[0] = 240518168588 (expected 5124095577148911)
> movq2dq_1 ... not ok
> result0.uq[0] = 0 (expected 5124095577148911)
> result0.uq[1] = 0 (expected 0)
> axboe@nelson:/home/axboe $ ./a
> Segmentation fault
>
> Varies between the two. Compiling without -O2 makes the last two
> suceed, the others still not. This CPU has sse2.
32-bit version works, as intended I guess.
--
Jens Axboe
^ permalink raw reply [flat|nested] 15+ messages in thread
end of thread, other threads:[~2006-06-21 23:01 UTC | newest]
Thread overview: 15+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2006-06-20 10:54 [Qemu-devel] cvttps2dq, movdq2q, movq2dq incorrect behaviour Julian Seward
2006-06-20 11:29 ` malc
2006-06-20 11:48 ` Julian Seward
2006-06-20 14:26 ` malc
2006-06-21 0:31 ` Julian Seward
2006-06-21 8:21 ` malc
2006-06-21 11:04 ` malc
2006-06-21 23:01 ` Julian Seward
2006-06-20 13:15 ` RE : " Sylvain Petreolle
2006-06-20 13:51 ` malc
2006-06-20 14:13 ` Julian Seward
2006-06-20 15:06 ` RE : " Sylvain Petreolle
2006-06-20 15:14 ` Guillaume POIRIER
2006-06-20 14:17 ` RE : " Jens Axboe
2006-06-20 14:23 ` Jens Axboe
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).