From mboxrd@z Thu Jan 1 00:00:00 1970 Received: from mailman by lists.gnu.org with tmda-scanned (Exim 4.43) id 1Fsdsh-00070l-7Y for qemu-devel@nongnu.org; Tue, 20 Jun 2006 06:54:51 -0400 Received: from exim by lists.gnu.org with spam-scanned (Exim 4.43) id 1Fsdsg-0006zc-9y for qemu-devel@nongnu.org; Tue, 20 Jun 2006 06:54:50 -0400 Received: from [199.232.76.173] (helo=monty-python.gnu.org) by lists.gnu.org with esmtp (Exim 4.43) id 1Fsdsg-0006zY-4b for qemu-devel@nongnu.org; Tue, 20 Jun 2006 06:54:50 -0400 Received: from [81.103.221.48] (helo=mtaout02-winn.ispmail.ntl.com) by monty-python.gnu.org with esmtp (Exim 4.52) id 1Fse3B-0005jm-6z for qemu-devel@nongnu.org; Tue, 20 Jun 2006 07:05:41 -0400 Received: from aamtaout04-winn.ispmail.ntl.com ([81.103.221.35]) by mtaout02-winn.ispmail.ntl.com with ESMTP id <20060620105447.JCHJ27023.mtaout02-winn.ispmail.ntl.com@aamtaout04-winn.ispmail.ntl.com> for ; Tue, 20 Jun 2006 11:54:47 +0100 Received: from suse10.valgrind.org ([82.21.96.252]) by aamtaout04-winn.ispmail.ntl.com with ESMTP id <20060620105447.XQAO16086.aamtaout04-winn.ispmail.ntl.com@suse10.valgrind.org> for ; Tue, 20 Jun 2006 11:54:47 +0100 From: Julian Seward Date: Tue, 20 Jun 2006 11:54:40 +0100 MIME-Version: 1.0 Content-Type: text/plain; charset="us-ascii" Content-Transfer-Encoding: 7bit Content-Disposition: inline Message-Id: <200606201154.40985.jseward@acm.org> Subject: [Qemu-devel] cvttps2dq, movdq2q, movq2dq incorrect behaviour Reply-To: qemu-devel@nongnu.org List-Id: qemu-devel.nongnu.org List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , To: qemu-devel@nongnu.org The SSE2 instructions cvttps2dq, movdq2q, movq2dq do not behave correctly, as shown by the attached program. It should print cvttps2dq_1 ... ok cvttps2dq_2 ... ok movdq2q_1 ... ok movq2dq_1 ... ok but instead produces cvttps2dq_1 ... ok cvttps2dq_2 ... not ok result0.sd[0] = 12 (expected 12) result0.sd[1] = 3 (expected 56) result0.sd[2] = -2147483648 (expected 43) result0.sd[3] = 3 (expected 87) movdq2q_1 ... not ok result0.uq[0] = 1302123111658042420 (expected 5124095577148911) movq2dq_1 ... not ok result0.uq[0] = 1302123111658042420 (expected 5124095577148911) result0.uq[1] = 6221254864647256184 (expected 0) I looked at QEMU's instruction decoders for these, and compared them to Valgrind's, but could not see what the problem was. The decode logic looks OK. Maybe the problem is elsewhere. J ------------------------------------------------------------------- #include #include #include #include #include typedef union { char sb[1]; unsigned char ub[1]; } reg8_t; typedef union { char sb[2]; unsigned char ub[2]; short sw[1]; unsigned short uw[1]; } reg16_t; typedef union { char sb[4]; unsigned char ub[4]; short sw[2]; unsigned short uw[2]; long int sd[1]; unsigned long int ud[1]; float ps[1]; } reg32_t; typedef union { char sb[8]; unsigned char ub[8]; short sw[4]; unsigned short uw[4]; long int sd[2]; unsigned long int ud[2]; long long int sq[1]; unsigned long long int uq[1]; float ps[2]; double pd[1]; } reg64_t __attribute__ ((aligned (8))); typedef union { char sb[16]; unsigned char ub[16]; short sw[8]; unsigned short uw[8]; long int sd[4]; unsigned long int ud[4]; long long int sq[2]; unsigned long long int uq[2]; float ps[4]; double pd[2]; } reg128_t __attribute__ ((aligned (16))); static sigjmp_buf catchpoint; static void handle_sigill(int signum) { siglongjmp(catchpoint, 1); } __attribute__((unused)) static int eq_float(float f1, float f2) { return f1 == f2 || fabsf(f1 - f2) < fabsf(f1) * 1.5 * pow(2,-12); } __attribute__((unused)) static int eq_double(double d1, double d2) { return d1 == d2 || fabs(d1 - d2) < fabs(d1) * 1.5 * pow(2,-12); } static void cvttps2dq_1(void) { reg128_t arg0 = { .ps = { 12.34F, 56.78F, 43.21F, 87.65F } }; reg128_t arg1 = { .sd = { 1L, 2L, 3L, 4L } }; reg128_t result0; char state[108]; if (sigsetjmp(catchpoint, 1) == 0) { asm( "fsave %3\n" "movlps 0%0, %%xmm4\n" "movhps 8%0, %%xmm4\n" "movlps 0%1, %%xmm5\n" "movhps 8%1, %%xmm5\n" "cvttps2dq %%xmm4, %%xmm5\n" "movlps %%xmm5, 0%2\n" "movhps %%xmm5, 8%2\n" "frstor %3\n" : : "m" (arg0), "m" (arg1), "m" (result0), "m" (state[0]) : "xmm4", "xmm5" ); if (result0.sd[0] == 12L && result0.sd[1] == 56L && result0.sd[2] == 43L && result0.sd[3] == 87L ) { printf("cvttps2dq_1 ... ok\n"); } else { printf("cvttps2dq_1 ... not ok\n"); printf(" result0.sd[0] = %ld (expected %ld)\n", result0.sd[0], 12L); printf(" result0.sd[1] = %ld (expected %ld)\n", result0.sd[1], 56L); printf(" result0.sd[2] = %ld (expected %ld)\n", result0.sd[2], 43L); printf(" result0.sd[3] = %ld (expected %ld)\n", result0.sd[3], 87L); } } else { printf("cvttps2dq_1 ... failed\n"); } return; } static void cvttps2dq_2(void) { reg128_t arg0 = { .ps = { 12.34F, 56.78F, 43.21F, 87.65F } }; reg128_t arg1 = { .sd = { 1L, 2L, 3L, 4L } }; reg128_t result0; char state[108]; if (sigsetjmp(catchpoint, 1) == 0) { asm( "fsave %3\n" "movlps 0%1, %%xmm5\n" "movhps 8%1, %%xmm5\n" "cvttps2dq %0, %%xmm5\n" "movlps %%xmm5, 0%2\n" "movhps %%xmm5, 8%2\n" "frstor %3\n" : : "m" (arg0), "m" (arg1), "m" (result0), "m" (state[0]) : "xmm4", "xmm5" ); if (result0.sd[0] == 12L && result0.sd[1] == 56L && result0.sd[2] == 43L && result0.sd[3] == 87L ) { printf("cvttps2dq_2 ... ok\n"); } else { printf("cvttps2dq_2 ... not ok\n"); printf(" result0.sd[0] = %ld (expected %ld)\n", result0.sd[0], 12L); printf(" result0.sd[1] = %ld (expected %ld)\n", result0.sd[1], 56L); printf(" result0.sd[2] = %ld (expected %ld)\n", result0.sd[2], 43L); printf(" result0.sd[3] = %ld (expected %ld)\n", result0.sd[3], 87L); } } else { printf("cvttps2dq_2 ... failed\n"); } return; } static void movdq2q_1(void) { reg128_t arg0 = { .uq = { 0x012345678abcdefULL, 0xfedcba9876543210ULL } }; reg64_t arg1 = { .uq = { 0x1212121234343434ULL } }; reg64_t result0; char state[108]; if (sigsetjmp(catchpoint, 1) == 0) { asm( "fsave %3\n" "movlps 0%0, %%xmm4\n" "movhps 8%0, %%xmm4\n" "movq %1, %%mm6\n" "movdq2q %%xmm4, %%mm6\n" "movq %%mm6, %2\n" "frstor %3\n" : : "m" (arg0), "m" (arg1), "m" (result0), "m" (state[0]) : "xmm4", "mm6" ); if (result0.uq[0] == 0x012345678abcdefULL ) { printf("movdq2q_1 ... ok\n"); } else { printf("movdq2q_1 ... not ok\n"); printf(" result0.uq[0] = %llu (expected %llu)\n", result0.uq[0], 0x012345678abcdefULL); } } else { printf("movdq2q_1 ... failed\n"); } return; } static void movq2dq_1(void) { reg64_t arg0 = { .uq = { 0x012345678abcdefULL } }; reg128_t arg1 = { .uq = { 0x1212121234343434ULL, 0x5656565678787878ULL } }; reg128_t result0; char state[108]; if (sigsetjmp(catchpoint, 1) == 0) { asm( "fsave %3\n" "movq %0, %%mm6\n" "movlps 0%1, %%xmm4\n" "movhps 8%1, %%xmm4\n" "movq2dq %%mm6, %%xmm4\n" "movlps %%xmm4, 0%2\n" "movhps %%xmm4, 8%2\n" "frstor %3\n" : : "m" (arg0), "m" (arg1), "m" (result0), "m" (state[0]) : "mm6", "xmm4" ); if (result0.uq[0] == 0x012345678abcdefULL && result0.uq[1] == 0ULL ) { printf("movq2dq_1 ... ok\n"); } else { printf("movq2dq_1 ... not ok\n"); printf(" result0.uq[0] = %llu (expected %llu)\n", result0.uq[0], 0x012345678abcdefULL); printf(" result0.uq[1] = %llu (expected %llu)\n", result0.uq[1], 0ULL); } } else { printf("movq2dq_1 ... failed\n"); } return; } int main(int argc, char **argv) { signal(SIGILL, handle_sigill); cvttps2dq_1(); cvttps2dq_2(); movdq2q_1(); movq2dq_1(); exit(0); }