From mboxrd@z Thu Jan  1 00:00:00 1970
Return-Path: <linux-kernel-owner@vger.kernel.org>
Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand
	id S1753364AbbHVIzA (ORCPT <rfc822;w@1wt.eu>);
	Sat, 22 Aug 2015 04:55:00 -0400
Received: from mail-wi0-f172.google.com ([209.85.212.172]:38053 "EHLO
	mail-wi0-f172.google.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org
	with ESMTP id S1753149AbbHVIy5 (ORCPT
	<rfc822;linux-kernel@vger.kernel.org>);
	Sat, 22 Aug 2015 04:54:57 -0400
Date: Sat, 22 Aug 2015 10:54:53 +0200
From: Ingo Molnar <mingo@kernel.org>
To: Denys Vlasenko <dvlasenk@redhat.com>
Cc: Borislav Petkov <bp@alien8.de>, "H. Peter Anvin" <hpa@zytor.com>,
        Andy Lutomirski <luto@amacapital.net>,
        Kees Cook <keescook@chromium.org>, x86@kernel.org,
        linux-kernel@vger.kernel.org
Subject: Re: [PATCH] x86/math-emu: Add support for FCMOVcc and F[U]COMI[P]
 insns
Message-ID: <20150822085453.GB10490@gmail.com>
References: <1440152395-19818-1-git-send-email-dvlasenk@redhat.com>
MIME-Version: 1.0
Content-Type: text/plain; charset=us-ascii
Content-Disposition: inline
In-Reply-To: <1440152395-19818-1-git-send-email-dvlasenk@redhat.com>
User-Agent: Mutt/1.5.23 (2014-03-12)
Sender: linux-kernel-owner@vger.kernel.org
List-ID: <linux-kernel.vger.kernel.org>
X-Mailing-List: linux-kernel@vger.kernel.org


* Denys Vlasenko <dvlasenk@redhat.com> wrote:

> +/* fcmovCC and f(u)comi(p) are enabled if CPUID(1).EDX(15) "cmov" is set */
> +
>  static FUNC const st_instr_table[64] = {
> -	fadd__, fld_i_, __BAD__, __BAD__, fadd_i, ffree_, faddp_, _df_c0_,
> -	fmul__, fxch_i, __BAD__, __BAD__, fmul_i, _dd_c8_, fmulp_, _df_c8_,
> -	fcom_st, fp_nop, __BAD__, __BAD__, _dc_d0_, fst_i_, _de_d0_, _df_d0_,
> -	fcompst, _d9_d8_, __BAD__, __BAD__, _dc_d8_, fstp_i, fcompp, _df_d8_,
> +	fadd__,  fld_i_,  fcmovb,   fcmovnb,  fadd_i,  ffree_,  faddp_,  _df_c0_,
> +	fmul__,  fxch_i,  fcmove,   fcmovne,  fmul_i,  _dd_c8_, fmulp_,  _df_c8_,
> +	fcom_st, fp_nop,  fcmovbe,  fcmovnbe, _dc_d0_, fst_i_,  _de_d0_, _df_d0_,
> +	fcompst, _d9_d8_, fcmovu,   fcmovnu,  _dc_d8_, fstp_i,  fcompp,  _df_d8_,
>  	fsub__, FPU_etc, __BAD__, finit_, fsubri, fucom_, fsubrp, fstsw_,
> -	fsubr_, fconst, fucompp, __BAD__, fsub_i, fucomp, fsubp_, __BAD__,
> -	fdiv__, FPU_triga, __BAD__, __BAD__, fdivri, __BAD__, fdivrp, __BAD__,
> +	fsubr_, fconst,   fucompp,  fucomi_,  fsub_i,  fucomp,  fsubp_,  fucomip,
> +	fdiv__, FPU_triga, __BAD__, fcomi_,   fdivri,  __BAD__, fdivrp,  fcomip,
>  	fdivr_, FPU_trigb, __BAD__, __BAD__, fdiv_i, __BAD__, fdivp_, __BAD__,
>  };

So the problem is that you did not give an FPU register encoding type table entry 
for the new opcodes:

static u_char const type_table[64] = {
	_REGI_, _NONE_, _null_, _null_, _REGIi, _REGi_, _REGIp, _REGi_,
	_REGI_, _REGIn, _null_, _null_, _REGIi, _REGI_, _REGIp, _REGI_,
	_REGIc, _NONE_, _null_, _null_, _REGIc, _REG0_, _REGIc, _REG0_,
	_REGIc, _REG0_, _null_, _null_, _REGIc, _REG0_, _REGIc, _REG0_,
	_REGI_, _NONE_, _null_, _NONE_, _REGIi, _REGIc, _REGIp, _NONE_,
	_REGI_, _NONE_, _REGIc, _null_, _REGIi, _REGIc, _REGIp, _null_,
	_REGI_, _NONE_, _null_, _null_, _REGIi, _null_, _REGIp, _null_,
	_REGI_, _NONE_, _null_, _null_, _REGIi, _null_, _REGIp, _null_
};

Those _null_ entries must be filled in as well.

For FUCOMI[P] it's _REGIc I think, so I tried that - and the patch below on top of 
yours made those instructions appear to work - only to be caught in an MMX op:

0xb75eb3fb <bn_mul_add_words+59>:       movd   %ebp,%mm0                                                                                                                              

:-/

Arguably the way I tested it, user-space libraries see SSE and MMX capabilities:

flags           : vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat 
pse36 clflush mmx sse2 ht syscall nx mmxext lm 3dnowext 3dnow rep_good pni lahf_lm 
cmp_legacy 3dnowprl...

So I'll first turn those CPUID bits off, to (hopefully) not confuse user-space.

Thanks,

	Ingo

====================>

 arch/x86/math-emu/fpu_entry.c | 82 ++++++++++++++-----------------------------
 1 file changed, 27 insertions(+), 55 deletions(-)

diff --git a/arch/x86/math-emu/fpu_entry.c b/arch/x86/math-emu/fpu_entry.c
index d20c8f8420e2..4d91c0fc6bc3 100644
--- a/arch/x86/math-emu/fpu_entry.c
+++ b/arch/x86/math-emu/fpu_entry.c
@@ -40,12 +40,10 @@
 
 #define __BAD__ FPU_illegal	/* Illegal on an 80486, causes SIGILL */
 
-#ifndef NO_UNDOC_CODE		/* Un-documented FPU op-codes supported by default. */
-
-/* WARNING: These codes are not documented by Intel in their 80486 manual
+/* WARNING: These codes are not all documented by Intel in their 80486 manual
    and may not work on FPU clones or later Intel FPUs. */
 
-/* Changes to support the un-doc codes provided by Linus Torvalds. */
+/* Changes to support the un-documented instructions provided by Linus Torvalds. */
 
 #define _d9_d8_ fstp_i		/* unofficial code (19) */
 #define _dc_d0_ fcom_st		/* unofficial code (14) */
@@ -60,31 +58,24 @@
 /* fcmovCC and f(u)comi(p) are enabled if CPUID(1).EDX(15) "cmov" is set */
 
 static FUNC const st_instr_table[64] = {
-	fadd__,  fld_i_,  fcmovb,   fcmovnb,  fadd_i,  ffree_,  faddp_,  _df_c0_,
-	fmul__,  fxch_i,  fcmove,   fcmovne,  fmul_i,  _dd_c8_, fmulp_,  _df_c8_,
-	fcom_st, fp_nop,  fcmovbe,  fcmovnbe, _dc_d0_, fst_i_,  _de_d0_, _df_d0_,
-	fcompst, _d9_d8_, fcmovu,   fcmovnu,  _dc_d8_, fstp_i,  fcompp,  _df_d8_,
-	fsub__, FPU_etc, __BAD__, finit_, fsubri, fucom_, fsubrp, fstsw_,
-	fsubr_, fconst,   fucompp,  fucomi_,  fsub_i,  fucomp,  fsubp_,  fucomip,
-	fdiv__, FPU_triga, __BAD__, fcomi_,   fdivri,  __BAD__, fdivrp,  fcomip,
-	fdivr_, FPU_trigb, __BAD__, __BAD__, fdiv_i, __BAD__, fdivp_, __BAD__,
+/* 0x00 */	fadd__,		fld_i_,		fcmovb,		fcmovnb,
+/* 0x04 */	fadd_i,		ffree_,		faddp_,		_df_c0_,
+/* 0x08 */	fmul__,		fxch_i,		fcmove,		fcmovne,
+/* 0x0c */	fmul_i,		_dd_c8_,	fmulp_,		_df_c8_,
+/* 0x10 */	fcom_st,	fp_nop,		fcmovbe,	fcmovnbe,
+/* 0x14 */	_dc_d0_,	fst_i_,		_de_d0_,	_df_d0_,
+/* 0x18 */	fcompst,	_d9_d8_,	fcmovu,		fcmovnu,
+/* 0x1c */	_dc_d8_,	fstp_i,		fcompp,		_df_d8_,
+/* 0x20 */	fsub__,		FPU_etc,	__BAD__,	finit_,
+/* 0x24 */	fsubri,		fucom_,		fsubrp,		fstsw_,
+/* 0x28 */	fsubr_,		fconst,		fucompp,	fucomi_,
+/* 0x2c */	fsub_i,		fucomp,		fsubp_,		fucomip,
+/* 0x30 */	fdiv__,		FPU_triga,	__BAD__,	fcomi_,
+/* 0x34 */	fdivri,		__BAD__,	fdivrp,		fcomip,
+/* 0x38 */	fdivr_,		FPU_trigb,	__BAD__,	__BAD__,
+/* 0x3c */	fdiv_i,		__BAD__,	fdivp_,		__BAD__,
 };
 
-#else /* Support only documented FPU op-codes */
-
-static FUNC const st_instr_table[64] = {
-	fadd__, fld_i_, __BAD__, __BAD__, fadd_i, ffree_, faddp_, __BAD__,
-	fmul__, fxch_i, __BAD__, __BAD__, fmul_i, __BAD__, fmulp_, __BAD__,
-	fcom_st, fp_nop, __BAD__, __BAD__, __BAD__, fst_i_, __BAD__, __BAD__,
-	fcompst, __BAD__, __BAD__, __BAD__, __BAD__, fstp_i, fcompp, __BAD__,
-	fsub__, FPU_etc, __BAD__, finit_, fsubri, fucom_, fsubrp, fstsw_,
-	fsubr_, fconst, fucompp, __BAD__, fsub_i, fucomp, fsubp_, __BAD__,
-	fdiv__, FPU_triga, __BAD__, __BAD__, fdivri, __BAD__, fdivrp, __BAD__,
-	fdivr_, FPU_trigb, __BAD__, __BAD__, fdiv_i, __BAD__, fdivp_, __BAD__,
-};
-
-#endif /* NO_UNDOC_CODE */
-
 #define _NONE_ 0		/* Take no special action */
 #define _REG0_ 1		/* Need to check for not empty st(0) */
 #define _REGI_ 2		/* Need to check for not empty st(0) and st(rm) */
@@ -96,36 +87,17 @@ static FUNC const st_instr_table[64] = {
 #define _REGIc 0		/* Compare st(0) and st(rm) */
 #define _REGIn 0		/* Uses st(0) and st(rm), but handle checks later */
 
-#ifndef NO_UNDOC_CODE
-
-/* Un-documented FPU op-codes supported by default. (see above) */
-
-static u_char const type_table[64] = {
-	_REGI_, _NONE_, _null_, _null_, _REGIi, _REGi_, _REGIp, _REGi_,
-	_REGI_, _REGIn, _null_, _null_, _REGIi, _REGI_, _REGIp, _REGI_,
-	_REGIc, _NONE_, _null_, _null_, _REGIc, _REG0_, _REGIc, _REG0_,
-	_REGIc, _REG0_, _null_, _null_, _REGIc, _REG0_, _REGIc, _REG0_,
-	_REGI_, _NONE_, _null_, _NONE_, _REGIi, _REGIc, _REGIp, _NONE_,
-	_REGI_, _NONE_, _REGIc, _null_, _REGIi, _REGIc, _REGIp, _null_,
-	_REGI_, _NONE_, _null_, _null_, _REGIi, _null_, _REGIp, _null_,
-	_REGI_, _NONE_, _null_, _null_, _REGIi, _null_, _REGIp, _null_
-};
-
-#else /* Support only documented FPU op-codes */
-
 static u_char const type_table[64] = {
-	_REGI_, _NONE_, _null_, _null_, _REGIi, _REGi_, _REGIp, _null_,
-	_REGI_, _REGIn, _null_, _null_, _REGIi, _null_, _REGIp, _null_,
-	_REGIc, _NONE_, _null_, _null_, _null_, _REG0_, _null_, _null_,
-	_REGIc, _null_, _null_, _null_, _null_, _REG0_, _REGIc, _null_,
-	_REGI_, _NONE_, _null_, _NONE_, _REGIi, _REGIc, _REGIp, _NONE_,
-	_REGI_, _NONE_, _REGIc, _null_, _REGIi, _REGIc, _REGIp, _null_,
-	_REGI_, _NONE_, _null_, _null_, _REGIi, _null_, _REGIp, _null_,
-	_REGI_, _NONE_, _null_, _null_, _REGIi, _null_, _REGIp, _null_
+/* 0x00 */	_REGI_, _NONE_, _null_, _null_, _REGIi, _REGi_, _REGIp, _REGi_,
+/* 0x08 */	_REGI_, _REGIn, _null_, _null_, _REGIi, _REGI_, _REGIp, _REGI_,
+/* 0x10 */	_REGIc, _NONE_, _null_, _null_, _REGIc, _REG0_, _REGIc, _REG0_,
+/* 0x18 */	_REGIc, _REG0_, _null_, _null_, _REGIc, _REG0_, _REGIc, _REG0_,
+/* 0x20 */	_REGI_, _NONE_, _null_, _NONE_, _REGIi, _REGIc, _REGIp, _NONE_,
+/* 0x28 */	_REGI_, _NONE_, _REGIc, _REGIc, _REGIi, _REGIc, _REGIp, _REGIc,
+/* 0x30 */	_REGI_, _NONE_, _null_, _null_, _REGIi, _null_, _REGIp, _null_,
+/* 0x38 */	_REGI_, _NONE_, _null_, _null_, _REGIi, _null_, _REGIp, _null_
 };
 
-#endif /* NO_UNDOC_CODE */
-
 #ifdef RE_ENTRANT_CHECKING
 u_char emulating = 0;
 #endif /* RE_ENTRANT_CHECKING */