qemu-devel.nongnu.org archive mirror
 help / color / mirror / Atom feed
From: "Jaume Martí" <jaume.martif@gmail.com>
To: Richard Henderson <rth@twiddle.net>
Cc: Peter Maydell <peter.maydell@linaro.org>,
	mtosatti@redhat.com, gleb@redhat.com, mst <mst@redhat.com>,
	"riku.voipio" <riku.voipio@iki.fi>,
	qemu-devel@nongnu.org, quintela@redhat.com, vrozenfe@redhat.com,
	anthony <anthony@codemonkey.ws>,
	pbonzini@redhat.com, "alex.bennee" <alex.bennee@linaro.org>,
	afaerber@suse.de
Subject: Re: [Qemu-devel] PATCH for bugs 661696 and 1248376: target-i386: x87 exception pointers using TCG.
Date: Mon, 21 Jul 2014 20:55:48 +0200	[thread overview]
Message-ID: <CAL4g94QH0psTw2bEk37L7H25d0bVFStn=f3brQkgRMVBbk4r+g@mail.gmail.com> (raw)
In-Reply-To: <CAL4g94RnG7k6Mp-Q17zcm_TrTbN+Kg4TT4tODSW-0FOuVK0XhA@mail.gmail.com>

[-- Attachment #1: Type: text/plain, Size: 176775 bytes --]

Hello,

The patch in my previous email got corrupted due to gmail's limitation of
78 characters per line when sending plain text emails.
I attach a new patch. Also you can pull the code from
https://github.com/jmartif/qemu.git
Please review and apply.

Best regards,
Jaume

Signed-off-by: Jaume Marti Farriol (jaume.martif@gmail.com)
diff --git a/linux-user/signal.c b/linux-user/signal.c
index 1141054..73f8f6b 100644
--- a/linux-user/signal.c
+++ b/linux-user/signal.c
@@ -865,7 +865,7 @@ static void setup_sigcontext(struct target_sigcontext
*sc,
     __put_user(env->regs[R_ESP], &sc->esp_at_signal);
     __put_user(env->segs[R_SS].selector, (unsigned int *)&sc->ss);

-        cpu_x86_fsave(env, fpstate_addr, 1);
+        cpu_x86_fsave(env, fpstate_addr);
         fpstate->status = fpstate->sw;
         magic = 0xffff;
     __put_user(magic, &fpstate->magic);
@@ -1068,7 +1068,7 @@ restore_sigcontext(CPUX86State *env, struct
target_sigcontext *sc, int *peax)
                 if (!access_ok(VERIFY_READ, fpstate_addr,
                                sizeof(struct target_fpstate)))
                         goto badframe;
-                cpu_x86_frstor(env, fpstate_addr, 1);
+                cpu_x86_frstor(env, fpstate_addr);
  }

         *peax = tswapl(sc->eax);
diff --git a/target-i386/cpu.h b/target-i386/cpu.h
index e634d83..4274ce3 100644
--- a/target-i386/cpu.h
+++ b/target-i386/cpu.h
@@ -819,10 +819,11 @@ typedef struct CPUX86State {
     uint16_t fpuc;
     uint8_t fptags[8];   /* 0 = valid, 1 = empty */
     FPReg fpregs[8];
-    /* KVM-only so far */
-    uint16_t fpop;
+    uint32_t fpop;
     uint64_t fpip;
     uint64_t fpdp;
+    uint32_t fpcs;
+    uint32_t fpds;

     /* emulator internal variables */
     float_status fp_status;
@@ -1067,8 +1068,8 @@ floatx80 cpu_set_fp80(uint64_t mant, uint16_t upper);
 /* the following helpers are only usable in user mode simulation as
    they can trigger unexpected exceptions */
 void cpu_x86_load_seg(CPUX86State *s, int seg_reg, int selector);
-void cpu_x86_fsave(CPUX86State *s, target_ulong ptr, int data32);
-void cpu_x86_frstor(CPUX86State *s, target_ulong ptr, int data32);
+void cpu_x86_fsave(CPUX86State *s, target_ulong ptr);
+void cpu_x86_frstor(CPUX86State *s, target_ulong ptr);

 /* you can call this signal handler from your SIGBUS and SIGSEGV
    signal handlers to inform the virtual CPU of exceptions. non zero
diff --git a/target-i386/fpu_helper.c b/target-i386/fpu_helper.c
index 1b2900d..6886031 100644
--- a/target-i386/fpu_helper.c
+++ b/target-i386/fpu_helper.c
@@ -56,6 +56,8 @@
 #define floatx80_l2e make_floatx80(0x3fff, 0xb8aa3b295c17f0bcLL)
 #define floatx80_l2t make_floatx80(0x4000, 0xd49a784bcd1b8afeLL)

+#define FPUS(env) ((env->fpus & ~0x3800) | ((env->fpstt & 0x7) << 11))
+
 static inline void fpush(CPUX86State *env)
 {
     env->fpstt = (env->fpstt - 1) & 7;
@@ -604,6 +606,10 @@ void helper_fninit(CPUX86State *env)
     env->fptags[5] = 1;
     env->fptags[6] = 1;
     env->fptags[7] = 1;
+    env->fpip = 0;
+    env->fpcs = 0;
+    env->fpdp = 0;
+    env->fpds = 0;
 }

 /* BCD ops */
@@ -961,13 +967,13 @@ void helper_fxam_ST0(CPUX86State *env)
     }
 }

-void helper_fstenv(CPUX86State *env, target_ulong ptr, int data32)
+void helper_fstenv(CPUX86State *env, target_ulong ptr, int data32,
+                   int protected_mode)
 {
-    int fpus, fptag, exp, i;
+    int fptag, exp, i;
     uint64_t mant;
     CPU_LDoubleU tmp;

-    fpus = (env->fpus & ~0x3800) | (env->fpstt & 0x7) << 11;
     fptag = 0;
     for (i = 7; i >= 0; i--) {
         fptag <<= 2;
@@ -987,83 +993,150 @@ void helper_fstenv(CPUX86State *env, target_ulong
ptr, int data32)
             }
         }
     }
+
     if (data32) {
         /* 32 bit */
-        cpu_stl_data(env, ptr, env->fpuc);
-        cpu_stl_data(env, ptr + 4, fpus);
-        cpu_stl_data(env, ptr + 8, fptag);
-        cpu_stl_data(env, ptr + 12, 0); /* fpip */
-        cpu_stl_data(env, ptr + 16, 0); /* fpcs */
-        cpu_stl_data(env, ptr + 20, 0); /* fpoo */
-        cpu_stl_data(env, ptr + 24, 0); /* fpos */
+        cpu_stw_data(env, ptr, env->fpuc);
+        cpu_stw_data(env, ptr + 4, FPUS(env));
+        cpu_stw_data(env, ptr + 8, fptag);
+        if (protected_mode) {
+            cpu_stl_data(env, ptr + 12, env->fpip);
+            cpu_stl_data(env, ptr + 16,
+                        ((env->fpop & 0x7ff) << 16) | (env->fpcs &
0xffff));
+            cpu_stl_data(env, ptr + 20, env->fpdp);
+            cpu_stl_data(env, ptr + 24, env->fpds);
+        } else {
+            /* Real mode  */
+            cpu_stl_data(env, ptr + 12, env->fpip); /* fpip[15..00] */
+            cpu_stl_data(env, ptr + 16, ((((env->fpip >> 16) & 0xffff) <<
12) |
+                        (env->fpop & 0x7ff))); /* fpip[31..16], fpop */
+            cpu_stl_data(env, ptr + 20, env->fpdp); /* fpdp[15..00] */
+            cpu_stl_data(env, ptr + 24,
+                        (env->fpdp >> 4) & 0xffff000); /* fpdp[31..16] */
+        }
     } else {
         /* 16 bit */
         cpu_stw_data(env, ptr, env->fpuc);
-        cpu_stw_data(env, ptr + 2, fpus);
+        cpu_stw_data(env, ptr + 2, FPUS(env));
         cpu_stw_data(env, ptr + 4, fptag);
-        cpu_stw_data(env, ptr + 6, 0);
-        cpu_stw_data(env, ptr + 8, 0);
-        cpu_stw_data(env, ptr + 10, 0);
-        cpu_stw_data(env, ptr + 12, 0);
+        if (protected_mode) {
+            cpu_stw_data(env, ptr + 6, env->fpip);
+            cpu_stw_data(env, ptr + 8, env->fpcs);
+            cpu_stw_data(env, ptr + 10, env->fpdp);
+            cpu_stw_data(env, ptr + 12, env->fpds);
+        } else {
+            /* Real mode  */
+            cpu_stw_data(env, ptr + 6, env->fpip); /* fpip[15..0] */
+            cpu_stw_data(env, ptr + 8, ((env->fpip >> 4) & 0xf000) |
+                        (env->fpop & 0x7ff)); /* fpip[19..16], fpop */
+            cpu_stw_data(env, ptr + 10, env->fpdp); /* fpdp[15..0] */
+            cpu_stw_data(env, ptr + 12,
+                        (env->fpdp >> 4) & 0xf000); /* fpdp[19..16] */
+        }
     }
+
+    env->fpip = 0;
+    env->fpcs = 0;
+    env->fpdp = 0;
+    env->fpds = 0;
 }

-void helper_fldenv(CPUX86State *env, target_ulong ptr, int data32)
+void helper_fldenv(CPUX86State *env, target_ulong ptr, int data32,
+                   int protected_mode)
 {
-    int i, fpus, fptag;
+    int tmp, i, fpus, fptag;

     if (data32) {
+        /* 32 bit */
         env->fpuc = cpu_lduw_data(env, ptr);
         fpus = cpu_lduw_data(env, ptr + 4);
         fptag = cpu_lduw_data(env, ptr + 8);
+        if (protected_mode) {
+            env->fpip = cpu_ldl_data(env, ptr + 12);
+            tmp = cpu_ldl_data(env, ptr + 16);
+            env->fpcs = tmp & 0xffff;
+            env->fpop = tmp >> 16;
+            env->fpdp = cpu_ldl_data(env, ptr + 20);
+            env->fpds = cpu_lduw_data(env, ptr + 24);
+        } else {
+            /* Real mode */
+            tmp = cpu_ldl_data(env, ptr + 16);
+            env->fpip = ((tmp & 0xffff000) << 4) |
+                        cpu_lduw_data(env, ptr + 12);
+            env->fpop = tmp & 0x7ff;
+            env->fpdp = (cpu_ldl_data(env, ptr + 24) << 4) |
+                        cpu_lduw_data(env, ptr + 20);
+        }
     } else {
+        /* 16 bit */
         env->fpuc = cpu_lduw_data(env, ptr);
         fpus = cpu_lduw_data(env, ptr + 2);
         fptag = cpu_lduw_data(env, ptr + 4);
+        if (protected_mode) {
+            /* Protected mode  */
+            env->fpip = cpu_lduw_data(env, ptr + 6);
+            env->fpcs = cpu_lduw_data(env, ptr + 8);
+            env->fpdp = cpu_lduw_data(env, ptr + 10);
+            env->fpds = cpu_lduw_data(env, ptr + 12);
+        } else {
+            /* Real mode  */
+            tmp = cpu_lduw_data(env, ptr + 8);
+            env->fpip = ((tmp & 0xf000) << 4) | cpu_lduw_data(env, ptr +
6);
+            env->fpop = tmp & 0x7ff;
+            env->fpdp = cpu_lduw_data(env, ptr + 12) << 4 |
+                        cpu_lduw_data(env, ptr + 10);
+        }
     }
+
     env->fpstt = (fpus >> 11) & 7;
     env->fpus = fpus & ~0x3800;
     for (i = 0; i < 8; i++) {
         env->fptags[i] = ((fptag & 3) == 3);
         fptag >>= 2;
     }
+
+    env->fpip &= 0xffffffff;
+    env->fpdp &= 0xffffffff;
+    if (!protected_mode) {
+        env->fpcs = 0;
+        env->fpds = 0;
+    }
 }

-void helper_fsave(CPUX86State *env, target_ulong ptr, int data32)
+void helper_fsave(CPUX86State *env, target_ulong ptr, int data32,
+                  int protected_mode)
 {
     floatx80 tmp;
     int i;

-    helper_fstenv(env, ptr, data32);
+    helper_fstenv(env, ptr, data32, protected_mode);

-    ptr += (14 << data32);
+    if (data32) {
+        ptr += 28;
+    } else {
+        ptr += 14;
+    }
     for (i = 0; i < 8; i++) {
         tmp = ST(i);
         helper_fstt(env, tmp, ptr);
         ptr += 10;
     }

-    /* fninit */
-    env->fpus = 0;
-    env->fpstt = 0;
-    env->fpuc = 0x37f;
-    env->fptags[0] = 1;
-    env->fptags[1] = 1;
-    env->fptags[2] = 1;
-    env->fptags[3] = 1;
-    env->fptags[4] = 1;
-    env->fptags[5] = 1;
-    env->fptags[6] = 1;
-    env->fptags[7] = 1;
+    helper_fninit(env);
 }

-void helper_frstor(CPUX86State *env, target_ulong ptr, int data32)
+void helper_frstor(CPUX86State *env, target_ulong ptr, int data32,
+                   int protected_mode)
 {
     floatx80 tmp;
     int i;

-    helper_fldenv(env, ptr, data32);
-    ptr += (14 << data32);
+    helper_fldenv(env, ptr, data32, protected_mode);
+    if (data32) {
+        ptr += 28;
+    } else {
+        ptr += 14;
+    }

     for (i = 0; i < 8; i++) {
         tmp = helper_fldt(env, ptr);
@@ -1072,21 +1145,22 @@ void helper_frstor(CPUX86State *env, target_ulong
ptr, int data32)
     }
 }

-#if defined(CONFIG_USER_ONLY)
-void cpu_x86_fsave(CPUX86State *env, target_ulong ptr, int data32)
+#if defined(CONFIG_USER_ONLY) && defined(TARGET_I386) && TARGET_ABI_BITS
== 32
+
+void cpu_x86_fsave(CPUX86State *env, target_ulong ptr)
 {
-    helper_fsave(env, ptr, data32);
+    helper_fsave(env, ptr, 1, 1);
 }

-void cpu_x86_frstor(CPUX86State *env, target_ulong ptr, int data32)
+void cpu_x86_frstor(CPUX86State *env, target_ulong ptr)
 {
-    helper_frstor(env, ptr, data32);
+    helper_frstor(env, ptr, 1, 1);
 }
 #endif

-void helper_fxsave(CPUX86State *env, target_ulong ptr, int data64)
+void helper_fxsave(CPUX86State *env, target_ulong ptr, int data32, int
data64)
 {
-    int fpus, fptag, i, nb_xmm_regs;
+    int i, nb_xmm_regs, fptag;
     floatx80 tmp;
     target_ulong addr;

@@ -1095,25 +1169,36 @@ void helper_fxsave(CPUX86State *env, target_ulong
ptr, int data64)
         raise_exception(env, EXCP0D_GPF);
     }

-    fpus = (env->fpus & ~0x3800) | (env->fpstt & 0x7) << 11;
     fptag = 0;
     for (i = 0; i < 8; i++) {
         fptag |= (env->fptags[i] << i);
     }
+    fptag ^= 0xff;
+
     cpu_stw_data(env, ptr, env->fpuc);
-    cpu_stw_data(env, ptr + 2, fpus);
-    cpu_stw_data(env, ptr + 4, fptag ^ 0xff);
+    cpu_stw_data(env, ptr + 2, FPUS(env));
+    cpu_stw_data(env, ptr + 4, fptag & 0xff);
+    cpu_stw_data(env, ptr + 6, env->fpop);
+
 #ifdef TARGET_X86_64
     if (data64) {
-        cpu_stq_data(env, ptr + 0x08, 0); /* rip */
-        cpu_stq_data(env, ptr + 0x10, 0); /* rdp */
+        /* 64 bit */
+        cpu_stq_data(env, ptr + 8, env->fpip);
+        cpu_stq_data(env, ptr + 16, env->fpdp);
     } else
 #endif
     {
-        cpu_stl_data(env, ptr + 0x08, 0); /* eip */
-        cpu_stl_data(env, ptr + 0x0c, 0); /* sel  */
-        cpu_stl_data(env, ptr + 0x10, 0); /* dp */
-        cpu_stl_data(env, ptr + 0x14, 0); /* sel  */
+        if (data32) {
+            /* 32 bit */
+            cpu_stl_data(env, ptr + 8, env->fpip);
+            cpu_stl_data(env, ptr + 16, env->fpdp);
+        } else {
+            /* 16 bit */
+            cpu_stw_data(env, ptr + 8, env->fpip);
+            cpu_stw_data(env, ptr + 16, env->fpdp);
+        }
+        cpu_stw_data(env, ptr + 12, env->fpcs & 0xffff);
+        cpu_stw_data(env, ptr + 20, env->fpds & 0xffff);
     }

     addr = ptr + 0x20;
@@ -1146,7 +1231,7 @@ void helper_fxsave(CPUX86State *env, target_ulong
ptr, int data64)
     }
 }

-void helper_fxrstor(CPUX86State *env, target_ulong ptr, int data64)
+void helper_fxrstor(CPUX86State *env, target_ulong ptr, int data32, int
data64)
 {
     int i, fpus, fptag, nb_xmm_regs;
     floatx80 tmp;
@@ -1167,6 +1252,30 @@ void helper_fxrstor(CPUX86State *env, target_ulong
ptr, int data64)
         env->fptags[i] = ((fptag >> i) & 1);
     }

+    env->fpop = (cpu_lduw_data(env, ptr + 6) >> 5) & 0x7ff;
+
+#ifdef TARGET_X86_64
+    if (data64) {
+        /* 64 bit */
+        env->fpip = cpu_ldq_data(env, ptr + 8);
+        env->fpdp = cpu_ldq_data(env, ptr + 16);
+    } else
+#endif
+    {
+        if (data32) {
+            /* 32 bit */
+            env->fpip = cpu_ldl_data(env, ptr + 8);
+            env->fpdp = cpu_ldl_data(env, ptr + 16);
+        } else {
+            /* 16 bit */
+            env->fpip = cpu_lduw_data(env, ptr + 8);
+            env->fpdp = cpu_lduw_data(env, ptr + 16);
+        }
+
+        env->fpcs = cpu_lduw_data(env, ptr + 12);
+        env->fpds = cpu_lduw_data(env, ptr + 20);
+    }
+
     addr = ptr + 0x20;
     for (i = 0; i < 8; i++) {
         tmp = helper_fldt(env, addr);
@@ -1195,6 +1304,11 @@ void helper_fxrstor(CPUX86State *env, target_ulong
ptr, int data64)
             }
         }
     }
+
+    if (!data64) {
+        env->fpip &= 0xffffffff;
+        env->fpdp &= 0xffffffff;
+    }
 }

 void cpu_get_fp80(uint64_t *pmant, uint16_t *pexp, floatx80 f)
diff --git a/target-i386/helper.h b/target-i386/helper.h
index 8eb0145..9c4fd22 100644
--- a/target-i386/helper.h
+++ b/target-i386/helper.h
@@ -183,12 +183,12 @@ DEF_HELPER_1(frndint, void, env)
 DEF_HELPER_1(fscale, void, env)
 DEF_HELPER_1(fsin, void, env)
 DEF_HELPER_1(fcos, void, env)
-DEF_HELPER_3(fstenv, void, env, tl, int)
-DEF_HELPER_3(fldenv, void, env, tl, int)
-DEF_HELPER_3(fsave, void, env, tl, int)
-DEF_HELPER_3(frstor, void, env, tl, int)
-DEF_HELPER_3(fxsave, void, env, tl, int)
-DEF_HELPER_3(fxrstor, void, env, tl, int)
+DEF_HELPER_4(fstenv, void, env, tl, int, int)
+DEF_HELPER_4(fldenv, void, env, tl, int, int)
+DEF_HELPER_4(fsave, void, env, tl, int, int)
+DEF_HELPER_4(frstor, void, env, tl, int, int)
+DEF_HELPER_4(fxsave, void, env, tl, int, int)
+DEF_HELPER_4(fxrstor, void, env, tl, int, int)

 DEF_HELPER_FLAGS_1(clz, TCG_CALL_NO_RWG_SE, tl, tl)
 DEF_HELPER_FLAGS_1(ctz, TCG_CALL_NO_RWG_SE, tl, tl)
diff --git a/target-i386/machine.c b/target-i386/machine.c
index 16d2f6a..500f04f 100644
--- a/target-i386/machine.c
+++ b/target-i386/machine.c
@@ -397,7 +397,7 @@ static const VMStateDescription vmstate_fpop_ip_dp = {
     .version_id = 1,
     .minimum_version_id = 1,
     .fields = (VMStateField[]) {
-        VMSTATE_UINT16(env.fpop, X86CPU),
+        VMSTATE_UINT32(env.fpop, X86CPU),
         VMSTATE_UINT64(env.fpip, X86CPU),
         VMSTATE_UINT64(env.fpdp, X86CPU),
         VMSTATE_END_OF_LIST()
diff --git a/target-i386/translate.c b/target-i386/translate.c
index 6fcd824..8e490de 100644
--- a/target-i386/translate.c
+++ b/target-i386/translate.c
@@ -58,6 +58,9 @@
 #endif

 //#define MACRO_TEST   1
+#define IS_PROTECTED_MODE(s) (s->pe && !s->vm86)
+#define FP_EP_VALID 0x80000000
+#define FP_EP_INVALID 0

 /* global register indexes */
 static TCGv_ptr cpu_env;
@@ -65,6 +68,11 @@ static TCGv cpu_A0;
 static TCGv cpu_cc_dst, cpu_cc_src, cpu_cc_src2, cpu_cc_srcT;
 static TCGv_i32 cpu_cc_op;
 static TCGv cpu_regs[CPU_NB_REGS];
+static TCGv_i32 cpu_fpop;
+static TCGv cpu_fpip;
+static TCGv cpu_fpdp;
+static TCGv_i32 cpu_fpds;
+static TCGv_i32 cpu_fpcs;
 /* local temps */
 static TCGv cpu_T[2];
 /* local register indexes (only used inside old micro ops) */
@@ -74,6 +82,9 @@ static TCGv_i32 cpu_tmp2_i32, cpu_tmp3_i32;
 static TCGv_i64 cpu_tmp1_i64;

 static uint8_t gen_opc_cc_op[OPC_BUF_SIZE];
+static uint16_t gen_opc_fp_op[OPC_BUF_SIZE];
+static uint16_t gen_opc_fp_cs[OPC_BUF_SIZE];
+static target_ulong gen_opc_fp_ip[OPC_BUF_SIZE];

 #include "exec/gen-icount.h"

@@ -104,6 +115,10 @@ typedef struct DisasContext {
     int ss32;   /* 32 bit stack segment */
     CCOp cc_op;  /* current CC operation */
     bool cc_op_dirty;
+    uint16_t fp_op;
+    bool fp_ep_dirty;
+    target_ulong fp_ip;
+    uint16_t fp_cs;
     int addseg; /* non zero if either DS/ES/SS have a non zero base */
     int f_st;   /* currently unused */
     int vm86;   /* vm86 mode */
@@ -208,6 +223,62 @@ static const uint8_t cc_op_live[CC_OP_NB] = {
     [CC_OP_CLR] = 0,
 };

+static inline bool instr_is_x87_nc(int modrm, int b)
+{
+    int op, mod, rm;
+    switch (b) {
+    case 0xd8 ... 0xdf:
+        /* floats */
+        op = ((b & 7) << 3) | ((modrm >> 3) & 7);
+        mod = (modrm >> 6) & 3;
+        rm = modrm & 7;
+        if (mod != 3) {
+            /* memory */
+            switch (op) {
+            case 0x0c: /* fldenv */
+            case 0x0d: /* fldcw */
+            case 0x0e: /* fstenv, fnstenv */
+            case 0x0f: /* fstcw, fnstcw */
+            case 0x2c: /* frstor */
+            case 0x2e: /* fsave, fnsave */
+            case 0x2f: /* fstsw, fnstsw */
+                return false;
+            default:
+                return true;
+            }
+        } else {
+            /* register */
+            switch (op) {
+            case 0x0a:
+                return false; /* fnop, Illegal op */
+            case 0x0e: /* fdecstp, fincstp */
+            case 0x28: /* ffree */
+                return false;
+            case 0x1c:
+                switch (rm) {
+                case 1: /* feni */
+                    return true;
+                case 2: /* fclex, fnclex */
+                case 3: /* finit, fninit */
+                    return false;
+                case 4: /* fsetpm */
+                    return true;
+                default: /* Illegal op */
+                    return false;
+                }
+            case 0x3c:
+                return false; /* fstsw, fnstsw, Illegal op */
+            default:
+                return true;
+            }
+        }
+    /*case 0x9b: // fwait, wait
+        return false;*/
+    default:
+        return false;
+    }
+}
+
 static void set_cc_op(DisasContext *s, CCOp op)
 {
     int dead;
@@ -253,6 +324,23 @@ static void gen_update_cc_op(DisasContext *s)
     }
 }

+static void set_ep(DisasContext *s, int fp_op, int fp_ip, int fp_cs) {
+    s->fp_op = FP_EP_VALID | fp_op;
+    s->fp_ip = fp_ip;
+    s->fp_cs = fp_cs;
+    s->fp_ep_dirty = true;
+}
+
+static void gen_update_ep(DisasContext *s)
+{
+    if (s->fp_ep_dirty) {
+        tcg_gen_movi_i32(cpu_fpop, s->fp_op);
+        tcg_gen_movi_tl(cpu_fpip, s->fp_ip);
+        tcg_gen_movi_i32(cpu_fpcs, s->fp_cs);
+        s->fp_ep_dirty = false;
+    }
+}
+
 #ifdef TARGET_X86_64

 #define NB_OP_SIZES 4
@@ -666,6 +754,7 @@ static void gen_check_io(DisasContext *s, TCGMemOp ot,
target_ulong cur_eip,
     state_saved = 0;
     if (s->pe && (s->cpl > s->iopl || s->vm86)) {
         gen_update_cc_op(s);
+        gen_update_ep(s);
         gen_jmp_im(cur_eip);
         state_saved = 1;
         tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
@@ -686,6 +775,7 @@ static void gen_check_io(DisasContext *s, TCGMemOp ot,
target_ulong cur_eip,
     if(s->flags & HF_SVMI_MASK) {
         if (!state_saved) {
             gen_update_cc_op(s);
+            gen_update_ep(s);
             gen_jmp_im(cur_eip);
         }
         svm_flags |= (1 << (4 + ot));
@@ -1097,6 +1187,7 @@ static inline void gen_jcc1(DisasContext *s, int b,
int l1)
     CCPrepare cc = gen_prepare_cc(s, b, cpu_T[0]);

     gen_update_cc_op(s);
+    gen_update_ep(s);
     if (cc.mask != -1) {
         tcg_gen_andi_tl(cpu_T[0], cc.reg, cc.mask);
         cc.reg = cpu_T[0];
@@ -1580,14 +1671,14 @@ static void gen_rot_rm_T1(DisasContext *s, TCGMemOp
ot, int op1, int is_right)
     t0 = tcg_const_i32(0);
     t1 = tcg_temp_new_i32();
     tcg_gen_trunc_tl_i32(t1, cpu_T[1]);
-    tcg_gen_movi_i32(cpu_tmp2_i32, CC_OP_ADCOX);
+    tcg_gen_movi_i32(cpu_tmp2_i32, CC_OP_ADCOX);
     tcg_gen_movi_i32(cpu_tmp3_i32, CC_OP_EFLAGS);
     tcg_gen_movcond_i32(TCG_COND_NE, cpu_cc_op, t1, t0,
                         cpu_tmp2_i32, cpu_tmp3_i32);
     tcg_temp_free_i32(t0);
     tcg_temp_free_i32(t1);

-    /* The CC_OP value is no longer predictable.  */
+    /* The CC_OP value is no longer predictable.  */
     set_cc_op(s, CC_OP_DYNAMIC);
 }

@@ -1863,7 +1954,7 @@ static void gen_shifti(DisasContext *s1, int op,
TCGMemOp ot, int d, int c)
     }
 }

-static void gen_lea_modrm(CPUX86State *env, DisasContext *s, int modrm)
+static void gen_lea_modrm(CPUX86State *env, DisasContext *s, int modrm,
int b)
 {
     target_long disp;
     int havesib;
@@ -1871,6 +1962,7 @@ static void gen_lea_modrm(CPUX86State *env,
DisasContext *s, int modrm)
     int index;
     int scale;
     int mod, rm, code, override, must_add_seg;
+    int curr_instr_is_x87_nc;
     TCGv sum;

     override = s->override;
@@ -1950,6 +2042,13 @@ static void gen_lea_modrm(CPUX86State *env,
DisasContext *s, int modrm)
             tcg_gen_addi_tl(cpu_A0, sum, disp);
         }

+        curr_instr_is_x87_nc = instr_is_x87_nc(modrm, b);
+        if (curr_instr_is_x87_nc) {
+            tcg_gen_mov_tl(cpu_fpdp, cpu_A0);
+            if (s->aflag == MO_32) {
+                tcg_gen_ext32u_tl(cpu_fpdp, cpu_fpdp);
+            }
+        }
         if (must_add_seg) {
             if (override < 0) {
                 if (base == R_EBP || base == R_ESP) {
@@ -1961,6 +2060,12 @@ static void gen_lea_modrm(CPUX86State *env,
DisasContext *s, int modrm)

             tcg_gen_ld_tl(cpu_tmp0, cpu_env,
                           offsetof(CPUX86State, segs[override].base));
+
+            if (curr_instr_is_x87_nc) {
+                tcg_gen_ld_i32(cpu_fpds, cpu_env,
+                              offsetof(CPUX86State,
segs[override].selector));
+            }
+
             if (CODE64(s)) {
                 if (s->aflag == MO_32) {
                     tcg_gen_ext32u_tl(cpu_A0, cpu_A0);
@@ -1970,6 +2075,11 @@ static void gen_lea_modrm(CPUX86State *env,
DisasContext *s, int modrm)
             }

             tcg_gen_add_tl(cpu_A0, cpu_A0, cpu_tmp0);
+        } else {
+            if (curr_instr_is_x87_nc) {
+                tcg_gen_ld_i32(cpu_fpds, cpu_env,
+                              offsetof(CPUX86State, segs[R_DS].selector));
+            }
         }

         if (s->aflag == MO_32) {
@@ -2039,8 +2149,22 @@ static void gen_lea_modrm(CPUX86State *env,
DisasContext *s, int modrm)
                     override = R_DS;
                 }
             }
+            if (instr_is_x87_nc(modrm, b)) {
+                tcg_gen_mov_tl(cpu_fpdp, cpu_A0);
+                tcg_gen_ld_i32(cpu_fpds, cpu_env,
+                              offsetof(CPUX86State,
segs[override].selector));
+            }
             gen_op_addl_A0_seg(s, override);
+        } else {
+            if (instr_is_x87_nc(modrm, b)) {
+                tcg_gen_mov_tl(cpu_fpdp, cpu_A0);
+                tcg_gen_ld_i32(cpu_fpds, cpu_env,
+                              offsetof(CPUX86State, segs[R_DS].selector));
+            }
         }
+#ifdef TARGET_X86_64
+        tcg_gen_andi_tl(cpu_fpdp, cpu_fpdp, 0xffffffff);
+#endif
         break;

     default:
@@ -2130,7 +2254,7 @@ static void gen_add_A0_ds_seg(DisasContext *s)
 /* generate modrm memory load or store of 'reg'. TMP0 is used if reg ==
    OR_TMP0 */
 static void gen_ldst_modrm(CPUX86State *env, DisasContext *s, int modrm,
-                           TCGMemOp ot, int reg, int is_store)
+                           TCGMemOp ot, int reg, int is_store, int b)
 {
     int mod, rm;

@@ -2147,7 +2271,7 @@ static void gen_ldst_modrm(CPUX86State *env,
DisasContext *s, int modrm,
                 gen_op_mov_reg_v(ot, reg, cpu_T[0]);
         }
     } else {
-        gen_lea_modrm(env, s, modrm);
+        gen_lea_modrm(env, s, modrm, b);
         if (is_store) {
             if (reg != OR_TMP0)
                 gen_op_mov_v_reg(ot, cpu_T[0], reg);
@@ -2250,7 +2374,7 @@ static void gen_cmovcc1(CPUX86State *env,
DisasContext *s, TCGMemOp ot, int b,
 {
     CCPrepare cc;

-    gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
+    gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0, b);

     cc = gen_prepare_cc(s, b, cpu_T[1]);
     if (cc.mask != -1) {
@@ -2297,6 +2421,7 @@ static void gen_movl_seg_T0(DisasContext *s, int
seg_reg, target_ulong cur_eip)
     if (s->pe && !s->vm86) {
         /* XXX: optimize by finding processor state dynamically */
         gen_update_cc_op(s);
+        gen_update_ep(s);
         gen_jmp_im(cur_eip);
         tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
         gen_helper_load_seg(cpu_env, tcg_const_i32(seg_reg), cpu_tmp2_i32);
@@ -2326,6 +2451,7 @@ gen_svm_check_intercept_param(DisasContext *s,
target_ulong pc_start,
     if (likely(!(s->flags & HF_SVMI_MASK)))
         return;
     gen_update_cc_op(s);
+    gen_update_ep(s);
     gen_jmp_im(pc_start - s->cs_base);
     gen_helper_svm_check_intercept_param(cpu_env, tcg_const_i32(type),
                                          tcg_const_i64(param));
@@ -2513,6 +2639,7 @@ static void gen_enter(DisasContext *s, int
esp_addend, int level)
 static void gen_exception(DisasContext *s, int trapno, target_ulong
cur_eip)
 {
     gen_update_cc_op(s);
+    gen_update_ep(s);
     gen_jmp_im(cur_eip);
     gen_helper_raise_exception(cpu_env, tcg_const_i32(trapno));
     s->is_jmp = DISAS_TB_JUMP;
@@ -2524,6 +2651,7 @@ static void gen_interrupt(DisasContext *s, int intno,
                           target_ulong cur_eip, target_ulong next_eip)
 {
     gen_update_cc_op(s);
+    gen_update_ep(s);
     gen_jmp_im(cur_eip);
     gen_helper_raise_interrupt(cpu_env, tcg_const_i32(intno),
                                tcg_const_i32(next_eip - cur_eip));
@@ -2533,6 +2661,7 @@ static void gen_interrupt(DisasContext *s, int intno,
 static void gen_debug(DisasContext *s, target_ulong cur_eip)
 {
     gen_update_cc_op(s);
+    gen_update_ep(s);
     gen_jmp_im(cur_eip);
     gen_helper_debug(cpu_env);
     s->is_jmp = DISAS_TB_JUMP;
@@ -2543,6 +2672,7 @@ static void gen_debug(DisasContext *s, target_ulong
cur_eip)
 static void gen_eob(DisasContext *s)
 {
     gen_update_cc_op(s);
+    gen_update_ep(s);
     if (s->tb->flags & HF_INHIBIT_IRQ_MASK) {
         gen_helper_reset_inhibit_irq(cpu_env);
     }
@@ -2564,6 +2694,7 @@ static void gen_eob(DisasContext *s)
 static void gen_jmp_tb(DisasContext *s, target_ulong eip, int tb_num)
 {
     gen_update_cc_op(s);
+    gen_update_ep(s);
     set_cc_op(s, CC_OP_DYNAMIC);
     if (s->jmp_opt) {
         gen_goto_tb(s, tb_num, eip);
@@ -3043,7 +3174,7 @@ static void gen_sse(CPUX86State *env, DisasContext
*s, int b,
         case 0x0e7: /* movntq */
             if (mod == 3)
                 goto illegal_op;
-            gen_lea_modrm(env, s, modrm);
+            gen_lea_modrm(env, s, modrm, b);
             gen_stq_env_A0(s, offsetof(CPUX86State, fpregs[reg].mmx));
             break;
         case 0x1e7: /* movntdq */
@@ -3051,20 +3182,20 @@ static void gen_sse(CPUX86State *env, DisasContext
*s, int b,
         case 0x12b: /* movntps */
             if (mod == 3)
                 goto illegal_op;
-            gen_lea_modrm(env, s, modrm);
+            gen_lea_modrm(env, s, modrm, b);
             gen_sto_env_A0(s, offsetof(CPUX86State, xmm_regs[reg]));
             break;
         case 0x3f0: /* lddqu */
             if (mod == 3)
                 goto illegal_op;
-            gen_lea_modrm(env, s, modrm);
+            gen_lea_modrm(env, s, modrm, b);
             gen_ldo_env_A0(s, offsetof(CPUX86State, xmm_regs[reg]));
             break;
         case 0x22b: /* movntss */
         case 0x32b: /* movntsd */
             if (mod == 3)
                 goto illegal_op;
-            gen_lea_modrm(env, s, modrm);
+            gen_lea_modrm(env, s, modrm, b);
             if (b1 & 1) {
                 gen_stq_env_A0(s, offsetof(CPUX86State, xmm_regs[reg]));
             } else {
@@ -3076,13 +3207,13 @@ static void gen_sse(CPUX86State *env, DisasContext
*s, int b,
         case 0x6e: /* movd mm, ea */
 #ifdef TARGET_X86_64
             if (s->dflag == MO_64) {
-                gen_ldst_modrm(env, s, modrm, MO_64, OR_TMP0, 0);
+                gen_ldst_modrm(env, s, modrm, MO_64, OR_TMP0, 0, b);
                 tcg_gen_st_tl(cpu_T[0], cpu_env,
offsetof(CPUX86State,fpregs[reg].mmx));
             } else
 #endif
             {
-                gen_ldst_modrm(env, s, modrm, MO_32, OR_TMP0, 0);
-                tcg_gen_addi_ptr(cpu_ptr0, cpu_env,
+                gen_ldst_modrm(env, s, modrm, MO_32, OR_TMP0, 0, b);
+                tcg_gen_addi_ptr(cpu_ptr0, cpu_env,
                                  offsetof(CPUX86State,fpregs[reg].mmx));
                 tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
                 gen_helper_movl_mm_T0_mmx(cpu_ptr0, cpu_tmp2_i32);
@@ -3091,15 +3222,15 @@ static void gen_sse(CPUX86State *env, DisasContext
*s, int b,
         case 0x16e: /* movd xmm, ea */
 #ifdef TARGET_X86_64
             if (s->dflag == MO_64) {
-                gen_ldst_modrm(env, s, modrm, MO_64, OR_TMP0, 0);
-                tcg_gen_addi_ptr(cpu_ptr0, cpu_env,
+                gen_ldst_modrm(env, s, modrm, MO_64, OR_TMP0, 0, b);
+                tcg_gen_addi_ptr(cpu_ptr0, cpu_env,
                                  offsetof(CPUX86State,xmm_regs[reg]));
                 gen_helper_movq_mm_T0_xmm(cpu_ptr0, cpu_T[0]);
             } else
 #endif
             {
-                gen_ldst_modrm(env, s, modrm, MO_32, OR_TMP0, 0);
-                tcg_gen_addi_ptr(cpu_ptr0, cpu_env,
+                gen_ldst_modrm(env, s, modrm, MO_32, OR_TMP0, 0, b);
+                tcg_gen_addi_ptr(cpu_ptr0, cpu_env,
                                  offsetof(CPUX86State,xmm_regs[reg]));
                 tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
                 gen_helper_movl_mm_T0_xmm(cpu_ptr0, cpu_tmp2_i32);
@@ -3107,7 +3238,7 @@ static void gen_sse(CPUX86State *env, DisasContext
*s, int b,
             break;
         case 0x6f: /* movq mm, ea */
             if (mod != 3) {
-                gen_lea_modrm(env, s, modrm);
+                gen_lea_modrm(env, s, modrm, b);
                 gen_ldq_env_A0(s, offsetof(CPUX86State, fpregs[reg].mmx));
             } else {
                 rm = (modrm & 7);
@@ -3124,7 +3255,7 @@ static void gen_sse(CPUX86State *env, DisasContext
*s, int b,
         case 0x16f: /* movdqa xmm, ea */
         case 0x26f: /* movdqu xmm, ea */
             if (mod != 3) {
-                gen_lea_modrm(env, s, modrm);
+                gen_lea_modrm(env, s, modrm, b);
                 gen_ldo_env_A0(s, offsetof(CPUX86State, xmm_regs[reg]));
             } else {
                 rm = (modrm & 7) | REX_B(s);
@@ -3134,7 +3265,7 @@ static void gen_sse(CPUX86State *env, DisasContext
*s, int b,
             break;
         case 0x210: /* movss xmm, ea */
             if (mod != 3) {
-                gen_lea_modrm(env, s, modrm);
+                gen_lea_modrm(env, s, modrm, b);
                 gen_op_ld_v(s, MO_32, cpu_T[0], cpu_A0);
                 tcg_gen_st32_tl(cpu_T[0], cpu_env,
offsetof(CPUX86State,xmm_regs[reg].XMM_L(0)));
                 tcg_gen_movi_tl(cpu_T[0], 0);
@@ -3149,7 +3280,7 @@ static void gen_sse(CPUX86State *env, DisasContext
*s, int b,
             break;
         case 0x310: /* movsd xmm, ea */
             if (mod != 3) {
-                gen_lea_modrm(env, s, modrm);
+                gen_lea_modrm(env, s, modrm, b);
                 gen_ldq_env_A0(s, offsetof(CPUX86State,
                                            xmm_regs[reg].XMM_Q(0)));
                 tcg_gen_movi_tl(cpu_T[0], 0);
@@ -3164,7 +3295,7 @@ static void gen_sse(CPUX86State *env, DisasContext
*s, int b,
         case 0x012: /* movlps */
         case 0x112: /* movlpd */
             if (mod != 3) {
-                gen_lea_modrm(env, s, modrm);
+                gen_lea_modrm(env, s, modrm, b);
                 gen_ldq_env_A0(s, offsetof(CPUX86State,
                                            xmm_regs[reg].XMM_Q(0)));
             } else {
@@ -3176,7 +3307,7 @@ static void gen_sse(CPUX86State *env, DisasContext
*s, int b,
             break;
         case 0x212: /* movsldup */
             if (mod != 3) {
-                gen_lea_modrm(env, s, modrm);
+                gen_lea_modrm(env, s, modrm, b);
                 gen_ldo_env_A0(s, offsetof(CPUX86State, xmm_regs[reg]));
             } else {
                 rm = (modrm & 7) | REX_B(s);
@@ -3192,7 +3323,7 @@ static void gen_sse(CPUX86State *env, DisasContext
*s, int b,
             break;
         case 0x312: /* movddup */
             if (mod != 3) {
-                gen_lea_modrm(env, s, modrm);
+                gen_lea_modrm(env, s, modrm, b);
                 gen_ldq_env_A0(s, offsetof(CPUX86State,
                                            xmm_regs[reg].XMM_Q(0)));
             } else {
@@ -3206,7 +3337,7 @@ static void gen_sse(CPUX86State *env, DisasContext
*s, int b,
         case 0x016: /* movhps */
         case 0x116: /* movhpd */
             if (mod != 3) {
-                gen_lea_modrm(env, s, modrm);
+                gen_lea_modrm(env, s, modrm, b);
                 gen_ldq_env_A0(s, offsetof(CPUX86State,
                                            xmm_regs[reg].XMM_Q(1)));
             } else {
@@ -3218,7 +3349,7 @@ static void gen_sse(CPUX86State *env, DisasContext
*s, int b,
             break;
         case 0x216: /* movshdup */
             if (mod != 3) {
-                gen_lea_modrm(env, s, modrm);
+                gen_lea_modrm(env, s, modrm, b);
                 gen_ldo_env_A0(s, offsetof(CPUX86State, xmm_regs[reg]));
             } else {
                 rm = (modrm & 7) | REX_B(s);
@@ -3256,34 +3387,34 @@ static void gen_sse(CPUX86State *env, DisasContext
*s, int b,
         case 0x7e: /* movd ea, mm */
 #ifdef TARGET_X86_64
             if (s->dflag == MO_64) {
-                tcg_gen_ld_i64(cpu_T[0], cpu_env,
+                tcg_gen_ld_i64(cpu_T[0], cpu_env,
                                offsetof(CPUX86State,fpregs[reg].mmx));
-                gen_ldst_modrm(env, s, modrm, MO_64, OR_TMP0, 1);
+                gen_ldst_modrm(env, s, modrm, MO_64, OR_TMP0, 1, b);
             } else
 #endif
             {
-                tcg_gen_ld32u_tl(cpu_T[0], cpu_env,
+                tcg_gen_ld32u_tl(cpu_T[0], cpu_env,

offsetof(CPUX86State,fpregs[reg].mmx.MMX_L(0)));
-                gen_ldst_modrm(env, s, modrm, MO_32, OR_TMP0, 1);
+                gen_ldst_modrm(env, s, modrm, MO_32, OR_TMP0, 1, b);
             }
             break;
         case 0x17e: /* movd ea, xmm */
 #ifdef TARGET_X86_64
             if (s->dflag == MO_64) {
-                tcg_gen_ld_i64(cpu_T[0], cpu_env,
+                tcg_gen_ld_i64(cpu_T[0], cpu_env,

offsetof(CPUX86State,xmm_regs[reg].XMM_Q(0)));
-                gen_ldst_modrm(env, s, modrm, MO_64, OR_TMP0, 1);
+                gen_ldst_modrm(env, s, modrm, MO_64, OR_TMP0, 1, b);
             } else
 #endif
             {
-                tcg_gen_ld32u_tl(cpu_T[0], cpu_env,
+                tcg_gen_ld32u_tl(cpu_T[0], cpu_env,

offsetof(CPUX86State,xmm_regs[reg].XMM_L(0)));
-                gen_ldst_modrm(env, s, modrm, MO_32, OR_TMP0, 1);
+                gen_ldst_modrm(env, s, modrm, MO_32, OR_TMP0, 1, b);
             }
             break;
         case 0x27e: /* movq xmm, ea */
             if (mod != 3) {
-                gen_lea_modrm(env, s, modrm);
+                gen_lea_modrm(env, s, modrm, b);
                 gen_ldq_env_A0(s, offsetof(CPUX86State,
                                            xmm_regs[reg].XMM_Q(0)));
             } else {
@@ -3295,7 +3426,7 @@ static void gen_sse(CPUX86State *env, DisasContext
*s, int b,
             break;
         case 0x7f: /* movq ea, mm */
             if (mod != 3) {
-                gen_lea_modrm(env, s, modrm);
+                gen_lea_modrm(env, s, modrm, b);
                 gen_stq_env_A0(s, offsetof(CPUX86State, fpregs[reg].mmx));
             } else {
                 rm = (modrm & 7);
@@ -3310,7 +3441,7 @@ static void gen_sse(CPUX86State *env, DisasContext
*s, int b,
         case 0x17f: /* movdqa ea, xmm */
         case 0x27f: /* movdqu ea, xmm */
             if (mod != 3) {
-                gen_lea_modrm(env, s, modrm);
+                gen_lea_modrm(env, s, modrm, b);
                 gen_sto_env_A0(s, offsetof(CPUX86State, xmm_regs[reg]));
             } else {
                 rm = (modrm & 7) | REX_B(s);
@@ -3320,7 +3451,7 @@ static void gen_sse(CPUX86State *env, DisasContext
*s, int b,
             break;
         case 0x211: /* movss ea, xmm */
             if (mod != 3) {
-                gen_lea_modrm(env, s, modrm);
+                gen_lea_modrm(env, s, modrm, b);
                 tcg_gen_ld32u_tl(cpu_T[0], cpu_env,
offsetof(CPUX86State,xmm_regs[reg].XMM_L(0)));
                 gen_op_st_v(s, MO_32, cpu_T[0], cpu_A0);
             } else {
@@ -3331,7 +3462,7 @@ static void gen_sse(CPUX86State *env, DisasContext
*s, int b,
             break;
         case 0x311: /* movsd ea, xmm */
             if (mod != 3) {
-                gen_lea_modrm(env, s, modrm);
+                gen_lea_modrm(env, s, modrm, b);
                 gen_stq_env_A0(s, offsetof(CPUX86State,
                                            xmm_regs[reg].XMM_Q(0)));
             } else {
@@ -3343,7 +3474,7 @@ static void gen_sse(CPUX86State *env, DisasContext
*s, int b,
         case 0x013: /* movlps */
         case 0x113: /* movlpd */
             if (mod != 3) {
-                gen_lea_modrm(env, s, modrm);
+                gen_lea_modrm(env, s, modrm, b);
                 gen_stq_env_A0(s, offsetof(CPUX86State,
                                            xmm_regs[reg].XMM_Q(0)));
             } else {
@@ -3353,7 +3484,7 @@ static void gen_sse(CPUX86State *env, DisasContext
*s, int b,
         case 0x017: /* movhps */
         case 0x117: /* movhpd */
             if (mod != 3) {
-                gen_lea_modrm(env, s, modrm);
+                gen_lea_modrm(env, s, modrm, b);
                 gen_stq_env_A0(s, offsetof(CPUX86State,
                                            xmm_regs[reg].XMM_Q(1)));
             } else {
@@ -3417,7 +3548,7 @@ static void gen_sse(CPUX86State *env, DisasContext
*s, int b,
         case 0x12a: /* cvtpi2pd */
             gen_helper_enter_mmx(cpu_env);
             if (mod != 3) {
-                gen_lea_modrm(env, s, modrm);
+                gen_lea_modrm(env, s, modrm, b);
                 op2_offset = offsetof(CPUX86State,mmx_t0);
                 gen_ldq_env_A0(s, op2_offset);
             } else {
@@ -3440,7 +3571,7 @@ static void gen_sse(CPUX86State *env, DisasContext
*s, int b,
         case 0x22a: /* cvtsi2ss */
         case 0x32a: /* cvtsi2sd */
             ot = mo_64_32(s->dflag);
-            gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
+            gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0, b);
             op1_offset = offsetof(CPUX86State,xmm_regs[reg]);
             tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op1_offset);
             if (ot == MO_32) {
@@ -3462,7 +3593,7 @@ static void gen_sse(CPUX86State *env, DisasContext
*s, int b,
         case 0x12d: /* cvtpd2pi */
             gen_helper_enter_mmx(cpu_env);
             if (mod != 3) {
-                gen_lea_modrm(env, s, modrm);
+                gen_lea_modrm(env, s, modrm, b);
                 op2_offset = offsetof(CPUX86State,xmm_t0);
                 gen_ldo_env_A0(s, op2_offset);
             } else {
@@ -3493,7 +3624,7 @@ static void gen_sse(CPUX86State *env, DisasContext
*s, int b,
         case 0x32d: /* cvtsd2si */
             ot = mo_64_32(s->dflag);
             if (mod != 3) {
-                gen_lea_modrm(env, s, modrm);
+                gen_lea_modrm(env, s, modrm, b);
                 if ((b >> 8) & 1) {
                     gen_ldq_env_A0(s, offsetof(CPUX86State,
xmm_t0.XMM_Q(0)));
                 } else {
@@ -3525,7 +3656,7 @@ static void gen_sse(CPUX86State *env, DisasContext
*s, int b,
         case 0xc4: /* pinsrw */
         case 0x1c4:
             s->rip_offset = 1;
-            gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0);
+            gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0, b);
             val = cpu_ldub_code(env, s->pc++);
             if (b1) {
                 val &= 7;
@@ -3559,7 +3690,7 @@ static void gen_sse(CPUX86State *env, DisasContext
*s, int b,
             break;
         case 0x1d6: /* movq ea, xmm */
             if (mod != 3) {
-                gen_lea_modrm(env, s, modrm);
+                gen_lea_modrm(env, s, modrm, b);
                 gen_stq_env_A0(s, offsetof(CPUX86State,
                                            xmm_regs[reg].XMM_Q(0)));
             } else {
@@ -3626,7 +3757,7 @@ static void gen_sse(CPUX86State *env, DisasContext
*s, int b,
                     op2_offset = offsetof(CPUX86State,xmm_regs[rm |
REX_B(s)]);
                 } else {
                     op2_offset = offsetof(CPUX86State,xmm_t0);
-                    gen_lea_modrm(env, s, modrm);
+                    gen_lea_modrm(env, s, modrm, b);
                     switch (b) {
                     case 0x20: case 0x30: /* pmovsxbw, pmovzxbw */
                     case 0x23: case 0x33: /* pmovsxwd, pmovzxwd */
@@ -3660,7 +3791,7 @@ static void gen_sse(CPUX86State *env, DisasContext
*s, int b,
                     op2_offset = offsetof(CPUX86State,fpregs[rm].mmx);
                 } else {
                     op2_offset = offsetof(CPUX86State,mmx_t0);
-                    gen_lea_modrm(env, s, modrm);
+                    gen_lea_modrm(env, s, modrm, b);
                     gen_ldq_env_A0(s, op2_offset);
                 }
             }
@@ -3701,7 +3832,7 @@ static void gen_sse(CPUX86State *env, DisasContext
*s, int b,
                 }

                 tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_regs[reg]);
-                gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
+                gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0, b);
                 gen_helper_crc32(cpu_T[0], cpu_tmp2_i32,
                                  cpu_T[0], tcg_const_i32(8 << ot));

@@ -3729,7 +3860,7 @@ static void gen_sse(CPUX86State *env, DisasContext
*s, int b,
                     ot = MO_64;
                 }

-                gen_lea_modrm(env, s, modrm);
+                gen_lea_modrm(env, s, modrm, b);
                 if ((b & 1) == 0) {
                     tcg_gen_qemu_ld_tl(cpu_T[0], cpu_A0,
                                        s->mem_index, ot | MO_BE);
@@ -3747,7 +3878,7 @@ static void gen_sse(CPUX86State *env, DisasContext
*s, int b,
                     goto illegal_op;
                 }
                 ot = mo_64_32(s->dflag);
-                gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
+                gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0, b);
                 tcg_gen_andc_tl(cpu_T[0], cpu_regs[s->vex_v], cpu_T[0]);
                 gen_op_mov_reg_v(ot, reg, cpu_T[0]);
                 gen_op_update1_cc();
@@ -3764,7 +3895,7 @@ static void gen_sse(CPUX86State *env, DisasContext
*s, int b,
                 {
                     TCGv bound, zero;

-                    gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
+                    gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0, b);
                     /* Extract START, and shift the operand.
                        Shifts larger than operand size get zeros.  */
                     tcg_gen_ext8u_tl(cpu_A0, cpu_regs[s->vex_v]);
@@ -3801,7 +3932,7 @@ static void gen_sse(CPUX86State *env, DisasContext
*s, int b,
                     goto illegal_op;
                 }
                 ot = mo_64_32(s->dflag);
-                gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
+                gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0, b);
                 tcg_gen_ext8u_tl(cpu_T[1], cpu_regs[s->vex_v]);
                 {
                     TCGv bound = tcg_const_tl(ot == MO_64 ? 63 : 31);
@@ -3828,7 +3959,7 @@ static void gen_sse(CPUX86State *env, DisasContext
*s, int b,
                     goto illegal_op;
                 }
                 ot = mo_64_32(s->dflag);
-                gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
+                gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0, b);
                 switch (ot) {
                 default:
                     tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
@@ -3854,7 +3985,7 @@ static void gen_sse(CPUX86State *env, DisasContext
*s, int b,
                     goto illegal_op;
                 }
                 ot = mo_64_32(s->dflag);
-                gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
+                gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0, b);
                 /* Note that by zero-extending the mask operand, we
                    automatically handle zero-extending the result.  */
                 if (ot == MO_64) {
@@ -3872,7 +4003,7 @@ static void gen_sse(CPUX86State *env, DisasContext
*s, int b,
                     goto illegal_op;
                 }
                 ot = mo_64_32(s->dflag);
-                gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
+                gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0, b);
                 /* Note that by zero-extending the mask operand, we
                    automatically handle zero-extending the result.  */
                 if (ot == MO_64) {
@@ -3892,7 +4023,7 @@ static void gen_sse(CPUX86State *env, DisasContext
*s, int b,
                     int end_op;

                     ot = mo_64_32(s->dflag);
-                    gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
+                    gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0, b);

                     /* Re-use the carry-out from a previous round.  */
                     TCGV_UNUSED(carry_in);
@@ -3971,7 +4102,7 @@ static void gen_sse(CPUX86State *env, DisasContext
*s, int b,
                     goto illegal_op;
                 }
                 ot = mo_64_32(s->dflag);
-                gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
+                gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0, b);
                 if (ot == MO_64) {
                     tcg_gen_andi_tl(cpu_T[1], cpu_regs[s->vex_v], 63);
                 } else {
@@ -4003,7 +4134,7 @@ static void gen_sse(CPUX86State *env, DisasContext
*s, int b,
                     goto illegal_op;
                 }
                 ot = mo_64_32(s->dflag);
-                gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
+                gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0, b);

                 switch (reg & 7) {
                 case 1: /* blsr By,Ey */
@@ -4062,7 +4193,7 @@ static void gen_sse(CPUX86State *env, DisasContext
*s, int b,
                 ot = mo_64_32(s->dflag);
                 rm = (modrm & 7) | REX_B(s);
                 if (mod != 3)
-                    gen_lea_modrm(env, s, modrm);
+                    gen_lea_modrm(env, s, modrm, b);
                 reg = ((modrm >> 3) & 7) | rex_r;
                 val = cpu_ldub_code(env, s->pc++);
                 switch (b) {
@@ -4199,7 +4330,7 @@ static void gen_sse(CPUX86State *env, DisasContext
*s, int b,
                     op2_offset = offsetof(CPUX86State,xmm_regs[rm |
REX_B(s)]);
                 } else {
                     op2_offset = offsetof(CPUX86State,xmm_t0);
-                    gen_lea_modrm(env, s, modrm);
+                    gen_lea_modrm(env, s, modrm, b);
                     gen_ldo_env_A0(s, op2_offset);
                 }
             } else {
@@ -4208,7 +4339,7 @@ static void gen_sse(CPUX86State *env, DisasContext
*s, int b,
                     op2_offset = offsetof(CPUX86State,fpregs[rm].mmx);
                 } else {
                     op2_offset = offsetof(CPUX86State,mmx_t0);
-                    gen_lea_modrm(env, s, modrm);
+                    gen_lea_modrm(env, s, modrm, b);
                     gen_ldq_env_A0(s, op2_offset);
                 }
             }
@@ -4242,7 +4373,7 @@ static void gen_sse(CPUX86State *env, DisasContext
*s, int b,
                     goto illegal_op;
                 }
                 ot = mo_64_32(s->dflag);
-                gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
+                gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0, b);
                 b = cpu_ldub_code(env, s->pc++);
                 if (ot == MO_64) {
                     tcg_gen_rotri_tl(cpu_T[0], cpu_T[0], b & 63);
@@ -4278,7 +4409,7 @@ static void gen_sse(CPUX86State *env, DisasContext
*s, int b,
             if (mod != 3) {
                 int sz = 4;

-                gen_lea_modrm(env, s, modrm);
+                gen_lea_modrm(env, s, modrm, b);
                 op2_offset = offsetof(CPUX86State,xmm_t0);

                 switch (b) {
@@ -4326,7 +4457,7 @@ static void gen_sse(CPUX86State *env, DisasContext
*s, int b,
         } else {
             op1_offset = offsetof(CPUX86State,fpregs[reg].mmx);
             if (mod != 3) {
-                gen_lea_modrm(env, s, modrm);
+                gen_lea_modrm(env, s, modrm, b);
                 op2_offset = offsetof(CPUX86State,mmx_t0);
                 gen_ldq_env_A0(s, op2_offset);
             } else {
@@ -4404,6 +4535,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
     int modrm, reg, rm, mod, op, opreg, val;
     target_ulong next_eip, tval;
     int rex_w, rex_r;
+    int fp_op, fp_ip, fp_cs;

     if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP | CPU_LOG_TB_OP_OPT))) {
         tcg_gen_debug_insn_start(pc_start);
@@ -4595,7 +4727,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
                 mod = (modrm >> 6) & 3;
                 rm = (modrm & 7) | REX_B(s);
                 if (mod != 3) {
-                    gen_lea_modrm(env, s, modrm);
+                    gen_lea_modrm(env, s, modrm, b);
                     opreg = OR_TMP0;
                 } else if (op == OP_XORL && rm == reg) {
                 xor_zero:
@@ -4616,7 +4748,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
                 reg = ((modrm >> 3) & 7) | rex_r;
                 rm = (modrm & 7) | REX_B(s);
                 if (mod != 3) {
-                    gen_lea_modrm(env, s, modrm);
+                    gen_lea_modrm(env, s, modrm, b);
                     gen_op_ld_v(s, ot, cpu_T[1], cpu_A0);
                 } else if (op == OP_XORL && rm == reg) {
                     goto xor_zero;
@@ -4655,7 +4787,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
                     s->rip_offset = 1;
                 else
                     s->rip_offset = insn_const_size(ot);
-                gen_lea_modrm(env, s, modrm);
+                gen_lea_modrm(env, s, modrm, b);
                 opreg = OR_TMP0;
             } else {
                 opreg = rm;
@@ -4698,7 +4830,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
         if (mod != 3) {
             if (op == 0)
                 s->rip_offset = insn_const_size(ot);
-            gen_lea_modrm(env, s, modrm);
+            gen_lea_modrm(env, s, modrm, b);
             gen_op_ld_v(s, ot, cpu_T[0], cpu_A0);
         } else {
             gen_op_mov_v_reg(ot, cpu_T[0], rm);
@@ -4906,7 +5038,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
             }
         }
         if (mod != 3) {
-            gen_lea_modrm(env, s, modrm);
+            gen_lea_modrm(env, s, modrm, b);
             if (op >= 2 && op != 3 && op != 5)
                 gen_op_ld_v(s, ot, cpu_T[0], cpu_A0);
         } else {
@@ -4946,6 +5078,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
         do_lcall:
             if (s->pe && !s->vm86) {
                 gen_update_cc_op(s);
+                gen_update_ep(s);
                 gen_jmp_im(pc_start - s->cs_base);
                 tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
                 gen_helper_lcall_protected(cpu_env, cpu_tmp2_i32, cpu_T[1],
@@ -4973,6 +5106,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
         do_ljmp:
             if (s->pe && !s->vm86) {
                 gen_update_cc_op(s);
+                gen_update_ep(s);
                 gen_jmp_im(pc_start - s->cs_base);
                 tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
                 gen_helper_ljmp_protected(cpu_env, cpu_tmp2_i32, cpu_T[1],
@@ -4998,7 +5132,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
         modrm = cpu_ldub_code(env, s->pc++);
         reg = ((modrm >> 3) & 7) | rex_r;

-        gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
+        gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0, b);
         gen_op_mov_v_reg(ot, cpu_T[1], reg);
         gen_op_testl_T0_T1_cc();
         set_cc_op(s, CC_OP_LOGICB + ot);
@@ -5073,7 +5207,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
             s->rip_offset = insn_const_size(ot);
         else if (b == 0x6b)
             s->rip_offset = 1;
-        gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
+        gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0, b);
         if (b == 0x69) {
             val = insn_get(env, s, ot);
             tcg_gen_movi_tl(cpu_T[1], val);
@@ -5130,7 +5264,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
             gen_op_mov_reg_v(ot, reg, cpu_T[1]);
             gen_op_mov_reg_v(ot, rm, cpu_T[0]);
         } else {
-            gen_lea_modrm(env, s, modrm);
+            gen_lea_modrm(env, s, modrm, b);
             gen_op_mov_v_reg(ot, cpu_T[0], reg);
             gen_op_ld_v(s, ot, cpu_T[1], cpu_A0);
             tcg_gen_add_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
@@ -5159,7 +5293,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
                 rm = (modrm & 7) | REX_B(s);
                 gen_op_mov_v_reg(ot, t0, rm);
             } else {
-                gen_lea_modrm(env, s, modrm);
+                gen_lea_modrm(env, s, modrm, b);
                 tcg_gen_mov_tl(a0, cpu_A0);
                 gen_op_ld_v(s, ot, t0, a0);
                 rm = 0; /* avoid warning */
@@ -5207,16 +5341,16 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
                 goto illegal_op;
             gen_jmp_im(pc_start - s->cs_base);
             gen_update_cc_op(s);
-            gen_lea_modrm(env, s, modrm);
+            gen_lea_modrm(env, s, modrm, b);
             gen_helper_cmpxchg16b(cpu_env, cpu_A0);
         } else
-#endif
+#endif
         {
             if (!(s->cpuid_features & CPUID_CX8))
                 goto illegal_op;
             gen_jmp_im(pc_start - s->cs_base);
             gen_update_cc_op(s);
-            gen_lea_modrm(env, s, modrm);
+            gen_lea_modrm(env, s, modrm, b);
             gen_helper_cmpxchg8b(cpu_env, cpu_A0);
         }
         set_cc_op(s, CC_OP_EFLAGS);
@@ -5266,7 +5400,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
         } else {
             /* NOTE: order is important too for MMU exceptions */
             s->popl_esp_hack = 1 << ot;
-            gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 1);
+            gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 1, b);
             s->popl_esp_hack = 0;
             gen_pop_update(s, ot);
         }
@@ -5352,7 +5486,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
         reg = ((modrm >> 3) & 7) | rex_r;

         /* generate a generic store */
-        gen_ldst_modrm(env, s, modrm, ot, reg, 1);
+        gen_ldst_modrm(env, s, modrm, ot, reg, 1, b);
         break;
     case 0xc6:
     case 0xc7: /* mov Ev, Iv */
@@ -5361,7 +5495,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
         mod = (modrm >> 6) & 3;
         if (mod != 3) {
             s->rip_offset = insn_const_size(ot);
-            gen_lea_modrm(env, s, modrm);
+            gen_lea_modrm(env, s, modrm, b);
         }
         val = insn_get(env, s, ot);
         tcg_gen_movi_tl(cpu_T[0], val);
@@ -5377,7 +5511,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
         modrm = cpu_ldub_code(env, s->pc++);
         reg = ((modrm >> 3) & 7) | rex_r;

-        gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
+        gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0, b);
         gen_op_mov_reg_v(ot, reg, cpu_T[0]);
         break;
     case 0x8e: /* mov seg, Gv */
@@ -5385,7 +5519,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
         reg = (modrm >> 3) & 7;
         if (reg >= 6 || reg == R_CS)
             goto illegal_op;
-        gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0);
+        gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0, b);
         gen_movl_seg_T0(s, reg, pc_start - s->cs_base);
         if (reg == R_SS) {
             /* if reg == SS, inhibit interrupts/trace */
@@ -5408,7 +5542,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
             goto illegal_op;
         gen_op_movl_T0_seg(reg);
         ot = mod == 3 ? dflag : MO_16;
-        gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 1);
+        gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 1, b);
         break;

     case 0x1b6: /* movzbS Gv, Eb */
@@ -5450,7 +5584,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
                 }
                 gen_op_mov_reg_v(d_ot, reg, cpu_T[0]);
             } else {
-                gen_lea_modrm(env, s, modrm);
+                gen_lea_modrm(env, s, modrm, b);
                 gen_op_ld_v(s, s_ot, cpu_T[0], cpu_A0);
                 gen_op_mov_reg_v(d_ot, reg, cpu_T[0]);
             }
@@ -5468,7 +5602,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
         s->override = -1;
         val = s->addseg;
         s->addseg = 0;
-        gen_lea_modrm(env, s, modrm);
+        gen_lea_modrm(env, s, modrm, b);
         s->addseg = val;
         gen_op_mov_reg_v(ot, reg, cpu_A0);
         break;
@@ -5558,7 +5692,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
             gen_op_mov_reg_v(ot, rm, cpu_T[0]);
             gen_op_mov_reg_v(ot, reg, cpu_T[1]);
         } else {
-            gen_lea_modrm(env, s, modrm);
+            gen_lea_modrm(env, s, modrm, b);
             gen_op_mov_v_reg(ot, cpu_T[0], reg);
             /* for xchg, lock is implicit */
             if (!(prefixes & PREFIX_LOCK))
@@ -5593,7 +5727,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
         mod = (modrm >> 6) & 3;
         if (mod == 3)
             goto illegal_op;
-        gen_lea_modrm(env, s, modrm);
+        gen_lea_modrm(env, s, modrm, b);
         gen_op_ld_v(s, ot, cpu_T[1], cpu_A0);
         gen_add_A0_im(s, 1 << ot);
         /* load the segment first to handle exceptions properly */
@@ -5624,7 +5758,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
                 if (shift == 2) {
                     s->rip_offset = 1;
                 }
-                gen_lea_modrm(env, s, modrm);
+                gen_lea_modrm(env, s, modrm, b);
                 opreg = OR_TMP0;
             } else {
                 opreg = (modrm & 7) | REX_B(s);
@@ -5674,7 +5808,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
         rm = (modrm & 7) | REX_B(s);
         reg = ((modrm >> 3) & 7) | rex_r;
         if (mod != 3) {
-            gen_lea_modrm(env, s, modrm);
+            gen_lea_modrm(env, s, modrm, b);
             opreg = OR_TMP0;
         } else {
             opreg = rm;
@@ -5705,7 +5839,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
         op = ((b & 7) << 3) | ((modrm >> 3) & 7);
         if (mod != 3) {
             /* memory op */
-            gen_lea_modrm(env, s, modrm);
+            gen_lea_modrm(env, s, modrm, b);
             switch(op) {
             case 0x00 ... 0x07: /* fxxxs */
             case 0x10 ... 0x17: /* fixxxl */
@@ -5832,7 +5966,9 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
             case 0x0c: /* fldenv mem */
                 gen_update_cc_op(s);
                 gen_jmp_im(pc_start - s->cs_base);
-                gen_helper_fldenv(cpu_env, cpu_A0, tcg_const_i32(dflag -
1));
+                gen_helper_fldenv(cpu_env, cpu_A0,
+                                  tcg_const_i32(dflag == MO_32),
+                                  tcg_const_i32(IS_PROTECTED_MODE(s)));
                 break;
             case 0x0d: /* fldcw mem */
                 tcg_gen_qemu_ld_i32(cpu_tmp2_i32, cpu_A0,
@@ -5841,8 +5977,11 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
                 break;
             case 0x0e: /* fnstenv mem */
                 gen_update_cc_op(s);
+                gen_update_ep(s);
                 gen_jmp_im(pc_start - s->cs_base);
-                gen_helper_fstenv(cpu_env, cpu_A0, tcg_const_i32(dflag -
1));
+                gen_helper_fstenv(cpu_env, cpu_A0,
+                                  tcg_const_i32(dflag == MO_32),
+                                  tcg_const_i32(IS_PROTECTED_MODE(s)));
                 break;
             case 0x0f: /* fnstcw mem */
                 gen_helper_fnstcw(cpu_tmp2_i32, cpu_env);
@@ -5863,12 +6002,17 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
             case 0x2c: /* frstor mem */
                 gen_update_cc_op(s);
                 gen_jmp_im(pc_start - s->cs_base);
-                gen_helper_frstor(cpu_env, cpu_A0, tcg_const_i32(dflag -
1));
+                gen_helper_frstor(cpu_env, cpu_A0,
+                                  tcg_const_i32(dflag == MO_32),
+                                  tcg_const_i32(IS_PROTECTED_MODE(s)));
                 break;
             case 0x2e: /* fnsave mem */
                 gen_update_cc_op(s);
+                gen_update_ep(s);
                 gen_jmp_im(pc_start - s->cs_base);
-                gen_helper_fsave(cpu_env, cpu_A0, tcg_const_i32(dflag -
1));
+                gen_helper_fsave(cpu_env, cpu_A0,
+                                 tcg_const_i32(dflag == MO_32),
+                                 tcg_const_i32(IS_PROTECTED_MODE(s)));
                 break;
             case 0x2f: /* fnstsw mem */
                 gen_helper_fnstsw(cpu_tmp2_i32, cpu_env);
@@ -6209,6 +6353,12 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
                 goto illegal_op;
             }
         }
+        if (instr_is_x87_nc(modrm, b)) {
+            fp_op = ((b & 0x7) << 8) | (modrm & 0xff);
+            fp_ip = pc_start - s->cs_base;
+            fp_cs = env->segs[R_CS].selector;
+            set_ep(s, fp_op, fp_ip, fp_cs);
+        }
         break;
         /************************/
         /* string ops */
@@ -6393,6 +6543,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
     do_lret:
         if (s->pe && !s->vm86) {
             gen_update_cc_op(s);
+            gen_update_ep(s);
             gen_jmp_im(pc_start - s->cs_base);
             gen_helper_lret_protected(cpu_env, tcg_const_i32(dflag - 1),
                                       tcg_const_i32(val));
@@ -6430,6 +6581,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
             }
         } else {
             gen_update_cc_op(s);
+            gen_update_ep(s);
             gen_jmp_im(pc_start - s->cs_base);
             gen_helper_iret_protected(cpu_env, tcg_const_i32(dflag - 1),
                                       tcg_const_i32(s->pc - s->cs_base));
@@ -6527,7 +6679,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
     case 0x190 ... 0x19f: /* setcc Gv */
         modrm = cpu_ldub_code(env, s->pc++);
         gen_setcc1(s, b, cpu_T[0]);
-        gen_ldst_modrm(env, s, modrm, MO_8, OR_TMP0, 1);
+        gen_ldst_modrm(env, s, modrm, MO_8, OR_TMP0, 1, b);
         break;
     case 0x140 ... 0x14f: /* cmov Gv, Ev */
         if (!(s->cpuid_features & CPUID_CMOV)) {
@@ -6657,7 +6809,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
         rm = (modrm & 7) | REX_B(s);
         if (mod != 3) {
             s->rip_offset = 1;
-            gen_lea_modrm(env, s, modrm);
+            gen_lea_modrm(env, s, modrm, b);
             gen_op_ld_v(s, ot, cpu_T[0], cpu_A0);
         } else {
             gen_op_mov_v_reg(ot, cpu_T[0], rm);
@@ -6688,7 +6840,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
         rm = (modrm & 7) | REX_B(s);
         gen_op_mov_v_reg(MO_32, cpu_T[1], reg);
         if (mod != 3) {
-            gen_lea_modrm(env, s, modrm);
+            gen_lea_modrm(env, s, modrm, b);
             /* specific case: we need to add a displacement */
             gen_exts(ot, cpu_T[1]);
             tcg_gen_sari_tl(cpu_tmp0, cpu_T[1], 3 + ot);
@@ -6764,7 +6916,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
         ot = dflag;
         modrm = cpu_ldub_code(env, s->pc++);
         reg = ((modrm >> 3) & 7) | rex_r;
-        gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
+        gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0, b);
         gen_extu(ot, cpu_T[0]);

         /* Note that lzcnt and tzcnt are in different extensions.  */
@@ -6884,6 +7036,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
             gen_exception(s, EXCP07_PREX, pc_start - s->cs_base);
         } else {
             gen_update_cc_op(s);
+            gen_update_ep(s);
             gen_jmp_im(pc_start - s->cs_base);
             gen_helper_fwait(cpu_env);
         }
@@ -6903,6 +7056,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
         if (CODE64(s))
             goto illegal_op;
         gen_update_cc_op(s);
+        gen_update_ep(s);
         gen_jmp_im(pc_start - s->cs_base);
         gen_helper_into(cpu_env, tcg_const_i32(s->pc - pc_start));
         break;
@@ -6967,7 +7121,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
         if (mod == 3)
             goto illegal_op;
         gen_op_mov_v_reg(ot, cpu_T[0], reg);
-        gen_lea_modrm(env, s, modrm);
+        gen_lea_modrm(env, s, modrm, b);
         gen_jmp_im(pc_start - s->cs_base);
         tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
         if (ot == MO_16) {
@@ -7095,6 +7249,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
             gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
         } else {
             gen_update_cc_op(s);
+            gen_update_ep(s);
             gen_jmp_im(pc_start - s->cs_base);
             gen_helper_sysexit(cpu_env, tcg_const_i32(dflag - 1));
             gen_eob(s);
@@ -7104,6 +7259,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
     case 0x105: /* syscall */
         /* XXX: is it usable in real mode ? */
         gen_update_cc_op(s);
+        gen_update_ep(s);
         gen_jmp_im(pc_start - s->cs_base);
         gen_helper_syscall(cpu_env, tcg_const_i32(s->pc - pc_start));
         gen_eob(s);
@@ -7113,6 +7269,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
             gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
         } else {
             gen_update_cc_op(s);
+            gen_update_ep(s);
             gen_jmp_im(pc_start - s->cs_base);
             gen_helper_sysret(cpu_env, tcg_const_i32(dflag - 1));
             /* condition codes are modified only in long mode */
@@ -7133,6 +7290,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
             gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
         } else {
             gen_update_cc_op(s);
+            gen_update_ep(s);
             gen_jmp_im(pc_start - s->cs_base);
             gen_helper_hlt(cpu_env, tcg_const_i32(s->pc - pc_start));
             s->is_jmp = DISAS_TB_JUMP;
@@ -7149,7 +7307,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
             gen_svm_check_intercept(s, pc_start, SVM_EXIT_LDTR_READ);
             tcg_gen_ld32u_tl(cpu_T[0], cpu_env,
offsetof(CPUX86State,ldt.selector));
             ot = mod == 3 ? dflag : MO_16;
-            gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 1);
+            gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 1, b);
             break;
         case 2: /* lldt */
             if (!s->pe || s->vm86)
@@ -7158,7 +7316,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
                 gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
             } else {
                 gen_svm_check_intercept(s, pc_start, SVM_EXIT_LDTR_WRITE);
-                gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0);
+                gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0, b);
                 gen_jmp_im(pc_start - s->cs_base);
                 tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
                 gen_helper_lldt(cpu_env, cpu_tmp2_i32);
@@ -7170,7 +7328,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
             gen_svm_check_intercept(s, pc_start, SVM_EXIT_TR_READ);
             tcg_gen_ld32u_tl(cpu_T[0], cpu_env,
offsetof(CPUX86State,tr.selector));
             ot = mod == 3 ? dflag : MO_16;
-            gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 1);
+            gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 1, b);
             break;
         case 3: /* ltr */
             if (!s->pe || s->vm86)
@@ -7179,7 +7337,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
                 gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
             } else {
                 gen_svm_check_intercept(s, pc_start, SVM_EXIT_TR_WRITE);
-                gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0);
+                gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0, b);
                 gen_jmp_im(pc_start - s->cs_base);
                 tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
                 gen_helper_ltr(cpu_env, cpu_tmp2_i32);
@@ -7189,7 +7347,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
         case 5: /* verw */
             if (!s->pe || s->vm86)
                 goto illegal_op;
-            gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0);
+            gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0, b);
             gen_update_cc_op(s);
             if (op == 4) {
                 gen_helper_verr(cpu_env, cpu_T[0]);
@@ -7212,7 +7370,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
             if (mod == 3)
                 goto illegal_op;
             gen_svm_check_intercept(s, pc_start, SVM_EXIT_GDTR_READ);
-            gen_lea_modrm(env, s, modrm);
+            gen_lea_modrm(env, s, modrm, b);
             tcg_gen_ld32u_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,
gdt.limit));
             gen_op_st_v(s, MO_16, cpu_T[0], cpu_A0);
             gen_add_A0_im(s, 2);
@@ -7241,6 +7399,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
                         s->cpl != 0)
                         goto illegal_op;
                     gen_update_cc_op(s);
+                    gen_update_ep(s);
                     gen_jmp_im(pc_start - s->cs_base);
                     gen_helper_mwait(cpu_env, tcg_const_i32(s->pc -
pc_start));
                     gen_eob(s);
@@ -7268,7 +7427,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
                 }
             } else { /* sidt */
                 gen_svm_check_intercept(s, pc_start, SVM_EXIT_IDTR_READ);
-                gen_lea_modrm(env, s, modrm);
+                gen_lea_modrm(env, s, modrm, b);
                 tcg_gen_ld32u_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,
idt.limit));
                 gen_op_st_v(s, MO_16, cpu_T[0], cpu_A0);
                 gen_add_A0_im(s, 2);
@@ -7371,7 +7530,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
             } else {
                 gen_svm_check_intercept(s, pc_start,
                                         op==2 ? SVM_EXIT_GDTR_WRITE :
SVM_EXIT_IDTR_WRITE);
-                gen_lea_modrm(env, s, modrm);
+                gen_lea_modrm(env, s, modrm, b);
                 gen_op_ld_v(s, MO_16, cpu_T[1], cpu_A0);
                 gen_add_A0_im(s, 2);
                 gen_op_ld_v(s, CODE64(s) + MO_32, cpu_T[0], cpu_A0);
@@ -7394,14 +7553,14 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
 #else
             tcg_gen_ld32u_tl(cpu_T[0], cpu_env,
offsetof(CPUX86State,cr[0]));
 #endif
-            gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 1);
+            gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 1, b);
             break;
         case 6: /* lmsw */
             if (s->cpl != 0) {
                 gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
             } else {
                 gen_svm_check_intercept(s, pc_start, SVM_EXIT_WRITE_CR0);
-                gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0);
+                gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0, b);
                 gen_helper_lmsw(cpu_env, cpu_T[0]);
                 gen_jmp_im(s->pc - s->cs_base);
                 gen_eob(s);
@@ -7413,8 +7572,9 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
                     gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
                 } else {
                     gen_update_cc_op(s);
+                    gen_update_ep(s);
                     gen_jmp_im(pc_start - s->cs_base);
-                    gen_lea_modrm(env, s, modrm);
+                    gen_lea_modrm(env, s, modrm, b);
                     gen_helper_invlpg(cpu_env, cpu_A0);
                     gen_jmp_im(s->pc - s->cs_base);
                     gen_eob(s);
@@ -7446,6 +7606,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
                     if (!(s->cpuid_ext2_features & CPUID_EXT2_RDTSCP))
                         goto illegal_op;
                     gen_update_cc_op(s);
+                    gen_update_ep(s);
                     gen_jmp_im(pc_start - s->cs_base);
                     if (use_icount)
                         gen_io_start();
@@ -7493,7 +7654,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
                 }
                 gen_op_mov_reg_v(d_ot, reg, cpu_T[0]);
             } else {
-                gen_lea_modrm(env, s, modrm);
+                gen_lea_modrm(env, s, modrm, b);
                 gen_op_ld_v(s, MO_32 | MO_SIGN, cpu_T[0], cpu_A0);
                 gen_op_mov_reg_v(d_ot, reg, cpu_T[0]);
             }
@@ -7514,7 +7675,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
             mod = (modrm >> 6) & 3;
             rm = modrm & 7;
             if (mod != 3) {
-                gen_lea_modrm(env, s, modrm);
+                gen_lea_modrm(env, s, modrm, b);
                 gen_op_ld_v(s, ot, t0, cpu_A0);
                 a0 = tcg_temp_local_new();
                 tcg_gen_mov_tl(a0, cpu_A0);
@@ -7556,7 +7717,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
             ot = dflag != MO_16 ? MO_32 : MO_16;
             modrm = cpu_ldub_code(env, s->pc++);
             reg = ((modrm >> 3) & 7) | rex_r;
-            gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0);
+            gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0, b);
             t0 = tcg_temp_local_new();
             gen_update_cc_op(s);
             if (b == 0x102) {
@@ -7584,7 +7745,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
         case 3: /* prefetchnt0 */
             if (mod == 3)
                 goto illegal_op;
-            gen_lea_modrm(env, s, modrm);
+            gen_lea_modrm(env, s, modrm, b);
             /* nothing more to do */
             break;
         default: /* nop (multi byte) */
@@ -7624,6 +7785,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
             case 4:
             case 8:
                 gen_update_cc_op(s);
+                gen_update_ep(s);
                 gen_jmp_im(pc_start - s->cs_base);
                 if (b & 2) {
                     gen_op_mov_v_reg(ot, cpu_T[0], rm);
@@ -7696,7 +7858,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
             goto illegal_op;
         reg = ((modrm >> 3) & 7) | rex_r;
         /* generate a generic store */
-        gen_ldst_modrm(env, s, modrm, ot, reg, 1);
+        gen_ldst_modrm(env, s, modrm, ot, reg, 1, b);
         break;
     case 0x1ae:
         modrm = cpu_ldub_code(env, s->pc++);
@@ -7704,6 +7866,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
         op = (modrm >> 3) & 7;
         switch(op) {
         case 0: /* fxsave */
+            gen_update_ep(s);
             if (mod == 3 || !(s->cpuid_features & CPUID_FXSR) ||
                 (s->prefix & PREFIX_LOCK))
                 goto illegal_op;
@@ -7711,10 +7874,13 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
                 gen_exception(s, EXCP07_PREX, pc_start - s->cs_base);
                 break;
             }
-            gen_lea_modrm(env, s, modrm);
+            gen_lea_modrm(env, s, modrm, b);
             gen_update_cc_op(s);
+            gen_update_ep(s);
             gen_jmp_im(pc_start - s->cs_base);
-            gen_helper_fxsave(cpu_env, cpu_A0, tcg_const_i32(dflag ==
MO_64));
+            gen_helper_fxsave(cpu_env, cpu_A0,
+                              tcg_const_i32(dflag == MO_32),
+                              tcg_const_i32(dflag == MO_64));
             break;
         case 1: /* fxrstor */
             if (mod == 3 || !(s->cpuid_features & CPUID_FXSR) ||
@@ -7724,10 +7890,12 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
                 gen_exception(s, EXCP07_PREX, pc_start - s->cs_base);
                 break;
             }
-            gen_lea_modrm(env, s, modrm);
+            gen_lea_modrm(env, s, modrm, b);
             gen_update_cc_op(s);
             gen_jmp_im(pc_start - s->cs_base);
-            gen_helper_fxrstor(cpu_env, cpu_A0, tcg_const_i32(dflag ==
MO_64));
+            gen_helper_fxrstor(cpu_env, cpu_A0,
+                               tcg_const_i32(dflag == MO_32),
+                               tcg_const_i32(dflag == MO_64));
             break;
         case 2: /* ldmxcsr */
         case 3: /* stmxcsr */
@@ -7738,7 +7906,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
             if ((s->flags & HF_EM_MASK) || !(s->flags & HF_OSFXSR_MASK) ||
                 mod == 3)
                 goto illegal_op;
-            gen_lea_modrm(env, s, modrm);
+            gen_lea_modrm(env, s, modrm, b);
             if (op == 2) {
                 tcg_gen_qemu_ld_i32(cpu_tmp2_i32, cpu_A0,
                                     s->mem_index, MO_LEUL);
@@ -7763,7 +7931,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
                 /* clflush */
                 if (!(s->cpuid_features & CPUID_CLFLUSH))
                     goto illegal_op;
-                gen_lea_modrm(env, s, modrm);
+                gen_lea_modrm(env, s, modrm, b);
             }
             break;
         default:
@@ -7775,7 +7943,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
         mod = (modrm >> 6) & 3;
         if (mod == 3)
             goto illegal_op;
-        gen_lea_modrm(env, s, modrm);
+        gen_lea_modrm(env, s, modrm, b);
         /* ignore for now */
         break;
     case 0x1aa: /* rsm */
@@ -7783,6 +7951,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
         if (!(s->flags & HF_SMM_MASK))
             goto illegal_op;
         gen_update_cc_op(s);
+        gen_update_ep(s);
         gen_jmp_im(s->pc - s->cs_base);
         gen_helper_rsm(cpu_env);
         gen_eob(s);
@@ -7803,7 +7972,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
             ot = mo_64_32(dflag);
         }

-        gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
+        gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0, b);
         gen_helper_popcnt(cpu_T[0], cpu_env, cpu_T[0], tcg_const_i32(ot));
         gen_op_mov_reg_v(ot, reg, cpu_T[0]);

@@ -7880,6 +8049,17 @@ void optimize_flags_init(void)
     cpu_cc_src2 = tcg_global_mem_new(TCG_AREG0, offsetof(CPUX86State,
cc_src2),
                                      "cc_src2");

+    cpu_fpop = tcg_global_mem_new_i32(TCG_AREG0,
+                                      offsetof(CPUX86State, fpop), "fpop");
+    cpu_fpip = tcg_global_mem_new(TCG_AREG0, offsetof(CPUX86State, fpip),
+                                     "fpip");
+    cpu_fpdp = tcg_global_mem_new(TCG_AREG0, offsetof(CPUX86State, fpdp),
+                                     "fpdp");
+    cpu_fpds = tcg_global_mem_new_i32(TCG_AREG0, offsetof(CPUX86State,
fpds),
+                                     "fpds");
+    cpu_fpcs = tcg_global_mem_new_i32(TCG_AREG0, offsetof(CPUX86State,
fpcs),
+                                     "fpcs");
+
     for (i = 0; i < CPU_NB_REGS; ++i) {
         cpu_regs[i] = tcg_global_mem_new(TCG_AREG0,
                                          offsetof(CPUX86State, regs[i]),
@@ -7924,6 +8104,8 @@ static inline void
gen_intermediate_code_internal(X86CPU *cpu,
     dc->singlestep_enabled = cs->singlestep_enabled;
     dc->cc_op = CC_OP_DYNAMIC;
     dc->cc_op_dirty = false;
+    dc->fp_op = FP_EP_INVALID;
+    dc->fp_ep_dirty = false;
     dc->cs_base = cs_base;
     dc->tb = tb;
     dc->popl_esp_hack = 0;
@@ -7997,6 +8179,9 @@ static inline void
gen_intermediate_code_internal(X86CPU *cpu,
             }
             tcg_ctx.gen_opc_pc[lj] = pc_ptr;
             gen_opc_cc_op[lj] = dc->cc_op;
+            gen_opc_fp_op[lj] = dc->fp_op;
+            gen_opc_fp_ip[lj] = dc->fp_ip;
+            gen_opc_fp_cs[lj] = dc->fp_cs;
             tcg_ctx.gen_opc_instr_start[lj] = 1;
             tcg_ctx.gen_opc_icount[lj] = num_insns;
         }
@@ -8080,6 +8265,7 @@ void gen_intermediate_code_pc(CPUX86State *env,
TranslationBlock *tb)
 void restore_state_to_opc(CPUX86State *env, TranslationBlock *tb, int
pc_pos)
 {
     int cc_op;
+    uint16_t fp_op;
 #ifdef DEBUG_DISAS
     if (qemu_loglevel_mask(CPU_LOG_TB_OP)) {
         int i;
@@ -8099,4 +8285,10 @@ void restore_state_to_opc(CPUX86State *env,
TranslationBlock *tb, int pc_pos)
     cc_op = gen_opc_cc_op[pc_pos];
     if (cc_op != CC_OP_DYNAMIC)
         env->cc_op = cc_op;
+    fp_op = gen_opc_fp_op[pc_pos];
+    if (fp_op & FP_EP_VALID) {
+        tcg_gen_movi_i32(cpu_fpop, fp_op);
+        tcg_gen_movi_tl(cpu_fpip, gen_opc_fp_ip[pc_pos]);
+        tcg_gen_movi_i32(cpu_fpcs, gen_opc_fp_cs[pc_pos]);
+    }
 }



On Sat, Jul 19, 2014 at 2:36 AM, Jaume Martí <jaume.martif@gmail.com> wrote:

> Hello,
>
> I attach a patch with the fix for the issues pointed out by Richard.
> Maybe it would be useful to have the option to disabled this feature
> at compile time, for performance reasons.
> Please review and apply.
>
> Best regards,
> Jaume
>
> Signed-off-by: Jaume Marti Farriol (jaume.martif@gmail.com)
> diff --git a/linux-user/signal.c b/linux-user/signal.c
> index 1141054..73f8f6b 100644
> --- a/linux-user/signal.c
> +++ b/linux-user/signal.c
> @@ -865,7 +865,7 @@ static void setup_sigcontext(struct target_sigcontext
> *sc,
>      __put_user(env->regs[R_ESP], &sc->esp_at_signal);
>      __put_user(env->segs[R_SS].selector, (unsigned int *)&sc->ss);
>
> -        cpu_x86_fsave(env, fpstate_addr, 1);
> +        cpu_x86_fsave(env, fpstate_addr);
>          fpstate->status = fpstate->sw;
>          magic = 0xffff;
>      __put_user(magic, &fpstate->magic);
> @@ -1068,7 +1068,7 @@ restore_sigcontext(CPUX86State *env, struct
> target_sigcontext *sc, int *peax)
>                  if (!access_ok(VERIFY_READ, fpstate_addr,
>                                 sizeof(struct target_fpstate)))
>                          goto badframe;
> -                cpu_x86_frstor(env, fpstate_addr, 1);
> +                cpu_x86_frstor(env, fpstate_addr);
>   }
>
>          *peax = tswapl(sc->eax);
> diff --git a/target-i386/cpu.h b/target-i386/cpu.h
> index e634d83..4274ce3 100644
> --- a/target-i386/cpu.h
> +++ b/target-i386/cpu.h
> @@ -819,10 +819,11 @@ typedef struct CPUX86State {
>      uint16_t fpuc;
>      uint8_t fptags[8];   /* 0 = valid, 1 = empty */
>      FPReg fpregs[8];
> -    /* KVM-only so far */
> -    uint16_t fpop;
> +    uint32_t fpop;
>      uint64_t fpip;
>      uint64_t fpdp;
> +    uint32_t fpcs;
> +    uint32_t fpds;
>
>      /* emulator internal variables */
>      float_status fp_status;
> @@ -1067,8 +1068,8 @@ floatx80 cpu_set_fp80(uint64_t mant, uint16_t upper);
>  /* the following helpers are only usable in user mode simulation as
>     they can trigger unexpected exceptions */
>  void cpu_x86_load_seg(CPUX86State *s, int seg_reg, int selector);
> -void cpu_x86_fsave(CPUX86State *s, target_ulong ptr, int data32);
> -void cpu_x86_frstor(CPUX86State *s, target_ulong ptr, int data32);
> +void cpu_x86_fsave(CPUX86State *s, target_ulong ptr);
> +void cpu_x86_frstor(CPUX86State *s, target_ulong ptr);
>
>  /* you can call this signal handler from your SIGBUS and SIGSEGV
>     signal handlers to inform the virtual CPU of exceptions. non zero
> diff --git a/target-i386/fpu_helper.c b/target-i386/fpu_helper.c
> index 1b2900d..6886031 100644
> --- a/target-i386/fpu_helper.c
> +++ b/target-i386/fpu_helper.c
> @@ -56,6 +56,8 @@
>  #define floatx80_l2e make_floatx80(0x3fff, 0xb8aa3b295c17f0bcLL)
>  #define floatx80_l2t make_floatx80(0x4000, 0xd49a784bcd1b8afeLL)
>
> +#define FPUS(env) ((env->fpus & ~0x3800) | ((env->fpstt & 0x7) << 11))
> +
>  static inline void fpush(CPUX86State *env)
>  {
>      env->fpstt = (env->fpstt - 1) & 7;
> @@ -604,6 +606,10 @@ void helper_fninit(CPUX86State *env)
>      env->fptags[5] = 1;
>      env->fptags[6] = 1;
>      env->fptags[7] = 1;
> +    env->fpip = 0;
> +    env->fpcs = 0;
> +    env->fpdp = 0;
> +    env->fpds = 0;
>  }
>
>  /* BCD ops */
> @@ -961,13 +967,13 @@ void helper_fxam_ST0(CPUX86State *env)
>      }
>  }
>
> -void helper_fstenv(CPUX86State *env, target_ulong ptr, int data32)
> +void helper_fstenv(CPUX86State *env, target_ulong ptr, int data32,
> +                   int protected_mode)
>  {
> -    int fpus, fptag, exp, i;
> +    int fptag, exp, i;
>      uint64_t mant;
>      CPU_LDoubleU tmp;
>
> -    fpus = (env->fpus & ~0x3800) | (env->fpstt & 0x7) << 11;
>      fptag = 0;
>      for (i = 7; i >= 0; i--) {
>          fptag <<= 2;
> @@ -987,83 +993,150 @@ void helper_fstenv(CPUX86State *env,
> target_ulong ptr, int data32)
>              }
>          }
>      }
> +
>      if (data32) {
>          /* 32 bit */
> -        cpu_stl_data(env, ptr, env->fpuc);
> -        cpu_stl_data(env, ptr + 4, fpus);
> -        cpu_stl_data(env, ptr + 8, fptag);
> -        cpu_stl_data(env, ptr + 12, 0); /* fpip */
> -        cpu_stl_data(env, ptr + 16, 0); /* fpcs */
> -        cpu_stl_data(env, ptr + 20, 0); /* fpoo */
> -        cpu_stl_data(env, ptr + 24, 0); /* fpos */
> +        cpu_stw_data(env, ptr, env->fpuc);
> +        cpu_stw_data(env, ptr + 4, FPUS(env));
> +        cpu_stw_data(env, ptr + 8, fptag);
> +        if (protected_mode) {
> +            cpu_stl_data(env, ptr + 12, env->fpip);
> +            cpu_stl_data(env, ptr + 16,
> +                        ((env->fpop & 0x7ff) << 16) | (env->fpcs &
> 0xffff));
> +            cpu_stl_data(env, ptr + 20, env->fpdp);
> +            cpu_stl_data(env, ptr + 24, env->fpds);
> +        } else {
> +            /* Real mode  */
> +            cpu_stl_data(env, ptr + 12, env->fpip); /* fpip[15..00] */
> +            cpu_stl_data(env, ptr + 16, ((((env->fpip >> 16) & 0xffff) <<
> 12) |
> +                        (env->fpop & 0x7ff))); /* fpip[31..16], fpop */
> +            cpu_stl_data(env, ptr + 20, env->fpdp); /* fpdp[15..00] */
> +            cpu_stl_data(env, ptr + 24,
> +                        (env->fpdp >> 4) & 0xffff000); /* fpdp[31..16] */
> +        }
>      } else {
>          /* 16 bit */
>          cpu_stw_data(env, ptr, env->fpuc);
> -        cpu_stw_data(env, ptr + 2, fpus);
> +        cpu_stw_data(env, ptr + 2, FPUS(env));
>          cpu_stw_data(env, ptr + 4, fptag);
> -        cpu_stw_data(env, ptr + 6, 0);
> -        cpu_stw_data(env, ptr + 8, 0);
> -        cpu_stw_data(env, ptr + 10, 0);
> -        cpu_stw_data(env, ptr + 12, 0);
> +        if (protected_mode) {
> +            cpu_stw_data(env, ptr + 6, env->fpip);
> +            cpu_stw_data(env, ptr + 8, env->fpcs);
> +            cpu_stw_data(env, ptr + 10, env->fpdp);
> +            cpu_stw_data(env, ptr + 12, env->fpds);
> +        } else {
> +            /* Real mode  */
> +            cpu_stw_data(env, ptr + 6, env->fpip); /* fpip[15..0] */
> +            cpu_stw_data(env, ptr + 8, ((env->fpip >> 4) & 0xf000) |
> +                        (env->fpop & 0x7ff)); /* fpip[19..16], fpop */
> +            cpu_stw_data(env, ptr + 10, env->fpdp); /* fpdp[15..0] */
> +            cpu_stw_data(env, ptr + 12,
> +                        (env->fpdp >> 4) & 0xf000); /* fpdp[19..16] */
> +        }
>      }
> +
> +    env->fpip = 0;
> +    env->fpcs = 0;
> +    env->fpdp = 0;
> +    env->fpds = 0;
>  }
>
> -void helper_fldenv(CPUX86State *env, target_ulong ptr, int data32)
> +void helper_fldenv(CPUX86State *env, target_ulong ptr, int data32,
> +                   int protected_mode)
>  {
> -    int i, fpus, fptag;
> +    int tmp, i, fpus, fptag;
>
>      if (data32) {
> +        /* 32 bit */
>          env->fpuc = cpu_lduw_data(env, ptr);
>          fpus = cpu_lduw_data(env, ptr + 4);
>          fptag = cpu_lduw_data(env, ptr + 8);
> +        if (protected_mode) {
> +            env->fpip = cpu_ldl_data(env, ptr + 12);
> +            tmp = cpu_ldl_data(env, ptr + 16);
> +            env->fpcs = tmp & 0xffff;
> +            env->fpop = tmp >> 16;
> +            env->fpdp = cpu_ldl_data(env, ptr + 20);
> +            env->fpds = cpu_lduw_data(env, ptr + 24);
> +        } else {
> +            /* Real mode */
> +            tmp = cpu_ldl_data(env, ptr + 16);
> +            env->fpip = ((tmp & 0xffff000) << 4) |
> +                        cpu_lduw_data(env, ptr + 12);
> +            env->fpop = tmp & 0x7ff;
> +            env->fpdp = (cpu_ldl_data(env, ptr + 24) << 4) |
> +                        cpu_lduw_data(env, ptr + 20);
> +        }
>      } else {
> +        /* 16 bit */
>          env->fpuc = cpu_lduw_data(env, ptr);
>          fpus = cpu_lduw_data(env, ptr + 2);
>          fptag = cpu_lduw_data(env, ptr + 4);
> +        if (protected_mode) {
> +            /* Protected mode  */
> +            env->fpip = cpu_lduw_data(env, ptr + 6);
> +            env->fpcs = cpu_lduw_data(env, ptr + 8);
> +            env->fpdp = cpu_lduw_data(env, ptr + 10);
> +            env->fpds = cpu_lduw_data(env, ptr + 12);
> +        } else {
> +            /* Real mode  */
> +            tmp = cpu_lduw_data(env, ptr + 8);
> +            env->fpip = ((tmp & 0xf000) << 4) | cpu_lduw_data(env, ptr +
> 6);
> +            env->fpop = tmp & 0x7ff;
> +            env->fpdp = cpu_lduw_data(env, ptr + 12) << 4 |
> +                        cpu_lduw_data(env, ptr + 10);
> +        }
>      }
> +
>      env->fpstt = (fpus >> 11) & 7;
>      env->fpus = fpus & ~0x3800;
>      for (i = 0; i < 8; i++) {
>          env->fptags[i] = ((fptag & 3) == 3);
>          fptag >>= 2;
>      }
> +
> +    env->fpip &= 0xffffffff;
> +    env->fpdp &= 0xffffffff;
> +    if (!protected_mode) {
> +        env->fpcs = 0;
> +        env->fpds = 0;
> +    }
>  }
>
> -void helper_fsave(CPUX86State *env, target_ulong ptr, int data32)
> +void helper_fsave(CPUX86State *env, target_ulong ptr, int data32,
> +                  int protected_mode)
>  {
>      floatx80 tmp;
>      int i;
>
> -    helper_fstenv(env, ptr, data32);
> +    helper_fstenv(env, ptr, data32, protected_mode);
>
> -    ptr += (14 << data32);
> +    if (data32) {
> +        ptr += 28;
> +    } else {
> +        ptr += 14;
> +    }
>      for (i = 0; i < 8; i++) {
>          tmp = ST(i);
>          helper_fstt(env, tmp, ptr);
>          ptr += 10;
>      }
>
> -    /* fninit */
> -    env->fpus = 0;
> -    env->fpstt = 0;
> -    env->fpuc = 0x37f;
> -    env->fptags[0] = 1;
> -    env->fptags[1] = 1;
> -    env->fptags[2] = 1;
> -    env->fptags[3] = 1;
> -    env->fptags[4] = 1;
> -    env->fptags[5] = 1;
> -    env->fptags[6] = 1;
> -    env->fptags[7] = 1;
> +    helper_fninit(env);
>  }
>
> -void helper_frstor(CPUX86State *env, target_ulong ptr, int data32)
> +void helper_frstor(CPUX86State *env, target_ulong ptr, int data32,
> +                   int protected_mode)
>  {
>      floatx80 tmp;
>      int i;
>
> -    helper_fldenv(env, ptr, data32);
> -    ptr += (14 << data32);
> +    helper_fldenv(env, ptr, data32, protected_mode);
> +    if (data32) {
> +        ptr += 28;
> +    } else {
> +        ptr += 14;
> +    }
>
>      for (i = 0; i < 8; i++) {
>          tmp = helper_fldt(env, ptr);
> @@ -1072,21 +1145,22 @@ void helper_frstor(CPUX86State *env,
> target_ulong ptr, int data32)
>      }
>  }
>
> -#if defined(CONFIG_USER_ONLY)
> -void cpu_x86_fsave(CPUX86State *env, target_ulong ptr, int data32)
> +#if defined(CONFIG_USER_ONLY) && defined(TARGET_I386) && TARGET_ABI_BITS
> == 32
> +
> +void cpu_x86_fsave(CPUX86State *env, target_ulong ptr)
>  {
> -    helper_fsave(env, ptr, data32);
> +    helper_fsave(env, ptr, 1, 1);
>  }
>
> -void cpu_x86_frstor(CPUX86State *env, target_ulong ptr, int data32)
> +void cpu_x86_frstor(CPUX86State *env, target_ulong ptr)
>  {
> -    helper_frstor(env, ptr, data32);
> +    helper_frstor(env, ptr, 1, 1);
>  }
>  #endif
>
> -void helper_fxsave(CPUX86State *env, target_ulong ptr, int data64)
> +void helper_fxsave(CPUX86State *env, target_ulong ptr, int data32, int
> data64)
>  {
> -    int fpus, fptag, i, nb_xmm_regs;
> +    int i, nb_xmm_regs, fptag;
>      floatx80 tmp;
>      target_ulong addr;
>
> @@ -1095,25 +1169,36 @@ void helper_fxsave(CPUX86State *env,
> target_ulong ptr, int data64)
>          raise_exception(env, EXCP0D_GPF);
>      }
>
> -    fpus = (env->fpus & ~0x3800) | (env->fpstt & 0x7) << 11;
>      fptag = 0;
>      for (i = 0; i < 8; i++) {
>          fptag |= (env->fptags[i] << i);
>      }
> +    fptag ^= 0xff;
> +
>      cpu_stw_data(env, ptr, env->fpuc);
> -    cpu_stw_data(env, ptr + 2, fpus);
> -    cpu_stw_data(env, ptr + 4, fptag ^ 0xff);
> +    cpu_stw_data(env, ptr + 2, FPUS(env));
> +    cpu_stw_data(env, ptr + 4, fptag & 0xff);
> +    cpu_stw_data(env, ptr + 6, env->fpop);
> +
>  #ifdef TARGET_X86_64
>      if (data64) {
> -        cpu_stq_data(env, ptr + 0x08, 0); /* rip */
> -        cpu_stq_data(env, ptr + 0x10, 0); /* rdp */
> +        /* 64 bit */
> +        cpu_stq_data(env, ptr + 8, env->fpip);
> +        cpu_stq_data(env, ptr + 16, env->fpdp);
>      } else
>  #endif
>      {
> -        cpu_stl_data(env, ptr + 0x08, 0); /* eip */
> -        cpu_stl_data(env, ptr + 0x0c, 0); /* sel  */
> -        cpu_stl_data(env, ptr + 0x10, 0); /* dp */
> -        cpu_stl_data(env, ptr + 0x14, 0); /* sel  */
> +        if (data32) {
> +            /* 32 bit */
> +            cpu_stl_data(env, ptr + 8, env->fpip);
> +            cpu_stl_data(env, ptr + 16, env->fpdp);
> +        } else {
> +            /* 16 bit */
> +            cpu_stw_data(env, ptr + 8, env->fpip);
> +            cpu_stw_data(env, ptr + 16, env->fpdp);
> +        }
> +        cpu_stw_data(env, ptr + 12, env->fpcs & 0xffff);
> +        cpu_stw_data(env, ptr + 20, env->fpds & 0xffff);
>      }
>
>      addr = ptr + 0x20;
> @@ -1146,7 +1231,7 @@ void helper_fxsave(CPUX86State *env,
> target_ulong ptr, int data64)
>      }
>  }
>
> -void helper_fxrstor(CPUX86State *env, target_ulong ptr, int data64)
> +void helper_fxrstor(CPUX86State *env, target_ulong ptr, int data32, int
> data64)
>  {
>      int i, fpus, fptag, nb_xmm_regs;
>      floatx80 tmp;
> @@ -1167,6 +1252,30 @@ void helper_fxrstor(CPUX86State *env,
> target_ulong ptr, int data64)
>          env->fptags[i] = ((fptag >> i) & 1);
>      }
>
> +    env->fpop = (cpu_lduw_data(env, ptr + 6) >> 5) & 0x7ff;
> +
> +#ifdef TARGET_X86_64
> +    if (data64) {
> +        /* 64 bit */
> +        env->fpip = cpu_ldq_data(env, ptr + 8);
> +        env->fpdp = cpu_ldq_data(env, ptr + 16);
> +    } else
> +#endif
> +    {
> +        if (data32) {
> +            /* 32 bit */
> +            env->fpip = cpu_ldl_data(env, ptr + 8);
> +            env->fpdp = cpu_ldl_data(env, ptr + 16);
> +        } else {
> +            /* 16 bit */
> +            env->fpip = cpu_lduw_data(env, ptr + 8);
> +            env->fpdp = cpu_lduw_data(env, ptr + 16);
> +        }
> +
> +        env->fpcs = cpu_lduw_data(env, ptr + 12);
> +        env->fpds = cpu_lduw_data(env, ptr + 20);
> +    }
> +
>      addr = ptr + 0x20;
>      for (i = 0; i < 8; i++) {
>          tmp = helper_fldt(env, addr);
> @@ -1195,6 +1304,11 @@ void helper_fxrstor(CPUX86State *env,
> target_ulong ptr, int data64)
>              }
>          }
>      }
> +
> +    if (!data64) {
> +        env->fpip &= 0xffffffff;
> +        env->fpdp &= 0xffffffff;
> +    }
>  }
>
>  void cpu_get_fp80(uint64_t *pmant, uint16_t *pexp, floatx80 f)
> diff --git a/target-i386/helper.h b/target-i386/helper.h
> index 8eb0145..9c4fd22 100644
> --- a/target-i386/helper.h
> +++ b/target-i386/helper.h
> @@ -183,12 +183,12 @@ DEF_HELPER_1(frndint, void, env)
>  DEF_HELPER_1(fscale, void, env)
>  DEF_HELPER_1(fsin, void, env)
>  DEF_HELPER_1(fcos, void, env)
> -DEF_HELPER_3(fstenv, void, env, tl, int)
> -DEF_HELPER_3(fldenv, void, env, tl, int)
> -DEF_HELPER_3(fsave, void, env, tl, int)
> -DEF_HELPER_3(frstor, void, env, tl, int)
> -DEF_HELPER_3(fxsave, void, env, tl, int)
> -DEF_HELPER_3(fxrstor, void, env, tl, int)
> +DEF_HELPER_4(fstenv, void, env, tl, int, int)
> +DEF_HELPER_4(fldenv, void, env, tl, int, int)
> +DEF_HELPER_4(fsave, void, env, tl, int, int)
> +DEF_HELPER_4(frstor, void, env, tl, int, int)
> +DEF_HELPER_4(fxsave, void, env, tl, int, int)
> +DEF_HELPER_4(fxrstor, void, env, tl, int, int)
>
>  DEF_HELPER_FLAGS_1(clz, TCG_CALL_NO_RWG_SE, tl, tl)
>  DEF_HELPER_FLAGS_1(ctz, TCG_CALL_NO_RWG_SE, tl, tl)
> diff --git a/target-i386/machine.c b/target-i386/machine.c
> index 16d2f6a..500f04f 100644
> --- a/target-i386/machine.c
> +++ b/target-i386/machine.c
> @@ -397,7 +397,7 @@ static const VMStateDescription vmstate_fpop_ip_dp = {
>      .version_id = 1,
>      .minimum_version_id = 1,
>      .fields = (VMStateField[]) {
> -        VMSTATE_UINT16(env.fpop, X86CPU),
> +        VMSTATE_UINT32(env.fpop, X86CPU),
>          VMSTATE_UINT64(env.fpip, X86CPU),
>          VMSTATE_UINT64(env.fpdp, X86CPU),
>          VMSTATE_END_OF_LIST()
> diff --git a/target-i386/translate.c b/target-i386/translate.c
> index 6fcd824..8e490de 100644
> --- a/target-i386/translate.c
> +++ b/target-i386/translate.c
> @@ -58,6 +58,9 @@
>  #endif
>
>  //#define MACRO_TEST   1
> +#define IS_PROTECTED_MODE(s) (s->pe && !s->vm86)
> +#define FP_EP_VALID 0x80000000
> +#define FP_EP_INVALID 0
>
>  /* global register indexes */
>  static TCGv_ptr cpu_env;
> @@ -65,6 +68,11 @@ static TCGv cpu_A0;
>  static TCGv cpu_cc_dst, cpu_cc_src, cpu_cc_src2, cpu_cc_srcT;
>  static TCGv_i32 cpu_cc_op;
>  static TCGv cpu_regs[CPU_NB_REGS];
> +static TCGv_i32 cpu_fpop;
> +static TCGv cpu_fpip;
> +static TCGv cpu_fpdp;
> +static TCGv_i32 cpu_fpds;
> +static TCGv_i32 cpu_fpcs;
>  /* local temps */
>  static TCGv cpu_T[2];
>  /* local register indexes (only used inside old micro ops) */
> @@ -74,6 +82,9 @@ static TCGv_i32 cpu_tmp2_i32, cpu_tmp3_i32;
>  static TCGv_i64 cpu_tmp1_i64;
>
>  static uint8_t gen_opc_cc_op[OPC_BUF_SIZE];
> +static uint16_t gen_opc_fp_op[OPC_BUF_SIZE];
> +static uint16_t gen_opc_fp_cs[OPC_BUF_SIZE];
> +static target_ulong gen_opc_fp_ip[OPC_BUF_SIZE];
>
>  #include "exec/gen-icount.h"
>
> @@ -104,6 +115,10 @@ typedef struct DisasContext {
>      int ss32;   /* 32 bit stack segment */
>      CCOp cc_op;  /* current CC operation */
>      bool cc_op_dirty;
> +    uint16_t fp_op;
> +    bool fp_ep_dirty;
> +    target_ulong fp_ip;
> +    uint16_t fp_cs;
>      int addseg; /* non zero if either DS/ES/SS have a non zero base */
>      int f_st;   /* currently unused */
>      int vm86;   /* vm86 mode */
> @@ -208,6 +223,62 @@ static const uint8_t cc_op_live[CC_OP_NB] = {
>      [CC_OP_CLR] = 0,
>  };
>
> +static inline bool instr_is_x87_nc(int modrm, int b)
> +{
> +    int op, mod, rm;
> +    switch (b) {
> +    case 0xd8 ... 0xdf:
> +        /* floats */
> +        op = ((b & 7) << 3) | ((modrm >> 3) & 7);
> +        mod = (modrm >> 6) & 3;
> +        rm = modrm & 7;
> +        if (mod != 3) {
> +            /* memory */
> +            switch (op) {
> +            case 0x0c: /* fldenv */
> +            case 0x0d: /* fldcw */
> +            case 0x0e: /* fstenv, fnstenv */
> +            case 0x0f: /* fstcw, fnstcw */
> +            case 0x2c: /* frstor */
> +            case 0x2e: /* fsave, fnsave */
> +            case 0x2f: /* fstsw, fnstsw */
> +                return false;
> +            default:
> +                return true;
> +            }
> +        } else {
> +            /* register */
> +            switch (op) {
> +            case 0x0a:
> +                return false; /* fnop, Illegal op */
> +            case 0x0e: /* fdecstp, fincstp */
> +            case 0x28: /* ffree */
> +                return false;
> +            case 0x1c:
> +                switch (rm) {
> +                case 1: /* feni */
> +                    return true;
> +                case 2: /* fclex, fnclex */
> +                case 3: /* finit, fninit */
> +                    return false;
> +                case 4: /* fsetpm */
> +                    return true;
> +                default: /* Illegal op */
> +                    return false;
> +                }
> +            case 0x3c:
> +                return false; /* fstsw, fnstsw, Illegal op */
> +            default:
> +                return true;
> +            }
> +        }
> +    /*case 0x9b: // fwait, wait
> +        return false;*/
> +    default:
> +        return false;
> +    }
> +}
> +
>  static void set_cc_op(DisasContext *s, CCOp op)
>  {
>      int dead;
> @@ -253,6 +324,23 @@ static void gen_update_cc_op(DisasContext *s)
>      }
>  }
>
> +static void set_ep(DisasContext *s, int fp_op, int fp_ip, int fp_cs) {
> +    s->fp_op = FP_EP_VALID | fp_op;
> +    s->fp_ip = fp_ip;
> +    s->fp_cs = fp_cs;
> +    s->fp_ep_dirty = true;
> +}
> +
> +static void gen_update_ep(DisasContext *s)
> +{
> +    if (s->fp_ep_dirty) {
> +        tcg_gen_movi_i32(cpu_fpop, s->fp_op);
> +        tcg_gen_movi_tl(cpu_fpip, s->fp_ip);
> +        tcg_gen_movi_i32(cpu_fpcs, s->fp_cs);
> +        s->fp_ep_dirty = false;
> +    }
> +}
> +
>  #ifdef TARGET_X86_64
>
>  #define NB_OP_SIZES 4
> @@ -666,6 +754,7 @@ static void gen_check_io(DisasContext *s, TCGMemOp
> ot, target_ulong cur_eip,
>      state_saved = 0;
>      if (s->pe && (s->cpl > s->iopl || s->vm86)) {
>          gen_update_cc_op(s);
> +        gen_update_ep(s);
>          gen_jmp_im(cur_eip);
>          state_saved = 1;
>          tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
> @@ -686,6 +775,7 @@ static void gen_check_io(DisasContext *s, TCGMemOp
> ot, target_ulong cur_eip,
>      if(s->flags & HF_SVMI_MASK) {
>          if (!state_saved) {
>              gen_update_cc_op(s);
> +            gen_update_ep(s);
>              gen_jmp_im(cur_eip);
>          }
>          svm_flags |= (1 << (4 + ot));
> @@ -1097,6 +1187,7 @@ static inline void gen_jcc1(DisasContext *s, int
> b, int l1)
>      CCPrepare cc = gen_prepare_cc(s, b, cpu_T[0]);
>
>      gen_update_cc_op(s);
> +    gen_update_ep(s);
>      if (cc.mask != -1) {
>          tcg_gen_andi_tl(cpu_T[0], cc.reg, cc.mask);
>          cc.reg = cpu_T[0];
> @@ -1580,14 +1671,14 @@ static void gen_rot_rm_T1(DisasContext *s,
> TCGMemOp ot, int op1, int is_right)
>      t0 = tcg_const_i32(0);
>      t1 = tcg_temp_new_i32();
>      tcg_gen_trunc_tl_i32(t1, cpu_T[1]);
> -    tcg_gen_movi_i32(cpu_tmp2_i32, CC_OP_ADCOX);
> +    tcg_gen_movi_i32(cpu_tmp2_i32, CC_OP_ADCOX);
>      tcg_gen_movi_i32(cpu_tmp3_i32, CC_OP_EFLAGS);
>      tcg_gen_movcond_i32(TCG_COND_NE, cpu_cc_op, t1, t0,
>                          cpu_tmp2_i32, cpu_tmp3_i32);
>      tcg_temp_free_i32(t0);
>      tcg_temp_free_i32(t1);
>
> -    /* The CC_OP value is no longer predictable.  */
> +    /* The CC_OP value is no longer predictable.  */
>      set_cc_op(s, CC_OP_DYNAMIC);
>  }
>
> @@ -1863,7 +1954,7 @@ static void gen_shifti(DisasContext *s1, int op,
> TCGMemOp ot, int d, int c)
>      }
>  }
>
> -static void gen_lea_modrm(CPUX86State *env, DisasContext *s, int modrm)
> +static void gen_lea_modrm(CPUX86State *env, DisasContext *s, int modrm,
> int b)
>  {
>      target_long disp;
>      int havesib;
> @@ -1871,6 +1962,7 @@ static void gen_lea_modrm(CPUX86State *env,
> DisasContext *s, int modrm)
>      int index;
>      int scale;
>      int mod, rm, code, override, must_add_seg;
> +    int curr_instr_is_x87_nc;
>      TCGv sum;
>
>      override = s->override;
> @@ -1950,6 +2042,13 @@ static void gen_lea_modrm(CPUX86State *env,
> DisasContext *s, int modrm)
>              tcg_gen_addi_tl(cpu_A0, sum, disp);
>          }
>
> +        curr_instr_is_x87_nc = instr_is_x87_nc(modrm, b);
> +        if (curr_instr_is_x87_nc) {
> +            tcg_gen_mov_tl(cpu_fpdp, cpu_A0);
> +            if (s->aflag == MO_32) {
> +                tcg_gen_ext32u_tl(cpu_fpdp, cpu_fpdp);
> +            }
> +        }
>          if (must_add_seg) {
>              if (override < 0) {
>                  if (base == R_EBP || base == R_ESP) {
> @@ -1961,6 +2060,12 @@ static void gen_lea_modrm(CPUX86State *env,
> DisasContext *s, int modrm)
>
>              tcg_gen_ld_tl(cpu_tmp0, cpu_env,
>                            offsetof(CPUX86State, segs[override].base));
> +
> +            if (curr_instr_is_x87_nc) {
> +                tcg_gen_ld_i32(cpu_fpds, cpu_env,
> +                              offsetof(CPUX86State,
> segs[override].selector));
> +            }
> +
>              if (CODE64(s)) {
>                  if (s->aflag == MO_32) {
>                      tcg_gen_ext32u_tl(cpu_A0, cpu_A0);
> @@ -1970,6 +2075,11 @@ static void gen_lea_modrm(CPUX86State *env,
> DisasContext *s, int modrm)
>              }
>
>              tcg_gen_add_tl(cpu_A0, cpu_A0, cpu_tmp0);
> +        } else {
> +            if (curr_instr_is_x87_nc) {
> +                tcg_gen_ld_i32(cpu_fpds, cpu_env,
> +                              offsetof(CPUX86State, segs[R_DS].selector));
> +            }
>          }
>
>          if (s->aflag == MO_32) {
> @@ -2039,8 +2149,22 @@ static void gen_lea_modrm(CPUX86State *env,
> DisasContext *s, int modrm)
>                      override = R_DS;
>                  }
>              }
> +            if (instr_is_x87_nc(modrm, b)) {
> +                tcg_gen_mov_tl(cpu_fpdp, cpu_A0);
> +                tcg_gen_ld_i32(cpu_fpds, cpu_env,
> +                              offsetof(CPUX86State,
> segs[override].selector));
> +            }
>              gen_op_addl_A0_seg(s, override);
> +        } else {
> +            if (instr_is_x87_nc(modrm, b)) {
> +                tcg_gen_mov_tl(cpu_fpdp, cpu_A0);
> +                tcg_gen_ld_i32(cpu_fpds, cpu_env,
> +                              offsetof(CPUX86State, segs[R_DS].selector));
> +            }
>          }
> +#ifdef TARGET_X86_64
> +        tcg_gen_andi_tl(cpu_fpdp, cpu_fpdp, 0xffffffff);
> +#endif
>          break;
>
>      default:
> @@ -2130,7 +2254,7 @@ static void gen_add_A0_ds_seg(DisasContext *s)
>  /* generate modrm memory load or store of 'reg'. TMP0 is used if reg ==
>     OR_TMP0 */
>  static void gen_ldst_modrm(CPUX86State *env, DisasContext *s, int modrm,
> -                           TCGMemOp ot, int reg, int is_store)
> +                           TCGMemOp ot, int reg, int is_store, int b)
>  {
>      int mod, rm;
>
> @@ -2147,7 +2271,7 @@ static void gen_ldst_modrm(CPUX86State *env,
> DisasContext *s, int modrm,
>                  gen_op_mov_reg_v(ot, reg, cpu_T[0]);
>          }
>      } else {
> -        gen_lea_modrm(env, s, modrm);
> +        gen_lea_modrm(env, s, modrm, b);
>          if (is_store) {
>              if (reg != OR_TMP0)
>                  gen_op_mov_v_reg(ot, cpu_T[0], reg);
> @@ -2250,7 +2374,7 @@ static void gen_cmovcc1(CPUX86State *env,
> DisasContext *s, TCGMemOp ot, int b,
>  {
>      CCPrepare cc;
>
> -    gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
> +    gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0, b);
>
>      cc = gen_prepare_cc(s, b, cpu_T[1]);
>      if (cc.mask != -1) {
> @@ -2297,6 +2421,7 @@ static void gen_movl_seg_T0(DisasContext *s, int
> seg_reg, target_ulong cur_eip)
>      if (s->pe && !s->vm86) {
>          /* XXX: optimize by finding processor state dynamically */
>          gen_update_cc_op(s);
> +        gen_update_ep(s);
>          gen_jmp_im(cur_eip);
>          tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
>          gen_helper_load_seg(cpu_env, tcg_const_i32(seg_reg),
> cpu_tmp2_i32);
> @@ -2326,6 +2451,7 @@ gen_svm_check_intercept_param(DisasContext *s,
> target_ulong pc_start,
>      if (likely(!(s->flags & HF_SVMI_MASK)))
>          return;
>      gen_update_cc_op(s);
> +    gen_update_ep(s);
>      gen_jmp_im(pc_start - s->cs_base);
>      gen_helper_svm_check_intercept_param(cpu_env, tcg_const_i32(type),
>                                           tcg_const_i64(param));
> @@ -2513,6 +2639,7 @@ static void gen_enter(DisasContext *s, int
> esp_addend, int level)
>  static void gen_exception(DisasContext *s, int trapno, target_ulong
> cur_eip)
>  {
>      gen_update_cc_op(s);
> +    gen_update_ep(s);
>      gen_jmp_im(cur_eip);
>      gen_helper_raise_exception(cpu_env, tcg_const_i32(trapno));
>      s->is_jmp = DISAS_TB_JUMP;
> @@ -2524,6 +2651,7 @@ static void gen_interrupt(DisasContext *s, int intno,
>                            target_ulong cur_eip, target_ulong next_eip)
>  {
>      gen_update_cc_op(s);
> +    gen_update_ep(s);
>      gen_jmp_im(cur_eip);
>      gen_helper_raise_interrupt(cpu_env, tcg_const_i32(intno),
>                                 tcg_const_i32(next_eip - cur_eip));
> @@ -2533,6 +2661,7 @@ static void gen_interrupt(DisasContext *s, int intno,
>  static void gen_debug(DisasContext *s, target_ulong cur_eip)
>  {
>      gen_update_cc_op(s);
> +    gen_update_ep(s);
>      gen_jmp_im(cur_eip);
>      gen_helper_debug(cpu_env);
>      s->is_jmp = DISAS_TB_JUMP;
> @@ -2543,6 +2672,7 @@ static void gen_debug(DisasContext *s,
> target_ulong cur_eip)
>  static void gen_eob(DisasContext *s)
>  {
>      gen_update_cc_op(s);
> +    gen_update_ep(s);
>      if (s->tb->flags & HF_INHIBIT_IRQ_MASK) {
>          gen_helper_reset_inhibit_irq(cpu_env);
>      }
> @@ -2564,6 +2694,7 @@ static void gen_eob(DisasContext *s)
>  static void gen_jmp_tb(DisasContext *s, target_ulong eip, int tb_num)
>  {
>      gen_update_cc_op(s);
> +    gen_update_ep(s);
>      set_cc_op(s, CC_OP_DYNAMIC);
>      if (s->jmp_opt) {
>          gen_goto_tb(s, tb_num, eip);
> @@ -3043,7 +3174,7 @@ static void gen_sse(CPUX86State *env,
> DisasContext *s, int b,
>          case 0x0e7: /* movntq */
>              if (mod == 3)
>                  goto illegal_op;
> -            gen_lea_modrm(env, s, modrm);
> +            gen_lea_modrm(env, s, modrm, b);
>              gen_stq_env_A0(s, offsetof(CPUX86State, fpregs[reg].mmx));
>              break;
>          case 0x1e7: /* movntdq */
> @@ -3051,20 +3182,20 @@ static void gen_sse(CPUX86State *env,
> DisasContext *s, int b,
>          case 0x12b: /* movntps */
>              if (mod == 3)
>                  goto illegal_op;
> -            gen_lea_modrm(env, s, modrm);
> +            gen_lea_modrm(env, s, modrm, b);
>              gen_sto_env_A0(s, offsetof(CPUX86State, xmm_regs[reg]));
>              break;
>          case 0x3f0: /* lddqu */
>              if (mod == 3)
>                  goto illegal_op;
> -            gen_lea_modrm(env, s, modrm);
> +            gen_lea_modrm(env, s, modrm, b);
>              gen_ldo_env_A0(s, offsetof(CPUX86State, xmm_regs[reg]));
>              break;
>          case 0x22b: /* movntss */
>          case 0x32b: /* movntsd */
>              if (mod == 3)
>                  goto illegal_op;
> -            gen_lea_modrm(env, s, modrm);
> +            gen_lea_modrm(env, s, modrm, b);
>              if (b1 & 1) {
>                  gen_stq_env_A0(s, offsetof(CPUX86State, xmm_regs[reg]));
>              } else {
> @@ -3076,13 +3207,13 @@ static void gen_sse(CPUX86State *env,
> DisasContext *s, int b,
>          case 0x6e: /* movd mm, ea */
>  #ifdef TARGET_X86_64
>              if (s->dflag == MO_64) {
> -                gen_ldst_modrm(env, s, modrm, MO_64, OR_TMP0, 0);
> +                gen_ldst_modrm(env, s, modrm, MO_64, OR_TMP0, 0, b);
>                  tcg_gen_st_tl(cpu_T[0], cpu_env,
> offsetof(CPUX86State,fpregs[reg].mmx));
>              } else
>  #endif
>              {
> -                gen_ldst_modrm(env, s, modrm, MO_32, OR_TMP0, 0);
> -                tcg_gen_addi_ptr(cpu_ptr0, cpu_env,
> +                gen_ldst_modrm(env, s, modrm, MO_32, OR_TMP0, 0, b);
> +                tcg_gen_addi_ptr(cpu_ptr0, cpu_env,
>                                   offsetof(CPUX86State,fpregs[reg].mmx));
>                  tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
>                  gen_helper_movl_mm_T0_mmx(cpu_ptr0, cpu_tmp2_i32);
> @@ -3091,15 +3222,15 @@ static void gen_sse(CPUX86State *env,
> DisasContext *s, int b,
>          case 0x16e: /* movd xmm, ea */
>  #ifdef TARGET_X86_64
>              if (s->dflag == MO_64) {
> -                gen_ldst_modrm(env, s, modrm, MO_64, OR_TMP0, 0);
> -                tcg_gen_addi_ptr(cpu_ptr0, cpu_env,
> +                gen_ldst_modrm(env, s, modrm, MO_64, OR_TMP0, 0, b);
> +                tcg_gen_addi_ptr(cpu_ptr0, cpu_env,
>                                   offsetof(CPUX86State,xmm_regs[reg]));
>                  gen_helper_movq_mm_T0_xmm(cpu_ptr0, cpu_T[0]);
>              } else
>  #endif
>              {
> -                gen_ldst_modrm(env, s, modrm, MO_32, OR_TMP0, 0);
> -                tcg_gen_addi_ptr(cpu_ptr0, cpu_env,
> +                gen_ldst_modrm(env, s, modrm, MO_32, OR_TMP0, 0, b);
> +                tcg_gen_addi_ptr(cpu_ptr0, cpu_env,
>                                   offsetof(CPUX86State,xmm_regs[reg]));
>                  tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
>                  gen_helper_movl_mm_T0_xmm(cpu_ptr0, cpu_tmp2_i32);
> @@ -3107,7 +3238,7 @@ static void gen_sse(CPUX86State *env,
> DisasContext *s, int b,
>              break;
>          case 0x6f: /* movq mm, ea */
>              if (mod != 3) {
> -                gen_lea_modrm(env, s, modrm);
> +                gen_lea_modrm(env, s, modrm, b);
>                  gen_ldq_env_A0(s, offsetof(CPUX86State, fpregs[reg].mmx));
>              } else {
>                  rm = (modrm & 7);
> @@ -3124,7 +3255,7 @@ static void gen_sse(CPUX86State *env,
> DisasContext *s, int b,
>          case 0x16f: /* movdqa xmm, ea */
>          case 0x26f: /* movdqu xmm, ea */
>              if (mod != 3) {
> -                gen_lea_modrm(env, s, modrm);
> +                gen_lea_modrm(env, s, modrm, b);
>                  gen_ldo_env_A0(s, offsetof(CPUX86State, xmm_regs[reg]));
>              } else {
>                  rm = (modrm & 7) | REX_B(s);
> @@ -3134,7 +3265,7 @@ static void gen_sse(CPUX86State *env,
> DisasContext *s, int b,
>              break;
>          case 0x210: /* movss xmm, ea */
>              if (mod != 3) {
> -                gen_lea_modrm(env, s, modrm);
> +                gen_lea_modrm(env, s, modrm, b);
>                  gen_op_ld_v(s, MO_32, cpu_T[0], cpu_A0);
>                  tcg_gen_st32_tl(cpu_T[0], cpu_env,
> offsetof(CPUX86State,xmm_regs[reg].XMM_L(0)));
>                  tcg_gen_movi_tl(cpu_T[0], 0);
> @@ -3149,7 +3280,7 @@ static void gen_sse(CPUX86State *env,
> DisasContext *s, int b,
>              break;
>          case 0x310: /* movsd xmm, ea */
>              if (mod != 3) {
> -                gen_lea_modrm(env, s, modrm);
> +                gen_lea_modrm(env, s, modrm, b);
>                  gen_ldq_env_A0(s, offsetof(CPUX86State,
>                                             xmm_regs[reg].XMM_Q(0)));
>                  tcg_gen_movi_tl(cpu_T[0], 0);
> @@ -3164,7 +3295,7 @@ static void gen_sse(CPUX86State *env,
> DisasContext *s, int b,
>          case 0x012: /* movlps */
>          case 0x112: /* movlpd */
>              if (mod != 3) {
> -                gen_lea_modrm(env, s, modrm);
> +                gen_lea_modrm(env, s, modrm, b);
>                  gen_ldq_env_A0(s, offsetof(CPUX86State,
>                                             xmm_regs[reg].XMM_Q(0)));
>              } else {
> @@ -3176,7 +3307,7 @@ static void gen_sse(CPUX86State *env,
> DisasContext *s, int b,
>              break;
>          case 0x212: /* movsldup */
>              if (mod != 3) {
> -                gen_lea_modrm(env, s, modrm);
> +                gen_lea_modrm(env, s, modrm, b);
>                  gen_ldo_env_A0(s, offsetof(CPUX86State, xmm_regs[reg]));
>              } else {
>                  rm = (modrm & 7) | REX_B(s);
> @@ -3192,7 +3323,7 @@ static void gen_sse(CPUX86State *env,
> DisasContext *s, int b,
>              break;
>          case 0x312: /* movddup */
>              if (mod != 3) {
> -                gen_lea_modrm(env, s, modrm);
> +                gen_lea_modrm(env, s, modrm, b);
>                  gen_ldq_env_A0(s, offsetof(CPUX86State,
>                                             xmm_regs[reg].XMM_Q(0)));
>              } else {
> @@ -3206,7 +3337,7 @@ static void gen_sse(CPUX86State *env,
> DisasContext *s, int b,
>          case 0x016: /* movhps */
>          case 0x116: /* movhpd */
>              if (mod != 3) {
> -                gen_lea_modrm(env, s, modrm);
> +                gen_lea_modrm(env, s, modrm, b);
>                  gen_ldq_env_A0(s, offsetof(CPUX86State,
>                                             xmm_regs[reg].XMM_Q(1)));
>              } else {
> @@ -3218,7 +3349,7 @@ static void gen_sse(CPUX86State *env,
> DisasContext *s, int b,
>              break;
>          case 0x216: /* movshdup */
>              if (mod != 3) {
> -                gen_lea_modrm(env, s, modrm);
> +                gen_lea_modrm(env, s, modrm, b);
>                  gen_ldo_env_A0(s, offsetof(CPUX86State, xmm_regs[reg]));
>              } else {
>                  rm = (modrm & 7) | REX_B(s);
> @@ -3256,34 +3387,34 @@ static void gen_sse(CPUX86State *env,
> DisasContext *s, int b,
>          case 0x7e: /* movd ea, mm */
>  #ifdef TARGET_X86_64
>              if (s->dflag == MO_64) {
> -                tcg_gen_ld_i64(cpu_T[0], cpu_env,
> +                tcg_gen_ld_i64(cpu_T[0], cpu_env,
>                                 offsetof(CPUX86State,fpregs[reg].mmx));
> -                gen_ldst_modrm(env, s, modrm, MO_64, OR_TMP0, 1);
> +                gen_ldst_modrm(env, s, modrm, MO_64, OR_TMP0, 1, b);
>              } else
>  #endif
>              {
> -                tcg_gen_ld32u_tl(cpu_T[0], cpu_env,
> +                tcg_gen_ld32u_tl(cpu_T[0], cpu_env,
>
> offsetof(CPUX86State,fpregs[reg].mmx.MMX_L(0)));
> -                gen_ldst_modrm(env, s, modrm, MO_32, OR_TMP0, 1);
> +                gen_ldst_modrm(env, s, modrm, MO_32, OR_TMP0, 1, b);
>              }
>              break;
>          case 0x17e: /* movd ea, xmm */
>  #ifdef TARGET_X86_64
>              if (s->dflag == MO_64) {
> -                tcg_gen_ld_i64(cpu_T[0], cpu_env,
> +                tcg_gen_ld_i64(cpu_T[0], cpu_env,
>
> offsetof(CPUX86State,xmm_regs[reg].XMM_Q(0)));
> -                gen_ldst_modrm(env, s, modrm, MO_64, OR_TMP0, 1);
> +                gen_ldst_modrm(env, s, modrm, MO_64, OR_TMP0, 1, b);
>              } else
>  #endif
>              {
> -                tcg_gen_ld32u_tl(cpu_T[0], cpu_env,
> +                tcg_gen_ld32u_tl(cpu_T[0], cpu_env,
>
> offsetof(CPUX86State,xmm_regs[reg].XMM_L(0)));
> -                gen_ldst_modrm(env, s, modrm, MO_32, OR_TMP0, 1);
> +                gen_ldst_modrm(env, s, modrm, MO_32, OR_TMP0, 1, b);
>              }
>              break;
>          case 0x27e: /* movq xmm, ea */
>              if (mod != 3) {
> -                gen_lea_modrm(env, s, modrm);
> +                gen_lea_modrm(env, s, modrm, b);
>                  gen_ldq_env_A0(s, offsetof(CPUX86State,
>                                             xmm_regs[reg].XMM_Q(0)));
>              } else {
> @@ -3295,7 +3426,7 @@ static void gen_sse(CPUX86State *env,
> DisasContext *s, int b,
>              break;
>          case 0x7f: /* movq ea, mm */
>              if (mod != 3) {
> -                gen_lea_modrm(env, s, modrm);
> +                gen_lea_modrm(env, s, modrm, b);
>                  gen_stq_env_A0(s, offsetof(CPUX86State, fpregs[reg].mmx));
>              } else {
>                  rm = (modrm & 7);
> @@ -3310,7 +3441,7 @@ static void gen_sse(CPUX86State *env,
> DisasContext *s, int b,
>          case 0x17f: /* movdqa ea, xmm */
>          case 0x27f: /* movdqu ea, xmm */
>              if (mod != 3) {
> -                gen_lea_modrm(env, s, modrm);
> +                gen_lea_modrm(env, s, modrm, b);
>                  gen_sto_env_A0(s, offsetof(CPUX86State, xmm_regs[reg]));
>              } else {
>                  rm = (modrm & 7) | REX_B(s);
> @@ -3320,7 +3451,7 @@ static void gen_sse(CPUX86State *env,
> DisasContext *s, int b,
>              break;
>          case 0x211: /* movss ea, xmm */
>              if (mod != 3) {
> -                gen_lea_modrm(env, s, modrm);
> +                gen_lea_modrm(env, s, modrm, b);
>                  tcg_gen_ld32u_tl(cpu_T[0], cpu_env,
> offsetof(CPUX86State,xmm_regs[reg].XMM_L(0)));
>                  gen_op_st_v(s, MO_32, cpu_T[0], cpu_A0);
>              } else {
> @@ -3331,7 +3462,7 @@ static void gen_sse(CPUX86State *env,
> DisasContext *s, int b,
>              break;
>          case 0x311: /* movsd ea, xmm */
>              if (mod != 3) {
> -                gen_lea_modrm(env, s, modrm);
> +                gen_lea_modrm(env, s, modrm, b);
>                  gen_stq_env_A0(s, offsetof(CPUX86State,
>                                             xmm_regs[reg].XMM_Q(0)));
>              } else {
> @@ -3343,7 +3474,7 @@ static void gen_sse(CPUX86State *env,
> DisasContext *s, int b,
>          case 0x013: /* movlps */
>          case 0x113: /* movlpd */
>              if (mod != 3) {
> -                gen_lea_modrm(env, s, modrm);
> +                gen_lea_modrm(env, s, modrm, b);
>                  gen_stq_env_A0(s, offsetof(CPUX86State,
>                                             xmm_regs[reg].XMM_Q(0)));
>              } else {
> @@ -3353,7 +3484,7 @@ static void gen_sse(CPUX86State *env,
> DisasContext *s, int b,
>          case 0x017: /* movhps */
>          case 0x117: /* movhpd */
>              if (mod != 3) {
> -                gen_lea_modrm(env, s, modrm);
> +                gen_lea_modrm(env, s, modrm, b);
>                  gen_stq_env_A0(s, offsetof(CPUX86State,
>                                             xmm_regs[reg].XMM_Q(1)));
>              } else {
> @@ -3417,7 +3548,7 @@ static void gen_sse(CPUX86State *env,
> DisasContext *s, int b,
>          case 0x12a: /* cvtpi2pd */
>              gen_helper_enter_mmx(cpu_env);
>              if (mod != 3) {
> -                gen_lea_modrm(env, s, modrm);
> +                gen_lea_modrm(env, s, modrm, b);
>                  op2_offset = offsetof(CPUX86State,mmx_t0);
>                  gen_ldq_env_A0(s, op2_offset);
>              } else {
> @@ -3440,7 +3571,7 @@ static void gen_sse(CPUX86State *env,
> DisasContext *s, int b,
>          case 0x22a: /* cvtsi2ss */
>          case 0x32a: /* cvtsi2sd */
>              ot = mo_64_32(s->dflag);
> -            gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
> +            gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0, b);
>              op1_offset = offsetof(CPUX86State,xmm_regs[reg]);
>              tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op1_offset);
>              if (ot == MO_32) {
> @@ -3462,7 +3593,7 @@ static void gen_sse(CPUX86State *env,
> DisasContext *s, int b,
>          case 0x12d: /* cvtpd2pi */
>              gen_helper_enter_mmx(cpu_env);
>              if (mod != 3) {
> -                gen_lea_modrm(env, s, modrm);
> +                gen_lea_modrm(env, s, modrm, b);
>                  op2_offset = offsetof(CPUX86State,xmm_t0);
>                  gen_ldo_env_A0(s, op2_offset);
>              } else {
> @@ -3493,7 +3624,7 @@ static void gen_sse(CPUX86State *env,
> DisasContext *s, int b,
>          case 0x32d: /* cvtsd2si */
>              ot = mo_64_32(s->dflag);
>              if (mod != 3) {
> -                gen_lea_modrm(env, s, modrm);
> +                gen_lea_modrm(env, s, modrm, b);
>                  if ((b >> 8) & 1) {
>                      gen_ldq_env_A0(s, offsetof(CPUX86State,
> xmm_t0.XMM_Q(0)));
>                  } else {
> @@ -3525,7 +3656,7 @@ static void gen_sse(CPUX86State *env,
> DisasContext *s, int b,
>          case 0xc4: /* pinsrw */
>          case 0x1c4:
>              s->rip_offset = 1;
> -            gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0);
> +            gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0, b);
>              val = cpu_ldub_code(env, s->pc++);
>              if (b1) {
>                  val &= 7;
> @@ -3559,7 +3690,7 @@ static void gen_sse(CPUX86State *env,
> DisasContext *s, int b,
>              break;
>          case 0x1d6: /* movq ea, xmm */
>              if (mod != 3) {
> -                gen_lea_modrm(env, s, modrm);
> +                gen_lea_modrm(env, s, modrm, b);
>                  gen_stq_env_A0(s, offsetof(CPUX86State,
>                                             xmm_regs[reg].XMM_Q(0)));
>              } else {
> @@ -3626,7 +3757,7 @@ static void gen_sse(CPUX86State *env,
> DisasContext *s, int b,
>                      op2_offset = offsetof(CPUX86State,xmm_regs[rm |
> REX_B(s)]);
>                  } else {
>                      op2_offset = offsetof(CPUX86State,xmm_t0);
> -                    gen_lea_modrm(env, s, modrm);
> +                    gen_lea_modrm(env, s, modrm, b);
>                      switch (b) {
>                      case 0x20: case 0x30: /* pmovsxbw, pmovzxbw */
>                      case 0x23: case 0x33: /* pmovsxwd, pmovzxwd */
> @@ -3660,7 +3791,7 @@ static void gen_sse(CPUX86State *env,
> DisasContext *s, int b,
>                      op2_offset = offsetof(CPUX86State,fpregs[rm].mmx);
>                  } else {
>                      op2_offset = offsetof(CPUX86State,mmx_t0);
> -                    gen_lea_modrm(env, s, modrm);
> +                    gen_lea_modrm(env, s, modrm, b);
>                      gen_ldq_env_A0(s, op2_offset);
>                  }
>              }
> @@ -3701,7 +3832,7 @@ static void gen_sse(CPUX86State *env,
> DisasContext *s, int b,
>                  }
>
>                  tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_regs[reg]);
> -                gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
> +                gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0, b);
>                  gen_helper_crc32(cpu_T[0], cpu_tmp2_i32,
>                                   cpu_T[0], tcg_const_i32(8 << ot));
>
> @@ -3729,7 +3860,7 @@ static void gen_sse(CPUX86State *env,
> DisasContext *s, int b,
>                      ot = MO_64;
>                  }
>
> -                gen_lea_modrm(env, s, modrm);
> +                gen_lea_modrm(env, s, modrm, b);
>                  if ((b & 1) == 0) {
>                      tcg_gen_qemu_ld_tl(cpu_T[0], cpu_A0,
>                                         s->mem_index, ot | MO_BE);
> @@ -3747,7 +3878,7 @@ static void gen_sse(CPUX86State *env,
> DisasContext *s, int b,
>                      goto illegal_op;
>                  }
>                  ot = mo_64_32(s->dflag);
> -                gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
> +                gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0, b);
>                  tcg_gen_andc_tl(cpu_T[0], cpu_regs[s->vex_v], cpu_T[0]);
>                  gen_op_mov_reg_v(ot, reg, cpu_T[0]);
>                  gen_op_update1_cc();
> @@ -3764,7 +3895,7 @@ static void gen_sse(CPUX86State *env,
> DisasContext *s, int b,
>                  {
>                      TCGv bound, zero;
>
> -                    gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
> +                    gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0, b);
>                      /* Extract START, and shift the operand.
>                         Shifts larger than operand size get zeros.  */
>                      tcg_gen_ext8u_tl(cpu_A0, cpu_regs[s->vex_v]);
> @@ -3801,7 +3932,7 @@ static void gen_sse(CPUX86State *env,
> DisasContext *s, int b,
>                      goto illegal_op;
>                  }
>                  ot = mo_64_32(s->dflag);
> -                gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
> +                gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0, b);
>                  tcg_gen_ext8u_tl(cpu_T[1], cpu_regs[s->vex_v]);
>                  {
>                      TCGv bound = tcg_const_tl(ot == MO_64 ? 63 : 31);
> @@ -3828,7 +3959,7 @@ static void gen_sse(CPUX86State *env,
> DisasContext *s, int b,
>                      goto illegal_op;
>                  }
>                  ot = mo_64_32(s->dflag);
> -                gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
> +                gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0, b);
>                  switch (ot) {
>                  default:
>                      tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
> @@ -3854,7 +3985,7 @@ static void gen_sse(CPUX86State *env,
> DisasContext *s, int b,
>                      goto illegal_op;
>                  }
>                  ot = mo_64_32(s->dflag);
> -                gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
> +                gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0, b);
>                  /* Note that by zero-extending the mask operand, we
>                     automatically handle zero-extending the result.  */
>                  if (ot == MO_64) {
> @@ -3872,7 +4003,7 @@ static void gen_sse(CPUX86State *env,
> DisasContext *s, int b,
>                      goto illegal_op;
>                  }
>                  ot = mo_64_32(s->dflag);
> -                gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
> +                gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0, b);
>                  /* Note that by zero-extending the mask operand, we
>                     automatically handle zero-extending the result.  */
>                  if (ot == MO_64) {
> @@ -3892,7 +4023,7 @@ static void gen_sse(CPUX86State *env,
> DisasContext *s, int b,
>                      int end_op;
>
>                      ot = mo_64_32(s->dflag);
> -                    gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
> +                    gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0, b);
>
>                      /* Re-use the carry-out from a previous round.  */
>                      TCGV_UNUSED(carry_in);
> @@ -3971,7 +4102,7 @@ static void gen_sse(CPUX86State *env,
> DisasContext *s, int b,
>                      goto illegal_op;
>                  }
>                  ot = mo_64_32(s->dflag);
> -                gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
> +                gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0, b);
>                  if (ot == MO_64) {
>                      tcg_gen_andi_tl(cpu_T[1], cpu_regs[s->vex_v], 63);
>                  } else {
> @@ -4003,7 +4134,7 @@ static void gen_sse(CPUX86State *env,
> DisasContext *s, int b,
>                      goto illegal_op;
>                  }
>                  ot = mo_64_32(s->dflag);
> -                gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
> +                gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0, b);
>
>                  switch (reg & 7) {
>                  case 1: /* blsr By,Ey */
> @@ -4062,7 +4193,7 @@ static void gen_sse(CPUX86State *env,
> DisasContext *s, int b,
>                  ot = mo_64_32(s->dflag);
>                  rm = (modrm & 7) | REX_B(s);
>                  if (mod != 3)
> -                    gen_lea_modrm(env, s, modrm);
> +                    gen_lea_modrm(env, s, modrm, b);
>                  reg = ((modrm >> 3) & 7) | rex_r;
>                  val = cpu_ldub_code(env, s->pc++);
>                  switch (b) {
> @@ -4199,7 +4330,7 @@ static void gen_sse(CPUX86State *env,
> DisasContext *s, int b,
>                      op2_offset = offsetof(CPUX86State,xmm_regs[rm |
> REX_B(s)]);
>                  } else {
>                      op2_offset = offsetof(CPUX86State,xmm_t0);
> -                    gen_lea_modrm(env, s, modrm);
> +                    gen_lea_modrm(env, s, modrm, b);
>                      gen_ldo_env_A0(s, op2_offset);
>                  }
>              } else {
> @@ -4208,7 +4339,7 @@ static void gen_sse(CPUX86State *env,
> DisasContext *s, int b,
>                      op2_offset = offsetof(CPUX86State,fpregs[rm].mmx);
>                  } else {
>                      op2_offset = offsetof(CPUX86State,mmx_t0);
> -                    gen_lea_modrm(env, s, modrm);
> +                    gen_lea_modrm(env, s, modrm, b);
>                      gen_ldq_env_A0(s, op2_offset);
>                  }
>              }
> @@ -4242,7 +4373,7 @@ static void gen_sse(CPUX86State *env,
> DisasContext *s, int b,
>                      goto illegal_op;
>                  }
>                  ot = mo_64_32(s->dflag);
> -                gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
> +                gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0, b);
>                  b = cpu_ldub_code(env, s->pc++);
>                  if (ot == MO_64) {
>                      tcg_gen_rotri_tl(cpu_T[0], cpu_T[0], b & 63);
> @@ -4278,7 +4409,7 @@ static void gen_sse(CPUX86State *env,
> DisasContext *s, int b,
>              if (mod != 3) {
>                  int sz = 4;
>
> -                gen_lea_modrm(env, s, modrm);
> +                gen_lea_modrm(env, s, modrm, b);
>                  op2_offset = offsetof(CPUX86State,xmm_t0);
>
>                  switch (b) {
> @@ -4326,7 +4457,7 @@ static void gen_sse(CPUX86State *env,
> DisasContext *s, int b,
>          } else {
>              op1_offset = offsetof(CPUX86State,fpregs[reg].mmx);
>              if (mod != 3) {
> -                gen_lea_modrm(env, s, modrm);
> +                gen_lea_modrm(env, s, modrm, b);
>                  op2_offset = offsetof(CPUX86State,mmx_t0);
>                  gen_ldq_env_A0(s, op2_offset);
>              } else {
> @@ -4404,6 +4535,7 @@ static target_ulong disas_insn(CPUX86State *env,
> DisasContext *s,
>      int modrm, reg, rm, mod, op, opreg, val;
>      target_ulong next_eip, tval;
>      int rex_w, rex_r;
> +    int fp_op, fp_ip, fp_cs;
>
>      if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP | CPU_LOG_TB_OP_OPT))) {
>          tcg_gen_debug_insn_start(pc_start);
> @@ -4595,7 +4727,7 @@ static target_ulong disas_insn(CPUX86State *env,
> DisasContext *s,
>                  mod = (modrm >> 6) & 3;
>                  rm = (modrm & 7) | REX_B(s);
>                  if (mod != 3) {
> -                    gen_lea_modrm(env, s, modrm);
> +                    gen_lea_modrm(env, s, modrm, b);
>                      opreg = OR_TMP0;
>                  } else if (op == OP_XORL && rm == reg) {
>                  xor_zero:
> @@ -4616,7 +4748,7 @@ static target_ulong disas_insn(CPUX86State *env,
> DisasContext *s,
>                  reg = ((modrm >> 3) & 7) | rex_r;
>                  rm = (modrm & 7) | REX_B(s);
>                  if (mod != 3) {
> -                    gen_lea_modrm(env, s, modrm);
> +                    gen_lea_modrm(env, s, modrm, b);
>                      gen_op_ld_v(s, ot, cpu_T[1], cpu_A0);
>                  } else if (op == OP_XORL && rm == reg) {
>                      goto xor_zero;
> @@ -4655,7 +4787,7 @@ static target_ulong disas_insn(CPUX86State *env,
> DisasContext *s,
>                      s->rip_offset = 1;
>                  else
>                      s->rip_offset = insn_const_size(ot);
> -                gen_lea_modrm(env, s, modrm);
> +                gen_lea_modrm(env, s, modrm, b);
>                  opreg = OR_TMP0;
>              } else {
>                  opreg = rm;
> @@ -4698,7 +4830,7 @@ static target_ulong disas_insn(CPUX86State *env,
> DisasContext *s,
>          if (mod != 3) {
>              if (op == 0)
>                  s->rip_offset = insn_const_size(ot);
> -            gen_lea_modrm(env, s, modrm);
> +            gen_lea_modrm(env, s, modrm, b);
>              gen_op_ld_v(s, ot, cpu_T[0], cpu_A0);
>          } else {
>              gen_op_mov_v_reg(ot, cpu_T[0], rm);
> @@ -4906,7 +5038,7 @@ static target_ulong disas_insn(CPUX86State *env,
> DisasContext *s,
>              }
>          }
>          if (mod != 3) {
> -            gen_lea_modrm(env, s, modrm);
> +            gen_lea_modrm(env, s, modrm, b);
>              if (op >= 2 && op != 3 && op != 5)
>                  gen_op_ld_v(s, ot, cpu_T[0], cpu_A0);
>          } else {
> @@ -4946,6 +5078,7 @@ static target_ulong disas_insn(CPUX86State *env,
> DisasContext *s,
>          do_lcall:
>              if (s->pe && !s->vm86) {
>                  gen_update_cc_op(s);
> +                gen_update_ep(s);
>                  gen_jmp_im(pc_start - s->cs_base);
>                  tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
>                  gen_helper_lcall_protected(cpu_env, cpu_tmp2_i32,
> cpu_T[1],
> @@ -4973,6 +5106,7 @@ static target_ulong disas_insn(CPUX86State *env,
> DisasContext *s,
>          do_ljmp:
>              if (s->pe && !s->vm86) {
>                  gen_update_cc_op(s);
> +                gen_update_ep(s);
>                  gen_jmp_im(pc_start - s->cs_base);
>                  tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
>                  gen_helper_ljmp_protected(cpu_env, cpu_tmp2_i32, cpu_T[1],
> @@ -4998,7 +5132,7 @@ static target_ulong disas_insn(CPUX86State *env,
> DisasContext *s,
>          modrm = cpu_ldub_code(env, s->pc++);
>          reg = ((modrm >> 3) & 7) | rex_r;
>
> -        gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
> +        gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0, b);
>          gen_op_mov_v_reg(ot, cpu_T[1], reg);
>          gen_op_testl_T0_T1_cc();
>          set_cc_op(s, CC_OP_LOGICB + ot);
> @@ -5073,7 +5207,7 @@ static target_ulong disas_insn(CPUX86State *env,
> DisasContext *s,
>              s->rip_offset = insn_const_size(ot);
>          else if (b == 0x6b)
>              s->rip_offset = 1;
> -        gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
> +        gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0, b);
>          if (b == 0x69) {
>              val = insn_get(env, s, ot);
>              tcg_gen_movi_tl(cpu_T[1], val);
> @@ -5130,7 +5264,7 @@ static target_ulong disas_insn(CPUX86State *env,
> DisasContext *s,
>              gen_op_mov_reg_v(ot, reg, cpu_T[1]);
>              gen_op_mov_reg_v(ot, rm, cpu_T[0]);
>          } else {
> -            gen_lea_modrm(env, s, modrm);
> +            gen_lea_modrm(env, s, modrm, b);
>              gen_op_mov_v_reg(ot, cpu_T[0], reg);
>              gen_op_ld_v(s, ot, cpu_T[1], cpu_A0);
>              tcg_gen_add_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
> @@ -5159,7 +5293,7 @@ static target_ulong disas_insn(CPUX86State *env,
> DisasContext *s,
>                  rm = (modrm & 7) | REX_B(s);
>                  gen_op_mov_v_reg(ot, t0, rm);
>              } else {
> -                gen_lea_modrm(env, s, modrm);
> +                gen_lea_modrm(env, s, modrm, b);
>                  tcg_gen_mov_tl(a0, cpu_A0);
>                  gen_op_ld_v(s, ot, t0, a0);
>                  rm = 0; /* avoid warning */
> @@ -5207,16 +5341,16 @@ static target_ulong disas_insn(CPUX86State
> *env, DisasContext *s,
>                  goto illegal_op;
>              gen_jmp_im(pc_start - s->cs_base);
>              gen_update_cc_op(s);
> -            gen_lea_modrm(env, s, modrm);
> +            gen_lea_modrm(env, s, modrm, b);
>              gen_helper_cmpxchg16b(cpu_env, cpu_A0);
>          } else
> -#endif
> +#endif
>          {
>              if (!(s->cpuid_features & CPUID_CX8))
>                  goto illegal_op;
>              gen_jmp_im(pc_start - s->cs_base);
>              gen_update_cc_op(s);
> -            gen_lea_modrm(env, s, modrm);
> +            gen_lea_modrm(env, s, modrm, b);
>              gen_helper_cmpxchg8b(cpu_env, cpu_A0);
>          }
>          set_cc_op(s, CC_OP_EFLAGS);
> @@ -5266,7 +5400,7 @@ static target_ulong disas_insn(CPUX86State *env,
> DisasContext *s,
>          } else {
>              /* NOTE: order is important too for MMU exceptions */
>              s->popl_esp_hack = 1 << ot;
> -            gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 1);
> +            gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 1, b);
>              s->popl_esp_hack = 0;
>              gen_pop_update(s, ot);
>          }
> @@ -5352,7 +5486,7 @@ static target_ulong disas_insn(CPUX86State *env,
> DisasContext *s,
>          reg = ((modrm >> 3) & 7) | rex_r;
>
>          /* generate a generic store */
> -        gen_ldst_modrm(env, s, modrm, ot, reg, 1);
> +        gen_ldst_modrm(env, s, modrm, ot, reg, 1, b);
>          break;
>      case 0xc6:
>      case 0xc7: /* mov Ev, Iv */
> @@ -5361,7 +5495,7 @@ static target_ulong disas_insn(CPUX86State *env,
> DisasContext *s,
>          mod = (modrm >> 6) & 3;
>          if (mod != 3) {
>              s->rip_offset = insn_const_size(ot);
> -            gen_lea_modrm(env, s, modrm);
> +            gen_lea_modrm(env, s, modrm, b);
>          }
>          val = insn_get(env, s, ot);
>          tcg_gen_movi_tl(cpu_T[0], val);
> @@ -5377,7 +5511,7 @@ static target_ulong disas_insn(CPUX86State *env,
> DisasContext *s,
>          modrm = cpu_ldub_code(env, s->pc++);
>          reg = ((modrm >> 3) & 7) | rex_r;
>
> -        gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
> +        gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0, b);
>          gen_op_mov_reg_v(ot, reg, cpu_T[0]);
>          break;
>      case 0x8e: /* mov seg, Gv */
> @@ -5385,7 +5519,7 @@ static target_ulong disas_insn(CPUX86State *env,
> DisasContext *s,
>          reg = (modrm >> 3) & 7;
>          if (reg >= 6 || reg == R_CS)
>              goto illegal_op;
> -        gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0);
> +        gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0, b);
>          gen_movl_seg_T0(s, reg, pc_start - s->cs_base);
>          if (reg == R_SS) {
>              /* if reg == SS, inhibit interrupts/trace */
> @@ -5408,7 +5542,7 @@ static target_ulong disas_insn(CPUX86State *env,
> DisasContext *s,
>              goto illegal_op;
>          gen_op_movl_T0_seg(reg);
>          ot = mod == 3 ? dflag : MO_16;
> -        gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 1);
> +        gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 1, b);
>          break;
>
>      case 0x1b6: /* movzbS Gv, Eb */
> @@ -5450,7 +5584,7 @@ static target_ulong disas_insn(CPUX86State *env,
> DisasContext *s,
>                  }
>                  gen_op_mov_reg_v(d_ot, reg, cpu_T[0]);
>              } else {
> -                gen_lea_modrm(env, s, modrm);
> +                gen_lea_modrm(env, s, modrm, b);
>                  gen_op_ld_v(s, s_ot, cpu_T[0], cpu_A0);
>                  gen_op_mov_reg_v(d_ot, reg, cpu_T[0]);
>              }
> @@ -5468,7 +5602,7 @@ static target_ulong disas_insn(CPUX86State *env,
> DisasContext *s,
>          s->override = -1;
>          val = s->addseg;
>          s->addseg = 0;
> -        gen_lea_modrm(env, s, modrm);
> +        gen_lea_modrm(env, s, modrm, b);
>          s->addseg = val;
>          gen_op_mov_reg_v(ot, reg, cpu_A0);
>          break;
> @@ -5558,7 +5692,7 @@ static target_ulong disas_insn(CPUX86State *env,
> DisasContext *s,
>              gen_op_mov_reg_v(ot, rm, cpu_T[0]);
>              gen_op_mov_reg_v(ot, reg, cpu_T[1]);
>          } else {
> -            gen_lea_modrm(env, s, modrm);
> +            gen_lea_modrm(env, s, modrm, b);
>              gen_op_mov_v_reg(ot, cpu_T[0], reg);
>              /* for xchg, lock is implicit */
>              if (!(prefixes & PREFIX_LOCK))
> @@ -5593,7 +5727,7 @@ static target_ulong disas_insn(CPUX86State *env,
> DisasContext *s,
>          mod = (modrm >> 6) & 3;
>          if (mod == 3)
>              goto illegal_op;
> -        gen_lea_modrm(env, s, modrm);
> +        gen_lea_modrm(env, s, modrm, b);
>          gen_op_ld_v(s, ot, cpu_T[1], cpu_A0);
>          gen_add_A0_im(s, 1 << ot);
>          /* load the segment first to handle exceptions properly */
> @@ -5624,7 +5758,7 @@ static target_ulong disas_insn(CPUX86State *env,
> DisasContext *s,
>                  if (shift == 2) {
>                      s->rip_offset = 1;
>                  }
> -                gen_lea_modrm(env, s, modrm);
> +                gen_lea_modrm(env, s, modrm, b);
>                  opreg = OR_TMP0;
>              } else {
>                  opreg = (modrm & 7) | REX_B(s);
> @@ -5674,7 +5808,7 @@ static target_ulong disas_insn(CPUX86State *env,
> DisasContext *s,
>          rm = (modrm & 7) | REX_B(s);
>          reg = ((modrm >> 3) & 7) | rex_r;
>          if (mod != 3) {
> -            gen_lea_modrm(env, s, modrm);
> +            gen_lea_modrm(env, s, modrm, b);
>              opreg = OR_TMP0;
>          } else {
>              opreg = rm;
> @@ -5705,7 +5839,7 @@ static target_ulong disas_insn(CPUX86State *env,
> DisasContext *s,
>          op = ((b & 7) << 3) | ((modrm >> 3) & 7);
>          if (mod != 3) {
>              /* memory op */
> -            gen_lea_modrm(env, s, modrm);
> +            gen_lea_modrm(env, s, modrm, b);
>              switch(op) {
>              case 0x00 ... 0x07: /* fxxxs */
>              case 0x10 ... 0x17: /* fixxxl */
> @@ -5832,7 +5966,9 @@ static target_ulong disas_insn(CPUX86State *env,
> DisasContext *s,
>              case 0x0c: /* fldenv mem */
>                  gen_update_cc_op(s);
>                  gen_jmp_im(pc_start - s->cs_base);
> -                gen_helper_fldenv(cpu_env, cpu_A0, tcg_const_i32(dflag -
> 1));
> +                gen_helper_fldenv(cpu_env, cpu_A0,
> +                                  tcg_const_i32(dflag == MO_32),
> +                                  tcg_const_i32(IS_PROTECTED_MODE(s)));
>                  break;
>              case 0x0d: /* fldcw mem */
>                  tcg_gen_qemu_ld_i32(cpu_tmp2_i32, cpu_A0,
> @@ -5841,8 +5977,11 @@ static target_ulong disas_insn(CPUX86State
> *env, DisasContext *s,
>                  break;
>              case 0x0e: /* fnstenv mem */
>                  gen_update_cc_op(s);
> +                gen_update_ep(s);
>                  gen_jmp_im(pc_start - s->cs_base);
> -                gen_helper_fstenv(cpu_env, cpu_A0, tcg_const_i32(dflag -
> 1));
> +                gen_helper_fstenv(cpu_env, cpu_A0,
> +                                  tcg_const_i32(dflag == MO_32),
> +                                  tcg_const_i32(IS_PROTECTED_MODE(s)));
>                  break;
>              case 0x0f: /* fnstcw mem */
>                  gen_helper_fnstcw(cpu_tmp2_i32, cpu_env);
> @@ -5863,12 +6002,17 @@ static target_ulong disas_insn(CPUX86State
> *env, DisasContext *s,
>              case 0x2c: /* frstor mem */
>                  gen_update_cc_op(s);
>                  gen_jmp_im(pc_start - s->cs_base);
> -                gen_helper_frstor(cpu_env, cpu_A0, tcg_const_i32(dflag -
> 1));
> +                gen_helper_frstor(cpu_env, cpu_A0,
> +                                  tcg_const_i32(dflag == MO_32),
> +                                  tcg_const_i32(IS_PROTECTED_MODE(s)));
>                  break;
>              case 0x2e: /* fnsave mem */
>                  gen_update_cc_op(s);
> +                gen_update_ep(s);
>                  gen_jmp_im(pc_start - s->cs_base);
> -                gen_helper_fsave(cpu_env, cpu_A0, tcg_const_i32(dflag -
> 1));
> +                gen_helper_fsave(cpu_env, cpu_A0,
> +                                 tcg_const_i32(dflag == MO_32),
> +                                 tcg_const_i32(IS_PROTECTED_MODE(s)));
>                  break;
>              case 0x2f: /* fnstsw mem */
>                  gen_helper_fnstsw(cpu_tmp2_i32, cpu_env);
> @@ -6209,6 +6353,12 @@ static target_ulong disas_insn(CPUX86State
> *env, DisasContext *s,
>                  goto illegal_op;
>              }
>          }
> +        if (instr_is_x87_nc(modrm, b)) {
> +            fp_op = ((b & 0x7) << 8) | (modrm & 0xff);
> +            fp_ip = pc_start - s->cs_base;
> +            fp_cs = env->segs[R_CS].selector;
> +            set_ep(s, fp_op, fp_ip, fp_cs);
> +        }
>          break;
>          /************************/
>          /* string ops */
> @@ -6393,6 +6543,7 @@ static target_ulong disas_insn(CPUX86State *env,
> DisasContext *s,
>      do_lret:
>          if (s->pe && !s->vm86) {
>              gen_update_cc_op(s);
> +            gen_update_ep(s);
>              gen_jmp_im(pc_start - s->cs_base);
>              gen_helper_lret_protected(cpu_env, tcg_const_i32(dflag - 1),
>                                        tcg_const_i32(val));
> @@ -6430,6 +6581,7 @@ static target_ulong disas_insn(CPUX86State *env,
> DisasContext *s,
>              }
>          } else {
>              gen_update_cc_op(s);
> +            gen_update_ep(s);
>              gen_jmp_im(pc_start - s->cs_base);
>              gen_helper_iret_protected(cpu_env, tcg_const_i32(dflag - 1),
>                                        tcg_const_i32(s->pc - s->cs_base));
> @@ -6527,7 +6679,7 @@ static target_ulong disas_insn(CPUX86State *env,
> DisasContext *s,
>      case 0x190 ... 0x19f: /* setcc Gv */
>          modrm = cpu_ldub_code(env, s->pc++);
>          gen_setcc1(s, b, cpu_T[0]);
> -        gen_ldst_modrm(env, s, modrm, MO_8, OR_TMP0, 1);
> +        gen_ldst_modrm(env, s, modrm, MO_8, OR_TMP0, 1, b);
>          break;
>      case 0x140 ... 0x14f: /* cmov Gv, Ev */
>          if (!(s->cpuid_features & CPUID_CMOV)) {
> @@ -6657,7 +6809,7 @@ static target_ulong disas_insn(CPUX86State *env,
> DisasContext *s,
>          rm = (modrm & 7) | REX_B(s);
>          if (mod != 3) {
>              s->rip_offset = 1;
> -            gen_lea_modrm(env, s, modrm);
> +            gen_lea_modrm(env, s, modrm, b);
>              gen_op_ld_v(s, ot, cpu_T[0], cpu_A0);
>          } else {
>              gen_op_mov_v_reg(ot, cpu_T[0], rm);
> @@ -6688,7 +6840,7 @@ static target_ulong disas_insn(CPUX86State *env,
> DisasContext *s,
>          rm = (modrm & 7) | REX_B(s);
>          gen_op_mov_v_reg(MO_32, cpu_T[1], reg);
>          if (mod != 3) {
> -            gen_lea_modrm(env, s, modrm);
> +            gen_lea_modrm(env, s, modrm, b);
>              /* specific case: we need to add a displacement */
>              gen_exts(ot, cpu_T[1]);
>              tcg_gen_sari_tl(cpu_tmp0, cpu_T[1], 3 + ot);
> @@ -6764,7 +6916,7 @@ static target_ulong disas_insn(CPUX86State *env,
> DisasContext *s,
>          ot = dflag;
>          modrm = cpu_ldub_code(env, s->pc++);
>          reg = ((modrm >> 3) & 7) | rex_r;
> -        gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
> +        gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0, b);
>          gen_extu(ot, cpu_T[0]);
>
>          /* Note that lzcnt and tzcnt are in different extensions.  */
> @@ -6884,6 +7036,7 @@ static target_ulong disas_insn(CPUX86State *env,
> DisasContext *s,
>              gen_exception(s, EXCP07_PREX, pc_start - s->cs_base);
>          } else {
>              gen_update_cc_op(s);
> +            gen_update_ep(s);
>              gen_jmp_im(pc_start - s->cs_base);
>              gen_helper_fwait(cpu_env);
>          }
> @@ -6903,6 +7056,7 @@ static target_ulong disas_insn(CPUX86State *env,
> DisasContext *s,
>          if (CODE64(s))
>              goto illegal_op;
>          gen_update_cc_op(s);
> +        gen_update_ep(s);
>          gen_jmp_im(pc_start - s->cs_base);
>          gen_helper_into(cpu_env, tcg_const_i32(s->pc - pc_start));
>          break;
> @@ -6967,7 +7121,7 @@ static target_ulong disas_insn(CPUX86State *env,
> DisasContext *s,
>          if (mod == 3)
>              goto illegal_op;
>          gen_op_mov_v_reg(ot, cpu_T[0], reg);
> -        gen_lea_modrm(env, s, modrm);
> +        gen_lea_modrm(env, s, modrm, b);
>          gen_jmp_im(pc_start - s->cs_base);
>          tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
>          if (ot == MO_16) {
> @@ -7095,6 +7249,7 @@ static target_ulong disas_insn(CPUX86State *env,
> DisasContext *s,
>              gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
>          } else {
>              gen_update_cc_op(s);
> +            gen_update_ep(s);
>              gen_jmp_im(pc_start - s->cs_base);
>              gen_helper_sysexit(cpu_env, tcg_const_i32(dflag - 1));
>              gen_eob(s);
> @@ -7104,6 +7259,7 @@ static target_ulong disas_insn(CPUX86State *env,
> DisasContext *s,
>      case 0x105: /* syscall */
>          /* XXX: is it usable in real mode ? */
>          gen_update_cc_op(s);
> +        gen_update_ep(s);
>          gen_jmp_im(pc_start - s->cs_base);
>          gen_helper_syscall(cpu_env, tcg_const_i32(s->pc - pc_start));
>          gen_eob(s);
> @@ -7113,6 +7269,7 @@ static target_ulong disas_insn(CPUX86State *env,
> DisasContext *s,
>              gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
>          } else {
>              gen_update_cc_op(s);
> +            gen_update_ep(s);
>              gen_jmp_im(pc_start - s->cs_base);
>              gen_helper_sysret(cpu_env, tcg_const_i32(dflag - 1));
>              /* condition codes are modified only in long mode */
> @@ -7133,6 +7290,7 @@ static target_ulong disas_insn(CPUX86State *env,
> DisasContext *s,
>              gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
>          } else {
>              gen_update_cc_op(s);
> +            gen_update_ep(s);
>              gen_jmp_im(pc_start - s->cs_base);
>              gen_helper_hlt(cpu_env, tcg_const_i32(s->pc - pc_start));
>              s->is_jmp = DISAS_TB_JUMP;
> @@ -7149,7 +7307,7 @@ static target_ulong disas_insn(CPUX86State *env,
> DisasContext *s,
>              gen_svm_check_intercept(s, pc_start, SVM_EXIT_LDTR_READ);
>              tcg_gen_ld32u_tl(cpu_T[0], cpu_env,
> offsetof(CPUX86State,ldt.selector));
>              ot = mod == 3 ? dflag : MO_16;
> -            gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 1);
> +            gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 1, b);
>              break;
>          case 2: /* lldt */
>              if (!s->pe || s->vm86)
> @@ -7158,7 +7316,7 @@ static target_ulong disas_insn(CPUX86State *env,
> DisasContext *s,
>                  gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
>              } else {
>                  gen_svm_check_intercept(s, pc_start, SVM_EXIT_LDTR_WRITE);
> -                gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0);
> +                gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0, b);
>                  gen_jmp_im(pc_start - s->cs_base);
>                  tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
>                  gen_helper_lldt(cpu_env, cpu_tmp2_i32);
> @@ -7170,7 +7328,7 @@ static target_ulong disas_insn(CPUX86State *env,
> DisasContext *s,
>              gen_svm_check_intercept(s, pc_start, SVM_EXIT_TR_READ);
>              tcg_gen_ld32u_tl(cpu_T[0], cpu_env,
> offsetof(CPUX86State,tr.selector));
>              ot = mod == 3 ? dflag : MO_16;
> -            gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 1);
> +            gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 1, b);
>              break;
>          case 3: /* ltr */
>              if (!s->pe || s->vm86)
> @@ -7179,7 +7337,7 @@ static target_ulong disas_insn(CPUX86State *env,
> DisasContext *s,
>                  gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
>              } else {
>                  gen_svm_check_intercept(s, pc_start, SVM_EXIT_TR_WRITE);
> -                gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0);
> +                gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0, b);
>                  gen_jmp_im(pc_start - s->cs_base);
>                  tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
>                  gen_helper_ltr(cpu_env, cpu_tmp2_i32);
> @@ -7189,7 +7347,7 @@ static target_ulong disas_insn(CPUX86State *env,
> DisasContext *s,
>          case 5: /* verw */
>              if (!s->pe || s->vm86)
>                  goto illegal_op;
> -            gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0);
> +            gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0, b);
>              gen_update_cc_op(s);
>              if (op == 4) {
>                  gen_helper_verr(cpu_env, cpu_T[0]);
> @@ -7212,7 +7370,7 @@ static target_ulong disas_insn(CPUX86State *env,
> DisasContext *s,
>              if (mod == 3)
>                  goto illegal_op;
>              gen_svm_check_intercept(s, pc_start, SVM_EXIT_GDTR_READ);
> -            gen_lea_modrm(env, s, modrm);
> +            gen_lea_modrm(env, s, modrm, b);
>              tcg_gen_ld32u_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,
> gdt.limit));
>              gen_op_st_v(s, MO_16, cpu_T[0], cpu_A0);
>              gen_add_A0_im(s, 2);
> @@ -7241,6 +7399,7 @@ static target_ulong disas_insn(CPUX86State *env,
> DisasContext *s,
>                          s->cpl != 0)
>                          goto illegal_op;
>                      gen_update_cc_op(s);
> +                    gen_update_ep(s);
>                      gen_jmp_im(pc_start - s->cs_base);
>                      gen_helper_mwait(cpu_env, tcg_const_i32(s->pc -
> pc_start));
>                      gen_eob(s);
> @@ -7268,7 +7427,7 @@ static target_ulong disas_insn(CPUX86State *env,
> DisasContext *s,
>                  }
>              } else { /* sidt */
>                  gen_svm_check_intercept(s, pc_start, SVM_EXIT_IDTR_READ);
> -                gen_lea_modrm(env, s, modrm);
> +                gen_lea_modrm(env, s, modrm, b);
>                  tcg_gen_ld32u_tl(cpu_T[0], cpu_env,
> offsetof(CPUX86State, idt.limit));
>                  gen_op_st_v(s, MO_16, cpu_T[0], cpu_A0);
>                  gen_add_A0_im(s, 2);
> @@ -7371,7 +7530,7 @@ static target_ulong disas_insn(CPUX86State *env,
> DisasContext *s,
>              } else {
>                  gen_svm_check_intercept(s, pc_start,
>                                          op==2 ? SVM_EXIT_GDTR_WRITE :
> SVM_EXIT_IDTR_WRITE);
> -                gen_lea_modrm(env, s, modrm);
> +                gen_lea_modrm(env, s, modrm, b);
>                  gen_op_ld_v(s, MO_16, cpu_T[1], cpu_A0);
>                  gen_add_A0_im(s, 2);
>                  gen_op_ld_v(s, CODE64(s) + MO_32, cpu_T[0], cpu_A0);
> @@ -7394,14 +7553,14 @@ static target_ulong disas_insn(CPUX86State
> *env, DisasContext *s,
>  #else
>              tcg_gen_ld32u_tl(cpu_T[0], cpu_env,
> offsetof(CPUX86State,cr[0]));
>  #endif
> -            gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 1);
> +            gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 1, b);
>              break;
>          case 6: /* lmsw */
>              if (s->cpl != 0) {
>                  gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
>              } else {
>                  gen_svm_check_intercept(s, pc_start, SVM_EXIT_WRITE_CR0);
> -                gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0);
> +                gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0, b);
>                  gen_helper_lmsw(cpu_env, cpu_T[0]);
>                  gen_jmp_im(s->pc - s->cs_base);
>                  gen_eob(s);
> @@ -7413,8 +7572,9 @@ static target_ulong disas_insn(CPUX86State *env,
> DisasContext *s,
>                      gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
>                  } else {
>                      gen_update_cc_op(s);
> +                    gen_update_ep(s);
>                      gen_jmp_im(pc_start - s->cs_base);
> -                    gen_lea_modrm(env, s, modrm);
> +                    gen_lea_modrm(env, s, modrm, b);
>                      gen_helper_invlpg(cpu_env, cpu_A0);
>                      gen_jmp_im(s->pc - s->cs_base);
>                      gen_eob(s);
> @@ -7446,6 +7606,7 @@ static target_ulong disas_insn(CPUX86State *env,
> DisasContext *s,
>                      if (!(s->cpuid_ext2_features & CPUID_EXT2_RDTSCP))
>                          goto illegal_op;
>                      gen_update_cc_op(s);
> +                    gen_update_ep(s);
>                      gen_jmp_im(pc_start - s->cs_base);
>                      if (use_icount)
>                          gen_io_start();
> @@ -7493,7 +7654,7 @@ static target_ulong disas_insn(CPUX86State *env,
> DisasContext *s,
>                  }
>                  gen_op_mov_reg_v(d_ot, reg, cpu_T[0]);
>              } else {
> -                gen_lea_modrm(env, s, modrm);
> +                gen_lea_modrm(env, s, modrm, b);
>                  gen_op_ld_v(s, MO_32 | MO_SIGN, cpu_T[0], cpu_A0);
>                  gen_op_mov_reg_v(d_ot, reg, cpu_T[0]);
>              }
> @@ -7514,7 +7675,7 @@ static target_ulong disas_insn(CPUX86State *env,
> DisasContext *s,
>              mod = (modrm >> 6) & 3;
>              rm = modrm & 7;
>              if (mod != 3) {
> -                gen_lea_modrm(env, s, modrm);
> +                gen_lea_modrm(env, s, modrm, b);
>                  gen_op_ld_v(s, ot, t0, cpu_A0);
>                  a0 = tcg_temp_local_new();
>                  tcg_gen_mov_tl(a0, cpu_A0);
> @@ -7556,7 +7717,7 @@ static target_ulong disas_insn(CPUX86State *env,
> DisasContext *s,
>              ot = dflag != MO_16 ? MO_32 : MO_16;
>              modrm = cpu_ldub_code(env, s->pc++);
>              reg = ((modrm >> 3) & 7) | rex_r;
> -            gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0);
> +            gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0, b);
>              t0 = tcg_temp_local_new();
>              gen_update_cc_op(s);
>              if (b == 0x102) {
> @@ -7584,7 +7745,7 @@ static target_ulong disas_insn(CPUX86State *env,
> DisasContext *s,
>          case 3: /* prefetchnt0 */
>              if (mod == 3)
>                  goto illegal_op;
> -            gen_lea_modrm(env, s, modrm);
> +            gen_lea_modrm(env, s, modrm, b);
>              /* nothing more to do */
>              break;
>          default: /* nop (multi byte) */
> @@ -7624,6 +7785,7 @@ static target_ulong disas_insn(CPUX86State *env,
> DisasContext *s,
>              case 4:
>              case 8:
>                  gen_update_cc_op(s);
> +                gen_update_ep(s);
>                  gen_jmp_im(pc_start - s->cs_base);
>                  if (b & 2) {
>                      gen_op_mov_v_reg(ot, cpu_T[0], rm);
> @@ -7696,7 +7858,7 @@ static target_ulong disas_insn(CPUX86State *env,
> DisasContext *s,
>              goto illegal_op;
>          reg = ((modrm >> 3) & 7) | rex_r;
>          /* generate a generic store */
> -        gen_ldst_modrm(env, s, modrm, ot, reg, 1);
> +        gen_ldst_modrm(env, s, modrm, ot, reg, 1, b);
>          break;
>      case 0x1ae:
>          modrm = cpu_ldub_code(env, s->pc++);
> @@ -7704,6 +7866,7 @@ static target_ulong disas_insn(CPUX86State *env,
> DisasContext *s,
>          op = (modrm >> 3) & 7;
>          switch(op) {
>          case 0: /* fxsave */
> +            gen_update_ep(s);
>              if (mod == 3 || !(s->cpuid_features & CPUID_FXSR) ||
>                  (s->prefix & PREFIX_LOCK))
>                  goto illegal_op;
> @@ -7711,10 +7874,13 @@ static target_ulong disas_insn(CPUX86State
> *env, DisasContext *s,
>                  gen_exception(s, EXCP07_PREX, pc_start - s->cs_base);
>                  break;
>              }
> -            gen_lea_modrm(env, s, modrm);
> +            gen_lea_modrm(env, s, modrm, b);
>              gen_update_cc_op(s);
> +            gen_update_ep(s);
>              gen_jmp_im(pc_start - s->cs_base);
> -            gen_helper_fxsave(cpu_env, cpu_A0, tcg_const_i32(dflag ==
> MO_64));
> +            gen_helper_fxsave(cpu_env, cpu_A0,
> +                              tcg_const_i32(dflag == MO_32),
> +                              tcg_const_i32(dflag == MO_64));
>              break;
>          case 1: /* fxrstor */
>              if (mod == 3 || !(s->cpuid_features & CPUID_FXSR) ||
> @@ -7724,10 +7890,12 @@ static target_ulong disas_insn(CPUX86State
> *env, DisasContext *s,
>                  gen_exception(s, EXCP07_PREX, pc_start - s->cs_base);
>                  break;
>              }
> -            gen_lea_modrm(env, s, modrm);
> +            gen_lea_modrm(env, s, modrm, b);
>              gen_update_cc_op(s);
>              gen_jmp_im(pc_start - s->cs_base);
> -            gen_helper_fxrstor(cpu_env, cpu_A0, tcg_const_i32(dflag ==
> MO_64));
> +            gen_helper_fxrstor(cpu_env, cpu_A0,
> +                               tcg_const_i32(dflag == MO_32),
> +                               tcg_const_i32(dflag == MO_64));
>              break;
>          case 2: /* ldmxcsr */
>          case 3: /* stmxcsr */
> @@ -7738,7 +7906,7 @@ static target_ulong disas_insn(CPUX86State *env,
> DisasContext *s,
>              if ((s->flags & HF_EM_MASK) || !(s->flags & HF_OSFXSR_MASK) ||
>                  mod == 3)
>                  goto illegal_op;
> -            gen_lea_modrm(env, s, modrm);
> +            gen_lea_modrm(env, s, modrm, b);
>              if (op == 2) {
>                  tcg_gen_qemu_ld_i32(cpu_tmp2_i32, cpu_A0,
>                                      s->mem_index, MO_LEUL);
> @@ -7763,7 +7931,7 @@ static target_ulong disas_insn(CPUX86State *env,
> DisasContext *s,
>                  /* clflush */
>                  if (!(s->cpuid_features & CPUID_CLFLUSH))
>                      goto illegal_op;
> -                gen_lea_modrm(env, s, modrm);
> +                gen_lea_modrm(env, s, modrm, b);
>              }
>              break;
>          default:
> @@ -7775,7 +7943,7 @@ static target_ulong disas_insn(CPUX86State *env,
> DisasContext *s,
>          mod = (modrm >> 6) & 3;
>          if (mod == 3)
>              goto illegal_op;
> -        gen_lea_modrm(env, s, modrm);
> +        gen_lea_modrm(env, s, modrm, b);
>          /* ignore for now */
>          break;
>      case 0x1aa: /* rsm */
> @@ -7783,6 +7951,7 @@ static target_ulong disas_insn(CPUX86State *env,
> DisasContext *s,
>          if (!(s->flags & HF_SMM_MASK))
>              goto illegal_op;
>          gen_update_cc_op(s);
> +        gen_update_ep(s);
>          gen_jmp_im(s->pc - s->cs_base);
>          gen_helper_rsm(cpu_env);
>          gen_eob(s);
> @@ -7803,7 +7972,7 @@ static target_ulong disas_insn(CPUX86State *env,
> DisasContext *s,
>              ot = mo_64_32(dflag);
>          }
>
> -        gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
> +        gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0, b);
>          gen_helper_popcnt(cpu_T[0], cpu_env, cpu_T[0], tcg_const_i32(ot));
>          gen_op_mov_reg_v(ot, reg, cpu_T[0]);
>
> @@ -7880,6 +8049,17 @@ void optimize_flags_init(void)
>      cpu_cc_src2 = tcg_global_mem_new(TCG_AREG0, offsetof(CPUX86State,
> cc_src2),
>                                       "cc_src2");
>
> +    cpu_fpop = tcg_global_mem_new_i32(TCG_AREG0,
> +                                      offsetof(CPUX86State, fpop),
> "fpop");
> +    cpu_fpip = tcg_global_mem_new(TCG_AREG0, offsetof(CPUX86State, fpip),
> +                                     "fpip");
> +    cpu_fpdp = tcg_global_mem_new(TCG_AREG0, offsetof(CPUX86State, fpdp),
> +                                     "fpdp");
> +    cpu_fpds = tcg_global_mem_new_i32(TCG_AREG0, offsetof(CPUX86State,
> fpds),
> +                                     "fpds");
> +    cpu_fpcs = tcg_global_mem_new_i32(TCG_AREG0, offsetof(CPUX86State,
> fpcs),
> +                                     "fpcs");
> +
>      for (i = 0; i < CPU_NB_REGS; ++i) {
>          cpu_regs[i] = tcg_global_mem_new(TCG_AREG0,
>                                           offsetof(CPUX86State, regs[i]),
> @@ -7924,6 +8104,8 @@ static inline void
> gen_intermediate_code_internal(X86CPU *cpu,
>      dc->singlestep_enabled = cs->singlestep_enabled;
>      dc->cc_op = CC_OP_DYNAMIC;
>      dc->cc_op_dirty = false;
> +    dc->fp_op = FP_EP_INVALID;
> +    dc->fp_ep_dirty = false;
>      dc->cs_base = cs_base;
>      dc->tb = tb;
>      dc->popl_esp_hack = 0;
> @@ -7997,6 +8179,9 @@ static inline void
> gen_intermediate_code_internal(X86CPU *cpu,
>              }
>              tcg_ctx.gen_opc_pc[lj] = pc_ptr;
>              gen_opc_cc_op[lj] = dc->cc_op;
> +            gen_opc_fp_op[lj] = dc->fp_op;
> +            gen_opc_fp_ip[lj] = dc->fp_ip;
> +            gen_opc_fp_cs[lj] = dc->fp_cs;
>              tcg_ctx.gen_opc_instr_start[lj] = 1;
>              tcg_ctx.gen_opc_icount[lj] = num_insns;
>          }
> @@ -8080,6 +8265,7 @@ void gen_intermediate_code_pc(CPUX86State *env,
> TranslationBlock *tb)
>  void restore_state_to_opc(CPUX86State *env, TranslationBlock *tb, int
> pc_pos)
>  {
>      int cc_op;
> +    uint16_t fp_op;
>  #ifdef DEBUG_DISAS
>      if (qemu_loglevel_mask(CPU_LOG_TB_OP)) {
>          int i;
> @@ -8099,4 +8285,10 @@ void restore_state_to_opc(CPUX86State *env,
> TranslationBlock *tb, int pc_pos)
>      cc_op = gen_opc_cc_op[pc_pos];
>      if (cc_op != CC_OP_DYNAMIC)
>          env->cc_op = cc_op;
> +    fp_op = gen_opc_fp_op[pc_pos];
> +    if (fp_op & FP_EP_VALID) {
> +        tcg_gen_movi_i32(cpu_fpop, fp_op);
> +        tcg_gen_movi_tl(cpu_fpip, gen_opc_fp_ip[pc_pos]);
> +        tcg_gen_movi_i32(cpu_fpcs, gen_opc_fp_cs[pc_pos]);
> +    }
>  }
>
> On Sun, Jun 22, 2014 at 9:17 PM, Jaume Martí <jaume.martif@gmail.com>
> wrote:
> > Thanks Richard for your feedback. I am going to correct the patch and
> > resubmit it.
> >
> > Best regards,
> > Jaume
> >
> > On Sun, Jun 22, 2014 at 8:55 PM, Richard Henderson <rth@twiddle.net>
> wrote:
> >> On 06/22/2014 07:55 AM, Jaume Martí wrote:
> >>> -        cpu_x86_fsave(env, fpstate_addr, 1);
> >>> -        fpstate->status = fpstate->sw;
> >>> -        magic = 0xffff;
> >>> +    cpu_x86_fsave(env, fpstate_addr);
> >>> +    fpstate->status = fpstate->sw;
> >>> +    magic = 0xffff;
> >>
> >> This patch needs to be split into format fixes and the actual change to
> be
> >> reviewed.
> >>
> >>> -    /* KVM-only so far */
> >>> -    uint16_t fpop;
> >>> +    union {
> >>> +        uint32_t tcg;
> >>> +        uint16_t kvm;
> >>> +    } fpop;
> >>
> >> This is highly questionable.
> >>
> >>>      .fields = (VMStateField[]) {
> >>> -        VMSTATE_UINT16(env.fpop, X86CPU),
> >>> +        VMSTATE_UINT16(env.fpop.kvm, X86CPU),
> >>
> >> You're breaking save/restore in tcg.  KVM is not required for migration.
> >>
> >>> +        if (non_control_x87_instr(modrm, b)) {
> >>> +            tcg_gen_movi_i32(cpu_fpop, ((b & 0x7) << 8) | (modrm &
> 0xff));
> >>> +            tcg_gen_movi_tl(cpu_fpip, pc_start - s->cs_base);
> >>> +            tcg_gen_movi_i32(cpu_fpcs, env->segs[R_CS].selector);
> >>> +        }
> >>
> >> I strongly suspect you can implement this feature without having to add
> 3
> >> (largely redundant) register writes to every x87 instruction executed.
> >>
> >> See how restore_state_to_opc works to compute the value of CC_OP during
> >> translation.  You can do the same thing to recover these three values.
> >>
> >> You do have to sync these values before normal exits from the TB, but
> you only
> >> have to do that once, not once for every insn executed.  See
> gen_update_cc_op.
> >>
> >>
> >> r~
>



-- 
Jaume

[-- Attachment #2: Type: text/html, Size: 229818 bytes --]

      reply	other threads:[~2014-07-21 18:57 UTC|newest]

Thread overview: 6+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2014-06-21  0:16 [Qemu-devel] PATCH for bugs 661696 and 1248376: target-i386: x87 exception pointers using TCG Jaume Martí
2014-06-22 14:55 ` Jaume Martí
2014-06-22 18:55   ` Richard Henderson
2014-06-22 19:17     ` Jaume Martí
2014-07-19  0:36       ` Jaume Martí
2014-07-21 18:55         ` Jaume Martí [this message]

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to='CAL4g94QH0psTw2bEk37L7H25d0bVFStn=f3brQkgRMVBbk4r+g@mail.gmail.com' \
    --to=jaume.martif@gmail.com \
    --cc=afaerber@suse.de \
    --cc=alex.bennee@linaro.org \
    --cc=anthony@codemonkey.ws \
    --cc=gleb@redhat.com \
    --cc=mst@redhat.com \
    --cc=mtosatti@redhat.com \
    --cc=pbonzini@redhat.com \
    --cc=peter.maydell@linaro.org \
    --cc=qemu-devel@nongnu.org \
    --cc=quintela@redhat.com \
    --cc=riku.voipio@iki.fi \
    --cc=rth@twiddle.net \
    --cc=vrozenfe@redhat.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).