qemu-devel.nongnu.org archive mirror
 help / color / mirror / Atom feed
* [Qemu-devel] PATCH for bugs 661696 and 1248376: target-i386: x87 exception pointers using TCG.
@ 2014-06-21  0:16 Jaume Martí
  2014-06-22 14:55 ` Jaume Martí
  0 siblings, 1 reply; 6+ messages in thread
From: Jaume Martí @ 2014-06-21  0:16 UTC (permalink / raw)
  To: qemu-devel

Hello,

I submit a patch to fix bugs 661696 and 1248376. The patch implements,
for TCG, the specifications provided in Intel and AMD programmer's
manuals regarding the x87 exception pointers. That is, when executing
instructions fstenv/fnstenv, fsave and fxsave the values for the
instruction pointer, data pointer and opcode of the last non-control
x87 instruction executed, are correctly saved to the specified memory
address. When executing instructions fldenv, frstor and fxrstor the
values that are going to be considered the instruction pointer, data
pointer and opcode of the last non-control x87 instruction are
obtained from the specified memory address.

Best regards,
Jaume

Signed-off-by: Jaume Marti Farriol (jaume.martif@gmail.com)
diff --git a/include/exec/def-helper.h b/include/exec/def-helper.h
index 73d51f9..9f31404 100644
--- a/include/exec/def-helper.h
+++ b/include/exec/def-helper.h
@@ -8,7 +8,7 @@
    to match the types used by the C helper implementation.

    The target helper.h should be included in all files that use/define
-   helper functions.  THis will ensure that function prototypes are
+   helper functions.  This will ensure that function prototypes are
    consistent.  In addition it should be included an extra two times for
    helper.c, defining:
     GEN_HELPER 1 to produce op generation functions (gen_helper_*)
diff --git a/linux-user/signal.c b/linux-user/signal.c
index 04638e2..0f3b573 100644
--- a/linux-user/signal.c
+++ b/linux-user/signal.c
@@ -687,52 +687,52 @@ struct target_xmmreg {
 };

 struct target_fpstate {
- /* Regular FPU environment */
-        abi_ulong       cw;
-        abi_ulong       sw;
-        abi_ulong       tag;
-        abi_ulong       ipoff;
-        abi_ulong       cssel;
-        abi_ulong       dataoff;
-        abi_ulong       datasel;
- struct target_fpreg _st[8];
- uint16_t status;
- uint16_t magic; /* 0xffff = regular FPU data only */
-
- /* FXSR FPU environment */
-        abi_ulong       _fxsr_env[6];   /* FXSR FPU env is ignored */
-        abi_ulong       mxcsr;
-        abi_ulong       reserved;
- struct target_fpxreg _fxsr_st[8]; /* FXSR FPU reg data is ignored */
- struct target_xmmreg _xmm[8];
-        abi_ulong       padding[56];
+    /* Regular FPU environment */
+    abi_ulong       cw;
+    abi_ulong       sw;
+    abi_ulong       tag;
+    abi_ulong       ipoff;
+    abi_ulong       cssel;
+    abi_ulong       dataoff;
+    abi_ulong       datasel;
+    struct target_fpreg _st[8];
+    uint16_t        status;
+    uint16_t        magic; /* 0xffff = regular FPU data only */
+
+    /* FXSR FPU environment */
+    abi_ulong       _fxsr_env[6]; /* FXSR FPU env is ignored */
+    abi_ulong       mxcsr;
+    abi_ulong       reserved;
+    struct target_fpxreg _fxsr_st[8]; /* FXSR FPU reg data is ignored */
+    struct target_xmmreg _xmm[8];
+    abi_ulong       padding[56];
 };

 #define X86_FXSR_MAGIC 0x0000

 struct target_sigcontext {
- uint16_t gs, __gsh;
- uint16_t fs, __fsh;
- uint16_t es, __esh;
- uint16_t ds, __dsh;
-        abi_ulong edi;
-        abi_ulong esi;
-        abi_ulong ebp;
-        abi_ulong esp;
-        abi_ulong ebx;
-        abi_ulong edx;
-        abi_ulong ecx;
-        abi_ulong eax;
-        abi_ulong trapno;
-        abi_ulong err;
-        abi_ulong eip;
- uint16_t cs, __csh;
-        abi_ulong eflags;
-        abi_ulong esp_at_signal;
- uint16_t ss, __ssh;
-        abi_ulong fpstate; /* pointer */
-        abi_ulong oldmask;
-        abi_ulong cr2;
+    uint16_t gs, __gsh;
+    uint16_t fs, __fsh;
+    uint16_t es, __esh;
+    uint16_t ds, __dsh;
+    abi_ulong edi;
+    abi_ulong esi;
+    abi_ulong ebp;
+    abi_ulong esp;
+    abi_ulong ebx;
+    abi_ulong edx;
+    abi_ulong ecx;
+    abi_ulong eax;
+    abi_ulong trapno;
+    abi_ulong err;
+    abi_ulong eip;
+    uint16_t cs, __csh;
+    abi_ulong eflags;
+    abi_ulong esp_at_signal;
+    uint16_t ss, __ssh;
+    abi_ulong fpstate; /* pointer */
+    abi_ulong oldmask;
+    abi_ulong cr2;
 };

 struct target_ucontext {
@@ -775,7 +775,7 @@ setup_sigcontext(struct target_sigcontext *sc,
struct target_fpstate *fpstate,
  CPUX86State *env, abi_ulong mask, abi_ulong fpstate_addr)
 {
  int err = 0;
-        uint16_t magic;
+    uint16_t magic;

  /* already locked in setup_frame() */
  err |= __put_user(env->segs[R_GS].selector, (unsigned int *)&sc->gs);
@@ -798,11 +798,11 @@ setup_sigcontext(struct target_sigcontext *sc,
struct target_fpstate *fpstate,
  err |= __put_user(env->regs[R_ESP], &sc->esp_at_signal);
  err |= __put_user(env->segs[R_SS].selector, (unsigned int *)&sc->ss);

-        cpu_x86_fsave(env, fpstate_addr, 1);
-        fpstate->status = fpstate->sw;
-        magic = 0xffff;
-        err |= __put_user(magic, &fpstate->magic);
-        err |= __put_user(fpstate_addr, &sc->fpstate);
+    cpu_x86_fsave(env, fpstate_addr);
+    fpstate->status = fpstate->sw;
+    magic = 0xffff;
+    err |= __put_user(magic, &fpstate->magic);
+    err |= __put_user(fpstate_addr, &sc->fpstate);

  /* non-iBCS2 extensions.. */
  err |= __put_user(mask, &sc->oldmask);
@@ -889,10 +889,10 @@ static void setup_frame(int sig, struct
target_sigaction *ka,
  env->regs[R_ESP] = frame_addr;
  env->eip = ka->_sa_handler;

-        cpu_x86_load_seg(env, R_DS, __USER_DS);
-        cpu_x86_load_seg(env, R_ES, __USER_DS);
-        cpu_x86_load_seg(env, R_SS, __USER_DS);
-        cpu_x86_load_seg(env, R_CS, __USER_CS);
+    cpu_x86_load_seg(env, R_DS, __USER_DS);
+    cpu_x86_load_seg(env, R_ES, __USER_DS);
+    cpu_x86_load_seg(env, R_SS, __USER_DS);
+    cpu_x86_load_seg(env, R_CS, __USER_CS);
  env->eflags &= ~TF_MASK;

  unlock_user_struct(frame, frame_addr, 1);
@@ -969,10 +969,10 @@ static void setup_rt_frame(int sig, struct
target_sigaction *ka,
  env->regs[R_ESP] = frame_addr;
  env->eip = ka->_sa_handler;

-        cpu_x86_load_seg(env, R_DS, __USER_DS);
-        cpu_x86_load_seg(env, R_ES, __USER_DS);
-        cpu_x86_load_seg(env, R_SS, __USER_DS);
-        cpu_x86_load_seg(env, R_CS, __USER_CS);
+    cpu_x86_load_seg(env, R_DS, __USER_DS);
+    cpu_x86_load_seg(env, R_ES, __USER_DS);
+    cpu_x86_load_seg(env, R_SS, __USER_DS);
+    cpu_x86_load_seg(env, R_CS, __USER_CS);
  env->eflags &= ~TF_MASK;

  unlock_user_struct(frame, frame_addr, 1);
@@ -989,43 +989,43 @@ give_sigsegv:
 static int
 restore_sigcontext(CPUX86State *env, struct target_sigcontext *sc, int *peax)
 {
- unsigned int err = 0;
-        abi_ulong fpstate_addr;
-        unsigned int tmpflags;
-
-        cpu_x86_load_seg(env, R_GS, tswap16(sc->gs));
-        cpu_x86_load_seg(env, R_FS, tswap16(sc->fs));
-        cpu_x86_load_seg(env, R_ES, tswap16(sc->es));
-        cpu_x86_load_seg(env, R_DS, tswap16(sc->ds));
-
-        env->regs[R_EDI] = tswapl(sc->edi);
-        env->regs[R_ESI] = tswapl(sc->esi);
-        env->regs[R_EBP] = tswapl(sc->ebp);
-        env->regs[R_ESP] = tswapl(sc->esp);
-        env->regs[R_EBX] = tswapl(sc->ebx);
-        env->regs[R_EDX] = tswapl(sc->edx);
-        env->regs[R_ECX] = tswapl(sc->ecx);
-        env->eip = tswapl(sc->eip);
-
-        cpu_x86_load_seg(env, R_CS, lduw_p(&sc->cs) | 3);
-        cpu_x86_load_seg(env, R_SS, lduw_p(&sc->ss) | 3);
-
-        tmpflags = tswapl(sc->eflags);
-        env->eflags = (env->eflags & ~0x40DD5) | (tmpflags & 0x40DD5);
-        // regs->orig_eax = -1; /* disable syscall checks */
-
-        fpstate_addr = tswapl(sc->fpstate);
- if (fpstate_addr != 0) {
-                if (!access_ok(VERIFY_READ, fpstate_addr,
-                               sizeof(struct target_fpstate)))
-                        goto badframe;
-                cpu_x86_frstor(env, fpstate_addr, 1);
- }
+    unsigned int err = 0;
+    abi_ulong fpstate_addr;
+    unsigned int tmpflags;
+
+    cpu_x86_load_seg(env, R_GS, tswap16(sc->gs));
+    cpu_x86_load_seg(env, R_FS, tswap16(sc->fs));
+    cpu_x86_load_seg(env, R_ES, tswap16(sc->es));
+    cpu_x86_load_seg(env, R_DS, tswap16(sc->ds));
+
+    env->regs[R_EDI] = tswapl(sc->edi);
+    env->regs[R_ESI] = tswapl(sc->esi);
+    env->regs[R_EBP] = tswapl(sc->ebp);
+    env->regs[R_ESP] = tswapl(sc->esp);
+    env->regs[R_EBX] = tswapl(sc->ebx);
+    env->regs[R_EDX] = tswapl(sc->edx);
+    env->regs[R_ECX] = tswapl(sc->ecx);
+    env->eip = tswapl(sc->eip);
+
+    cpu_x86_load_seg(env, R_CS, lduw_p(&sc->cs) | 3);
+    cpu_x86_load_seg(env, R_SS, lduw_p(&sc->ss) | 3);
+
+    tmpflags = tswapl(sc->eflags);
+    env->eflags = (env->eflags & ~0x40DD5) | (tmpflags & 0x40DD5);
+
+    fpstate_addr = tswapl(sc->fpstate);
+    if (fpstate_addr != 0) {
+        if (!access_ok(VERIFY_READ, fpstate_addr,
+                    sizeof(struct target_fpstate))) {
+            goto badframe;
+        }
+        cpu_x86_frstor(env, fpstate_addr);
+    }

-        *peax = tswapl(sc->eax);
- return err;
+    *peax = tswapl(sc->eax);
+    return err;
 badframe:
- return 1;
+    return 1;
 }

 long do_sigreturn(CPUX86State *env)
diff --git a/target-i386/cpu.h b/target-i386/cpu.h
index 0014acc..b239cae 100644
--- a/target-i386/cpu.h
+++ b/target-i386/cpu.h
@@ -803,10 +803,14 @@ typedef struct CPUX86State {
     uint16_t fpuc;
     uint8_t fptags[8];   /* 0 = valid, 1 = empty */
     FPReg fpregs[8];
-    /* KVM-only so far */
-    uint16_t fpop;
+    union {
+        uint32_t tcg;
+        uint16_t kvm;
+    } fpop;
     uint64_t fpip;
     uint64_t fpdp;
+    uint32_t fpcs;
+    uint32_t fpds;

     /* emulator internal variables */
     float_status fp_status;
@@ -1049,8 +1053,8 @@ floatx80 cpu_set_fp80(uint64_t mant, uint16_t upper);
 /* the following helpers are only usable in user mode simulation as
    they can trigger unexpected exceptions */
 void cpu_x86_load_seg(CPUX86State *s, int seg_reg, int selector);
-void cpu_x86_fsave(CPUX86State *s, target_ulong ptr, int data32);
-void cpu_x86_frstor(CPUX86State *s, target_ulong ptr, int data32);
+void cpu_x86_fsave(CPUX86State *s, target_ulong ptr);
+void cpu_x86_frstor(CPUX86State *s, target_ulong ptr);

 /* you can call this signal handler from your SIGBUS and SIGSEGV
    signal handlers to inform the virtual CPU of exceptions. non zero
diff --git a/target-i386/fpu_helper.c b/target-i386/fpu_helper.c
index de7ba76..c80cce7 100644
--- a/target-i386/fpu_helper.c
+++ b/target-i386/fpu_helper.c
@@ -59,6 +59,8 @@
 #define floatx80_l2e make_floatx80(0x3fff, 0xb8aa3b295c17f0bcLL)
 #define floatx80_l2t make_floatx80(0x4000, 0xd49a784bcd1b8afeLL)

+#define FPUS(env) ((env->fpus & ~0x3800) | ((env->fpstt & 0x7) << 11))
+
 static inline void fpush(CPUX86State *env)
 {
     env->fpstt = (env->fpstt - 1) & 7;
@@ -607,6 +609,10 @@ void helper_fninit(CPUX86State *env)
     env->fptags[5] = 1;
     env->fptags[6] = 1;
     env->fptags[7] = 1;
+    env->fpip = 0;
+    env->fpcs = 0;
+    env->fpdp = 0;
+    env->fpds = 0;
 }

 /* BCD ops */
@@ -964,13 +970,13 @@ void helper_fxam_ST0(CPUX86State *env)
     }
 }

-void helper_fstenv(CPUX86State *env, target_ulong ptr, int data32)
+void helper_fstenv(CPUX86State *env, target_ulong ptr, int data32,
+                   int protected_mode)
 {
-    int fpus, fptag, exp, i;
+    int fptag, exp, i;
     uint64_t mant;
     CPU_LDoubleU tmp;

-    fpus = (env->fpus & ~0x3800) | (env->fpstt & 0x7) << 11;
     fptag = 0;
     for (i = 7; i >= 0; i--) {
         fptag <<= 2;
@@ -990,83 +996,150 @@ void helper_fstenv(CPUX86State *env,
target_ulong ptr, int data32)
             }
         }
     }
+
     if (data32) {
         /* 32 bit */
-        cpu_stl_data(env, ptr, env->fpuc);
-        cpu_stl_data(env, ptr + 4, fpus);
-        cpu_stl_data(env, ptr + 8, fptag);
-        cpu_stl_data(env, ptr + 12, 0); /* fpip */
-        cpu_stl_data(env, ptr + 16, 0); /* fpcs */
-        cpu_stl_data(env, ptr + 20, 0); /* fpoo */
-        cpu_stl_data(env, ptr + 24, 0); /* fpos */
+        cpu_stw_data(env, ptr, env->fpuc);
+        cpu_stw_data(env, ptr + 4, FPUS(env));
+        cpu_stw_data(env, ptr + 8, fptag);
+        if (protected_mode) {
+            cpu_stl_data(env, ptr + 12, env->fpip);
+            cpu_stl_data(env, ptr + 16,
+                        ((env->fpop.tcg & 0x7ff) << 16) | (env->fpcs
& 0xffff));
+            cpu_stl_data(env, ptr + 20, env->fpdp);
+            cpu_stl_data(env, ptr + 24, env->fpds);
+        } else {
+            /* Real mode  */
+            cpu_stl_data(env, ptr + 12, env->fpip); /* fpip[15..00] */
+            cpu_stl_data(env, ptr + 16, ((((env->fpip >> 16) & 0xffff) << 12) |
+                        (env->fpop.tcg & 0x7ff))); /* fpip[31..16], fpop */
+            cpu_stl_data(env, ptr + 20, env->fpdp); /* fpdp[15..00] */
+            cpu_stl_data(env, ptr + 24,
+                        (env->fpdp >> 4) & 0xffff000); /* fpdp[31..16] */
+        }
     } else {
         /* 16 bit */
         cpu_stw_data(env, ptr, env->fpuc);
-        cpu_stw_data(env, ptr + 2, fpus);
+        cpu_stw_data(env, ptr + 2, FPUS(env));
         cpu_stw_data(env, ptr + 4, fptag);
-        cpu_stw_data(env, ptr + 6, 0);
-        cpu_stw_data(env, ptr + 8, 0);
-        cpu_stw_data(env, ptr + 10, 0);
-        cpu_stw_data(env, ptr + 12, 0);
+        if (protected_mode) {
+            cpu_stw_data(env, ptr + 6, env->fpip);
+            cpu_stw_data(env, ptr + 8, env->fpcs);
+            cpu_stw_data(env, ptr + 10, env->fpdp);
+            cpu_stw_data(env, ptr + 12, env->fpds);
+        } else {
+            /* Real mode  */
+            cpu_stw_data(env, ptr + 6, env->fpip); /* fpip[15..0] */
+            cpu_stw_data(env, ptr + 8, ((env->fpip >> 4) & 0xf000) |
+                        (env->fpop.tcg & 0x7ff)); /* fpip[19..16], fpop */
+            cpu_stw_data(env, ptr + 10, env->fpdp); /* fpdp[15..0] */
+            cpu_stw_data(env, ptr + 12,
+                        (env->fpdp >> 4) & 0xf000); /* fpdp[19..16] */
+        }
     }
+
+    env->fpip = 0;
+    env->fpcs = 0;
+    env->fpdp = 0;
+    env->fpds = 0;
 }

-void helper_fldenv(CPUX86State *env, target_ulong ptr, int data32)
+void helper_fldenv(CPUX86State *env, target_ulong ptr, int data32,
+                   int protected_mode)
 {
-    int i, fpus, fptag;
+    int tmp, i, fpus, fptag;

     if (data32) {
+        /* 32 bit */
         env->fpuc = cpu_lduw_data(env, ptr);
         fpus = cpu_lduw_data(env, ptr + 4);
         fptag = cpu_lduw_data(env, ptr + 8);
+        if (protected_mode) {
+            env->fpip = cpu_ldl_data(env, ptr + 12);
+            tmp = cpu_ldl_data(env, ptr + 16);
+            env->fpcs = tmp & 0xffff;
+            env->fpop.tcg = tmp >> 16;
+            env->fpdp = cpu_ldl_data(env, ptr + 20);
+            env->fpds = cpu_lduw_data(env, ptr + 24);
+        } else {
+            /* Real mode */
+            tmp = cpu_ldl_data(env, ptr + 16);
+            env->fpip = ((tmp & 0xffff000) << 4) |
+                        cpu_lduw_data(env, ptr + 12);
+            env->fpop.tcg = tmp & 0x7ff;
+            env->fpdp = (cpu_ldl_data(env, ptr + 24) << 4) |
+                        cpu_lduw_data(env, ptr + 20);
+        }
     } else {
+        /* 16 bit */
         env->fpuc = cpu_lduw_data(env, ptr);
         fpus = cpu_lduw_data(env, ptr + 2);
         fptag = cpu_lduw_data(env, ptr + 4);
+        if (protected_mode) {
+            /* Protected mode  */
+            env->fpip = cpu_lduw_data(env, ptr + 6);
+            env->fpcs = cpu_lduw_data(env, ptr + 8);
+            env->fpdp = cpu_lduw_data(env, ptr + 10);
+            env->fpds = cpu_lduw_data(env, ptr + 12);
+        } else {
+            /* Real mode  */
+            tmp = cpu_lduw_data(env, ptr + 8);
+            env->fpip = ((tmp & 0xf000) << 4) | cpu_lduw_data(env, ptr + 6);
+            env->fpop.tcg = tmp & 0x7ff;
+            env->fpdp = cpu_lduw_data(env, ptr + 12) << 4 |
+                        cpu_lduw_data(env, ptr + 10);
+        }
     }
+
     env->fpstt = (fpus >> 11) & 7;
     env->fpus = fpus & ~0x3800;
     for (i = 0; i < 8; i++) {
         env->fptags[i] = ((fptag & 3) == 3);
         fptag >>= 2;
     }
+
+    env->fpip &= 0xffffffff;
+    env->fpdp &= 0xffffffff;
+    if (!protected_mode) {
+        env->fpcs = 0;
+        env->fpds = 0;
+    }
 }

-void helper_fsave(CPUX86State *env, target_ulong ptr, int data32)
+void helper_fsave(CPUX86State *env, target_ulong ptr, int data32,
+                  int protected_mode)
 {
     floatx80 tmp;
     int i;

-    helper_fstenv(env, ptr, data32);
+    helper_fstenv(env, ptr, data32, protected_mode);

-    ptr += (14 << data32);
+    if (data32) {
+        ptr += 28;
+    } else {
+        ptr += 14;
+    }
     for (i = 0; i < 8; i++) {
         tmp = ST(i);
         helper_fstt(env, tmp, ptr);
         ptr += 10;
     }

-    /* fninit */
-    env->fpus = 0;
-    env->fpstt = 0;
-    env->fpuc = 0x37f;
-    env->fptags[0] = 1;
-    env->fptags[1] = 1;
-    env->fptags[2] = 1;
-    env->fptags[3] = 1;
-    env->fptags[4] = 1;
-    env->fptags[5] = 1;
-    env->fptags[6] = 1;
-    env->fptags[7] = 1;
+    helper_fninit(env);
 }

-void helper_frstor(CPUX86State *env, target_ulong ptr, int data32)
+void helper_frstor(CPUX86State *env, target_ulong ptr, int data32,
+                   int protected_mode)
 {
     floatx80 tmp;
     int i;

-    helper_fldenv(env, ptr, data32);
-    ptr += (14 << data32);
+    helper_fldenv(env, ptr, data32, protected_mode);
+    if (data32) {
+        ptr += 28;
+    } else {
+        ptr += 14;
+    }

     for (i = 0; i < 8; i++) {
         tmp = helper_fldt(env, ptr);
@@ -1075,21 +1148,22 @@ void helper_frstor(CPUX86State *env,
target_ulong ptr, int data32)
     }
 }

-#if defined(CONFIG_USER_ONLY)
-void cpu_x86_fsave(CPUX86State *env, target_ulong ptr, int data32)
+#if defined(CONFIG_USER_ONLY) && defined(TARGET_I386) && TARGET_ABI_BITS == 32
+
+void cpu_x86_fsave(CPUX86State *env, target_ulong ptr)
 {
-    helper_fsave(env, ptr, data32);
+    helper_fsave(env, ptr, 1, 1);
 }

-void cpu_x86_frstor(CPUX86State *env, target_ulong ptr, int data32)
+void cpu_x86_frstor(CPUX86State *env, target_ulong ptr)
 {
-    helper_frstor(env, ptr, data32);
+    helper_frstor(env, ptr, 1, 1);
 }
 #endif

-void helper_fxsave(CPUX86State *env, target_ulong ptr, int data64)
+void helper_fxsave(CPUX86State *env, target_ulong ptr, int data32, int data64)
 {
-    int fpus, fptag, i, nb_xmm_regs;
+    int i, nb_xmm_regs, fptag;
     floatx80 tmp;
     target_ulong addr;

@@ -1098,25 +1172,36 @@ void helper_fxsave(CPUX86State *env,
target_ulong ptr, int data64)
         raise_exception(env, EXCP0D_GPF);
     }

-    fpus = (env->fpus & ~0x3800) | (env->fpstt & 0x7) << 11;
     fptag = 0;
     for (i = 0; i < 8; i++) {
         fptag |= (env->fptags[i] << i);
     }
+    fptag ^= 0xff;
+
     cpu_stw_data(env, ptr, env->fpuc);
-    cpu_stw_data(env, ptr + 2, fpus);
-    cpu_stw_data(env, ptr + 4, fptag ^ 0xff);
+    cpu_stw_data(env, ptr + 2, FPUS(env));
+    cpu_stw_data(env, ptr + 4, fptag & 0xff);
+    cpu_stw_data(env, ptr + 6, env->fpop.tcg);
+
 #ifdef TARGET_X86_64
     if (data64) {
-        cpu_stq_data(env, ptr + 0x08, 0); /* rip */
-        cpu_stq_data(env, ptr + 0x10, 0); /* rdp */
+        /* 64 bit */
+        cpu_stq_data(env, ptr + 8, env->fpip);
+        cpu_stq_data(env, ptr + 16, env->fpdp);
     } else
 #endif
     {
-        cpu_stl_data(env, ptr + 0x08, 0); /* eip */
-        cpu_stl_data(env, ptr + 0x0c, 0); /* sel  */
-        cpu_stl_data(env, ptr + 0x10, 0); /* dp */
-        cpu_stl_data(env, ptr + 0x14, 0); /* sel  */
+        if (data32) {
+            /* 32 bit */
+            cpu_stl_data(env, ptr + 8, env->fpip);
+            cpu_stl_data(env, ptr + 16, env->fpdp);
+        } else {
+            /* 16 bit */
+            cpu_stw_data(env, ptr + 8, env->fpip);
+            cpu_stw_data(env, ptr + 16, env->fpdp);
+        }
+        cpu_stw_data(env, ptr + 12, env->fpcs & 0xffff);
+        cpu_stw_data(env, ptr + 20, env->fpds & 0xffff);
     }

     addr = ptr + 0x20;
@@ -1149,7 +1234,7 @@ void helper_fxsave(CPUX86State *env,
target_ulong ptr, int data64)
     }
 }

-void helper_fxrstor(CPUX86State *env, target_ulong ptr, int data64)
+void helper_fxrstor(CPUX86State *env, target_ulong ptr, int data32, int data64)
 {
     int i, fpus, fptag, nb_xmm_regs;
     floatx80 tmp;
@@ -1170,6 +1255,30 @@ void helper_fxrstor(CPUX86State *env,
target_ulong ptr, int data64)
         env->fptags[i] = ((fptag >> i) & 1);
     }

+    env->fpop.tcg = (cpu_lduw_data(env, ptr + 6) >> 5) & 0x7ff;
+
+#ifdef TARGET_X86_64
+    if (data64) {
+        /* 64 bit */
+        env->fpip = cpu_ldq_data(env, ptr + 8);
+        env->fpdp = cpu_ldq_data(env, ptr + 16);
+    } else
+#endif
+    {
+        if (data32) {
+            /* 32 bit */
+            env->fpip = cpu_ldl_data(env, ptr + 8);
+            env->fpdp = cpu_ldl_data(env, ptr + 16);
+        } else {
+            /* 16 bit */
+            env->fpip = cpu_lduw_data(env, ptr + 8);
+            env->fpdp = cpu_lduw_data(env, ptr + 16);
+        }
+
+        env->fpcs = cpu_lduw_data(env, ptr + 12);
+        env->fpds = cpu_lduw_data(env, ptr + 20);
+    }
+
     addr = ptr + 0x20;
     for (i = 0; i < 8; i++) {
         tmp = helper_fldt(env, addr);
@@ -1198,6 +1307,11 @@ void helper_fxrstor(CPUX86State *env,
target_ulong ptr, int data64)
             }
         }
     }
+
+    if (!data64) {
+        env->fpip &= 0xffffffff;
+        env->fpdp &= 0xffffffff;
+    }
 }

 void cpu_get_fp80(uint64_t *pmant, uint16_t *pexp, floatx80 f)
diff --git a/target-i386/helper.h b/target-i386/helper.h
index 3775abe..626b296 100644
--- a/target-i386/helper.h
+++ b/target-i386/helper.h
@@ -185,12 +185,12 @@ DEF_HELPER_1(frndint, void, env)
 DEF_HELPER_1(fscale, void, env)
 DEF_HELPER_1(fsin, void, env)
 DEF_HELPER_1(fcos, void, env)
-DEF_HELPER_3(fstenv, void, env, tl, int)
-DEF_HELPER_3(fldenv, void, env, tl, int)
-DEF_HELPER_3(fsave, void, env, tl, int)
-DEF_HELPER_3(frstor, void, env, tl, int)
-DEF_HELPER_3(fxsave, void, env, tl, int)
-DEF_HELPER_3(fxrstor, void, env, tl, int)
+DEF_HELPER_4(fstenv, void, env, tl, int, int)
+DEF_HELPER_4(fldenv, void, env, tl, int, int)
+DEF_HELPER_4(fsave, void, env, tl, int, int)
+DEF_HELPER_4(frstor, void, env, tl, int, int)
+DEF_HELPER_4(fxsave, void, env, tl, int, int)
+DEF_HELPER_4(fxrstor, void, env, tl, int, int)

 DEF_HELPER_FLAGS_1(clz, TCG_CALL_NO_RWG_SE, tl, tl)
 DEF_HELPER_FLAGS_1(ctz, TCG_CALL_NO_RWG_SE, tl, tl)
diff --git a/target-i386/kvm.c b/target-i386/kvm.c
index e555040..8444779 100644
--- a/target-i386/kvm.c
+++ b/target-i386/kvm.c
@@ -975,7 +975,7 @@ static int kvm_put_fpu(X86CPU *cpu)
     fpu.fsw = env->fpus & ~(7 << 11);
     fpu.fsw |= (env->fpstt & 7) << 11;
     fpu.fcw = env->fpuc;
-    fpu.last_opcode = env->fpop;
+    fpu.last_opcode = env->fpop.kvm;
     fpu.last_ip = env->fpip;
     fpu.last_dp = env->fpdp;
     for (i = 0; i < 8; ++i) {
@@ -1020,7 +1020,7 @@ static int kvm_put_xsave(X86CPU *cpu)
         twd |= (!env->fptags[i]) << i;
     }
     xsave->region[XSAVE_FCW_FSW] = (uint32_t)(swd << 16) + cwd;
-    xsave->region[XSAVE_FTW_FOP] = (uint32_t)(env->fpop << 16) + twd;
+    xsave->region[XSAVE_FTW_FOP] = (uint32_t)(env->fpop.kvm << 16) + twd;
     memcpy(&xsave->region[XSAVE_CWD_RIP], &env->fpip, sizeof(env->fpip));
     memcpy(&xsave->region[XSAVE_CWD_RDP], &env->fpdp, sizeof(env->fpdp));
     memcpy(&xsave->region[XSAVE_ST_SPACE], env->fpregs,
@@ -1286,7 +1286,7 @@ static int kvm_get_fpu(X86CPU *cpu)
     env->fpstt = (fpu.fsw >> 11) & 7;
     env->fpus = fpu.fsw;
     env->fpuc = fpu.fcw;
-    env->fpop = fpu.last_opcode;
+    env->fpop.kvm = fpu.last_opcode;
     env->fpip = fpu.last_ip;
     env->fpdp = fpu.last_dp;
     for (i = 0; i < 8; ++i) {
@@ -1318,7 +1318,7 @@ static int kvm_get_xsave(X86CPU *cpu)
     cwd = (uint16_t)xsave->region[XSAVE_FCW_FSW];
     swd = (uint16_t)(xsave->region[XSAVE_FCW_FSW] >> 16);
     twd = (uint16_t)xsave->region[XSAVE_FTW_FOP];
-    env->fpop = (uint16_t)(xsave->region[XSAVE_FTW_FOP] >> 16);
+    env->fpop.kvm = (uint16_t)(xsave->region[XSAVE_FTW_FOP] >> 16);
     env->fpstt = (swd >> 11) & 7;
     env->fpus = swd;
     env->fpuc = cwd;
diff --git a/target-i386/machine.c b/target-i386/machine.c
index d548c05..a879e00 100644
--- a/target-i386/machine.c
+++ b/target-i386/machine.c
@@ -388,7 +388,7 @@ static bool fpop_ip_dp_needed(void *opaque)
     X86CPU *cpu = opaque;
     CPUX86State *env = &cpu->env;

-    return env->fpop != 0 || env->fpip != 0 || env->fpdp != 0;
+    return env->fpop.kvm != 0 || env->fpip != 0 || env->fpdp != 0;
 }

 static const VMStateDescription vmstate_fpop_ip_dp = {
@@ -397,7 +397,7 @@ static const VMStateDescription vmstate_fpop_ip_dp = {
     .minimum_version_id = 1,
     .minimum_version_id_old = 1,
     .fields      = (VMStateField []) {
-        VMSTATE_UINT16(env.fpop, X86CPU),
+        VMSTATE_UINT16(env.fpop.kvm, X86CPU),
         VMSTATE_UINT64(env.fpip, X86CPU),
         VMSTATE_UINT64(env.fpdp, X86CPU),
         VMSTATE_END_OF_LIST()
diff --git a/target-i386/translate.c b/target-i386/translate.c
index 707ebd5..8d29931 100644
--- a/target-i386/translate.c
+++ b/target-i386/translate.c
@@ -58,6 +58,7 @@
 #endif

 //#define MACRO_TEST   1
+#define IS_PROTECTED_MODE(s) (s->pe && !s->vm86)

 /* global register indexes */
 static TCGv_ptr cpu_env;
@@ -65,6 +66,11 @@ static TCGv cpu_A0;
 static TCGv cpu_cc_dst, cpu_cc_src, cpu_cc_src2, cpu_cc_srcT;
 static TCGv_i32 cpu_cc_op;
 static TCGv cpu_regs[CPU_NB_REGS];
+static TCGv_i32 cpu_fpop;
+static TCGv cpu_fpip;
+static TCGv cpu_fpdp;
+static TCGv_i32 cpu_fpds;
+static TCGv_i32 cpu_fpcs;
 /* local temps */
 static TCGv cpu_T[2];
 /* local register indexes (only used inside old micro ops) */
@@ -208,6 +214,62 @@ static const uint8_t cc_op_live[CC_OP_NB] = {
     [CC_OP_CLR] = 0,
 };

+static inline bool non_control_x87_instr(int modrm, int b)
+{
+    int op, mod, rm;
+    switch (b) {
+    case 0xd8 ... 0xdf:
+        /* floats */
+        op = ((b & 7) << 3) | ((modrm >> 3) & 7);
+        mod = (modrm >> 6) & 3;
+        rm = modrm & 7;
+        if (mod != 3) {
+            /* memory */
+            switch (op) {
+            case 0x0c: /* fldenv */
+            case 0x0d: /* fldcw */
+            case 0x0e: /* fstenv, fnstenv */
+            case 0x0f: /* fstcw, fnstcw */
+            case 0x2c: /* frstor */
+            case 0x2e: /* fsave, fnsave */
+            case 0x2f: /* fstsw, fnstsw */
+                return false;
+            default:
+                return true;
+            }
+        } else {
+            /* register */
+            switch (op) {
+            case 0x0a:
+                return false; /* fnop, Illegal op */
+            case 0x0e: /* fdecstp, fincstp */
+            case 0x28: /* ffree */
+                return false;
+            case 0x1c:
+                switch (rm) {
+                case 1: /* feni */
+                    return true;
+                case 2: /* fclex, fnclex */
+                case 3: /* finit, fninit */
+                    return false;
+                case 4: /* fsetpm */
+                    return true;
+                default: /* Illegal op */
+                    return false;
+                }
+            case 0x3c:
+                return false; /* fstsw, fnstsw, Illegal op */
+            default:
+                return true;
+            }
+        }
+    /*case 0x9b: // fwait, wait
+        return false;*/
+    default:
+        return false;
+    }
+}
+
 static void set_cc_op(DisasContext *s, CCOp op)
 {
     int dead;
@@ -1588,14 +1650,14 @@ static void gen_rot_rm_T1(DisasContext *s,
TCGMemOp ot, int op1, int is_right)
     t0 = tcg_const_i32(0);
     t1 = tcg_temp_new_i32();
     tcg_gen_trunc_tl_i32(t1, cpu_T[1]);
-    tcg_gen_movi_i32(cpu_tmp2_i32, CC_OP_ADCOX);
+    tcg_gen_movi_i32(cpu_tmp2_i32, CC_OP_ADCOX);
     tcg_gen_movi_i32(cpu_tmp3_i32, CC_OP_EFLAGS);
     tcg_gen_movcond_i32(TCG_COND_NE, cpu_cc_op, t1, t0,
                         cpu_tmp2_i32, cpu_tmp3_i32);
     tcg_temp_free_i32(t0);
     tcg_temp_free_i32(t1);

-    /* The CC_OP value is no longer predictable.  */
+    /* The CC_OP value is no longer predictable.  */
     set_cc_op(s, CC_OP_DYNAMIC);
 }

@@ -1871,7 +1933,7 @@ static void gen_shifti(DisasContext *s1, int op,
TCGMemOp ot, int d, int c)
     }
 }

-static void gen_lea_modrm(CPUX86State *env, DisasContext *s, int modrm)
+static void gen_lea_modrm(CPUX86State *env, DisasContext *s, int modrm, int b)
 {
     target_long disp;
     int havesib;
@@ -1879,6 +1941,7 @@ static void gen_lea_modrm(CPUX86State *env,
DisasContext *s, int modrm)
     int index;
     int scale;
     int mod, rm, code, override, must_add_seg;
+    int non_control_float_instr;
     TCGv sum;

     override = s->override;
@@ -1958,6 +2021,13 @@ static void gen_lea_modrm(CPUX86State *env,
DisasContext *s, int modrm)
             tcg_gen_addi_tl(cpu_A0, sum, disp);
         }

+        non_control_float_instr = non_control_x87_instr(modrm, b);
+        if (non_control_float_instr) {
+            tcg_gen_mov_tl(cpu_fpdp, cpu_A0);
+            if (s->aflag == MO_32) {
+                tcg_gen_ext32u_tl(cpu_fpdp, cpu_fpdp);
+            }
+        }
         if (must_add_seg) {
             if (override < 0) {
                 if (base == R_EBP || base == R_ESP) {
@@ -1969,6 +2039,12 @@ static void gen_lea_modrm(CPUX86State *env,
DisasContext *s, int modrm)

             tcg_gen_ld_tl(cpu_tmp0, cpu_env,
                           offsetof(CPUX86State, segs[override].base));
+
+            if (non_control_float_instr) {
+                tcg_gen_ld_i32(cpu_fpds, cpu_env,
+                              offsetof(CPUX86State, segs[override].selector));
+            }
+
             if (CODE64(s)) {
                 if (s->aflag == MO_32) {
                     tcg_gen_ext32u_tl(cpu_A0, cpu_A0);
@@ -1978,6 +2054,11 @@ static void gen_lea_modrm(CPUX86State *env,
DisasContext *s, int modrm)
             }

             tcg_gen_add_tl(cpu_A0, cpu_A0, cpu_tmp0);
+        } else {
+            if (non_control_float_instr) {
+                tcg_gen_ld_i32(cpu_fpds, cpu_env,
+                              offsetof(CPUX86State, segs[R_DS].selector));
+            }
         }

         if (s->aflag == MO_32) {
@@ -2047,8 +2128,22 @@ static void gen_lea_modrm(CPUX86State *env,
DisasContext *s, int modrm)
                     override = R_DS;
                 }
             }
+            if (non_control_x87_instr(modrm, b)) {
+                tcg_gen_mov_tl(cpu_fpdp, cpu_A0);
+                tcg_gen_ld_i32(cpu_fpds, cpu_env,
+                              offsetof(CPUX86State, segs[override].selector));
+            }
             gen_op_addl_A0_seg(s, override);
+        } else {
+            if (non_control_x87_instr(modrm, b)) {
+                tcg_gen_mov_tl(cpu_fpdp, cpu_A0);
+                tcg_gen_ld_i32(cpu_fpds, cpu_env,
+                              offsetof(CPUX86State, segs[R_DS].selector));
+            }
         }
+#ifdef TARGET_X86_64
+        tcg_gen_andi_tl(cpu_fpdp, cpu_fpdp, 0xffffffff);
+#endif
         break;

     default:
@@ -2138,7 +2233,7 @@ static void gen_add_A0_ds_seg(DisasContext *s)
 /* generate modrm memory load or store of 'reg'. TMP0 is used if reg ==
    OR_TMP0 */
 static void gen_ldst_modrm(CPUX86State *env, DisasContext *s, int modrm,
-                           TCGMemOp ot, int reg, int is_store)
+                           TCGMemOp ot, int reg, int is_store, int b)
 {
     int mod, rm;

@@ -2155,7 +2250,7 @@ static void gen_ldst_modrm(CPUX86State *env,
DisasContext *s, int modrm,
                 gen_op_mov_reg_v(ot, reg, cpu_T[0]);
         }
     } else {
-        gen_lea_modrm(env, s, modrm);
+        gen_lea_modrm(env, s, modrm, b);
         if (is_store) {
             if (reg != OR_TMP0)
                 gen_op_mov_v_reg(ot, cpu_T[0], reg);
@@ -2258,7 +2353,7 @@ static void gen_cmovcc1(CPUX86State *env,
DisasContext *s, TCGMemOp ot, int b,
 {
     CCPrepare cc;

-    gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
+    gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0, b);

     cc = gen_prepare_cc(s, b, cpu_T[1]);
     if (cc.mask != -1) {
@@ -2284,17 +2379,17 @@ static void gen_cmovcc1(CPUX86State *env,
DisasContext *s, TCGMemOp ot, int b,

 static inline void gen_op_movl_T0_seg(int seg_reg)
 {
-    tcg_gen_ld32u_tl(cpu_T[0], cpu_env,
+    tcg_gen_ld32u_tl(cpu_T[0], cpu_env,
                      offsetof(CPUX86State,segs[seg_reg].selector));
 }

 static inline void gen_op_movl_seg_T0_vm(int seg_reg)
 {
     tcg_gen_andi_tl(cpu_T[0], cpu_T[0], 0xffff);
-    tcg_gen_st32_tl(cpu_T[0], cpu_env,
+    tcg_gen_st32_tl(cpu_T[0], cpu_env,
                     offsetof(CPUX86State,segs[seg_reg].selector));
     tcg_gen_shli_tl(cpu_T[0], cpu_T[0], 4);
-    tcg_gen_st_tl(cpu_T[0], cpu_env,
+    tcg_gen_st_tl(cpu_T[0], cpu_env,
                   offsetof(CPUX86State,segs[seg_reg].base));
 }

@@ -3051,7 +3146,7 @@ static void gen_sse(CPUX86State *env,
DisasContext *s, int b,
         case 0x0e7: /* movntq */
             if (mod == 3)
                 goto illegal_op;
-            gen_lea_modrm(env, s, modrm);
+            gen_lea_modrm(env, s, modrm, b);
             gen_stq_env_A0(s, offsetof(CPUX86State, fpregs[reg].mmx));
             break;
         case 0x1e7: /* movntdq */
@@ -3059,20 +3154,20 @@ static void gen_sse(CPUX86State *env,
DisasContext *s, int b,
         case 0x12b: /* movntps */
             if (mod == 3)
                 goto illegal_op;
-            gen_lea_modrm(env, s, modrm);
+            gen_lea_modrm(env, s, modrm, b);
             gen_sto_env_A0(s, offsetof(CPUX86State, xmm_regs[reg]));
             break;
         case 0x3f0: /* lddqu */
             if (mod == 3)
                 goto illegal_op;
-            gen_lea_modrm(env, s, modrm);
+            gen_lea_modrm(env, s, modrm, b);
             gen_ldo_env_A0(s, offsetof(CPUX86State, xmm_regs[reg]));
             break;
         case 0x22b: /* movntss */
         case 0x32b: /* movntsd */
             if (mod == 3)
                 goto illegal_op;
-            gen_lea_modrm(env, s, modrm);
+            gen_lea_modrm(env, s, modrm, b);
             if (b1 & 1) {
                 gen_stq_env_A0(s, offsetof(CPUX86State, xmm_regs[reg]));
             } else {
@@ -3084,13 +3179,13 @@ static void gen_sse(CPUX86State *env,
DisasContext *s, int b,
         case 0x6e: /* movd mm, ea */
 #ifdef TARGET_X86_64
             if (s->dflag == MO_64) {
-                gen_ldst_modrm(env, s, modrm, MO_64, OR_TMP0, 0);
+                gen_ldst_modrm(env, s, modrm, MO_64, OR_TMP0, 0, b);
                 tcg_gen_st_tl(cpu_T[0], cpu_env,
offsetof(CPUX86State,fpregs[reg].mmx));
             } else
 #endif
             {
-                gen_ldst_modrm(env, s, modrm, MO_32, OR_TMP0, 0);
-                tcg_gen_addi_ptr(cpu_ptr0, cpu_env,
+                gen_ldst_modrm(env, s, modrm, MO_32, OR_TMP0, 0, b);
+                tcg_gen_addi_ptr(cpu_ptr0, cpu_env,
                                  offsetof(CPUX86State,fpregs[reg].mmx));
                 tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
                 gen_helper_movl_mm_T0_mmx(cpu_ptr0, cpu_tmp2_i32);
@@ -3099,15 +3194,15 @@ static void gen_sse(CPUX86State *env,
DisasContext *s, int b,
         case 0x16e: /* movd xmm, ea */
 #ifdef TARGET_X86_64
             if (s->dflag == MO_64) {
-                gen_ldst_modrm(env, s, modrm, MO_64, OR_TMP0, 0);
-                tcg_gen_addi_ptr(cpu_ptr0, cpu_env,
+                gen_ldst_modrm(env, s, modrm, MO_64, OR_TMP0, 0, b);
+                tcg_gen_addi_ptr(cpu_ptr0, cpu_env,
                                  offsetof(CPUX86State,xmm_regs[reg]));
                 gen_helper_movq_mm_T0_xmm(cpu_ptr0, cpu_T[0]);
             } else
 #endif
             {
-                gen_ldst_modrm(env, s, modrm, MO_32, OR_TMP0, 0);
-                tcg_gen_addi_ptr(cpu_ptr0, cpu_env,
+                gen_ldst_modrm(env, s, modrm, MO_32, OR_TMP0, 0, b);
+                tcg_gen_addi_ptr(cpu_ptr0, cpu_env,
                                  offsetof(CPUX86State,xmm_regs[reg]));
                 tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
                 gen_helper_movl_mm_T0_xmm(cpu_ptr0, cpu_tmp2_i32);
@@ -3115,7 +3210,7 @@ static void gen_sse(CPUX86State *env,
DisasContext *s, int b,
             break;
         case 0x6f: /* movq mm, ea */
             if (mod != 3) {
-                gen_lea_modrm(env, s, modrm);
+                gen_lea_modrm(env, s, modrm, b);
                 gen_ldq_env_A0(s, offsetof(CPUX86State, fpregs[reg].mmx));
             } else {
                 rm = (modrm & 7);
@@ -3132,7 +3227,7 @@ static void gen_sse(CPUX86State *env,
DisasContext *s, int b,
         case 0x16f: /* movdqa xmm, ea */
         case 0x26f: /* movdqu xmm, ea */
             if (mod != 3) {
-                gen_lea_modrm(env, s, modrm);
+                gen_lea_modrm(env, s, modrm, b);
                 gen_ldo_env_A0(s, offsetof(CPUX86State, xmm_regs[reg]));
             } else {
                 rm = (modrm & 7) | REX_B(s);
@@ -3142,7 +3237,7 @@ static void gen_sse(CPUX86State *env,
DisasContext *s, int b,
             break;
         case 0x210: /* movss xmm, ea */
             if (mod != 3) {
-                gen_lea_modrm(env, s, modrm);
+                gen_lea_modrm(env, s, modrm, b);
                 gen_op_ld_v(s, MO_32, cpu_T[0], cpu_A0);
                 tcg_gen_st32_tl(cpu_T[0], cpu_env,
offsetof(CPUX86State,xmm_regs[reg].XMM_L(0)));
                 tcg_gen_movi_tl(cpu_T[0], 0);
@@ -3157,7 +3252,7 @@ static void gen_sse(CPUX86State *env,
DisasContext *s, int b,
             break;
         case 0x310: /* movsd xmm, ea */
             if (mod != 3) {
-                gen_lea_modrm(env, s, modrm);
+                gen_lea_modrm(env, s, modrm, b);
                 gen_ldq_env_A0(s, offsetof(CPUX86State,
                                            xmm_regs[reg].XMM_Q(0)));
                 tcg_gen_movi_tl(cpu_T[0], 0);
@@ -3172,7 +3267,7 @@ static void gen_sse(CPUX86State *env,
DisasContext *s, int b,
         case 0x012: /* movlps */
         case 0x112: /* movlpd */
             if (mod != 3) {
-                gen_lea_modrm(env, s, modrm);
+                gen_lea_modrm(env, s, modrm, b);
                 gen_ldq_env_A0(s, offsetof(CPUX86State,
                                            xmm_regs[reg].XMM_Q(0)));
             } else {
@@ -3184,7 +3279,7 @@ static void gen_sse(CPUX86State *env,
DisasContext *s, int b,
             break;
         case 0x212: /* movsldup */
             if (mod != 3) {
-                gen_lea_modrm(env, s, modrm);
+                gen_lea_modrm(env, s, modrm, b);
                 gen_ldo_env_A0(s, offsetof(CPUX86State, xmm_regs[reg]));
             } else {
                 rm = (modrm & 7) | REX_B(s);
@@ -3200,7 +3295,7 @@ static void gen_sse(CPUX86State *env,
DisasContext *s, int b,
             break;
         case 0x312: /* movddup */
             if (mod != 3) {
-                gen_lea_modrm(env, s, modrm);
+                gen_lea_modrm(env, s, modrm, b);
                 gen_ldq_env_A0(s, offsetof(CPUX86State,
                                            xmm_regs[reg].XMM_Q(0)));
             } else {
@@ -3214,7 +3309,7 @@ static void gen_sse(CPUX86State *env,
DisasContext *s, int b,
         case 0x016: /* movhps */
         case 0x116: /* movhpd */
             if (mod != 3) {
-                gen_lea_modrm(env, s, modrm);
+                gen_lea_modrm(env, s, modrm, b);
                 gen_ldq_env_A0(s, offsetof(CPUX86State,
                                            xmm_regs[reg].XMM_Q(1)));
             } else {
@@ -3226,7 +3321,7 @@ static void gen_sse(CPUX86State *env,
DisasContext *s, int b,
             break;
         case 0x216: /* movshdup */
             if (mod != 3) {
-                gen_lea_modrm(env, s, modrm);
+                gen_lea_modrm(env, s, modrm, b);
                 gen_ldo_env_A0(s, offsetof(CPUX86State, xmm_regs[reg]));
             } else {
                 rm = (modrm & 7) | REX_B(s);
@@ -3264,34 +3359,34 @@ static void gen_sse(CPUX86State *env,
DisasContext *s, int b,
         case 0x7e: /* movd ea, mm */
 #ifdef TARGET_X86_64
             if (s->dflag == MO_64) {
-                tcg_gen_ld_i64(cpu_T[0], cpu_env,
+                tcg_gen_ld_i64(cpu_T[0], cpu_env,
                                offsetof(CPUX86State,fpregs[reg].mmx));
-                gen_ldst_modrm(env, s, modrm, MO_64, OR_TMP0, 1);
+                gen_ldst_modrm(env, s, modrm, MO_64, OR_TMP0, 1, b);
             } else
 #endif
             {
-                tcg_gen_ld32u_tl(cpu_T[0], cpu_env,
+                tcg_gen_ld32u_tl(cpu_T[0], cpu_env,

offsetof(CPUX86State,fpregs[reg].mmx.MMX_L(0)));
-                gen_ldst_modrm(env, s, modrm, MO_32, OR_TMP0, 1);
+                gen_ldst_modrm(env, s, modrm, MO_32, OR_TMP0, 1, b);
             }
             break;
         case 0x17e: /* movd ea, xmm */
 #ifdef TARGET_X86_64
             if (s->dflag == MO_64) {
-                tcg_gen_ld_i64(cpu_T[0], cpu_env,
+                tcg_gen_ld_i64(cpu_T[0], cpu_env,
                                offsetof(CPUX86State,xmm_regs[reg].XMM_Q(0)));
-                gen_ldst_modrm(env, s, modrm, MO_64, OR_TMP0, 1);
+                gen_ldst_modrm(env, s, modrm, MO_64, OR_TMP0, 1, b);
             } else
 #endif
             {
-                tcg_gen_ld32u_tl(cpu_T[0], cpu_env,
+                tcg_gen_ld32u_tl(cpu_T[0], cpu_env,
                                  offsetof(CPUX86State,xmm_regs[reg].XMM_L(0)));
-                gen_ldst_modrm(env, s, modrm, MO_32, OR_TMP0, 1);
+                gen_ldst_modrm(env, s, modrm, MO_32, OR_TMP0, 1, b);
             }
             break;
         case 0x27e: /* movq xmm, ea */
             if (mod != 3) {
-                gen_lea_modrm(env, s, modrm);
+                gen_lea_modrm(env, s, modrm, b);
                 gen_ldq_env_A0(s, offsetof(CPUX86State,
                                            xmm_regs[reg].XMM_Q(0)));
             } else {
@@ -3303,7 +3398,7 @@ static void gen_sse(CPUX86State *env,
DisasContext *s, int b,
             break;
         case 0x7f: /* movq ea, mm */
             if (mod != 3) {
-                gen_lea_modrm(env, s, modrm);
+                gen_lea_modrm(env, s, modrm, b);
                 gen_stq_env_A0(s, offsetof(CPUX86State, fpregs[reg].mmx));
             } else {
                 rm = (modrm & 7);
@@ -3318,7 +3413,7 @@ static void gen_sse(CPUX86State *env,
DisasContext *s, int b,
         case 0x17f: /* movdqa ea, xmm */
         case 0x27f: /* movdqu ea, xmm */
             if (mod != 3) {
-                gen_lea_modrm(env, s, modrm);
+                gen_lea_modrm(env, s, modrm, b);
                 gen_sto_env_A0(s, offsetof(CPUX86State, xmm_regs[reg]));
             } else {
                 rm = (modrm & 7) | REX_B(s);
@@ -3328,7 +3423,7 @@ static void gen_sse(CPUX86State *env,
DisasContext *s, int b,
             break;
         case 0x211: /* movss ea, xmm */
             if (mod != 3) {
-                gen_lea_modrm(env, s, modrm);
+                gen_lea_modrm(env, s, modrm, b);
                 tcg_gen_ld32u_tl(cpu_T[0], cpu_env,
offsetof(CPUX86State,xmm_regs[reg].XMM_L(0)));
                 gen_op_st_v(s, MO_32, cpu_T[0], cpu_A0);
             } else {
@@ -3339,7 +3434,7 @@ static void gen_sse(CPUX86State *env,
DisasContext *s, int b,
             break;
         case 0x311: /* movsd ea, xmm */
             if (mod != 3) {
-                gen_lea_modrm(env, s, modrm);
+                gen_lea_modrm(env, s, modrm, b);
                 gen_stq_env_A0(s, offsetof(CPUX86State,
                                            xmm_regs[reg].XMM_Q(0)));
             } else {
@@ -3351,7 +3446,7 @@ static void gen_sse(CPUX86State *env,
DisasContext *s, int b,
         case 0x013: /* movlps */
         case 0x113: /* movlpd */
             if (mod != 3) {
-                gen_lea_modrm(env, s, modrm);
+                gen_lea_modrm(env, s, modrm, b);
                 gen_stq_env_A0(s, offsetof(CPUX86State,
                                            xmm_regs[reg].XMM_Q(0)));
             } else {
@@ -3361,7 +3456,7 @@ static void gen_sse(CPUX86State *env,
DisasContext *s, int b,
         case 0x017: /* movhps */
         case 0x117: /* movhpd */
             if (mod != 3) {
-                gen_lea_modrm(env, s, modrm);
+                gen_lea_modrm(env, s, modrm, b);
                 gen_stq_env_A0(s, offsetof(CPUX86State,
                                            xmm_regs[reg].XMM_Q(1)));
             } else {
@@ -3409,14 +3504,14 @@ static void gen_sse(CPUX86State *env,
DisasContext *s, int b,
             break;
         case 0x050: /* movmskps */
             rm = (modrm & 7) | REX_B(s);
-            tcg_gen_addi_ptr(cpu_ptr0, cpu_env,
+            tcg_gen_addi_ptr(cpu_ptr0, cpu_env,
                              offsetof(CPUX86State,xmm_regs[rm]));
             gen_helper_movmskps(cpu_tmp2_i32, cpu_env, cpu_ptr0);
             tcg_gen_extu_i32_tl(cpu_regs[reg], cpu_tmp2_i32);
             break;
         case 0x150: /* movmskpd */
             rm = (modrm & 7) | REX_B(s);
-            tcg_gen_addi_ptr(cpu_ptr0, cpu_env,
+            tcg_gen_addi_ptr(cpu_ptr0, cpu_env,
                              offsetof(CPUX86State,xmm_regs[rm]));
             gen_helper_movmskpd(cpu_tmp2_i32, cpu_env, cpu_ptr0);
             tcg_gen_extu_i32_tl(cpu_regs[reg], cpu_tmp2_i32);
@@ -3425,7 +3520,7 @@ static void gen_sse(CPUX86State *env,
DisasContext *s, int b,
         case 0x12a: /* cvtpi2pd */
             gen_helper_enter_mmx(cpu_env);
             if (mod != 3) {
-                gen_lea_modrm(env, s, modrm);
+                gen_lea_modrm(env, s, modrm, b);
                 op2_offset = offsetof(CPUX86State,mmx_t0);
                 gen_ldq_env_A0(s, op2_offset);
             } else {
@@ -3448,7 +3543,7 @@ static void gen_sse(CPUX86State *env,
DisasContext *s, int b,
         case 0x22a: /* cvtsi2ss */
         case 0x32a: /* cvtsi2sd */
             ot = mo_64_32(s->dflag);
-            gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
+            gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0, b);
             op1_offset = offsetof(CPUX86State,xmm_regs[reg]);
             tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op1_offset);
             if (ot == MO_32) {
@@ -3470,7 +3565,7 @@ static void gen_sse(CPUX86State *env,
DisasContext *s, int b,
         case 0x12d: /* cvtpd2pi */
             gen_helper_enter_mmx(cpu_env);
             if (mod != 3) {
-                gen_lea_modrm(env, s, modrm);
+                gen_lea_modrm(env, s, modrm, b);
                 op2_offset = offsetof(CPUX86State,xmm_t0);
                 gen_ldo_env_A0(s, op2_offset);
             } else {
@@ -3501,7 +3596,7 @@ static void gen_sse(CPUX86State *env,
DisasContext *s, int b,
         case 0x32d: /* cvtsd2si */
             ot = mo_64_32(s->dflag);
             if (mod != 3) {
-                gen_lea_modrm(env, s, modrm);
+                gen_lea_modrm(env, s, modrm, b);
                 if ((b >> 8) & 1) {
                     gen_ldq_env_A0(s, offsetof(CPUX86State, xmm_t0.XMM_Q(0)));
                 } else {
@@ -3533,7 +3628,7 @@ static void gen_sse(CPUX86State *env,
DisasContext *s, int b,
         case 0xc4: /* pinsrw */
         case 0x1c4:
             s->rip_offset = 1;
-            gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0);
+            gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0, b);
             val = cpu_ldub_code(env, s->pc++);
             if (b1) {
                 val &= 7;
@@ -3567,7 +3662,7 @@ static void gen_sse(CPUX86State *env,
DisasContext *s, int b,
             break;
         case 0x1d6: /* movq ea, xmm */
             if (mod != 3) {
-                gen_lea_modrm(env, s, modrm);
+                gen_lea_modrm(env, s, modrm, b);
                 gen_stq_env_A0(s, offsetof(CPUX86State,
                                            xmm_regs[reg].XMM_Q(0)));
             } else {
@@ -3634,7 +3729,7 @@ static void gen_sse(CPUX86State *env,
DisasContext *s, int b,
                     op2_offset = offsetof(CPUX86State,xmm_regs[rm | REX_B(s)]);
                 } else {
                     op2_offset = offsetof(CPUX86State,xmm_t0);
-                    gen_lea_modrm(env, s, modrm);
+                    gen_lea_modrm(env, s, modrm, b);
                     switch (b) {
                     case 0x20: case 0x30: /* pmovsxbw, pmovzxbw */
                     case 0x23: case 0x33: /* pmovsxwd, pmovzxwd */
@@ -3668,7 +3763,7 @@ static void gen_sse(CPUX86State *env,
DisasContext *s, int b,
                     op2_offset = offsetof(CPUX86State,fpregs[rm].mmx);
                 } else {
                     op2_offset = offsetof(CPUX86State,mmx_t0);
-                    gen_lea_modrm(env, s, modrm);
+                    gen_lea_modrm(env, s, modrm, b);
                     gen_ldq_env_A0(s, op2_offset);
                 }
             }
@@ -3709,7 +3804,7 @@ static void gen_sse(CPUX86State *env,
DisasContext *s, int b,
                 }

                 tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_regs[reg]);
-                gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
+                gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0, b);
                 gen_helper_crc32(cpu_T[0], cpu_tmp2_i32,
                                  cpu_T[0], tcg_const_i32(8 << ot));

@@ -3737,7 +3832,7 @@ static void gen_sse(CPUX86State *env,
DisasContext *s, int b,
                     ot = MO_64;
                 }

-                gen_lea_modrm(env, s, modrm);
+                gen_lea_modrm(env, s, modrm, b);
                 if ((b & 1) == 0) {
                     tcg_gen_qemu_ld_tl(cpu_T[0], cpu_A0,
                                        s->mem_index, ot | MO_BE);
@@ -3755,7 +3850,7 @@ static void gen_sse(CPUX86State *env,
DisasContext *s, int b,
                     goto illegal_op;
                 }
                 ot = mo_64_32(s->dflag);
-                gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
+                gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0, b);
                 tcg_gen_andc_tl(cpu_T[0], cpu_regs[s->vex_v], cpu_T[0]);
                 gen_op_mov_reg_v(ot, reg, cpu_T[0]);
                 gen_op_update1_cc();
@@ -3772,7 +3867,7 @@ static void gen_sse(CPUX86State *env,
DisasContext *s, int b,
                 {
                     TCGv bound, zero;

-                    gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
+                    gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0, b);
                     /* Extract START, and shift the operand.
                        Shifts larger than operand size get zeros.  */
                     tcg_gen_ext8u_tl(cpu_A0, cpu_regs[s->vex_v]);
@@ -3809,7 +3904,7 @@ static void gen_sse(CPUX86State *env,
DisasContext *s, int b,
                     goto illegal_op;
                 }
                 ot = mo_64_32(s->dflag);
-                gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
+                gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0, b);
                 tcg_gen_ext8u_tl(cpu_T[1], cpu_regs[s->vex_v]);
                 {
                     TCGv bound = tcg_const_tl(ot == MO_64 ? 63 : 31);
@@ -3836,7 +3931,7 @@ static void gen_sse(CPUX86State *env,
DisasContext *s, int b,
                     goto illegal_op;
                 }
                 ot = mo_64_32(s->dflag);
-                gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
+                gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0, b);
                 switch (ot) {
                 default:
                     tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
@@ -3862,7 +3957,7 @@ static void gen_sse(CPUX86State *env,
DisasContext *s, int b,
                     goto illegal_op;
                 }
                 ot = mo_64_32(s->dflag);
-                gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
+                gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0, b);
                 /* Note that by zero-extending the mask operand, we
                    automatically handle zero-extending the result.  */
                 if (ot == MO_64) {
@@ -3880,7 +3975,7 @@ static void gen_sse(CPUX86State *env,
DisasContext *s, int b,
                     goto illegal_op;
                 }
                 ot = mo_64_32(s->dflag);
-                gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
+                gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0, b);
                 /* Note that by zero-extending the mask operand, we
                    automatically handle zero-extending the result.  */
                 if (ot == MO_64) {
@@ -3900,7 +3995,7 @@ static void gen_sse(CPUX86State *env,
DisasContext *s, int b,
                     int end_op;

                     ot = mo_64_32(s->dflag);
-                    gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
+                    gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0, b);

                     /* Re-use the carry-out from a previous round.  */
                     TCGV_UNUSED(carry_in);
@@ -3979,7 +4074,7 @@ static void gen_sse(CPUX86State *env,
DisasContext *s, int b,
                     goto illegal_op;
                 }
                 ot = mo_64_32(s->dflag);
-                gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
+                gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0, b);
                 if (ot == MO_64) {
                     tcg_gen_andi_tl(cpu_T[1], cpu_regs[s->vex_v], 63);
                 } else {
@@ -4011,7 +4106,7 @@ static void gen_sse(CPUX86State *env,
DisasContext *s, int b,
                     goto illegal_op;
                 }
                 ot = mo_64_32(s->dflag);
-                gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
+                gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0, b);

                 switch (reg & 7) {
                 case 1: /* blsr By,Ey */
@@ -4070,7 +4165,7 @@ static void gen_sse(CPUX86State *env,
DisasContext *s, int b,
                 ot = mo_64_32(s->dflag);
                 rm = (modrm & 7) | REX_B(s);
                 if (mod != 3)
-                    gen_lea_modrm(env, s, modrm);
+                    gen_lea_modrm(env, s, modrm, b);
                 reg = ((modrm >> 3) & 7) | rex_r;
                 val = cpu_ldub_code(env, s->pc++);
                 switch (b) {
@@ -4207,7 +4302,7 @@ static void gen_sse(CPUX86State *env,
DisasContext *s, int b,
                     op2_offset = offsetof(CPUX86State,xmm_regs[rm | REX_B(s)]);
                 } else {
                     op2_offset = offsetof(CPUX86State,xmm_t0);
-                    gen_lea_modrm(env, s, modrm);
+                    gen_lea_modrm(env, s, modrm, b);
                     gen_ldo_env_A0(s, op2_offset);
                 }
             } else {
@@ -4216,7 +4311,7 @@ static void gen_sse(CPUX86State *env,
DisasContext *s, int b,
                     op2_offset = offsetof(CPUX86State,fpregs[rm].mmx);
                 } else {
                     op2_offset = offsetof(CPUX86State,mmx_t0);
-                    gen_lea_modrm(env, s, modrm);
+                    gen_lea_modrm(env, s, modrm, b);
                     gen_ldq_env_A0(s, op2_offset);
                 }
             }
@@ -4250,7 +4345,7 @@ static void gen_sse(CPUX86State *env,
DisasContext *s, int b,
                     goto illegal_op;
                 }
                 ot = mo_64_32(s->dflag);
-                gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
+                gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0, b);
                 b = cpu_ldub_code(env, s->pc++);
                 if (ot == MO_64) {
                     tcg_gen_rotri_tl(cpu_T[0], cpu_T[0], b & 63);
@@ -4286,7 +4381,7 @@ static void gen_sse(CPUX86State *env,
DisasContext *s, int b,
             if (mod != 3) {
                 int sz = 4;

-                gen_lea_modrm(env, s, modrm);
+                gen_lea_modrm(env, s, modrm, b);
                 op2_offset = offsetof(CPUX86State,xmm_t0);

                 switch (b) {
@@ -4334,7 +4429,7 @@ static void gen_sse(CPUX86State *env,
DisasContext *s, int b,
         } else {
             op1_offset = offsetof(CPUX86State,fpregs[reg].mmx);
             if (mod != 3) {
-                gen_lea_modrm(env, s, modrm);
+                gen_lea_modrm(env, s, modrm, b);
                 op2_offset = offsetof(CPUX86State,mmx_t0);
                 gen_ldq_env_A0(s, op2_offset);
             } else {
@@ -4603,7 +4698,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
                 mod = (modrm >> 6) & 3;
                 rm = (modrm & 7) | REX_B(s);
                 if (mod != 3) {
-                    gen_lea_modrm(env, s, modrm);
+                    gen_lea_modrm(env, s, modrm, b);
                     opreg = OR_TMP0;
                 } else if (op == OP_XORL && rm == reg) {
                 xor_zero:
@@ -4624,7 +4719,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
                 reg = ((modrm >> 3) & 7) | rex_r;
                 rm = (modrm & 7) | REX_B(s);
                 if (mod != 3) {
-                    gen_lea_modrm(env, s, modrm);
+                    gen_lea_modrm(env, s, modrm, b);
                     gen_op_ld_v(s, ot, cpu_T[1], cpu_A0);
                 } else if (op == OP_XORL && rm == reg) {
                     goto xor_zero;
@@ -4663,7 +4758,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
                     s->rip_offset = 1;
                 else
                     s->rip_offset = insn_const_size(ot);
-                gen_lea_modrm(env, s, modrm);
+                gen_lea_modrm(env, s, modrm, b);
                 opreg = OR_TMP0;
             } else {
                 opreg = rm;
@@ -4706,7 +4801,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
         if (mod != 3) {
             if (op == 0)
                 s->rip_offset = insn_const_size(ot);
-            gen_lea_modrm(env, s, modrm);
+            gen_lea_modrm(env, s, modrm, b);
             gen_op_ld_v(s, ot, cpu_T[0], cpu_A0);
         } else {
             gen_op_mov_v_reg(ot, cpu_T[0], rm);
@@ -4914,7 +5009,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
             }
         }
         if (mod != 3) {
-            gen_lea_modrm(env, s, modrm);
+            gen_lea_modrm(env, s, modrm, b);
             if (op >= 2 && op != 3 && op != 5)
                 gen_op_ld_v(s, ot, cpu_T[0], cpu_A0);
         } else {
@@ -5006,7 +5101,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
         modrm = cpu_ldub_code(env, s->pc++);
         reg = ((modrm >> 3) & 7) | rex_r;

-        gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
+        gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0, b);
         gen_op_mov_v_reg(ot, cpu_T[1], reg);
         gen_op_testl_T0_T1_cc();
         set_cc_op(s, CC_OP_LOGICB + ot);
@@ -5081,7 +5176,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
             s->rip_offset = insn_const_size(ot);
         else if (b == 0x6b)
             s->rip_offset = 1;
-        gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
+        gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0, b);
         if (b == 0x69) {
             val = insn_get(env, s, ot);
             tcg_gen_movi_tl(cpu_T[1], val);
@@ -5138,7 +5233,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
             gen_op_mov_reg_v(ot, reg, cpu_T[1]);
             gen_op_mov_reg_v(ot, rm, cpu_T[0]);
         } else {
-            gen_lea_modrm(env, s, modrm);
+            gen_lea_modrm(env, s, modrm, b);
             gen_op_mov_v_reg(ot, cpu_T[0], reg);
             gen_op_ld_v(s, ot, cpu_T[1], cpu_A0);
             tcg_gen_add_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
@@ -5167,7 +5262,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
                 rm = (modrm & 7) | REX_B(s);
                 gen_op_mov_v_reg(ot, t0, rm);
             } else {
-                gen_lea_modrm(env, s, modrm);
+                gen_lea_modrm(env, s, modrm, b);
                 tcg_gen_mov_tl(a0, cpu_A0);
                 gen_op_ld_v(s, ot, t0, a0);
                 rm = 0; /* avoid warning */
@@ -5215,16 +5310,16 @@ static target_ulong disas_insn(CPUX86State
*env, DisasContext *s,
                 goto illegal_op;
             gen_jmp_im(pc_start - s->cs_base);
             gen_update_cc_op(s);
-            gen_lea_modrm(env, s, modrm);
+            gen_lea_modrm(env, s, modrm, b);
             gen_helper_cmpxchg16b(cpu_env, cpu_A0);
         } else
-#endif
+#endif
         {
             if (!(s->cpuid_features & CPUID_CX8))
                 goto illegal_op;
             gen_jmp_im(pc_start - s->cs_base);
             gen_update_cc_op(s);
-            gen_lea_modrm(env, s, modrm);
+            gen_lea_modrm(env, s, modrm, b);
             gen_helper_cmpxchg8b(cpu_env, cpu_A0);
         }
         set_cc_op(s, CC_OP_EFLAGS);
@@ -5274,7 +5369,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
         } else {
             /* NOTE: order is important too for MMU exceptions */
             s->popl_esp_hack = 1 << ot;
-            gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 1);
+            gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 1, b);
             s->popl_esp_hack = 0;
             gen_pop_update(s, ot);
         }
@@ -5360,7 +5455,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
         reg = ((modrm >> 3) & 7) | rex_r;

         /* generate a generic store */
-        gen_ldst_modrm(env, s, modrm, ot, reg, 1);
+        gen_ldst_modrm(env, s, modrm, ot, reg, 1, b);
         break;
     case 0xc6:
     case 0xc7: /* mov Ev, Iv */
@@ -5369,7 +5464,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
         mod = (modrm >> 6) & 3;
         if (mod != 3) {
             s->rip_offset = insn_const_size(ot);
-            gen_lea_modrm(env, s, modrm);
+            gen_lea_modrm(env, s, modrm, b);
         }
         val = insn_get(env, s, ot);
         tcg_gen_movi_tl(cpu_T[0], val);
@@ -5385,7 +5480,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
         modrm = cpu_ldub_code(env, s->pc++);
         reg = ((modrm >> 3) & 7) | rex_r;

-        gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
+        gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0, b);
         gen_op_mov_reg_v(ot, reg, cpu_T[0]);
         break;
     case 0x8e: /* mov seg, Gv */
@@ -5393,7 +5488,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
         reg = (modrm >> 3) & 7;
         if (reg >= 6 || reg == R_CS)
             goto illegal_op;
-        gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0);
+        gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0, b);
         gen_movl_seg_T0(s, reg, pc_start - s->cs_base);
         if (reg == R_SS) {
             /* if reg == SS, inhibit interrupts/trace */
@@ -5416,7 +5511,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
             goto illegal_op;
         gen_op_movl_T0_seg(reg);
         ot = mod == 3 ? dflag : MO_16;
-        gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 1);
+        gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 1, b);
         break;

     case 0x1b6: /* movzbS Gv, Eb */
@@ -5458,7 +5553,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
                 }
                 gen_op_mov_reg_v(d_ot, reg, cpu_T[0]);
             } else {
-                gen_lea_modrm(env, s, modrm);
+                gen_lea_modrm(env, s, modrm, b);
                 gen_op_ld_v(s, s_ot, cpu_T[0], cpu_A0);
                 gen_op_mov_reg_v(d_ot, reg, cpu_T[0]);
             }
@@ -5476,7 +5571,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
         s->override = -1;
         val = s->addseg;
         s->addseg = 0;
-        gen_lea_modrm(env, s, modrm);
+        gen_lea_modrm(env, s, modrm, b);
         s->addseg = val;
         gen_op_mov_reg_v(ot, reg, cpu_A0);
         break;
@@ -5566,7 +5661,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
             gen_op_mov_reg_v(ot, rm, cpu_T[0]);
             gen_op_mov_reg_v(ot, reg, cpu_T[1]);
         } else {
-            gen_lea_modrm(env, s, modrm);
+            gen_lea_modrm(env, s, modrm, b);
             gen_op_mov_v_reg(ot, cpu_T[0], reg);
             /* for xchg, lock is implicit */
             if (!(prefixes & PREFIX_LOCK))
@@ -5601,7 +5696,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
         mod = (modrm >> 6) & 3;
         if (mod == 3)
             goto illegal_op;
-        gen_lea_modrm(env, s, modrm);
+        gen_lea_modrm(env, s, modrm, b);
         gen_op_ld_v(s, ot, cpu_T[1], cpu_A0);
         gen_add_A0_im(s, 1 << ot);
         /* load the segment first to handle exceptions properly */
@@ -5632,7 +5727,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
                 if (shift == 2) {
                     s->rip_offset = 1;
                 }
-                gen_lea_modrm(env, s, modrm);
+                gen_lea_modrm(env, s, modrm, b);
                 opreg = OR_TMP0;
             } else {
                 opreg = (modrm & 7) | REX_B(s);
@@ -5682,7 +5777,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
         rm = (modrm & 7) | REX_B(s);
         reg = ((modrm >> 3) & 7) | rex_r;
         if (mod != 3) {
-            gen_lea_modrm(env, s, modrm);
+            gen_lea_modrm(env, s, modrm, b);
             opreg = OR_TMP0;
         } else {
             opreg = rm;
@@ -5713,7 +5808,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
         op = ((b & 7) << 3) | ((modrm >> 3) & 7);
         if (mod != 3) {
             /* memory op */
-            gen_lea_modrm(env, s, modrm);
+            gen_lea_modrm(env, s, modrm, b);
             switch(op) {
             case 0x00 ... 0x07: /* fxxxs */
             case 0x10 ... 0x17: /* fixxxl */
@@ -5840,7 +5935,9 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
             case 0x0c: /* fldenv mem */
                 gen_update_cc_op(s);
                 gen_jmp_im(pc_start - s->cs_base);
-                gen_helper_fldenv(cpu_env, cpu_A0, tcg_const_i32(dflag - 1));
+                gen_helper_fldenv(cpu_env, cpu_A0,
+                                  tcg_const_i32(dflag == MO_32),
+                                  tcg_const_i32(IS_PROTECTED_MODE(s)));
                 break;
             case 0x0d: /* fldcw mem */
                 tcg_gen_qemu_ld_i32(cpu_tmp2_i32, cpu_A0,
@@ -5850,7 +5947,9 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
             case 0x0e: /* fnstenv mem */
                 gen_update_cc_op(s);
                 gen_jmp_im(pc_start - s->cs_base);
-                gen_helper_fstenv(cpu_env, cpu_A0, tcg_const_i32(dflag - 1));
+                gen_helper_fstenv(cpu_env, cpu_A0,
+                                  tcg_const_i32(dflag == MO_32),
+                                  tcg_const_i32(IS_PROTECTED_MODE(s)));
                 break;
             case 0x0f: /* fnstcw mem */
                 gen_helper_fnstcw(cpu_tmp2_i32, cpu_env);
@@ -5871,12 +5970,16 @@ static target_ulong disas_insn(CPUX86State
*env, DisasContext *s,
             case 0x2c: /* frstor mem */
                 gen_update_cc_op(s);
                 gen_jmp_im(pc_start - s->cs_base);
-                gen_helper_frstor(cpu_env, cpu_A0, tcg_const_i32(dflag - 1));
+                gen_helper_frstor(cpu_env, cpu_A0,
+                                  tcg_const_i32(dflag == MO_32),
+                                  tcg_const_i32(IS_PROTECTED_MODE(s)));
                 break;
             case 0x2e: /* fnsave mem */
                 gen_update_cc_op(s);
                 gen_jmp_im(pc_start - s->cs_base);
-                gen_helper_fsave(cpu_env, cpu_A0, tcg_const_i32(dflag - 1));
+                gen_helper_fsave(cpu_env, cpu_A0,
+                                 tcg_const_i32(dflag == MO_32),
+                                 tcg_const_i32(IS_PROTECTED_MODE(s)));
                 break;
             case 0x2f: /* fnstsw mem */
                 gen_helper_fnstsw(cpu_tmp2_i32, cpu_env);
@@ -6217,6 +6320,11 @@ static target_ulong disas_insn(CPUX86State
*env, DisasContext *s,
                 goto illegal_op;
             }
         }
+        if (non_control_x87_instr(modrm, b)) {
+            tcg_gen_movi_i32(cpu_fpop, ((b & 0x7) << 8) | (modrm & 0xff));
+            tcg_gen_movi_tl(cpu_fpip, pc_start - s->cs_base);
+            tcg_gen_movi_i32(cpu_fpcs, env->segs[R_CS].selector);
+        }
         break;
         /************************/
         /* string ops */
@@ -6276,7 +6384,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
     case 0x6d:
         ot = mo_b_d32(b, dflag);
         tcg_gen_ext16u_tl(cpu_T[0], cpu_regs[R_EDX]);
-        gen_check_io(s, ot, pc_start - s->cs_base,
+        gen_check_io(s, ot, pc_start - s->cs_base,
                      SVM_IOIO_TYPE_MASK | svm_is_rep(prefixes) | 4);
         if (prefixes & (PREFIX_REPZ | PREFIX_REPNZ)) {
             gen_repz_ins(s, ot, pc_start - s->cs_base, s->pc - s->cs_base);
@@ -6535,7 +6643,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
     case 0x190 ... 0x19f: /* setcc Gv */
         modrm = cpu_ldub_code(env, s->pc++);
         gen_setcc1(s, b, cpu_T[0]);
-        gen_ldst_modrm(env, s, modrm, MO_8, OR_TMP0, 1);
+        gen_ldst_modrm(env, s, modrm, MO_8, OR_TMP0, 1, b);
         break;
     case 0x140 ... 0x14f: /* cmov Gv, Ev */
         if (!(s->cpuid_features & CPUID_CMOV)) {
@@ -6665,7 +6773,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
         rm = (modrm & 7) | REX_B(s);
         if (mod != 3) {
             s->rip_offset = 1;
-            gen_lea_modrm(env, s, modrm);
+            gen_lea_modrm(env, s, modrm, b);
             gen_op_ld_v(s, ot, cpu_T[0], cpu_A0);
         } else {
             gen_op_mov_v_reg(ot, cpu_T[0], rm);
@@ -6696,7 +6804,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
         rm = (modrm & 7) | REX_B(s);
         gen_op_mov_v_reg(MO_32, cpu_T[1], reg);
         if (mod != 3) {
-            gen_lea_modrm(env, s, modrm);
+            gen_lea_modrm(env, s, modrm, b);
             /* specific case: we need to add a displacement */
             gen_exts(ot, cpu_T[1]);
             tcg_gen_sari_tl(cpu_tmp0, cpu_T[1], 3 + ot);
@@ -6750,7 +6858,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
         ot = dflag;
         modrm = cpu_ldub_code(env, s->pc++);
         reg = ((modrm >> 3) & 7) | rex_r;
-        gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
+        gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0, b);
         gen_extu(ot, cpu_T[0]);

         /* Note that lzcnt and tzcnt are in different extensions.  */
@@ -6953,7 +7061,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
         if (mod == 3)
             goto illegal_op;
         gen_op_mov_v_reg(ot, cpu_T[0], reg);
-        gen_lea_modrm(env, s, modrm);
+        gen_lea_modrm(env, s, modrm, b);
         gen_jmp_im(pc_start - s->cs_base);
         tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
         if (ot == MO_16) {
@@ -7135,7 +7243,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
             gen_svm_check_intercept(s, pc_start, SVM_EXIT_LDTR_READ);
             tcg_gen_ld32u_tl(cpu_T[0], cpu_env,
offsetof(CPUX86State,ldt.selector));
             ot = mod == 3 ? dflag : MO_16;
-            gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 1);
+            gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 1, b);
             break;
         case 2: /* lldt */
             if (!s->pe || s->vm86)
@@ -7144,7 +7252,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
                 gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
             } else {
                 gen_svm_check_intercept(s, pc_start, SVM_EXIT_LDTR_WRITE);
-                gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0);
+                gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0, b);
                 gen_jmp_im(pc_start - s->cs_base);
                 tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
                 gen_helper_lldt(cpu_env, cpu_tmp2_i32);
@@ -7156,7 +7264,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
             gen_svm_check_intercept(s, pc_start, SVM_EXIT_TR_READ);
             tcg_gen_ld32u_tl(cpu_T[0], cpu_env,
offsetof(CPUX86State,tr.selector));
             ot = mod == 3 ? dflag : MO_16;
-            gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 1);
+            gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 1, b);
             break;
         case 3: /* ltr */
             if (!s->pe || s->vm86)
@@ -7165,7 +7273,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
                 gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
             } else {
                 gen_svm_check_intercept(s, pc_start, SVM_EXIT_TR_WRITE);
-                gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0);
+                gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0, b);
                 gen_jmp_im(pc_start - s->cs_base);
                 tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
                 gen_helper_ltr(cpu_env, cpu_tmp2_i32);
@@ -7175,7 +7283,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
         case 5: /* verw */
             if (!s->pe || s->vm86)
                 goto illegal_op;
-            gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0);
+            gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0, b);
             gen_update_cc_op(s);
             if (op == 4) {
                 gen_helper_verr(cpu_env, cpu_T[0]);
@@ -7198,7 +7306,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
             if (mod == 3)
                 goto illegal_op;
             gen_svm_check_intercept(s, pc_start, SVM_EXIT_GDTR_READ);
-            gen_lea_modrm(env, s, modrm);
+            gen_lea_modrm(env, s, modrm, b);
             tcg_gen_ld32u_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,
gdt.limit));
             gen_op_st_v(s, MO_16, cpu_T[0], cpu_A0);
             gen_add_A0_im(s, 2);
@@ -7254,7 +7362,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
                 }
             } else { /* sidt */
                 gen_svm_check_intercept(s, pc_start, SVM_EXIT_IDTR_READ);
-                gen_lea_modrm(env, s, modrm);
+                gen_lea_modrm(env, s, modrm, b);
                 tcg_gen_ld32u_tl(cpu_T[0], cpu_env,
offsetof(CPUX86State, idt.limit));
                 gen_op_st_v(s, MO_16, cpu_T[0], cpu_A0);
                 gen_add_A0_im(s, 2);
@@ -7311,7 +7419,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
                     break;
                 case 4: /* STGI */
                     if ((!(s->flags & HF_SVME_MASK) &&
-                         !(s->cpuid_ext3_features & CPUID_EXT3_SKINIT)) ||
+                         !(s->cpuid_ext3_features & CPUID_EXT3_SKINIT)) ||
                         !s->pe)
                         goto illegal_op;
                     if (s->cpl != 0) {
@@ -7332,8 +7440,8 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
                     }
                     break;
                 case 6: /* SKINIT */
-                    if ((!(s->flags & HF_SVME_MASK) &&
-                         !(s->cpuid_ext3_features & CPUID_EXT3_SKINIT)) ||
+                    if ((!(s->flags & HF_SVME_MASK) &&
+                         !(s->cpuid_ext3_features & CPUID_EXT3_SKINIT)) ||
                         !s->pe)
                         goto illegal_op;
                     gen_helper_skinit(cpu_env);
@@ -7357,7 +7465,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
             } else {
                 gen_svm_check_intercept(s, pc_start,
                                         op==2 ? SVM_EXIT_GDTR_WRITE :
SVM_EXIT_IDTR_WRITE);
-                gen_lea_modrm(env, s, modrm);
+                gen_lea_modrm(env, s, modrm, b);
                 gen_op_ld_v(s, MO_16, cpu_T[1], cpu_A0);
                 gen_add_A0_im(s, 2);
                 gen_op_ld_v(s, CODE64(s) + MO_32, cpu_T[0], cpu_A0);
@@ -7380,14 +7488,14 @@ static target_ulong disas_insn(CPUX86State
*env, DisasContext *s,
 #else
             tcg_gen_ld32u_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,cr[0]));
 #endif
-            gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 1);
+            gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 1, b);
             break;
         case 6: /* lmsw */
             if (s->cpl != 0) {
                 gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
             } else {
                 gen_svm_check_intercept(s, pc_start, SVM_EXIT_WRITE_CR0);
-                gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0);
+                gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0, b);
                 gen_helper_lmsw(cpu_env, cpu_T[0]);
                 gen_jmp_im(s->pc - s->cs_base);
                 gen_eob(s);
@@ -7400,7 +7508,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
                 } else {
                     gen_update_cc_op(s);
                     gen_jmp_im(pc_start - s->cs_base);
-                    gen_lea_modrm(env, s, modrm);
+                    gen_lea_modrm(env, s, modrm, b);
                     gen_helper_invlpg(cpu_env, cpu_A0);
                     gen_jmp_im(s->pc - s->cs_base);
                     gen_eob(s);
@@ -7479,7 +7587,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
                 }
                 gen_op_mov_reg_v(d_ot, reg, cpu_T[0]);
             } else {
-                gen_lea_modrm(env, s, modrm);
+                gen_lea_modrm(env, s, modrm, b);
                 gen_op_ld_v(s, MO_32 | MO_SIGN, cpu_T[0], cpu_A0);
                 gen_op_mov_reg_v(d_ot, reg, cpu_T[0]);
             }
@@ -7500,7 +7608,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
             mod = (modrm >> 6) & 3;
             rm = modrm & 7;
             if (mod != 3) {
-                gen_lea_modrm(env, s, modrm);
+                gen_lea_modrm(env, s, modrm, b);
                 gen_op_ld_v(s, ot, t0, cpu_A0);
                 a0 = tcg_temp_local_new();
                 tcg_gen_mov_tl(a0, cpu_A0);
@@ -7542,7 +7650,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
             ot = dflag != MO_16 ? MO_32 : MO_16;
             modrm = cpu_ldub_code(env, s->pc++);
             reg = ((modrm >> 3) & 7) | rex_r;
-            gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0);
+            gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0, b);
             t0 = tcg_temp_local_new();
             gen_update_cc_op(s);
             if (b == 0x102) {
@@ -7570,7 +7678,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
         case 3: /* prefetchnt0 */
             if (mod == 3)
                 goto illegal_op;
-            gen_lea_modrm(env, s, modrm);
+            gen_lea_modrm(env, s, modrm, b);
             /* nothing more to do */
             break;
         default: /* nop (multi byte) */
@@ -7682,7 +7790,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
             goto illegal_op;
         reg = ((modrm >> 3) & 7) | rex_r;
         /* generate a generic store */
-        gen_ldst_modrm(env, s, modrm, ot, reg, 1);
+        gen_ldst_modrm(env, s, modrm, ot, reg, 1, b);
         break;
     case 0x1ae:
         modrm = cpu_ldub_code(env, s->pc++);
@@ -7697,10 +7805,12 @@ static target_ulong disas_insn(CPUX86State
*env, DisasContext *s,
                 gen_exception(s, EXCP07_PREX, pc_start - s->cs_base);
                 break;
             }
-            gen_lea_modrm(env, s, modrm);
+            gen_lea_modrm(env, s, modrm, b);
             gen_update_cc_op(s);
             gen_jmp_im(pc_start - s->cs_base);
-            gen_helper_fxsave(cpu_env, cpu_A0, tcg_const_i32(dflag == MO_64));
+            gen_helper_fxsave(cpu_env, cpu_A0,
+                              tcg_const_i32(dflag == MO_32),
+                              tcg_const_i32(dflag == MO_64));
             break;
         case 1: /* fxrstor */
             if (mod == 3 || !(s->cpuid_features & CPUID_FXSR) ||
@@ -7710,10 +7820,12 @@ static target_ulong disas_insn(CPUX86State
*env, DisasContext *s,
                 gen_exception(s, EXCP07_PREX, pc_start - s->cs_base);
                 break;
             }
-            gen_lea_modrm(env, s, modrm);
+            gen_lea_modrm(env, s, modrm, b);
             gen_update_cc_op(s);
             gen_jmp_im(pc_start - s->cs_base);
-            gen_helper_fxrstor(cpu_env, cpu_A0, tcg_const_i32(dflag == MO_64));
+            gen_helper_fxrstor(cpu_env, cpu_A0,
+                               tcg_const_i32(dflag == MO_32),
+                               tcg_const_i32(dflag == MO_64));
             break;
         case 2: /* ldmxcsr */
         case 3: /* stmxcsr */
@@ -7724,7 +7836,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
             if ((s->flags & HF_EM_MASK) || !(s->flags & HF_OSFXSR_MASK) ||
                 mod == 3)
                 goto illegal_op;
-            gen_lea_modrm(env, s, modrm);
+            gen_lea_modrm(env, s, modrm, b);
             if (op == 2) {
                 tcg_gen_qemu_ld_i32(cpu_tmp2_i32, cpu_A0,
                                     s->mem_index, MO_LEUL);
@@ -7749,7 +7861,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
                 /* clflush */
                 if (!(s->cpuid_features & CPUID_CLFLUSH))
                     goto illegal_op;
-                gen_lea_modrm(env, s, modrm);
+                gen_lea_modrm(env, s, modrm, b);
             }
             break;
         default:
@@ -7761,7 +7873,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
         mod = (modrm >> 6) & 3;
         if (mod == 3)
             goto illegal_op;
-        gen_lea_modrm(env, s, modrm);
+        gen_lea_modrm(env, s, modrm, b);
         /* ignore for now */
         break;
     case 0x1aa: /* rsm */
@@ -7789,7 +7901,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
             ot = mo_64_32(dflag);
         }

-        gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
+        gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0, b);
         gen_helper_popcnt(cpu_T[0], cpu_env, cpu_T[0], tcg_const_i32(ot));
         gen_op_mov_reg_v(ot, reg, cpu_T[0]);

@@ -7866,6 +7978,17 @@ void optimize_flags_init(void)
     cpu_cc_src2 = tcg_global_mem_new(TCG_AREG0, offsetof(CPUX86State, cc_src2),
                                      "cc_src2");

+    cpu_fpop = tcg_global_mem_new_i32(TCG_AREG0,
+                                      offsetof(CPUX86State, fpop.tcg), "fpop");
+    cpu_fpip = tcg_global_mem_new(TCG_AREG0, offsetof(CPUX86State, fpip),
+                                     "fpip");
+    cpu_fpdp = tcg_global_mem_new(TCG_AREG0, offsetof(CPUX86State, fpdp),
+                                     "fpdp");
+    cpu_fpds = tcg_global_mem_new_i32(TCG_AREG0, offsetof(CPUX86State, fpds),
+                                     "fpds");
+    cpu_fpcs = tcg_global_mem_new_i32(TCG_AREG0, offsetof(CPUX86State, fpcs),
+                                     "fpcs");
+
     for (i = 0; i < CPU_NB_REGS; ++i) {
         cpu_regs[i] = tcg_global_mem_new(TCG_AREG0,
                                          offsetof(CPUX86State, regs[i]),

^ permalink raw reply related	[flat|nested] 6+ messages in thread

* Re: [Qemu-devel] PATCH for bugs 661696 and 1248376: target-i386: x87 exception pointers using TCG.
  2014-06-21  0:16 [Qemu-devel] PATCH for bugs 661696 and 1248376: target-i386: x87 exception pointers using TCG Jaume Martí
@ 2014-06-22 14:55 ` Jaume Martí
  2014-06-22 18:55   ` Richard Henderson
  0 siblings, 1 reply; 6+ messages in thread
From: Jaume Martí @ 2014-06-22 14:55 UTC (permalink / raw)
  To: qemu-devel
  Cc: Peter Maydell, mtosatti, gleb, mst, riku.voipio, quintela,
	vrozenfe, anthony, pbonzini, alex.bennee, afaerber, rth

Hello,

The patch I provided no longer applies correctly on the current HEAD.
I attach a new patch. Also the code can be pulled from
https://github.com/jmartif/qemu.git
As per my previous email, the patch is to fix bugs 661696 and 1248376.
Please review and apply.

Best regards,
Jaume


Signed-off-by: Jaume Marti Farriol (jaume.martif@gmail.com)
diff --git a/linux-user/signal.c b/linux-user/signal.c
index f3b4378..1392207 100644
--- a/linux-user/signal.c
+++ b/linux-user/signal.c
@@ -865,9 +865,9 @@ static void setup_sigcontext(struct target_sigcontext *sc,
     __put_user(env->regs[R_ESP], &sc->esp_at_signal);
     __put_user(env->segs[R_SS].selector, (unsigned int *)&sc->ss);

-        cpu_x86_fsave(env, fpstate_addr, 1);
-        fpstate->status = fpstate->sw;
-        magic = 0xffff;
+    cpu_x86_fsave(env, fpstate_addr);
+    fpstate->status = fpstate->sw;
+    magic = 0xffff;
     __put_user(magic, &fpstate->magic);
     __put_user(fpstate_addr, &sc->fpstate);

@@ -1068,7 +1068,7 @@ restore_sigcontext(CPUX86State *env, struct
target_sigcontext *sc, int *peax)
                 if (!access_ok(VERIFY_READ, fpstate_addr,
                                sizeof(struct target_fpstate)))
                         goto badframe;
-                cpu_x86_frstor(env, fpstate_addr, 1);
+                cpu_x86_frstor(env, fpstate_addr);
  }

         *peax = tswapl(sc->eax);
diff --git a/target-i386/cpu.h b/target-i386/cpu.h
index b5e1b41..8e00cd5 100644
--- a/target-i386/cpu.h
+++ b/target-i386/cpu.h
@@ -815,10 +815,14 @@ typedef struct CPUX86State {
     uint16_t fpuc;
     uint8_t fptags[8];   /* 0 = valid, 1 = empty */
     FPReg fpregs[8];
-    /* KVM-only so far */
-    uint16_t fpop;
+    union {
+        uint32_t tcg;
+        uint16_t kvm;
+    } fpop;
     uint64_t fpip;
     uint64_t fpdp;
+    uint32_t fpcs;
+    uint32_t fpds;

     /* emulator internal variables */
     float_status fp_status;
@@ -1063,8 +1067,8 @@ floatx80 cpu_set_fp80(uint64_t mant, uint16_t upper);
 /* the following helpers are only usable in user mode simulation as
    they can trigger unexpected exceptions */
 void cpu_x86_load_seg(CPUX86State *s, int seg_reg, int selector);
-void cpu_x86_fsave(CPUX86State *s, target_ulong ptr, int data32);
-void cpu_x86_frstor(CPUX86State *s, target_ulong ptr, int data32);
+void cpu_x86_fsave(CPUX86State *s, target_ulong ptr);
+void cpu_x86_frstor(CPUX86State *s, target_ulong ptr);

 /* you can call this signal handler from your SIGBUS and SIGSEGV
    signal handlers to inform the virtual CPU of exceptions. non zero
diff --git a/target-i386/fpu_helper.c b/target-i386/fpu_helper.c
index 1b2900d..ce0860e 100644
--- a/target-i386/fpu_helper.c
+++ b/target-i386/fpu_helper.c
@@ -56,6 +56,8 @@
 #define floatx80_l2e make_floatx80(0x3fff, 0xb8aa3b295c17f0bcLL)
 #define floatx80_l2t make_floatx80(0x4000, 0xd49a784bcd1b8afeLL)

+#define FPUS(env) ((env->fpus & ~0x3800) | ((env->fpstt & 0x7) << 11))
+
 static inline void fpush(CPUX86State *env)
 {
     env->fpstt = (env->fpstt - 1) & 7;
@@ -604,6 +606,10 @@ void helper_fninit(CPUX86State *env)
     env->fptags[5] = 1;
     env->fptags[6] = 1;
     env->fptags[7] = 1;
+    env->fpip = 0;
+    env->fpcs = 0;
+    env->fpdp = 0;
+    env->fpds = 0;
 }

 /* BCD ops */
@@ -961,13 +967,13 @@ void helper_fxam_ST0(CPUX86State *env)
     }
 }

-void helper_fstenv(CPUX86State *env, target_ulong ptr, int data32)
+void helper_fstenv(CPUX86State *env, target_ulong ptr, int data32,
+                   int protected_mode)
 {
-    int fpus, fptag, exp, i;
+    int fptag, exp, i;
     uint64_t mant;
     CPU_LDoubleU tmp;

-    fpus = (env->fpus & ~0x3800) | (env->fpstt & 0x7) << 11;
     fptag = 0;
     for (i = 7; i >= 0; i--) {
         fptag <<= 2;
@@ -987,83 +993,150 @@ void helper_fstenv(CPUX86State *env,
target_ulong ptr, int data32)
             }
         }
     }
+
     if (data32) {
         /* 32 bit */
-        cpu_stl_data(env, ptr, env->fpuc);
-        cpu_stl_data(env, ptr + 4, fpus);
-        cpu_stl_data(env, ptr + 8, fptag);
-        cpu_stl_data(env, ptr + 12, 0); /* fpip */
-        cpu_stl_data(env, ptr + 16, 0); /* fpcs */
-        cpu_stl_data(env, ptr + 20, 0); /* fpoo */
-        cpu_stl_data(env, ptr + 24, 0); /* fpos */
+        cpu_stw_data(env, ptr, env->fpuc);
+        cpu_stw_data(env, ptr + 4, FPUS(env));
+        cpu_stw_data(env, ptr + 8, fptag);
+        if (protected_mode) {
+            cpu_stl_data(env, ptr + 12, env->fpip);
+            cpu_stl_data(env, ptr + 16,
+                        ((env->fpop.tcg & 0x7ff) << 16) | (env->fpcs
& 0xffff));
+            cpu_stl_data(env, ptr + 20, env->fpdp);
+            cpu_stl_data(env, ptr + 24, env->fpds);
+        } else {
+            /* Real mode  */
+            cpu_stl_data(env, ptr + 12, env->fpip); /* fpip[15..00] */
+            cpu_stl_data(env, ptr + 16, ((((env->fpip >> 16) & 0xffff) << 12) |
+                        (env->fpop.tcg & 0x7ff))); /* fpip[31..16], fpop */
+            cpu_stl_data(env, ptr + 20, env->fpdp); /* fpdp[15..00] */
+            cpu_stl_data(env, ptr + 24,
+                        (env->fpdp >> 4) & 0xffff000); /* fpdp[31..16] */
+        }
     } else {
         /* 16 bit */
         cpu_stw_data(env, ptr, env->fpuc);
-        cpu_stw_data(env, ptr + 2, fpus);
+        cpu_stw_data(env, ptr + 2, FPUS(env));
         cpu_stw_data(env, ptr + 4, fptag);
-        cpu_stw_data(env, ptr + 6, 0);
-        cpu_stw_data(env, ptr + 8, 0);
-        cpu_stw_data(env, ptr + 10, 0);
-        cpu_stw_data(env, ptr + 12, 0);
+        if (protected_mode) {
+            cpu_stw_data(env, ptr + 6, env->fpip);
+            cpu_stw_data(env, ptr + 8, env->fpcs);
+            cpu_stw_data(env, ptr + 10, env->fpdp);
+            cpu_stw_data(env, ptr + 12, env->fpds);
+        } else {
+            /* Real mode  */
+            cpu_stw_data(env, ptr + 6, env->fpip); /* fpip[15..0] */
+            cpu_stw_data(env, ptr + 8, ((env->fpip >> 4) & 0xf000) |
+                        (env->fpop.tcg & 0x7ff)); /* fpip[19..16], fpop */
+            cpu_stw_data(env, ptr + 10, env->fpdp); /* fpdp[15..0] */
+            cpu_stw_data(env, ptr + 12,
+                        (env->fpdp >> 4) & 0xf000); /* fpdp[19..16] */
+        }
     }
+
+    env->fpip = 0;
+    env->fpcs = 0;
+    env->fpdp = 0;
+    env->fpds = 0;
 }

-void helper_fldenv(CPUX86State *env, target_ulong ptr, int data32)
+void helper_fldenv(CPUX86State *env, target_ulong ptr, int data32,
+                   int protected_mode)
 {
-    int i, fpus, fptag;
+    int tmp, i, fpus, fptag;

     if (data32) {
+        /* 32 bit */
         env->fpuc = cpu_lduw_data(env, ptr);
         fpus = cpu_lduw_data(env, ptr + 4);
         fptag = cpu_lduw_data(env, ptr + 8);
+        if (protected_mode) {
+            env->fpip = cpu_ldl_data(env, ptr + 12);
+            tmp = cpu_ldl_data(env, ptr + 16);
+            env->fpcs = tmp & 0xffff;
+            env->fpop.tcg = tmp >> 16;
+            env->fpdp = cpu_ldl_data(env, ptr + 20);
+            env->fpds = cpu_lduw_data(env, ptr + 24);
+        } else {
+            /* Real mode */
+            tmp = cpu_ldl_data(env, ptr + 16);
+            env->fpip = ((tmp & 0xffff000) << 4) |
+                        cpu_lduw_data(env, ptr + 12);
+            env->fpop.tcg = tmp & 0x7ff;
+            env->fpdp = (cpu_ldl_data(env, ptr + 24) << 4) |
+                        cpu_lduw_data(env, ptr + 20);
+        }
     } else {
+        /* 16 bit */
         env->fpuc = cpu_lduw_data(env, ptr);
         fpus = cpu_lduw_data(env, ptr + 2);
         fptag = cpu_lduw_data(env, ptr + 4);
+        if (protected_mode) {
+            /* Protected mode  */
+            env->fpip = cpu_lduw_data(env, ptr + 6);
+            env->fpcs = cpu_lduw_data(env, ptr + 8);
+            env->fpdp = cpu_lduw_data(env, ptr + 10);
+            env->fpds = cpu_lduw_data(env, ptr + 12);
+        } else {
+            /* Real mode  */
+            tmp = cpu_lduw_data(env, ptr + 8);
+            env->fpip = ((tmp & 0xf000) << 4) | cpu_lduw_data(env, ptr + 6);
+            env->fpop.tcg = tmp & 0x7ff;
+            env->fpdp = cpu_lduw_data(env, ptr + 12) << 4 |
+                        cpu_lduw_data(env, ptr + 10);
+        }
     }
+
     env->fpstt = (fpus >> 11) & 7;
     env->fpus = fpus & ~0x3800;
     for (i = 0; i < 8; i++) {
         env->fptags[i] = ((fptag & 3) == 3);
         fptag >>= 2;
     }
+
+    env->fpip &= 0xffffffff;
+    env->fpdp &= 0xffffffff;
+    if (!protected_mode) {
+        env->fpcs = 0;
+        env->fpds = 0;
+    }
 }

-void helper_fsave(CPUX86State *env, target_ulong ptr, int data32)
+void helper_fsave(CPUX86State *env, target_ulong ptr, int data32,
+                  int protected_mode)
 {
     floatx80 tmp;
     int i;

-    helper_fstenv(env, ptr, data32);
+    helper_fstenv(env, ptr, data32, protected_mode);

-    ptr += (14 << data32);
+    if (data32) {
+        ptr += 28;
+    } else {
+        ptr += 14;
+    }
     for (i = 0; i < 8; i++) {
         tmp = ST(i);
         helper_fstt(env, tmp, ptr);
         ptr += 10;
     }

-    /* fninit */
-    env->fpus = 0;
-    env->fpstt = 0;
-    env->fpuc = 0x37f;
-    env->fptags[0] = 1;
-    env->fptags[1] = 1;
-    env->fptags[2] = 1;
-    env->fptags[3] = 1;
-    env->fptags[4] = 1;
-    env->fptags[5] = 1;
-    env->fptags[6] = 1;
-    env->fptags[7] = 1;
+    helper_fninit(env);
 }

-void helper_frstor(CPUX86State *env, target_ulong ptr, int data32)
+void helper_frstor(CPUX86State *env, target_ulong ptr, int data32,
+                   int protected_mode)
 {
     floatx80 tmp;
     int i;

-    helper_fldenv(env, ptr, data32);
-    ptr += (14 << data32);
+    helper_fldenv(env, ptr, data32, protected_mode);
+    if (data32) {
+        ptr += 28;
+    } else {
+        ptr += 14;
+    }

     for (i = 0; i < 8; i++) {
         tmp = helper_fldt(env, ptr);
@@ -1072,21 +1145,22 @@ void helper_frstor(CPUX86State *env,
target_ulong ptr, int data32)
     }
 }

-#if defined(CONFIG_USER_ONLY)
-void cpu_x86_fsave(CPUX86State *env, target_ulong ptr, int data32)
+#if defined(CONFIG_USER_ONLY) && defined(TARGET_I386) && TARGET_ABI_BITS == 32
+
+void cpu_x86_fsave(CPUX86State *env, target_ulong ptr)
 {
-    helper_fsave(env, ptr, data32);
+    helper_fsave(env, ptr, 1, 1);
 }

-void cpu_x86_frstor(CPUX86State *env, target_ulong ptr, int data32)
+void cpu_x86_frstor(CPUX86State *env, target_ulong ptr)
 {
-    helper_frstor(env, ptr, data32);
+    helper_frstor(env, ptr, 1, 1);
 }
 #endif

-void helper_fxsave(CPUX86State *env, target_ulong ptr, int data64)
+void helper_fxsave(CPUX86State *env, target_ulong ptr, int data32, int data64)
 {
-    int fpus, fptag, i, nb_xmm_regs;
+    int i, nb_xmm_regs, fptag;
     floatx80 tmp;
     target_ulong addr;

@@ -1095,25 +1169,36 @@ void helper_fxsave(CPUX86State *env,
target_ulong ptr, int data64)
         raise_exception(env, EXCP0D_GPF);
     }

-    fpus = (env->fpus & ~0x3800) | (env->fpstt & 0x7) << 11;
     fptag = 0;
     for (i = 0; i < 8; i++) {
         fptag |= (env->fptags[i] << i);
     }
+    fptag ^= 0xff;
+
     cpu_stw_data(env, ptr, env->fpuc);
-    cpu_stw_data(env, ptr + 2, fpus);
-    cpu_stw_data(env, ptr + 4, fptag ^ 0xff);
+    cpu_stw_data(env, ptr + 2, FPUS(env));
+    cpu_stw_data(env, ptr + 4, fptag & 0xff);
+    cpu_stw_data(env, ptr + 6, env->fpop.tcg);
+
 #ifdef TARGET_X86_64
     if (data64) {
-        cpu_stq_data(env, ptr + 0x08, 0); /* rip */
-        cpu_stq_data(env, ptr + 0x10, 0); /* rdp */
+        /* 64 bit */
+        cpu_stq_data(env, ptr + 8, env->fpip);
+        cpu_stq_data(env, ptr + 16, env->fpdp);
     } else
 #endif
     {
-        cpu_stl_data(env, ptr + 0x08, 0); /* eip */
-        cpu_stl_data(env, ptr + 0x0c, 0); /* sel  */
-        cpu_stl_data(env, ptr + 0x10, 0); /* dp */
-        cpu_stl_data(env, ptr + 0x14, 0); /* sel  */
+        if (data32) {
+            /* 32 bit */
+            cpu_stl_data(env, ptr + 8, env->fpip);
+            cpu_stl_data(env, ptr + 16, env->fpdp);
+        } else {
+            /* 16 bit */
+            cpu_stw_data(env, ptr + 8, env->fpip);
+            cpu_stw_data(env, ptr + 16, env->fpdp);
+        }
+        cpu_stw_data(env, ptr + 12, env->fpcs & 0xffff);
+        cpu_stw_data(env, ptr + 20, env->fpds & 0xffff);
     }

     addr = ptr + 0x20;
@@ -1146,7 +1231,7 @@ void helper_fxsave(CPUX86State *env,
target_ulong ptr, int data64)
     }
 }

-void helper_fxrstor(CPUX86State *env, target_ulong ptr, int data64)
+void helper_fxrstor(CPUX86State *env, target_ulong ptr, int data32, int data64)
 {
     int i, fpus, fptag, nb_xmm_regs;
     floatx80 tmp;
@@ -1167,6 +1252,30 @@ void helper_fxrstor(CPUX86State *env,
target_ulong ptr, int data64)
         env->fptags[i] = ((fptag >> i) & 1);
     }

+    env->fpop.tcg = (cpu_lduw_data(env, ptr + 6) >> 5) & 0x7ff;
+
+#ifdef TARGET_X86_64
+    if (data64) {
+        /* 64 bit */
+        env->fpip = cpu_ldq_data(env, ptr + 8);
+        env->fpdp = cpu_ldq_data(env, ptr + 16);
+    } else
+#endif
+    {
+        if (data32) {
+            /* 32 bit */
+            env->fpip = cpu_ldl_data(env, ptr + 8);
+            env->fpdp = cpu_ldl_data(env, ptr + 16);
+        } else {
+            /* 16 bit */
+            env->fpip = cpu_lduw_data(env, ptr + 8);
+            env->fpdp = cpu_lduw_data(env, ptr + 16);
+        }
+
+        env->fpcs = cpu_lduw_data(env, ptr + 12);
+        env->fpds = cpu_lduw_data(env, ptr + 20);
+    }
+
     addr = ptr + 0x20;
     for (i = 0; i < 8; i++) {
         tmp = helper_fldt(env, addr);
@@ -1195,6 +1304,11 @@ void helper_fxrstor(CPUX86State *env,
target_ulong ptr, int data64)
             }
         }
     }
+
+    if (!data64) {
+        env->fpip &= 0xffffffff;
+        env->fpdp &= 0xffffffff;
+    }
 }

 void cpu_get_fp80(uint64_t *pmant, uint16_t *pexp, floatx80 f)
diff --git a/target-i386/helper.h b/target-i386/helper.h
index 8eb0145..9c4fd22 100644
--- a/target-i386/helper.h
+++ b/target-i386/helper.h
@@ -183,12 +183,12 @@ DEF_HELPER_1(frndint, void, env)
 DEF_HELPER_1(fscale, void, env)
 DEF_HELPER_1(fsin, void, env)
 DEF_HELPER_1(fcos, void, env)
-DEF_HELPER_3(fstenv, void, env, tl, int)
-DEF_HELPER_3(fldenv, void, env, tl, int)
-DEF_HELPER_3(fsave, void, env, tl, int)
-DEF_HELPER_3(frstor, void, env, tl, int)
-DEF_HELPER_3(fxsave, void, env, tl, int)
-DEF_HELPER_3(fxrstor, void, env, tl, int)
+DEF_HELPER_4(fstenv, void, env, tl, int, int)
+DEF_HELPER_4(fldenv, void, env, tl, int, int)
+DEF_HELPER_4(fsave, void, env, tl, int, int)
+DEF_HELPER_4(frstor, void, env, tl, int, int)
+DEF_HELPER_4(fxsave, void, env, tl, int, int)
+DEF_HELPER_4(fxrstor, void, env, tl, int, int)

 DEF_HELPER_FLAGS_1(clz, TCG_CALL_NO_RWG_SE, tl, tl)
 DEF_HELPER_FLAGS_1(ctz, TCG_CALL_NO_RWG_SE, tl, tl)
diff --git a/target-i386/kvm.c b/target-i386/kvm.c
index 4bf0ac9..79a84ce 100644
--- a/target-i386/kvm.c
+++ b/target-i386/kvm.c
@@ -987,7 +987,7 @@ static int kvm_put_fpu(X86CPU *cpu)
     fpu.fsw = env->fpus & ~(7 << 11);
     fpu.fsw |= (env->fpstt & 7) << 11;
     fpu.fcw = env->fpuc;
-    fpu.last_opcode = env->fpop;
+    fpu.last_opcode = env->fpop.kvm;
     fpu.last_ip = env->fpip;
     fpu.last_dp = env->fpdp;
     for (i = 0; i < 8; ++i) {
@@ -1032,7 +1032,7 @@ static int kvm_put_xsave(X86CPU *cpu)
         twd |= (!env->fptags[i]) << i;
     }
     xsave->region[XSAVE_FCW_FSW] = (uint32_t)(swd << 16) + cwd;
-    xsave->region[XSAVE_FTW_FOP] = (uint32_t)(env->fpop << 16) + twd;
+    xsave->region[XSAVE_FTW_FOP] = (uint32_t)(env->fpop.kvm << 16) + twd;
     memcpy(&xsave->region[XSAVE_CWD_RIP], &env->fpip, sizeof(env->fpip));
     memcpy(&xsave->region[XSAVE_CWD_RDP], &env->fpdp, sizeof(env->fpdp));
     memcpy(&xsave->region[XSAVE_ST_SPACE], env->fpregs,
@@ -1298,7 +1298,7 @@ static int kvm_get_fpu(X86CPU *cpu)
     env->fpstt = (fpu.fsw >> 11) & 7;
     env->fpus = fpu.fsw;
     env->fpuc = fpu.fcw;
-    env->fpop = fpu.last_opcode;
+    env->fpop.kvm = fpu.last_opcode;
     env->fpip = fpu.last_ip;
     env->fpdp = fpu.last_dp;
     for (i = 0; i < 8; ++i) {
@@ -1330,7 +1330,7 @@ static int kvm_get_xsave(X86CPU *cpu)
     cwd = (uint16_t)xsave->region[XSAVE_FCW_FSW];
     swd = (uint16_t)(xsave->region[XSAVE_FCW_FSW] >> 16);
     twd = (uint16_t)xsave->region[XSAVE_FTW_FOP];
-    env->fpop = (uint16_t)(xsave->region[XSAVE_FTW_FOP] >> 16);
+    env->fpop.kvm = (uint16_t)(xsave->region[XSAVE_FTW_FOP] >> 16);
     env->fpstt = (swd >> 11) & 7;
     env->fpus = swd;
     env->fpuc = cwd;
diff --git a/target-i386/machine.c b/target-i386/machine.c
index b8dcd2f..70db6aa 100644
--- a/target-i386/machine.c
+++ b/target-i386/machine.c
@@ -389,7 +389,7 @@ static bool fpop_ip_dp_needed(void *opaque)
     X86CPU *cpu = opaque;
     CPUX86State *env = &cpu->env;

-    return env->fpop != 0 || env->fpip != 0 || env->fpdp != 0;
+    return env->fpop.kvm != 0 || env->fpip != 0 || env->fpdp != 0;
 }

 static const VMStateDescription vmstate_fpop_ip_dp = {
@@ -397,7 +397,7 @@ static const VMStateDescription vmstate_fpop_ip_dp = {
     .version_id = 1,
     .minimum_version_id = 1,
     .fields = (VMStateField[]) {
-        VMSTATE_UINT16(env.fpop, X86CPU),
+        VMSTATE_UINT16(env.fpop.kvm, X86CPU),
         VMSTATE_UINT64(env.fpip, X86CPU),
         VMSTATE_UINT64(env.fpdp, X86CPU),
         VMSTATE_END_OF_LIST()
diff --git a/target-i386/translate.c b/target-i386/translate.c
index 6fcd824..0d748ee 100644
--- a/target-i386/translate.c
+++ b/target-i386/translate.c
@@ -58,6 +58,7 @@
 #endif

 //#define MACRO_TEST   1
+#define IS_PROTECTED_MODE(s) (s->pe && !s->vm86)

 /* global register indexes */
 static TCGv_ptr cpu_env;
@@ -65,6 +66,11 @@ static TCGv cpu_A0;
 static TCGv cpu_cc_dst, cpu_cc_src, cpu_cc_src2, cpu_cc_srcT;
 static TCGv_i32 cpu_cc_op;
 static TCGv cpu_regs[CPU_NB_REGS];
+static TCGv_i32 cpu_fpop;
+static TCGv cpu_fpip;
+static TCGv cpu_fpdp;
+static TCGv_i32 cpu_fpds;
+static TCGv_i32 cpu_fpcs;
 /* local temps */
 static TCGv cpu_T[2];
 /* local register indexes (only used inside old micro ops) */
@@ -208,6 +214,62 @@ static const uint8_t cc_op_live[CC_OP_NB] = {
     [CC_OP_CLR] = 0,
 };

+static inline bool non_control_x87_instr(int modrm, int b)
+{
+    int op, mod, rm;
+    switch (b) {
+    case 0xd8 ... 0xdf:
+        /* floats */
+        op = ((b & 7) << 3) | ((modrm >> 3) & 7);
+        mod = (modrm >> 6) & 3;
+        rm = modrm & 7;
+        if (mod != 3) {
+            /* memory */
+            switch (op) {
+            case 0x0c: /* fldenv */
+            case 0x0d: /* fldcw */
+            case 0x0e: /* fstenv, fnstenv */
+            case 0x0f: /* fstcw, fnstcw */
+            case 0x2c: /* frstor */
+            case 0x2e: /* fsave, fnsave */
+            case 0x2f: /* fstsw, fnstsw */
+                return false;
+            default:
+                return true;
+            }
+        } else {
+            /* register */
+            switch (op) {
+            case 0x0a:
+                return false; /* fnop, Illegal op */
+            case 0x0e: /* fdecstp, fincstp */
+            case 0x28: /* ffree */
+                return false;
+            case 0x1c:
+                switch (rm) {
+                case 1: /* feni */
+                    return true;
+                case 2: /* fclex, fnclex */
+                case 3: /* finit, fninit */
+                    return false;
+                case 4: /* fsetpm */
+                    return true;
+                default: /* Illegal op */
+                    return false;
+                }
+            case 0x3c:
+                return false; /* fstsw, fnstsw, Illegal op */
+            default:
+                return true;
+            }
+        }
+    /*case 0x9b: // fwait, wait
+        return false;*/
+    default:
+        return false;
+    }
+}
+
 static void set_cc_op(DisasContext *s, CCOp op)
 {
     int dead;
@@ -1863,7 +1925,7 @@ static void gen_shifti(DisasContext *s1, int op,
TCGMemOp ot, int d, int c)
     }
 }

-static void gen_lea_modrm(CPUX86State *env, DisasContext *s, int modrm)
+static void gen_lea_modrm(CPUX86State *env, DisasContext *s, int modrm, int b)
 {
     target_long disp;
     int havesib;
@@ -1871,6 +1933,7 @@ static void gen_lea_modrm(CPUX86State *env,
DisasContext *s, int modrm)
     int index;
     int scale;
     int mod, rm, code, override, must_add_seg;
+    int non_control_float_instr;
     TCGv sum;

     override = s->override;
@@ -1950,6 +2013,13 @@ static void gen_lea_modrm(CPUX86State *env,
DisasContext *s, int modrm)
             tcg_gen_addi_tl(cpu_A0, sum, disp);
         }

+        non_control_float_instr = non_control_x87_instr(modrm, b);
+        if (non_control_float_instr) {
+            tcg_gen_mov_tl(cpu_fpdp, cpu_A0);
+            if (s->aflag == MO_32) {
+                tcg_gen_ext32u_tl(cpu_fpdp, cpu_fpdp);
+            }
+        }
         if (must_add_seg) {
             if (override < 0) {
                 if (base == R_EBP || base == R_ESP) {
@@ -1961,6 +2031,12 @@ static void gen_lea_modrm(CPUX86State *env,
DisasContext *s, int modrm)

             tcg_gen_ld_tl(cpu_tmp0, cpu_env,
                           offsetof(CPUX86State, segs[override].base));
+
+            if (non_control_float_instr) {
+                tcg_gen_ld_i32(cpu_fpds, cpu_env,
+                              offsetof(CPUX86State, segs[override].selector));
+            }
+
             if (CODE64(s)) {
                 if (s->aflag == MO_32) {
                     tcg_gen_ext32u_tl(cpu_A0, cpu_A0);
@@ -1970,6 +2046,11 @@ static void gen_lea_modrm(CPUX86State *env,
DisasContext *s, int modrm)
             }

             tcg_gen_add_tl(cpu_A0, cpu_A0, cpu_tmp0);
+        } else {
+            if (non_control_float_instr) {
+                tcg_gen_ld_i32(cpu_fpds, cpu_env,
+                              offsetof(CPUX86State, segs[R_DS].selector));
+            }
         }

         if (s->aflag == MO_32) {
@@ -2039,8 +2120,22 @@ static void gen_lea_modrm(CPUX86State *env,
DisasContext *s, int modrm)
                     override = R_DS;
                 }
             }
+            if (non_control_x87_instr(modrm, b)) {
+                tcg_gen_mov_tl(cpu_fpdp, cpu_A0);
+                tcg_gen_ld_i32(cpu_fpds, cpu_env,
+                              offsetof(CPUX86State, segs[override].selector));
+            }
             gen_op_addl_A0_seg(s, override);
+        } else {
+            if (non_control_x87_instr(modrm, b)) {
+                tcg_gen_mov_tl(cpu_fpdp, cpu_A0);
+                tcg_gen_ld_i32(cpu_fpds, cpu_env,
+                              offsetof(CPUX86State, segs[R_DS].selector));
+            }
         }
+#ifdef TARGET_X86_64
+        tcg_gen_andi_tl(cpu_fpdp, cpu_fpdp, 0xffffffff);
+#endif
         break;

     default:
@@ -2130,7 +2225,7 @@ static void gen_add_A0_ds_seg(DisasContext *s)
 /* generate modrm memory load or store of 'reg'. TMP0 is used if reg ==
    OR_TMP0 */
 static void gen_ldst_modrm(CPUX86State *env, DisasContext *s, int modrm,
-                           TCGMemOp ot, int reg, int is_store)
+                           TCGMemOp ot, int reg, int is_store, int b)
 {
     int mod, rm;

@@ -2147,7 +2242,7 @@ static void gen_ldst_modrm(CPUX86State *env,
DisasContext *s, int modrm,
                 gen_op_mov_reg_v(ot, reg, cpu_T[0]);
         }
     } else {
-        gen_lea_modrm(env, s, modrm);
+        gen_lea_modrm(env, s, modrm, b);
         if (is_store) {
             if (reg != OR_TMP0)
                 gen_op_mov_v_reg(ot, cpu_T[0], reg);
@@ -2250,7 +2345,7 @@ static void gen_cmovcc1(CPUX86State *env,
DisasContext *s, TCGMemOp ot, int b,
 {
     CCPrepare cc;

-    gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
+    gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0, b);

     cc = gen_prepare_cc(s, b, cpu_T[1]);
     if (cc.mask != -1) {
@@ -3043,7 +3138,7 @@ static void gen_sse(CPUX86State *env,
DisasContext *s, int b,
         case 0x0e7: /* movntq */
             if (mod == 3)
                 goto illegal_op;
-            gen_lea_modrm(env, s, modrm);
+            gen_lea_modrm(env, s, modrm, b);
             gen_stq_env_A0(s, offsetof(CPUX86State, fpregs[reg].mmx));
             break;
         case 0x1e7: /* movntdq */
@@ -3051,20 +3146,20 @@ static void gen_sse(CPUX86State *env,
DisasContext *s, int b,
         case 0x12b: /* movntps */
             if (mod == 3)
                 goto illegal_op;
-            gen_lea_modrm(env, s, modrm);
+            gen_lea_modrm(env, s, modrm, b);
             gen_sto_env_A0(s, offsetof(CPUX86State, xmm_regs[reg]));
             break;
         case 0x3f0: /* lddqu */
             if (mod == 3)
                 goto illegal_op;
-            gen_lea_modrm(env, s, modrm);
+            gen_lea_modrm(env, s, modrm, b);
             gen_ldo_env_A0(s, offsetof(CPUX86State, xmm_regs[reg]));
             break;
         case 0x22b: /* movntss */
         case 0x32b: /* movntsd */
             if (mod == 3)
                 goto illegal_op;
-            gen_lea_modrm(env, s, modrm);
+            gen_lea_modrm(env, s, modrm, b);
             if (b1 & 1) {
                 gen_stq_env_A0(s, offsetof(CPUX86State, xmm_regs[reg]));
             } else {
@@ -3076,12 +3171,12 @@ static void gen_sse(CPUX86State *env,
DisasContext *s, int b,
         case 0x6e: /* movd mm, ea */
 #ifdef TARGET_X86_64
             if (s->dflag == MO_64) {
-                gen_ldst_modrm(env, s, modrm, MO_64, OR_TMP0, 0);
+                gen_ldst_modrm(env, s, modrm, MO_64, OR_TMP0, 0, b);
                 tcg_gen_st_tl(cpu_T[0], cpu_env,
offsetof(CPUX86State,fpregs[reg].mmx));
             } else
 #endif
             {
-                gen_ldst_modrm(env, s, modrm, MO_32, OR_TMP0, 0);
+                gen_ldst_modrm(env, s, modrm, MO_32, OR_TMP0, 0, b);
                 tcg_gen_addi_ptr(cpu_ptr0, cpu_env,
                                  offsetof(CPUX86State,fpregs[reg].mmx));
                 tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
@@ -3091,14 +3186,14 @@ static void gen_sse(CPUX86State *env,
DisasContext *s, int b,
         case 0x16e: /* movd xmm, ea */
 #ifdef TARGET_X86_64
             if (s->dflag == MO_64) {
-                gen_ldst_modrm(env, s, modrm, MO_64, OR_TMP0, 0);
+                gen_ldst_modrm(env, s, modrm, MO_64, OR_TMP0, 0, b);
                 tcg_gen_addi_ptr(cpu_ptr0, cpu_env,
                                  offsetof(CPUX86State,xmm_regs[reg]));
                 gen_helper_movq_mm_T0_xmm(cpu_ptr0, cpu_T[0]);
             } else
 #endif
             {
-                gen_ldst_modrm(env, s, modrm, MO_32, OR_TMP0, 0);
+                gen_ldst_modrm(env, s, modrm, MO_32, OR_TMP0, 0, b);
                 tcg_gen_addi_ptr(cpu_ptr0, cpu_env,
                                  offsetof(CPUX86State,xmm_regs[reg]));
                 tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
@@ -3107,7 +3202,7 @@ static void gen_sse(CPUX86State *env,
DisasContext *s, int b,
             break;
         case 0x6f: /* movq mm, ea */
             if (mod != 3) {
-                gen_lea_modrm(env, s, modrm);
+                gen_lea_modrm(env, s, modrm, b);
                 gen_ldq_env_A0(s, offsetof(CPUX86State, fpregs[reg].mmx));
             } else {
                 rm = (modrm & 7);
@@ -3124,7 +3219,7 @@ static void gen_sse(CPUX86State *env,
DisasContext *s, int b,
         case 0x16f: /* movdqa xmm, ea */
         case 0x26f: /* movdqu xmm, ea */
             if (mod != 3) {
-                gen_lea_modrm(env, s, modrm);
+                gen_lea_modrm(env, s, modrm, b);
                 gen_ldo_env_A0(s, offsetof(CPUX86State, xmm_regs[reg]));
             } else {
                 rm = (modrm & 7) | REX_B(s);
@@ -3134,7 +3229,7 @@ static void gen_sse(CPUX86State *env,
DisasContext *s, int b,
             break;
         case 0x210: /* movss xmm, ea */
             if (mod != 3) {
-                gen_lea_modrm(env, s, modrm);
+                gen_lea_modrm(env, s, modrm, b);
                 gen_op_ld_v(s, MO_32, cpu_T[0], cpu_A0);
                 tcg_gen_st32_tl(cpu_T[0], cpu_env,
offsetof(CPUX86State,xmm_regs[reg].XMM_L(0)));
                 tcg_gen_movi_tl(cpu_T[0], 0);
@@ -3149,7 +3244,7 @@ static void gen_sse(CPUX86State *env,
DisasContext *s, int b,
             break;
         case 0x310: /* movsd xmm, ea */
             if (mod != 3) {
-                gen_lea_modrm(env, s, modrm);
+                gen_lea_modrm(env, s, modrm, b);
                 gen_ldq_env_A0(s, offsetof(CPUX86State,
                                            xmm_regs[reg].XMM_Q(0)));
                 tcg_gen_movi_tl(cpu_T[0], 0);
@@ -3164,7 +3259,7 @@ static void gen_sse(CPUX86State *env,
DisasContext *s, int b,
         case 0x012: /* movlps */
         case 0x112: /* movlpd */
             if (mod != 3) {
-                gen_lea_modrm(env, s, modrm);
+                gen_lea_modrm(env, s, modrm, b);
                 gen_ldq_env_A0(s, offsetof(CPUX86State,
                                            xmm_regs[reg].XMM_Q(0)));
             } else {
@@ -3176,7 +3271,7 @@ static void gen_sse(CPUX86State *env,
DisasContext *s, int b,
             break;
         case 0x212: /* movsldup */
             if (mod != 3) {
-                gen_lea_modrm(env, s, modrm);
+                gen_lea_modrm(env, s, modrm, b);
                 gen_ldo_env_A0(s, offsetof(CPUX86State, xmm_regs[reg]));
             } else {
                 rm = (modrm & 7) | REX_B(s);
@@ -3192,7 +3287,7 @@ static void gen_sse(CPUX86State *env,
DisasContext *s, int b,
             break;
         case 0x312: /* movddup */
             if (mod != 3) {
-                gen_lea_modrm(env, s, modrm);
+                gen_lea_modrm(env, s, modrm, b);
                 gen_ldq_env_A0(s, offsetof(CPUX86State,
                                            xmm_regs[reg].XMM_Q(0)));
             } else {
@@ -3206,7 +3301,7 @@ static void gen_sse(CPUX86State *env,
DisasContext *s, int b,
         case 0x016: /* movhps */
         case 0x116: /* movhpd */
             if (mod != 3) {
-                gen_lea_modrm(env, s, modrm);
+                gen_lea_modrm(env, s, modrm, b);
                 gen_ldq_env_A0(s, offsetof(CPUX86State,
                                            xmm_regs[reg].XMM_Q(1)));
             } else {
@@ -3218,7 +3313,7 @@ static void gen_sse(CPUX86State *env,
DisasContext *s, int b,
             break;
         case 0x216: /* movshdup */
             if (mod != 3) {
-                gen_lea_modrm(env, s, modrm);
+                gen_lea_modrm(env, s, modrm, b);
                 gen_ldo_env_A0(s, offsetof(CPUX86State, xmm_regs[reg]));
             } else {
                 rm = (modrm & 7) | REX_B(s);
@@ -3258,13 +3353,13 @@ static void gen_sse(CPUX86State *env,
DisasContext *s, int b,
             if (s->dflag == MO_64) {
                 tcg_gen_ld_i64(cpu_T[0], cpu_env,
                                offsetof(CPUX86State,fpregs[reg].mmx));
-                gen_ldst_modrm(env, s, modrm, MO_64, OR_TMP0, 1);
+                gen_ldst_modrm(env, s, modrm, MO_64, OR_TMP0, 1, b);
             } else
 #endif
             {
                 tcg_gen_ld32u_tl(cpu_T[0], cpu_env,

offsetof(CPUX86State,fpregs[reg].mmx.MMX_L(0)));
-                gen_ldst_modrm(env, s, modrm, MO_32, OR_TMP0, 1);
+                gen_ldst_modrm(env, s, modrm, MO_32, OR_TMP0, 1, b);
             }
             break;
         case 0x17e: /* movd ea, xmm */
@@ -3272,18 +3367,18 @@ static void gen_sse(CPUX86State *env,
DisasContext *s, int b,
             if (s->dflag == MO_64) {
                 tcg_gen_ld_i64(cpu_T[0], cpu_env,
                                offsetof(CPUX86State,xmm_regs[reg].XMM_Q(0)));
-                gen_ldst_modrm(env, s, modrm, MO_64, OR_TMP0, 1);
+                gen_ldst_modrm(env, s, modrm, MO_64, OR_TMP0, 1, b);
             } else
 #endif
             {
                 tcg_gen_ld32u_tl(cpu_T[0], cpu_env,
                                  offsetof(CPUX86State,xmm_regs[reg].XMM_L(0)));
-                gen_ldst_modrm(env, s, modrm, MO_32, OR_TMP0, 1);
+                gen_ldst_modrm(env, s, modrm, MO_32, OR_TMP0, 1, b);
             }
             break;
         case 0x27e: /* movq xmm, ea */
             if (mod != 3) {
-                gen_lea_modrm(env, s, modrm);
+                gen_lea_modrm(env, s, modrm, b);
                 gen_ldq_env_A0(s, offsetof(CPUX86State,
                                            xmm_regs[reg].XMM_Q(0)));
             } else {
@@ -3295,7 +3390,7 @@ static void gen_sse(CPUX86State *env,
DisasContext *s, int b,
             break;
         case 0x7f: /* movq ea, mm */
             if (mod != 3) {
-                gen_lea_modrm(env, s, modrm);
+                gen_lea_modrm(env, s, modrm, b);
                 gen_stq_env_A0(s, offsetof(CPUX86State, fpregs[reg].mmx));
             } else {
                 rm = (modrm & 7);
@@ -3310,7 +3405,7 @@ static void gen_sse(CPUX86State *env,
DisasContext *s, int b,
         case 0x17f: /* movdqa ea, xmm */
         case 0x27f: /* movdqu ea, xmm */
             if (mod != 3) {
-                gen_lea_modrm(env, s, modrm);
+                gen_lea_modrm(env, s, modrm, b);
                 gen_sto_env_A0(s, offsetof(CPUX86State, xmm_regs[reg]));
             } else {
                 rm = (modrm & 7) | REX_B(s);
@@ -3320,7 +3415,7 @@ static void gen_sse(CPUX86State *env,
DisasContext *s, int b,
             break;
         case 0x211: /* movss ea, xmm */
             if (mod != 3) {
-                gen_lea_modrm(env, s, modrm);
+                gen_lea_modrm(env, s, modrm, b);
                 tcg_gen_ld32u_tl(cpu_T[0], cpu_env,
offsetof(CPUX86State,xmm_regs[reg].XMM_L(0)));
                 gen_op_st_v(s, MO_32, cpu_T[0], cpu_A0);
             } else {
@@ -3331,7 +3426,7 @@ static void gen_sse(CPUX86State *env,
DisasContext *s, int b,
             break;
         case 0x311: /* movsd ea, xmm */
             if (mod != 3) {
-                gen_lea_modrm(env, s, modrm);
+                gen_lea_modrm(env, s, modrm, b);
                 gen_stq_env_A0(s, offsetof(CPUX86State,
                                            xmm_regs[reg].XMM_Q(0)));
             } else {
@@ -3343,7 +3438,7 @@ static void gen_sse(CPUX86State *env,
DisasContext *s, int b,
         case 0x013: /* movlps */
         case 0x113: /* movlpd */
             if (mod != 3) {
-                gen_lea_modrm(env, s, modrm);
+                gen_lea_modrm(env, s, modrm, b);
                 gen_stq_env_A0(s, offsetof(CPUX86State,
                                            xmm_regs[reg].XMM_Q(0)));
             } else {
@@ -3353,7 +3448,7 @@ static void gen_sse(CPUX86State *env,
DisasContext *s, int b,
         case 0x017: /* movhps */
         case 0x117: /* movhpd */
             if (mod != 3) {
-                gen_lea_modrm(env, s, modrm);
+                gen_lea_modrm(env, s, modrm, b);
                 gen_stq_env_A0(s, offsetof(CPUX86State,
                                            xmm_regs[reg].XMM_Q(1)));
             } else {
@@ -3417,7 +3512,7 @@ static void gen_sse(CPUX86State *env,
DisasContext *s, int b,
         case 0x12a: /* cvtpi2pd */
             gen_helper_enter_mmx(cpu_env);
             if (mod != 3) {
-                gen_lea_modrm(env, s, modrm);
+                gen_lea_modrm(env, s, modrm, b);
                 op2_offset = offsetof(CPUX86State,mmx_t0);
                 gen_ldq_env_A0(s, op2_offset);
             } else {
@@ -3440,7 +3535,7 @@ static void gen_sse(CPUX86State *env,
DisasContext *s, int b,
         case 0x22a: /* cvtsi2ss */
         case 0x32a: /* cvtsi2sd */
             ot = mo_64_32(s->dflag);
-            gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
+            gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0, b);
             op1_offset = offsetof(CPUX86State,xmm_regs[reg]);
             tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op1_offset);
             if (ot == MO_32) {
@@ -3462,7 +3557,7 @@ static void gen_sse(CPUX86State *env,
DisasContext *s, int b,
         case 0x12d: /* cvtpd2pi */
             gen_helper_enter_mmx(cpu_env);
             if (mod != 3) {
-                gen_lea_modrm(env, s, modrm);
+                gen_lea_modrm(env, s, modrm, b);
                 op2_offset = offsetof(CPUX86State,xmm_t0);
                 gen_ldo_env_A0(s, op2_offset);
             } else {
@@ -3493,7 +3588,7 @@ static void gen_sse(CPUX86State *env,
DisasContext *s, int b,
         case 0x32d: /* cvtsd2si */
             ot = mo_64_32(s->dflag);
             if (mod != 3) {
-                gen_lea_modrm(env, s, modrm);
+                gen_lea_modrm(env, s, modrm, b);
                 if ((b >> 8) & 1) {
                     gen_ldq_env_A0(s, offsetof(CPUX86State, xmm_t0.XMM_Q(0)));
                 } else {
@@ -3525,7 +3620,7 @@ static void gen_sse(CPUX86State *env,
DisasContext *s, int b,
         case 0xc4: /* pinsrw */
         case 0x1c4:
             s->rip_offset = 1;
-            gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0);
+            gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0, b);
             val = cpu_ldub_code(env, s->pc++);
             if (b1) {
                 val &= 7;
@@ -3559,7 +3654,7 @@ static void gen_sse(CPUX86State *env,
DisasContext *s, int b,
             break;
         case 0x1d6: /* movq ea, xmm */
             if (mod != 3) {
-                gen_lea_modrm(env, s, modrm);
+                gen_lea_modrm(env, s, modrm, b);
                 gen_stq_env_A0(s, offsetof(CPUX86State,
                                            xmm_regs[reg].XMM_Q(0)));
             } else {
@@ -3626,7 +3721,7 @@ static void gen_sse(CPUX86State *env,
DisasContext *s, int b,
                     op2_offset = offsetof(CPUX86State,xmm_regs[rm | REX_B(s)]);
                 } else {
                     op2_offset = offsetof(CPUX86State,xmm_t0);
-                    gen_lea_modrm(env, s, modrm);
+                    gen_lea_modrm(env, s, modrm, b);
                     switch (b) {
                     case 0x20: case 0x30: /* pmovsxbw, pmovzxbw */
                     case 0x23: case 0x33: /* pmovsxwd, pmovzxwd */
@@ -3660,7 +3755,7 @@ static void gen_sse(CPUX86State *env,
DisasContext *s, int b,
                     op2_offset = offsetof(CPUX86State,fpregs[rm].mmx);
                 } else {
                     op2_offset = offsetof(CPUX86State,mmx_t0);
-                    gen_lea_modrm(env, s, modrm);
+                    gen_lea_modrm(env, s, modrm, b);
                     gen_ldq_env_A0(s, op2_offset);
                 }
             }
@@ -3701,7 +3796,7 @@ static void gen_sse(CPUX86State *env,
DisasContext *s, int b,
                 }

                 tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_regs[reg]);
-                gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
+                gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0, b);
                 gen_helper_crc32(cpu_T[0], cpu_tmp2_i32,
                                  cpu_T[0], tcg_const_i32(8 << ot));

@@ -3729,7 +3824,7 @@ static void gen_sse(CPUX86State *env,
DisasContext *s, int b,
                     ot = MO_64;
                 }

-                gen_lea_modrm(env, s, modrm);
+                gen_lea_modrm(env, s, modrm, b);
                 if ((b & 1) == 0) {
                     tcg_gen_qemu_ld_tl(cpu_T[0], cpu_A0,
                                        s->mem_index, ot | MO_BE);
@@ -3747,7 +3842,7 @@ static void gen_sse(CPUX86State *env,
DisasContext *s, int b,
                     goto illegal_op;
                 }
                 ot = mo_64_32(s->dflag);
-                gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
+                gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0, b);
                 tcg_gen_andc_tl(cpu_T[0], cpu_regs[s->vex_v], cpu_T[0]);
                 gen_op_mov_reg_v(ot, reg, cpu_T[0]);
                 gen_op_update1_cc();
@@ -3764,7 +3859,7 @@ static void gen_sse(CPUX86State *env,
DisasContext *s, int b,
                 {
                     TCGv bound, zero;

-                    gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
+                    gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0, b);
                     /* Extract START, and shift the operand.
                        Shifts larger than operand size get zeros.  */
                     tcg_gen_ext8u_tl(cpu_A0, cpu_regs[s->vex_v]);
@@ -3801,7 +3896,7 @@ static void gen_sse(CPUX86State *env,
DisasContext *s, int b,
                     goto illegal_op;
                 }
                 ot = mo_64_32(s->dflag);
-                gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
+                gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0, b);
                 tcg_gen_ext8u_tl(cpu_T[1], cpu_regs[s->vex_v]);
                 {
                     TCGv bound = tcg_const_tl(ot == MO_64 ? 63 : 31);
@@ -3828,7 +3923,7 @@ static void gen_sse(CPUX86State *env,
DisasContext *s, int b,
                     goto illegal_op;
                 }
                 ot = mo_64_32(s->dflag);
-                gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
+                gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0, b);
                 switch (ot) {
                 default:
                     tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
@@ -3854,7 +3949,7 @@ static void gen_sse(CPUX86State *env,
DisasContext *s, int b,
                     goto illegal_op;
                 }
                 ot = mo_64_32(s->dflag);
-                gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
+                gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0, b);
                 /* Note that by zero-extending the mask operand, we
                    automatically handle zero-extending the result.  */
                 if (ot == MO_64) {
@@ -3872,7 +3967,7 @@ static void gen_sse(CPUX86State *env,
DisasContext *s, int b,
                     goto illegal_op;
                 }
                 ot = mo_64_32(s->dflag);
-                gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
+                gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0, b);
                 /* Note that by zero-extending the mask operand, we
                    automatically handle zero-extending the result.  */
                 if (ot == MO_64) {
@@ -3892,7 +3987,7 @@ static void gen_sse(CPUX86State *env,
DisasContext *s, int b,
                     int end_op;

                     ot = mo_64_32(s->dflag);
-                    gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
+                    gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0, b);

                     /* Re-use the carry-out from a previous round.  */
                     TCGV_UNUSED(carry_in);
@@ -3971,7 +4066,7 @@ static void gen_sse(CPUX86State *env,
DisasContext *s, int b,
                     goto illegal_op;
                 }
                 ot = mo_64_32(s->dflag);
-                gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
+                gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0, b);
                 if (ot == MO_64) {
                     tcg_gen_andi_tl(cpu_T[1], cpu_regs[s->vex_v], 63);
                 } else {
@@ -4003,7 +4098,7 @@ static void gen_sse(CPUX86State *env,
DisasContext *s, int b,
                     goto illegal_op;
                 }
                 ot = mo_64_32(s->dflag);
-                gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
+                gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0, b);

                 switch (reg & 7) {
                 case 1: /* blsr By,Ey */
@@ -4062,7 +4157,7 @@ static void gen_sse(CPUX86State *env,
DisasContext *s, int b,
                 ot = mo_64_32(s->dflag);
                 rm = (modrm & 7) | REX_B(s);
                 if (mod != 3)
-                    gen_lea_modrm(env, s, modrm);
+                    gen_lea_modrm(env, s, modrm, b);
                 reg = ((modrm >> 3) & 7) | rex_r;
                 val = cpu_ldub_code(env, s->pc++);
                 switch (b) {
@@ -4199,7 +4294,7 @@ static void gen_sse(CPUX86State *env,
DisasContext *s, int b,
                     op2_offset = offsetof(CPUX86State,xmm_regs[rm | REX_B(s)]);
                 } else {
                     op2_offset = offsetof(CPUX86State,xmm_t0);
-                    gen_lea_modrm(env, s, modrm);
+                    gen_lea_modrm(env, s, modrm, b);
                     gen_ldo_env_A0(s, op2_offset);
                 }
             } else {
@@ -4208,7 +4303,7 @@ static void gen_sse(CPUX86State *env,
DisasContext *s, int b,
                     op2_offset = offsetof(CPUX86State,fpregs[rm].mmx);
                 } else {
                     op2_offset = offsetof(CPUX86State,mmx_t0);
-                    gen_lea_modrm(env, s, modrm);
+                    gen_lea_modrm(env, s, modrm, b);
                     gen_ldq_env_A0(s, op2_offset);
                 }
             }
@@ -4242,7 +4337,7 @@ static void gen_sse(CPUX86State *env,
DisasContext *s, int b,
                     goto illegal_op;
                 }
                 ot = mo_64_32(s->dflag);
-                gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
+                gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0, b);
                 b = cpu_ldub_code(env, s->pc++);
                 if (ot == MO_64) {
                     tcg_gen_rotri_tl(cpu_T[0], cpu_T[0], b & 63);
@@ -4278,7 +4373,7 @@ static void gen_sse(CPUX86State *env,
DisasContext *s, int b,
             if (mod != 3) {
                 int sz = 4;

-                gen_lea_modrm(env, s, modrm);
+                gen_lea_modrm(env, s, modrm, b);
                 op2_offset = offsetof(CPUX86State,xmm_t0);

                 switch (b) {
@@ -4326,7 +4421,7 @@ static void gen_sse(CPUX86State *env,
DisasContext *s, int b,
         } else {
             op1_offset = offsetof(CPUX86State,fpregs[reg].mmx);
             if (mod != 3) {
-                gen_lea_modrm(env, s, modrm);
+                gen_lea_modrm(env, s, modrm, b);
                 op2_offset = offsetof(CPUX86State,mmx_t0);
                 gen_ldq_env_A0(s, op2_offset);
             } else {
@@ -4595,7 +4690,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
                 mod = (modrm >> 6) & 3;
                 rm = (modrm & 7) | REX_B(s);
                 if (mod != 3) {
-                    gen_lea_modrm(env, s, modrm);
+                    gen_lea_modrm(env, s, modrm, b);
                     opreg = OR_TMP0;
                 } else if (op == OP_XORL && rm == reg) {
                 xor_zero:
@@ -4616,7 +4711,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
                 reg = ((modrm >> 3) & 7) | rex_r;
                 rm = (modrm & 7) | REX_B(s);
                 if (mod != 3) {
-                    gen_lea_modrm(env, s, modrm);
+                    gen_lea_modrm(env, s, modrm, b);
                     gen_op_ld_v(s, ot, cpu_T[1], cpu_A0);
                 } else if (op == OP_XORL && rm == reg) {
                     goto xor_zero;
@@ -4655,7 +4750,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
                     s->rip_offset = 1;
                 else
                     s->rip_offset = insn_const_size(ot);
-                gen_lea_modrm(env, s, modrm);
+                gen_lea_modrm(env, s, modrm, b);
                 opreg = OR_TMP0;
             } else {
                 opreg = rm;
@@ -4698,7 +4793,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
         if (mod != 3) {
             if (op == 0)
                 s->rip_offset = insn_const_size(ot);
-            gen_lea_modrm(env, s, modrm);
+            gen_lea_modrm(env, s, modrm, b);
             gen_op_ld_v(s, ot, cpu_T[0], cpu_A0);
         } else {
             gen_op_mov_v_reg(ot, cpu_T[0], rm);
@@ -4906,7 +5001,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
             }
         }
         if (mod != 3) {
-            gen_lea_modrm(env, s, modrm);
+            gen_lea_modrm(env, s, modrm, b);
             if (op >= 2 && op != 3 && op != 5)
                 gen_op_ld_v(s, ot, cpu_T[0], cpu_A0);
         } else {
@@ -4998,7 +5093,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
         modrm = cpu_ldub_code(env, s->pc++);
         reg = ((modrm >> 3) & 7) | rex_r;

-        gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
+        gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0, b);
         gen_op_mov_v_reg(ot, cpu_T[1], reg);
         gen_op_testl_T0_T1_cc();
         set_cc_op(s, CC_OP_LOGICB + ot);
@@ -5073,7 +5168,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
             s->rip_offset = insn_const_size(ot);
         else if (b == 0x6b)
             s->rip_offset = 1;
-        gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
+        gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0, b);
         if (b == 0x69) {
             val = insn_get(env, s, ot);
             tcg_gen_movi_tl(cpu_T[1], val);
@@ -5130,7 +5225,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
             gen_op_mov_reg_v(ot, reg, cpu_T[1]);
             gen_op_mov_reg_v(ot, rm, cpu_T[0]);
         } else {
-            gen_lea_modrm(env, s, modrm);
+            gen_lea_modrm(env, s, modrm, b);
             gen_op_mov_v_reg(ot, cpu_T[0], reg);
             gen_op_ld_v(s, ot, cpu_T[1], cpu_A0);
             tcg_gen_add_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
@@ -5159,7 +5254,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
                 rm = (modrm & 7) | REX_B(s);
                 gen_op_mov_v_reg(ot, t0, rm);
             } else {
-                gen_lea_modrm(env, s, modrm);
+                gen_lea_modrm(env, s, modrm, b);
                 tcg_gen_mov_tl(a0, cpu_A0);
                 gen_op_ld_v(s, ot, t0, a0);
                 rm = 0; /* avoid warning */
@@ -5207,16 +5302,16 @@ static target_ulong disas_insn(CPUX86State
*env, DisasContext *s,
                 goto illegal_op;
             gen_jmp_im(pc_start - s->cs_base);
             gen_update_cc_op(s);
-            gen_lea_modrm(env, s, modrm);
+            gen_lea_modrm(env, s, modrm, b);
             gen_helper_cmpxchg16b(cpu_env, cpu_A0);
         } else
-#endif
+#endif
         {
             if (!(s->cpuid_features & CPUID_CX8))
                 goto illegal_op;
             gen_jmp_im(pc_start - s->cs_base);
             gen_update_cc_op(s);
-            gen_lea_modrm(env, s, modrm);
+            gen_lea_modrm(env, s, modrm, b);
             gen_helper_cmpxchg8b(cpu_env, cpu_A0);
         }
         set_cc_op(s, CC_OP_EFLAGS);
@@ -5266,7 +5361,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
         } else {
             /* NOTE: order is important too for MMU exceptions */
             s->popl_esp_hack = 1 << ot;
-            gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 1);
+            gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 1, b);
             s->popl_esp_hack = 0;
             gen_pop_update(s, ot);
         }
@@ -5352,7 +5447,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
         reg = ((modrm >> 3) & 7) | rex_r;

         /* generate a generic store */
-        gen_ldst_modrm(env, s, modrm, ot, reg, 1);
+        gen_ldst_modrm(env, s, modrm, ot, reg, 1, b);
         break;
     case 0xc6:
     case 0xc7: /* mov Ev, Iv */
@@ -5361,7 +5456,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
         mod = (modrm >> 6) & 3;
         if (mod != 3) {
             s->rip_offset = insn_const_size(ot);
-            gen_lea_modrm(env, s, modrm);
+            gen_lea_modrm(env, s, modrm, b);
         }
         val = insn_get(env, s, ot);
         tcg_gen_movi_tl(cpu_T[0], val);
@@ -5377,7 +5472,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
         modrm = cpu_ldub_code(env, s->pc++);
         reg = ((modrm >> 3) & 7) | rex_r;

-        gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
+        gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0, b);
         gen_op_mov_reg_v(ot, reg, cpu_T[0]);
         break;
     case 0x8e: /* mov seg, Gv */
@@ -5385,7 +5480,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
         reg = (modrm >> 3) & 7;
         if (reg >= 6 || reg == R_CS)
             goto illegal_op;
-        gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0);
+        gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0, b);
         gen_movl_seg_T0(s, reg, pc_start - s->cs_base);
         if (reg == R_SS) {
             /* if reg == SS, inhibit interrupts/trace */
@@ -5408,7 +5503,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
             goto illegal_op;
         gen_op_movl_T0_seg(reg);
         ot = mod == 3 ? dflag : MO_16;
-        gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 1);
+        gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 1, b);
         break;

     case 0x1b6: /* movzbS Gv, Eb */
@@ -5450,7 +5545,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
                 }
                 gen_op_mov_reg_v(d_ot, reg, cpu_T[0]);
             } else {
-                gen_lea_modrm(env, s, modrm);
+                gen_lea_modrm(env, s, modrm, b);
                 gen_op_ld_v(s, s_ot, cpu_T[0], cpu_A0);
                 gen_op_mov_reg_v(d_ot, reg, cpu_T[0]);
             }
@@ -5468,7 +5563,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
         s->override = -1;
         val = s->addseg;
         s->addseg = 0;
-        gen_lea_modrm(env, s, modrm);
+        gen_lea_modrm(env, s, modrm, b);
         s->addseg = val;
         gen_op_mov_reg_v(ot, reg, cpu_A0);
         break;
@@ -5558,7 +5653,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
             gen_op_mov_reg_v(ot, rm, cpu_T[0]);
             gen_op_mov_reg_v(ot, reg, cpu_T[1]);
         } else {
-            gen_lea_modrm(env, s, modrm);
+            gen_lea_modrm(env, s, modrm, b);
             gen_op_mov_v_reg(ot, cpu_T[0], reg);
             /* for xchg, lock is implicit */
             if (!(prefixes & PREFIX_LOCK))
@@ -5593,7 +5688,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
         mod = (modrm >> 6) & 3;
         if (mod == 3)
             goto illegal_op;
-        gen_lea_modrm(env, s, modrm);
+        gen_lea_modrm(env, s, modrm, b);
         gen_op_ld_v(s, ot, cpu_T[1], cpu_A0);
         gen_add_A0_im(s, 1 << ot);
         /* load the segment first to handle exceptions properly */
@@ -5624,7 +5719,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
                 if (shift == 2) {
                     s->rip_offset = 1;
                 }
-                gen_lea_modrm(env, s, modrm);
+                gen_lea_modrm(env, s, modrm, b);
                 opreg = OR_TMP0;
             } else {
                 opreg = (modrm & 7) | REX_B(s);
@@ -5674,7 +5769,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
         rm = (modrm & 7) | REX_B(s);
         reg = ((modrm >> 3) & 7) | rex_r;
         if (mod != 3) {
-            gen_lea_modrm(env, s, modrm);
+            gen_lea_modrm(env, s, modrm, b);
             opreg = OR_TMP0;
         } else {
             opreg = rm;
@@ -5705,7 +5800,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
         op = ((b & 7) << 3) | ((modrm >> 3) & 7);
         if (mod != 3) {
             /* memory op */
-            gen_lea_modrm(env, s, modrm);
+            gen_lea_modrm(env, s, modrm, b);
             switch(op) {
             case 0x00 ... 0x07: /* fxxxs */
             case 0x10 ... 0x17: /* fixxxl */
@@ -5832,7 +5927,9 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
             case 0x0c: /* fldenv mem */
                 gen_update_cc_op(s);
                 gen_jmp_im(pc_start - s->cs_base);
-                gen_helper_fldenv(cpu_env, cpu_A0, tcg_const_i32(dflag - 1));
+                gen_helper_fldenv(cpu_env, cpu_A0,
+                                  tcg_const_i32(dflag == MO_32),
+                                  tcg_const_i32(IS_PROTECTED_MODE(s)));
                 break;
             case 0x0d: /* fldcw mem */
                 tcg_gen_qemu_ld_i32(cpu_tmp2_i32, cpu_A0,
@@ -5842,7 +5939,9 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
             case 0x0e: /* fnstenv mem */
                 gen_update_cc_op(s);
                 gen_jmp_im(pc_start - s->cs_base);
-                gen_helper_fstenv(cpu_env, cpu_A0, tcg_const_i32(dflag - 1));
+                gen_helper_fstenv(cpu_env, cpu_A0,
+                                  tcg_const_i32(dflag == MO_32),
+                                  tcg_const_i32(IS_PROTECTED_MODE(s)));
                 break;
             case 0x0f: /* fnstcw mem */
                 gen_helper_fnstcw(cpu_tmp2_i32, cpu_env);
@@ -5863,12 +5962,16 @@ static target_ulong disas_insn(CPUX86State
*env, DisasContext *s,
             case 0x2c: /* frstor mem */
                 gen_update_cc_op(s);
                 gen_jmp_im(pc_start - s->cs_base);
-                gen_helper_frstor(cpu_env, cpu_A0, tcg_const_i32(dflag - 1));
+                gen_helper_frstor(cpu_env, cpu_A0,
+                                  tcg_const_i32(dflag == MO_32),
+                                  tcg_const_i32(IS_PROTECTED_MODE(s)));
                 break;
             case 0x2e: /* fnsave mem */
                 gen_update_cc_op(s);
                 gen_jmp_im(pc_start - s->cs_base);
-                gen_helper_fsave(cpu_env, cpu_A0, tcg_const_i32(dflag - 1));
+                gen_helper_fsave(cpu_env, cpu_A0,
+                                 tcg_const_i32(dflag == MO_32),
+                                 tcg_const_i32(IS_PROTECTED_MODE(s)));
                 break;
             case 0x2f: /* fnstsw mem */
                 gen_helper_fnstsw(cpu_tmp2_i32, cpu_env);
@@ -6209,6 +6312,11 @@ static target_ulong disas_insn(CPUX86State
*env, DisasContext *s,
                 goto illegal_op;
             }
         }
+        if (non_control_x87_instr(modrm, b)) {
+            tcg_gen_movi_i32(cpu_fpop, ((b & 0x7) << 8) | (modrm & 0xff));
+            tcg_gen_movi_tl(cpu_fpip, pc_start - s->cs_base);
+            tcg_gen_movi_i32(cpu_fpcs, env->segs[R_CS].selector);
+        }
         break;
         /************************/
         /* string ops */
@@ -6527,7 +6635,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
     case 0x190 ... 0x19f: /* setcc Gv */
         modrm = cpu_ldub_code(env, s->pc++);
         gen_setcc1(s, b, cpu_T[0]);
-        gen_ldst_modrm(env, s, modrm, MO_8, OR_TMP0, 1);
+        gen_ldst_modrm(env, s, modrm, MO_8, OR_TMP0, 1, b);
         break;
     case 0x140 ... 0x14f: /* cmov Gv, Ev */
         if (!(s->cpuid_features & CPUID_CMOV)) {
@@ -6657,7 +6765,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
         rm = (modrm & 7) | REX_B(s);
         if (mod != 3) {
             s->rip_offset = 1;
-            gen_lea_modrm(env, s, modrm);
+            gen_lea_modrm(env, s, modrm, b);
             gen_op_ld_v(s, ot, cpu_T[0], cpu_A0);
         } else {
             gen_op_mov_v_reg(ot, cpu_T[0], rm);
@@ -6688,7 +6796,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
         rm = (modrm & 7) | REX_B(s);
         gen_op_mov_v_reg(MO_32, cpu_T[1], reg);
         if (mod != 3) {
-            gen_lea_modrm(env, s, modrm);
+            gen_lea_modrm(env, s, modrm, b);
             /* specific case: we need to add a displacement */
             gen_exts(ot, cpu_T[1]);
             tcg_gen_sari_tl(cpu_tmp0, cpu_T[1], 3 + ot);
@@ -6764,7 +6872,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
         ot = dflag;
         modrm = cpu_ldub_code(env, s->pc++);
         reg = ((modrm >> 3) & 7) | rex_r;
-        gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
+        gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0, b);
         gen_extu(ot, cpu_T[0]);

         /* Note that lzcnt and tzcnt are in different extensions.  */
@@ -6967,7 +7075,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
         if (mod == 3)
             goto illegal_op;
         gen_op_mov_v_reg(ot, cpu_T[0], reg);
-        gen_lea_modrm(env, s, modrm);
+        gen_lea_modrm(env, s, modrm, b);
         gen_jmp_im(pc_start - s->cs_base);
         tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
         if (ot == MO_16) {
@@ -7149,7 +7257,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
             gen_svm_check_intercept(s, pc_start, SVM_EXIT_LDTR_READ);
             tcg_gen_ld32u_tl(cpu_T[0], cpu_env,
offsetof(CPUX86State,ldt.selector));
             ot = mod == 3 ? dflag : MO_16;
-            gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 1);
+            gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 1, b);
             break;
         case 2: /* lldt */
             if (!s->pe || s->vm86)
@@ -7158,7 +7266,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
                 gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
             } else {
                 gen_svm_check_intercept(s, pc_start, SVM_EXIT_LDTR_WRITE);
-                gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0);
+                gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0, b);
                 gen_jmp_im(pc_start - s->cs_base);
                 tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
                 gen_helper_lldt(cpu_env, cpu_tmp2_i32);
@@ -7170,7 +7278,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
             gen_svm_check_intercept(s, pc_start, SVM_EXIT_TR_READ);
             tcg_gen_ld32u_tl(cpu_T[0], cpu_env,
offsetof(CPUX86State,tr.selector));
             ot = mod == 3 ? dflag : MO_16;
-            gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 1);
+            gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 1, b);
             break;
         case 3: /* ltr */
             if (!s->pe || s->vm86)
@@ -7179,7 +7287,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
                 gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
             } else {
                 gen_svm_check_intercept(s, pc_start, SVM_EXIT_TR_WRITE);
-                gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0);
+                gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0, b);
                 gen_jmp_im(pc_start - s->cs_base);
                 tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
                 gen_helper_ltr(cpu_env, cpu_tmp2_i32);
@@ -7189,7 +7297,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
         case 5: /* verw */
             if (!s->pe || s->vm86)
                 goto illegal_op;
-            gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0);
+            gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0, b);
             gen_update_cc_op(s);
             if (op == 4) {
                 gen_helper_verr(cpu_env, cpu_T[0]);
@@ -7212,7 +7320,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
             if (mod == 3)
                 goto illegal_op;
             gen_svm_check_intercept(s, pc_start, SVM_EXIT_GDTR_READ);
-            gen_lea_modrm(env, s, modrm);
+            gen_lea_modrm(env, s, modrm, b);
             tcg_gen_ld32u_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,
gdt.limit));
             gen_op_st_v(s, MO_16, cpu_T[0], cpu_A0);
             gen_add_A0_im(s, 2);
@@ -7268,7 +7376,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
                 }
             } else { /* sidt */
                 gen_svm_check_intercept(s, pc_start, SVM_EXIT_IDTR_READ);
-                gen_lea_modrm(env, s, modrm);
+                gen_lea_modrm(env, s, modrm, b);
                 tcg_gen_ld32u_tl(cpu_T[0], cpu_env,
offsetof(CPUX86State, idt.limit));
                 gen_op_st_v(s, MO_16, cpu_T[0], cpu_A0);
                 gen_add_A0_im(s, 2);
@@ -7371,7 +7479,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
             } else {
                 gen_svm_check_intercept(s, pc_start,
                                         op==2 ? SVM_EXIT_GDTR_WRITE :
SVM_EXIT_IDTR_WRITE);
-                gen_lea_modrm(env, s, modrm);
+                gen_lea_modrm(env, s, modrm, b);
                 gen_op_ld_v(s, MO_16, cpu_T[1], cpu_A0);
                 gen_add_A0_im(s, 2);
                 gen_op_ld_v(s, CODE64(s) + MO_32, cpu_T[0], cpu_A0);
@@ -7394,14 +7502,14 @@ static target_ulong disas_insn(CPUX86State
*env, DisasContext *s,
 #else
             tcg_gen_ld32u_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,cr[0]));
 #endif
-            gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 1);
+            gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 1, b);
             break;
         case 6: /* lmsw */
             if (s->cpl != 0) {
                 gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
             } else {
                 gen_svm_check_intercept(s, pc_start, SVM_EXIT_WRITE_CR0);
-                gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0);
+                gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0, b);
                 gen_helper_lmsw(cpu_env, cpu_T[0]);
                 gen_jmp_im(s->pc - s->cs_base);
                 gen_eob(s);
@@ -7414,7 +7522,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
                 } else {
                     gen_update_cc_op(s);
                     gen_jmp_im(pc_start - s->cs_base);
-                    gen_lea_modrm(env, s, modrm);
+                    gen_lea_modrm(env, s, modrm, b);
                     gen_helper_invlpg(cpu_env, cpu_A0);
                     gen_jmp_im(s->pc - s->cs_base);
                     gen_eob(s);
@@ -7493,7 +7601,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
                 }
                 gen_op_mov_reg_v(d_ot, reg, cpu_T[0]);
             } else {
-                gen_lea_modrm(env, s, modrm);
+                gen_lea_modrm(env, s, modrm, b);
                 gen_op_ld_v(s, MO_32 | MO_SIGN, cpu_T[0], cpu_A0);
                 gen_op_mov_reg_v(d_ot, reg, cpu_T[0]);
             }
@@ -7514,7 +7622,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
             mod = (modrm >> 6) & 3;
             rm = modrm & 7;
             if (mod != 3) {
-                gen_lea_modrm(env, s, modrm);
+                gen_lea_modrm(env, s, modrm, b);
                 gen_op_ld_v(s, ot, t0, cpu_A0);
                 a0 = tcg_temp_local_new();
                 tcg_gen_mov_tl(a0, cpu_A0);
@@ -7556,7 +7664,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
             ot = dflag != MO_16 ? MO_32 : MO_16;
             modrm = cpu_ldub_code(env, s->pc++);
             reg = ((modrm >> 3) & 7) | rex_r;
-            gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0);
+            gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0, b);
             t0 = tcg_temp_local_new();
             gen_update_cc_op(s);
             if (b == 0x102) {
@@ -7584,7 +7692,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
         case 3: /* prefetchnt0 */
             if (mod == 3)
                 goto illegal_op;
-            gen_lea_modrm(env, s, modrm);
+            gen_lea_modrm(env, s, modrm, b);
             /* nothing more to do */
             break;
         default: /* nop (multi byte) */
@@ -7696,7 +7804,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
             goto illegal_op;
         reg = ((modrm >> 3) & 7) | rex_r;
         /* generate a generic store */
-        gen_ldst_modrm(env, s, modrm, ot, reg, 1);
+        gen_ldst_modrm(env, s, modrm, ot, reg, 1, b);
         break;
     case 0x1ae:
         modrm = cpu_ldub_code(env, s->pc++);
@@ -7711,10 +7819,12 @@ static target_ulong disas_insn(CPUX86State
*env, DisasContext *s,
                 gen_exception(s, EXCP07_PREX, pc_start - s->cs_base);
                 break;
             }
-            gen_lea_modrm(env, s, modrm);
+            gen_lea_modrm(env, s, modrm, b);
             gen_update_cc_op(s);
             gen_jmp_im(pc_start - s->cs_base);
-            gen_helper_fxsave(cpu_env, cpu_A0, tcg_const_i32(dflag == MO_64));
+            gen_helper_fxsave(cpu_env, cpu_A0,
+                              tcg_const_i32(dflag == MO_32),
+                              tcg_const_i32(dflag == MO_64));
             break;
         case 1: /* fxrstor */
             if (mod == 3 || !(s->cpuid_features & CPUID_FXSR) ||
@@ -7724,10 +7834,12 @@ static target_ulong disas_insn(CPUX86State
*env, DisasContext *s,
                 gen_exception(s, EXCP07_PREX, pc_start - s->cs_base);
                 break;
             }
-            gen_lea_modrm(env, s, modrm);
+            gen_lea_modrm(env, s, modrm, b);
             gen_update_cc_op(s);
             gen_jmp_im(pc_start - s->cs_base);
-            gen_helper_fxrstor(cpu_env, cpu_A0, tcg_const_i32(dflag == MO_64));
+            gen_helper_fxrstor(cpu_env, cpu_A0,
+                               tcg_const_i32(dflag == MO_32),
+                               tcg_const_i32(dflag == MO_64));
             break;
         case 2: /* ldmxcsr */
         case 3: /* stmxcsr */
@@ -7738,7 +7850,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
             if ((s->flags & HF_EM_MASK) || !(s->flags & HF_OSFXSR_MASK) ||
                 mod == 3)
                 goto illegal_op;
-            gen_lea_modrm(env, s, modrm);
+            gen_lea_modrm(env, s, modrm, b);
             if (op == 2) {
                 tcg_gen_qemu_ld_i32(cpu_tmp2_i32, cpu_A0,
                                     s->mem_index, MO_LEUL);
@@ -7763,7 +7875,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
                 /* clflush */
                 if (!(s->cpuid_features & CPUID_CLFLUSH))
                     goto illegal_op;
-                gen_lea_modrm(env, s, modrm);
+                gen_lea_modrm(env, s, modrm, b);
             }
             break;
         default:
@@ -7775,7 +7887,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
         mod = (modrm >> 6) & 3;
         if (mod == 3)
             goto illegal_op;
-        gen_lea_modrm(env, s, modrm);
+        gen_lea_modrm(env, s, modrm, b);
         /* ignore for now */
         break;
     case 0x1aa: /* rsm */
@@ -7803,7 +7915,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
             ot = mo_64_32(dflag);
         }

-        gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
+        gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0, b);
         gen_helper_popcnt(cpu_T[0], cpu_env, cpu_T[0], tcg_const_i32(ot));
         gen_op_mov_reg_v(ot, reg, cpu_T[0]);

@@ -7880,6 +7992,17 @@ void optimize_flags_init(void)
     cpu_cc_src2 = tcg_global_mem_new(TCG_AREG0, offsetof(CPUX86State, cc_src2),
                                      "cc_src2");

+    cpu_fpop = tcg_global_mem_new_i32(TCG_AREG0,
+                                      offsetof(CPUX86State, fpop.tcg), "fpop");
+    cpu_fpip = tcg_global_mem_new(TCG_AREG0, offsetof(CPUX86State, fpip),
+                                     "fpip");
+    cpu_fpdp = tcg_global_mem_new(TCG_AREG0, offsetof(CPUX86State, fpdp),
+                                     "fpdp");
+    cpu_fpds = tcg_global_mem_new_i32(TCG_AREG0, offsetof(CPUX86State, fpds),
+                                     "fpds");
+    cpu_fpcs = tcg_global_mem_new_i32(TCG_AREG0, offsetof(CPUX86State, fpcs),
+                                     "fpcs");
+
     for (i = 0; i < CPU_NB_REGS; ++i) {
         cpu_regs[i] = tcg_global_mem_new(TCG_AREG0,
                                          offsetof(CPUX86State, regs[i]),

On Sat, Jun 21, 2014 at 2:16 AM, Jaume Martí <jaume.martif@gmail.com> wrote:
> Hello,
>
> I submit a patch to fix bugs 661696 and 1248376. The patch implements,
> for TCG, the specifications provided in Intel and AMD programmer's
> manuals regarding the x87 exception pointers. That is, when executing
> instructions fstenv/fnstenv, fsave and fxsave the values for the
> instruction pointer, data pointer and opcode of the last non-control
> x87 instruction executed, are correctly saved to the specified memory
> address. When executing instructions fldenv, frstor and fxrstor the
> values that are going to be considered the instruction pointer, data
> pointer and opcode of the last non-control x87 instruction are
> obtained from the specified memory address.
>
> Best regards,
> Jaume
>
> Signed-off-by: Jaume Marti Farriol (jaume.martif@gmail.com)
> diff --git a/include/exec/def-helper.h b/include/exec/def-helper.h
> index 73d51f9..9f31404 100644
> --- a/include/exec/def-helper.h
> +++ b/include/exec/def-helper.h
> @@ -8,7 +8,7 @@
>     to match the types used by the C helper implementation.
>
>     The target helper.h should be included in all files that use/define
> -   helper functions.  THis will ensure that function prototypes are
> +   helper functions.  This will ensure that function prototypes are
>     consistent.  In addition it should be included an extra two times for
>     helper.c, defining:
>      GEN_HELPER 1 to produce op generation functions (gen_helper_*)
> diff --git a/linux-user/signal.c b/linux-user/signal.c
> index 04638e2..0f3b573 100644
> --- a/linux-user/signal.c
> +++ b/linux-user/signal.c
> @@ -687,52 +687,52 @@ struct target_xmmreg {
>  };
>
>  struct target_fpstate {
> - /* Regular FPU environment */
> -        abi_ulong       cw;
> -        abi_ulong       sw;
> -        abi_ulong       tag;
> -        abi_ulong       ipoff;
> -        abi_ulong       cssel;
> -        abi_ulong       dataoff;
> -        abi_ulong       datasel;
> - struct target_fpreg _st[8];
> - uint16_t status;
> - uint16_t magic; /* 0xffff = regular FPU data only */
> -
> - /* FXSR FPU environment */
> -        abi_ulong       _fxsr_env[6];   /* FXSR FPU env is ignored */
> -        abi_ulong       mxcsr;
> -        abi_ulong       reserved;
> - struct target_fpxreg _fxsr_st[8]; /* FXSR FPU reg data is ignored */
> - struct target_xmmreg _xmm[8];
> -        abi_ulong       padding[56];
> +    /* Regular FPU environment */
> +    abi_ulong       cw;
> +    abi_ulong       sw;
> +    abi_ulong       tag;
> +    abi_ulong       ipoff;
> +    abi_ulong       cssel;
> +    abi_ulong       dataoff;
> +    abi_ulong       datasel;
> +    struct target_fpreg _st[8];
> +    uint16_t        status;
> +    uint16_t        magic; /* 0xffff = regular FPU data only */
> +
> +    /* FXSR FPU environment */
> +    abi_ulong       _fxsr_env[6]; /* FXSR FPU env is ignored */
> +    abi_ulong       mxcsr;
> +    abi_ulong       reserved;
> +    struct target_fpxreg _fxsr_st[8]; /* FXSR FPU reg data is ignored */
> +    struct target_xmmreg _xmm[8];
> +    abi_ulong       padding[56];
>  };
>
>  #define X86_FXSR_MAGIC 0x0000
>
>  struct target_sigcontext {
> - uint16_t gs, __gsh;
> - uint16_t fs, __fsh;
> - uint16_t es, __esh;
> - uint16_t ds, __dsh;
> -        abi_ulong edi;
> -        abi_ulong esi;
> -        abi_ulong ebp;
> -        abi_ulong esp;
> -        abi_ulong ebx;
> -        abi_ulong edx;
> -        abi_ulong ecx;
> -        abi_ulong eax;
> -        abi_ulong trapno;
> -        abi_ulong err;
> -        abi_ulong eip;
> - uint16_t cs, __csh;
> -        abi_ulong eflags;
> -        abi_ulong esp_at_signal;
> - uint16_t ss, __ssh;
> -        abi_ulong fpstate; /* pointer */
> -        abi_ulong oldmask;
> -        abi_ulong cr2;
> +    uint16_t gs, __gsh;
> +    uint16_t fs, __fsh;
> +    uint16_t es, __esh;
> +    uint16_t ds, __dsh;
> +    abi_ulong edi;
> +    abi_ulong esi;
> +    abi_ulong ebp;
> +    abi_ulong esp;
> +    abi_ulong ebx;
> +    abi_ulong edx;
> +    abi_ulong ecx;
> +    abi_ulong eax;
> +    abi_ulong trapno;
> +    abi_ulong err;
> +    abi_ulong eip;
> +    uint16_t cs, __csh;
> +    abi_ulong eflags;
> +    abi_ulong esp_at_signal;
> +    uint16_t ss, __ssh;
> +    abi_ulong fpstate; /* pointer */
> +    abi_ulong oldmask;
> +    abi_ulong cr2;
>  };
>
>  struct target_ucontext {
> @@ -775,7 +775,7 @@ setup_sigcontext(struct target_sigcontext *sc,
> struct target_fpstate *fpstate,
>   CPUX86State *env, abi_ulong mask, abi_ulong fpstate_addr)
>  {
>   int err = 0;
> -        uint16_t magic;
> +    uint16_t magic;
>
>   /* already locked in setup_frame() */
>   err |= __put_user(env->segs[R_GS].selector, (unsigned int *)&sc->gs);
> @@ -798,11 +798,11 @@ setup_sigcontext(struct target_sigcontext *sc,
> struct target_fpstate *fpstate,
>   err |= __put_user(env->regs[R_ESP], &sc->esp_at_signal);
>   err |= __put_user(env->segs[R_SS].selector, (unsigned int *)&sc->ss);
>
> -        cpu_x86_fsave(env, fpstate_addr, 1);
> -        fpstate->status = fpstate->sw;
> -        magic = 0xffff;
> -        err |= __put_user(magic, &fpstate->magic);
> -        err |= __put_user(fpstate_addr, &sc->fpstate);
> +    cpu_x86_fsave(env, fpstate_addr);
> +    fpstate->status = fpstate->sw;
> +    magic = 0xffff;
> +    err |= __put_user(magic, &fpstate->magic);
> +    err |= __put_user(fpstate_addr, &sc->fpstate);
>
>   /* non-iBCS2 extensions.. */
>   err |= __put_user(mask, &sc->oldmask);
> @@ -889,10 +889,10 @@ static void setup_frame(int sig, struct
> target_sigaction *ka,
>   env->regs[R_ESP] = frame_addr;
>   env->eip = ka->_sa_handler;
>
> -        cpu_x86_load_seg(env, R_DS, __USER_DS);
> -        cpu_x86_load_seg(env, R_ES, __USER_DS);
> -        cpu_x86_load_seg(env, R_SS, __USER_DS);
> -        cpu_x86_load_seg(env, R_CS, __USER_CS);
> +    cpu_x86_load_seg(env, R_DS, __USER_DS);
> +    cpu_x86_load_seg(env, R_ES, __USER_DS);
> +    cpu_x86_load_seg(env, R_SS, __USER_DS);
> +    cpu_x86_load_seg(env, R_CS, __USER_CS);
>   env->eflags &= ~TF_MASK;
>
>   unlock_user_struct(frame, frame_addr, 1);
> @@ -969,10 +969,10 @@ static void setup_rt_frame(int sig, struct
> target_sigaction *ka,
>   env->regs[R_ESP] = frame_addr;
>   env->eip = ka->_sa_handler;
>
> -        cpu_x86_load_seg(env, R_DS, __USER_DS);
> -        cpu_x86_load_seg(env, R_ES, __USER_DS);
> -        cpu_x86_load_seg(env, R_SS, __USER_DS);
> -        cpu_x86_load_seg(env, R_CS, __USER_CS);
> +    cpu_x86_load_seg(env, R_DS, __USER_DS);
> +    cpu_x86_load_seg(env, R_ES, __USER_DS);
> +    cpu_x86_load_seg(env, R_SS, __USER_DS);
> +    cpu_x86_load_seg(env, R_CS, __USER_CS);
>   env->eflags &= ~TF_MASK;
>
>   unlock_user_struct(frame, frame_addr, 1);
> @@ -989,43 +989,43 @@ give_sigsegv:
>  static int
>  restore_sigcontext(CPUX86State *env, struct target_sigcontext *sc, int *peax)
>  {
> - unsigned int err = 0;
> -        abi_ulong fpstate_addr;
> -        unsigned int tmpflags;
> -
> -        cpu_x86_load_seg(env, R_GS, tswap16(sc->gs));
> -        cpu_x86_load_seg(env, R_FS, tswap16(sc->fs));
> -        cpu_x86_load_seg(env, R_ES, tswap16(sc->es));
> -        cpu_x86_load_seg(env, R_DS, tswap16(sc->ds));
> -
> -        env->regs[R_EDI] = tswapl(sc->edi);
> -        env->regs[R_ESI] = tswapl(sc->esi);
> -        env->regs[R_EBP] = tswapl(sc->ebp);
> -        env->regs[R_ESP] = tswapl(sc->esp);
> -        env->regs[R_EBX] = tswapl(sc->ebx);
> -        env->regs[R_EDX] = tswapl(sc->edx);
> -        env->regs[R_ECX] = tswapl(sc->ecx);
> -        env->eip = tswapl(sc->eip);
> -
> -        cpu_x86_load_seg(env, R_CS, lduw_p(&sc->cs) | 3);
> -        cpu_x86_load_seg(env, R_SS, lduw_p(&sc->ss) | 3);
> -
> -        tmpflags = tswapl(sc->eflags);
> -        env->eflags = (env->eflags & ~0x40DD5) | (tmpflags & 0x40DD5);
> -        // regs->orig_eax = -1; /* disable syscall checks */
> -
> -        fpstate_addr = tswapl(sc->fpstate);
> - if (fpstate_addr != 0) {
> -                if (!access_ok(VERIFY_READ, fpstate_addr,
> -                               sizeof(struct target_fpstate)))
> -                        goto badframe;
> -                cpu_x86_frstor(env, fpstate_addr, 1);
> - }
> +    unsigned int err = 0;
> +    abi_ulong fpstate_addr;
> +    unsigned int tmpflags;
> +
> +    cpu_x86_load_seg(env, R_GS, tswap16(sc->gs));
> +    cpu_x86_load_seg(env, R_FS, tswap16(sc->fs));
> +    cpu_x86_load_seg(env, R_ES, tswap16(sc->es));
> +    cpu_x86_load_seg(env, R_DS, tswap16(sc->ds));
> +
> +    env->regs[R_EDI] = tswapl(sc->edi);
> +    env->regs[R_ESI] = tswapl(sc->esi);
> +    env->regs[R_EBP] = tswapl(sc->ebp);
> +    env->regs[R_ESP] = tswapl(sc->esp);
> +    env->regs[R_EBX] = tswapl(sc->ebx);
> +    env->regs[R_EDX] = tswapl(sc->edx);
> +    env->regs[R_ECX] = tswapl(sc->ecx);
> +    env->eip = tswapl(sc->eip);
> +
> +    cpu_x86_load_seg(env, R_CS, lduw_p(&sc->cs) | 3);
> +    cpu_x86_load_seg(env, R_SS, lduw_p(&sc->ss) | 3);
> +
> +    tmpflags = tswapl(sc->eflags);
> +    env->eflags = (env->eflags & ~0x40DD5) | (tmpflags & 0x40DD5);
> +
> +    fpstate_addr = tswapl(sc->fpstate);
> +    if (fpstate_addr != 0) {
> +        if (!access_ok(VERIFY_READ, fpstate_addr,
> +                    sizeof(struct target_fpstate))) {
> +            goto badframe;
> +        }
> +        cpu_x86_frstor(env, fpstate_addr);
> +    }
>
> -        *peax = tswapl(sc->eax);
> - return err;
> +    *peax = tswapl(sc->eax);
> +    return err;
>  badframe:
> - return 1;
> +    return 1;
>  }
>
>  long do_sigreturn(CPUX86State *env)
> diff --git a/target-i386/cpu.h b/target-i386/cpu.h
> index 0014acc..b239cae 100644
> --- a/target-i386/cpu.h
> +++ b/target-i386/cpu.h
> @@ -803,10 +803,14 @@ typedef struct CPUX86State {
>      uint16_t fpuc;
>      uint8_t fptags[8];   /* 0 = valid, 1 = empty */
>      FPReg fpregs[8];
> -    /* KVM-only so far */
> -    uint16_t fpop;
> +    union {
> +        uint32_t tcg;
> +        uint16_t kvm;
> +    } fpop;
>      uint64_t fpip;
>      uint64_t fpdp;
> +    uint32_t fpcs;
> +    uint32_t fpds;
>
>      /* emulator internal variables */
>      float_status fp_status;
> @@ -1049,8 +1053,8 @@ floatx80 cpu_set_fp80(uint64_t mant, uint16_t upper);
>  /* the following helpers are only usable in user mode simulation as
>     they can trigger unexpected exceptions */
>  void cpu_x86_load_seg(CPUX86State *s, int seg_reg, int selector);
> -void cpu_x86_fsave(CPUX86State *s, target_ulong ptr, int data32);
> -void cpu_x86_frstor(CPUX86State *s, target_ulong ptr, int data32);
> +void cpu_x86_fsave(CPUX86State *s, target_ulong ptr);
> +void cpu_x86_frstor(CPUX86State *s, target_ulong ptr);
>
>  /* you can call this signal handler from your SIGBUS and SIGSEGV
>     signal handlers to inform the virtual CPU of exceptions. non zero
> diff --git a/target-i386/fpu_helper.c b/target-i386/fpu_helper.c
> index de7ba76..c80cce7 100644
> --- a/target-i386/fpu_helper.c
> +++ b/target-i386/fpu_helper.c
> @@ -59,6 +59,8 @@
>  #define floatx80_l2e make_floatx80(0x3fff, 0xb8aa3b295c17f0bcLL)
>  #define floatx80_l2t make_floatx80(0x4000, 0xd49a784bcd1b8afeLL)
>
> +#define FPUS(env) ((env->fpus & ~0x3800) | ((env->fpstt & 0x7) << 11))
> +
>  static inline void fpush(CPUX86State *env)
>  {
>      env->fpstt = (env->fpstt - 1) & 7;
> @@ -607,6 +609,10 @@ void helper_fninit(CPUX86State *env)
>      env->fptags[5] = 1;
>      env->fptags[6] = 1;
>      env->fptags[7] = 1;
> +    env->fpip = 0;
> +    env->fpcs = 0;
> +    env->fpdp = 0;
> +    env->fpds = 0;
>  }
>
>  /* BCD ops */
> @@ -964,13 +970,13 @@ void helper_fxam_ST0(CPUX86State *env)
>      }
>  }
>
> -void helper_fstenv(CPUX86State *env, target_ulong ptr, int data32)
> +void helper_fstenv(CPUX86State *env, target_ulong ptr, int data32,
> +                   int protected_mode)
>  {
> -    int fpus, fptag, exp, i;
> +    int fptag, exp, i;
>      uint64_t mant;
>      CPU_LDoubleU tmp;
>
> -    fpus = (env->fpus & ~0x3800) | (env->fpstt & 0x7) << 11;
>      fptag = 0;
>      for (i = 7; i >= 0; i--) {
>          fptag <<= 2;
> @@ -990,83 +996,150 @@ void helper_fstenv(CPUX86State *env,
> target_ulong ptr, int data32)
>              }
>          }
>      }
> +
>      if (data32) {
>          /* 32 bit */
> -        cpu_stl_data(env, ptr, env->fpuc);
> -        cpu_stl_data(env, ptr + 4, fpus);
> -        cpu_stl_data(env, ptr + 8, fptag);
> -        cpu_stl_data(env, ptr + 12, 0); /* fpip */
> -        cpu_stl_data(env, ptr + 16, 0); /* fpcs */
> -        cpu_stl_data(env, ptr + 20, 0); /* fpoo */
> -        cpu_stl_data(env, ptr + 24, 0); /* fpos */
> +        cpu_stw_data(env, ptr, env->fpuc);
> +        cpu_stw_data(env, ptr + 4, FPUS(env));
> +        cpu_stw_data(env, ptr + 8, fptag);
> +        if (protected_mode) {
> +            cpu_stl_data(env, ptr + 12, env->fpip);
> +            cpu_stl_data(env, ptr + 16,
> +                        ((env->fpop.tcg & 0x7ff) << 16) | (env->fpcs
> & 0xffff));
> +            cpu_stl_data(env, ptr + 20, env->fpdp);
> +            cpu_stl_data(env, ptr + 24, env->fpds);
> +        } else {
> +            /* Real mode  */
> +            cpu_stl_data(env, ptr + 12, env->fpip); /* fpip[15..00] */
> +            cpu_stl_data(env, ptr + 16, ((((env->fpip >> 16) & 0xffff) << 12) |
> +                        (env->fpop.tcg & 0x7ff))); /* fpip[31..16], fpop */
> +            cpu_stl_data(env, ptr + 20, env->fpdp); /* fpdp[15..00] */
> +            cpu_stl_data(env, ptr + 24,
> +                        (env->fpdp >> 4) & 0xffff000); /* fpdp[31..16] */
> +        }
>      } else {
>          /* 16 bit */
>          cpu_stw_data(env, ptr, env->fpuc);
> -        cpu_stw_data(env, ptr + 2, fpus);
> +        cpu_stw_data(env, ptr + 2, FPUS(env));
>          cpu_stw_data(env, ptr + 4, fptag);
> -        cpu_stw_data(env, ptr + 6, 0);
> -        cpu_stw_data(env, ptr + 8, 0);
> -        cpu_stw_data(env, ptr + 10, 0);
> -        cpu_stw_data(env, ptr + 12, 0);
> +        if (protected_mode) {
> +            cpu_stw_data(env, ptr + 6, env->fpip);
> +            cpu_stw_data(env, ptr + 8, env->fpcs);
> +            cpu_stw_data(env, ptr + 10, env->fpdp);
> +            cpu_stw_data(env, ptr + 12, env->fpds);
> +        } else {
> +            /* Real mode  */
> +            cpu_stw_data(env, ptr + 6, env->fpip); /* fpip[15..0] */
> +            cpu_stw_data(env, ptr + 8, ((env->fpip >> 4) & 0xf000) |
> +                        (env->fpop.tcg & 0x7ff)); /* fpip[19..16], fpop */
> +            cpu_stw_data(env, ptr + 10, env->fpdp); /* fpdp[15..0] */
> +            cpu_stw_data(env, ptr + 12,
> +                        (env->fpdp >> 4) & 0xf000); /* fpdp[19..16] */
> +        }
>      }
> +
> +    env->fpip = 0;
> +    env->fpcs = 0;
> +    env->fpdp = 0;
> +    env->fpds = 0;
>  }
>
> -void helper_fldenv(CPUX86State *env, target_ulong ptr, int data32)
> +void helper_fldenv(CPUX86State *env, target_ulong ptr, int data32,
> +                   int protected_mode)
>  {
> -    int i, fpus, fptag;
> +    int tmp, i, fpus, fptag;
>
>      if (data32) {
> +        /* 32 bit */
>          env->fpuc = cpu_lduw_data(env, ptr);
>          fpus = cpu_lduw_data(env, ptr + 4);
>          fptag = cpu_lduw_data(env, ptr + 8);
> +        if (protected_mode) {
> +            env->fpip = cpu_ldl_data(env, ptr + 12);
> +            tmp = cpu_ldl_data(env, ptr + 16);
> +            env->fpcs = tmp & 0xffff;
> +            env->fpop.tcg = tmp >> 16;
> +            env->fpdp = cpu_ldl_data(env, ptr + 20);
> +            env->fpds = cpu_lduw_data(env, ptr + 24);
> +        } else {
> +            /* Real mode */
> +            tmp = cpu_ldl_data(env, ptr + 16);
> +            env->fpip = ((tmp & 0xffff000) << 4) |
> +                        cpu_lduw_data(env, ptr + 12);
> +            env->fpop.tcg = tmp & 0x7ff;
> +            env->fpdp = (cpu_ldl_data(env, ptr + 24) << 4) |
> +                        cpu_lduw_data(env, ptr + 20);
> +        }
>      } else {
> +        /* 16 bit */
>          env->fpuc = cpu_lduw_data(env, ptr);
>          fpus = cpu_lduw_data(env, ptr + 2);
>          fptag = cpu_lduw_data(env, ptr + 4);
> +        if (protected_mode) {
> +            /* Protected mode  */
> +            env->fpip = cpu_lduw_data(env, ptr + 6);
> +            env->fpcs = cpu_lduw_data(env, ptr + 8);
> +            env->fpdp = cpu_lduw_data(env, ptr + 10);
> +            env->fpds = cpu_lduw_data(env, ptr + 12);
> +        } else {
> +            /* Real mode  */
> +            tmp = cpu_lduw_data(env, ptr + 8);
> +            env->fpip = ((tmp & 0xf000) << 4) | cpu_lduw_data(env, ptr + 6);
> +            env->fpop.tcg = tmp & 0x7ff;
> +            env->fpdp = cpu_lduw_data(env, ptr + 12) << 4 |
> +                        cpu_lduw_data(env, ptr + 10);
> +        }
>      }
> +
>      env->fpstt = (fpus >> 11) & 7;
>      env->fpus = fpus & ~0x3800;
>      for (i = 0; i < 8; i++) {
>          env->fptags[i] = ((fptag & 3) == 3);
>          fptag >>= 2;
>      }
> +
> +    env->fpip &= 0xffffffff;
> +    env->fpdp &= 0xffffffff;
> +    if (!protected_mode) {
> +        env->fpcs = 0;
> +        env->fpds = 0;
> +    }
>  }
>
> -void helper_fsave(CPUX86State *env, target_ulong ptr, int data32)
> +void helper_fsave(CPUX86State *env, target_ulong ptr, int data32,
> +                  int protected_mode)
>  {
>      floatx80 tmp;
>      int i;
>
> -    helper_fstenv(env, ptr, data32);
> +    helper_fstenv(env, ptr, data32, protected_mode);
>
> -    ptr += (14 << data32);
> +    if (data32) {
> +        ptr += 28;
> +    } else {
> +        ptr += 14;
> +    }
>      for (i = 0; i < 8; i++) {
>          tmp = ST(i);
>          helper_fstt(env, tmp, ptr);
>          ptr += 10;
>      }
>
> -    /* fninit */
> -    env->fpus = 0;
> -    env->fpstt = 0;
> -    env->fpuc = 0x37f;
> -    env->fptags[0] = 1;
> -    env->fptags[1] = 1;
> -    env->fptags[2] = 1;
> -    env->fptags[3] = 1;
> -    env->fptags[4] = 1;
> -    env->fptags[5] = 1;
> -    env->fptags[6] = 1;
> -    env->fptags[7] = 1;
> +    helper_fninit(env);
>  }
>
> -void helper_frstor(CPUX86State *env, target_ulong ptr, int data32)
> +void helper_frstor(CPUX86State *env, target_ulong ptr, int data32,
> +                   int protected_mode)
>  {
>      floatx80 tmp;
>      int i;
>
> -    helper_fldenv(env, ptr, data32);
> -    ptr += (14 << data32);
> +    helper_fldenv(env, ptr, data32, protected_mode);
> +    if (data32) {
> +        ptr += 28;
> +    } else {
> +        ptr += 14;
> +    }
>
>      for (i = 0; i < 8; i++) {
>          tmp = helper_fldt(env, ptr);
> @@ -1075,21 +1148,22 @@ void helper_frstor(CPUX86State *env,
> target_ulong ptr, int data32)
>      }
>  }
>
> -#if defined(CONFIG_USER_ONLY)
> -void cpu_x86_fsave(CPUX86State *env, target_ulong ptr, int data32)
> +#if defined(CONFIG_USER_ONLY) && defined(TARGET_I386) && TARGET_ABI_BITS == 32
> +
> +void cpu_x86_fsave(CPUX86State *env, target_ulong ptr)
>  {
> -    helper_fsave(env, ptr, data32);
> +    helper_fsave(env, ptr, 1, 1);
>  }
>
> -void cpu_x86_frstor(CPUX86State *env, target_ulong ptr, int data32)
> +void cpu_x86_frstor(CPUX86State *env, target_ulong ptr)
>  {
> -    helper_frstor(env, ptr, data32);
> +    helper_frstor(env, ptr, 1, 1);
>  }
>  #endif
>
> -void helper_fxsave(CPUX86State *env, target_ulong ptr, int data64)
> +void helper_fxsave(CPUX86State *env, target_ulong ptr, int data32, int data64)
>  {
> -    int fpus, fptag, i, nb_xmm_regs;
> +    int i, nb_xmm_regs, fptag;
>      floatx80 tmp;
>      target_ulong addr;
>
> @@ -1098,25 +1172,36 @@ void helper_fxsave(CPUX86State *env,
> target_ulong ptr, int data64)
>          raise_exception(env, EXCP0D_GPF);
>      }
>
> -    fpus = (env->fpus & ~0x3800) | (env->fpstt & 0x7) << 11;
>      fptag = 0;
>      for (i = 0; i < 8; i++) {
>          fptag |= (env->fptags[i] << i);
>      }
> +    fptag ^= 0xff;
> +
>      cpu_stw_data(env, ptr, env->fpuc);
> -    cpu_stw_data(env, ptr + 2, fpus);
> -    cpu_stw_data(env, ptr + 4, fptag ^ 0xff);
> +    cpu_stw_data(env, ptr + 2, FPUS(env));
> +    cpu_stw_data(env, ptr + 4, fptag & 0xff);
> +    cpu_stw_data(env, ptr + 6, env->fpop.tcg);
> +
>  #ifdef TARGET_X86_64
>      if (data64) {
> -        cpu_stq_data(env, ptr + 0x08, 0); /* rip */
> -        cpu_stq_data(env, ptr + 0x10, 0); /* rdp */
> +        /* 64 bit */
> +        cpu_stq_data(env, ptr + 8, env->fpip);
> +        cpu_stq_data(env, ptr + 16, env->fpdp);
>      } else
>  #endif
>      {
> -        cpu_stl_data(env, ptr + 0x08, 0); /* eip */
> -        cpu_stl_data(env, ptr + 0x0c, 0); /* sel  */
> -        cpu_stl_data(env, ptr + 0x10, 0); /* dp */
> -        cpu_stl_data(env, ptr + 0x14, 0); /* sel  */
> +        if (data32) {
> +            /* 32 bit */
> +            cpu_stl_data(env, ptr + 8, env->fpip);
> +            cpu_stl_data(env, ptr + 16, env->fpdp);
> +        } else {
> +            /* 16 bit */
> +            cpu_stw_data(env, ptr + 8, env->fpip);
> +            cpu_stw_data(env, ptr + 16, env->fpdp);
> +        }
> +        cpu_stw_data(env, ptr + 12, env->fpcs & 0xffff);
> +        cpu_stw_data(env, ptr + 20, env->fpds & 0xffff);
>      }
>
>      addr = ptr + 0x20;
> @@ -1149,7 +1234,7 @@ void helper_fxsave(CPUX86State *env,
> target_ulong ptr, int data64)
>      }
>  }
>
> -void helper_fxrstor(CPUX86State *env, target_ulong ptr, int data64)
> +void helper_fxrstor(CPUX86State *env, target_ulong ptr, int data32, int data64)
>  {
>      int i, fpus, fptag, nb_xmm_regs;
>      floatx80 tmp;
> @@ -1170,6 +1255,30 @@ void helper_fxrstor(CPUX86State *env,
> target_ulong ptr, int data64)
>          env->fptags[i] = ((fptag >> i) & 1);
>      }
>
> +    env->fpop.tcg = (cpu_lduw_data(env, ptr + 6) >> 5) & 0x7ff;
> +
> +#ifdef TARGET_X86_64
> +    if (data64) {
> +        /* 64 bit */
> +        env->fpip = cpu_ldq_data(env, ptr + 8);
> +        env->fpdp = cpu_ldq_data(env, ptr + 16);
> +    } else
> +#endif
> +    {
> +        if (data32) {
> +            /* 32 bit */
> +            env->fpip = cpu_ldl_data(env, ptr + 8);
> +            env->fpdp = cpu_ldl_data(env, ptr + 16);
> +        } else {
> +            /* 16 bit */
> +            env->fpip = cpu_lduw_data(env, ptr + 8);
> +            env->fpdp = cpu_lduw_data(env, ptr + 16);
> +        }
> +
> +        env->fpcs = cpu_lduw_data(env, ptr + 12);
> +        env->fpds = cpu_lduw_data(env, ptr + 20);
> +    }
> +
>      addr = ptr + 0x20;
>      for (i = 0; i < 8; i++) {
>          tmp = helper_fldt(env, addr);
> @@ -1198,6 +1307,11 @@ void helper_fxrstor(CPUX86State *env,
> target_ulong ptr, int data64)
>              }
>          }
>      }
> +
> +    if (!data64) {
> +        env->fpip &= 0xffffffff;
> +        env->fpdp &= 0xffffffff;
> +    }
>  }
>
>  void cpu_get_fp80(uint64_t *pmant, uint16_t *pexp, floatx80 f)
> diff --git a/target-i386/helper.h b/target-i386/helper.h
> index 3775abe..626b296 100644
> --- a/target-i386/helper.h
> +++ b/target-i386/helper.h
> @@ -185,12 +185,12 @@ DEF_HELPER_1(frndint, void, env)
>  DEF_HELPER_1(fscale, void, env)
>  DEF_HELPER_1(fsin, void, env)
>  DEF_HELPER_1(fcos, void, env)
> -DEF_HELPER_3(fstenv, void, env, tl, int)
> -DEF_HELPER_3(fldenv, void, env, tl, int)
> -DEF_HELPER_3(fsave, void, env, tl, int)
> -DEF_HELPER_3(frstor, void, env, tl, int)
> -DEF_HELPER_3(fxsave, void, env, tl, int)
> -DEF_HELPER_3(fxrstor, void, env, tl, int)
> +DEF_HELPER_4(fstenv, void, env, tl, int, int)
> +DEF_HELPER_4(fldenv, void, env, tl, int, int)
> +DEF_HELPER_4(fsave, void, env, tl, int, int)
> +DEF_HELPER_4(frstor, void, env, tl, int, int)
> +DEF_HELPER_4(fxsave, void, env, tl, int, int)
> +DEF_HELPER_4(fxrstor, void, env, tl, int, int)
>
>  DEF_HELPER_FLAGS_1(clz, TCG_CALL_NO_RWG_SE, tl, tl)
>  DEF_HELPER_FLAGS_1(ctz, TCG_CALL_NO_RWG_SE, tl, tl)
> diff --git a/target-i386/kvm.c b/target-i386/kvm.c
> index e555040..8444779 100644
> --- a/target-i386/kvm.c
> +++ b/target-i386/kvm.c
> @@ -975,7 +975,7 @@ static int kvm_put_fpu(X86CPU *cpu)
>      fpu.fsw = env->fpus & ~(7 << 11);
>      fpu.fsw |= (env->fpstt & 7) << 11;
>      fpu.fcw = env->fpuc;
> -    fpu.last_opcode = env->fpop;
> +    fpu.last_opcode = env->fpop.kvm;
>      fpu.last_ip = env->fpip;
>      fpu.last_dp = env->fpdp;
>      for (i = 0; i < 8; ++i) {
> @@ -1020,7 +1020,7 @@ static int kvm_put_xsave(X86CPU *cpu)
>          twd |= (!env->fptags[i]) << i;
>      }
>      xsave->region[XSAVE_FCW_FSW] = (uint32_t)(swd << 16) + cwd;
> -    xsave->region[XSAVE_FTW_FOP] = (uint32_t)(env->fpop << 16) + twd;
> +    xsave->region[XSAVE_FTW_FOP] = (uint32_t)(env->fpop.kvm << 16) + twd;
>      memcpy(&xsave->region[XSAVE_CWD_RIP], &env->fpip, sizeof(env->fpip));
>      memcpy(&xsave->region[XSAVE_CWD_RDP], &env->fpdp, sizeof(env->fpdp));
>      memcpy(&xsave->region[XSAVE_ST_SPACE], env->fpregs,
> @@ -1286,7 +1286,7 @@ static int kvm_get_fpu(X86CPU *cpu)
>      env->fpstt = (fpu.fsw >> 11) & 7;
>      env->fpus = fpu.fsw;
>      env->fpuc = fpu.fcw;
> -    env->fpop = fpu.last_opcode;
> +    env->fpop.kvm = fpu.last_opcode;
>      env->fpip = fpu.last_ip;
>      env->fpdp = fpu.last_dp;
>      for (i = 0; i < 8; ++i) {
> @@ -1318,7 +1318,7 @@ static int kvm_get_xsave(X86CPU *cpu)
>      cwd = (uint16_t)xsave->region[XSAVE_FCW_FSW];
>      swd = (uint16_t)(xsave->region[XSAVE_FCW_FSW] >> 16);
>      twd = (uint16_t)xsave->region[XSAVE_FTW_FOP];
> -    env->fpop = (uint16_t)(xsave->region[XSAVE_FTW_FOP] >> 16);
> +    env->fpop.kvm = (uint16_t)(xsave->region[XSAVE_FTW_FOP] >> 16);
>      env->fpstt = (swd >> 11) & 7;
>      env->fpus = swd;
>      env->fpuc = cwd;
> diff --git a/target-i386/machine.c b/target-i386/machine.c
> index d548c05..a879e00 100644
> --- a/target-i386/machine.c
> +++ b/target-i386/machine.c
> @@ -388,7 +388,7 @@ static bool fpop_ip_dp_needed(void *opaque)
>      X86CPU *cpu = opaque;
>      CPUX86State *env = &cpu->env;
>
> -    return env->fpop != 0 || env->fpip != 0 || env->fpdp != 0;
> +    return env->fpop.kvm != 0 || env->fpip != 0 || env->fpdp != 0;
>  }
>
>  static const VMStateDescription vmstate_fpop_ip_dp = {
> @@ -397,7 +397,7 @@ static const VMStateDescription vmstate_fpop_ip_dp = {
>      .minimum_version_id = 1,
>      .minimum_version_id_old = 1,
>      .fields      = (VMStateField []) {
> -        VMSTATE_UINT16(env.fpop, X86CPU),
> +        VMSTATE_UINT16(env.fpop.kvm, X86CPU),
>          VMSTATE_UINT64(env.fpip, X86CPU),
>          VMSTATE_UINT64(env.fpdp, X86CPU),
>          VMSTATE_END_OF_LIST()
> diff --git a/target-i386/translate.c b/target-i386/translate.c
> index 707ebd5..8d29931 100644
> --- a/target-i386/translate.c
> +++ b/target-i386/translate.c
> @@ -58,6 +58,7 @@
>  #endif
>
>  //#define MACRO_TEST   1
> +#define IS_PROTECTED_MODE(s) (s->pe && !s->vm86)
>
>  /* global register indexes */
>  static TCGv_ptr cpu_env;
> @@ -65,6 +66,11 @@ static TCGv cpu_A0;
>  static TCGv cpu_cc_dst, cpu_cc_src, cpu_cc_src2, cpu_cc_srcT;
>  static TCGv_i32 cpu_cc_op;
>  static TCGv cpu_regs[CPU_NB_REGS];
> +static TCGv_i32 cpu_fpop;
> +static TCGv cpu_fpip;
> +static TCGv cpu_fpdp;
> +static TCGv_i32 cpu_fpds;
> +static TCGv_i32 cpu_fpcs;
>  /* local temps */
>  static TCGv cpu_T[2];
>  /* local register indexes (only used inside old micro ops) */
> @@ -208,6 +214,62 @@ static const uint8_t cc_op_live[CC_OP_NB] = {
>      [CC_OP_CLR] = 0,
>  };
>
> +static inline bool non_control_x87_instr(int modrm, int b)
> +{
> +    int op, mod, rm;
> +    switch (b) {
> +    case 0xd8 ... 0xdf:
> +        /* floats */
> +        op = ((b & 7) << 3) | ((modrm >> 3) & 7);
> +        mod = (modrm >> 6) & 3;
> +        rm = modrm & 7;
> +        if (mod != 3) {
> +            /* memory */
> +            switch (op) {
> +            case 0x0c: /* fldenv */
> +            case 0x0d: /* fldcw */
> +            case 0x0e: /* fstenv, fnstenv */
> +            case 0x0f: /* fstcw, fnstcw */
> +            case 0x2c: /* frstor */
> +            case 0x2e: /* fsave, fnsave */
> +            case 0x2f: /* fstsw, fnstsw */
> +                return false;
> +            default:
> +                return true;
> +            }
> +        } else {
> +            /* register */
> +            switch (op) {
> +            case 0x0a:
> +                return false; /* fnop, Illegal op */
> +            case 0x0e: /* fdecstp, fincstp */
> +            case 0x28: /* ffree */
> +                return false;
> +            case 0x1c:
> +                switch (rm) {
> +                case 1: /* feni */
> +                    return true;
> +                case 2: /* fclex, fnclex */
> +                case 3: /* finit, fninit */
> +                    return false;
> +                case 4: /* fsetpm */
> +                    return true;
> +                default: /* Illegal op */
> +                    return false;
> +                }
> +            case 0x3c:
> +                return false; /* fstsw, fnstsw, Illegal op */
> +            default:
> +                return true;
> +            }
> +        }
> +    /*case 0x9b: // fwait, wait
> +        return false;*/
> +    default:
> +        return false;
> +    }
> +}
> +
>  static void set_cc_op(DisasContext *s, CCOp op)
>  {
>      int dead;
> @@ -1588,14 +1650,14 @@ static void gen_rot_rm_T1(DisasContext *s,
> TCGMemOp ot, int op1, int is_right)
>      t0 = tcg_const_i32(0);
>      t1 = tcg_temp_new_i32();
>      tcg_gen_trunc_tl_i32(t1, cpu_T[1]);
> -    tcg_gen_movi_i32(cpu_tmp2_i32, CC_OP_ADCOX);
> +    tcg_gen_movi_i32(cpu_tmp2_i32, CC_OP_ADCOX);
>      tcg_gen_movi_i32(cpu_tmp3_i32, CC_OP_EFLAGS);
>      tcg_gen_movcond_i32(TCG_COND_NE, cpu_cc_op, t1, t0,
>                          cpu_tmp2_i32, cpu_tmp3_i32);
>      tcg_temp_free_i32(t0);
>      tcg_temp_free_i32(t1);
>
> -    /* The CC_OP value is no longer predictable.  */
> +    /* The CC_OP value is no longer predictable.  */
>      set_cc_op(s, CC_OP_DYNAMIC);
>  }
>
> @@ -1871,7 +1933,7 @@ static void gen_shifti(DisasContext *s1, int op,
> TCGMemOp ot, int d, int c)
>      }
>  }
>
> -static void gen_lea_modrm(CPUX86State *env, DisasContext *s, int modrm)
> +static void gen_lea_modrm(CPUX86State *env, DisasContext *s, int modrm, int b)
>  {
>      target_long disp;
>      int havesib;
> @@ -1879,6 +1941,7 @@ static void gen_lea_modrm(CPUX86State *env,
> DisasContext *s, int modrm)
>      int index;
>      int scale;
>      int mod, rm, code, override, must_add_seg;
> +    int non_control_float_instr;
>      TCGv sum;
>
>      override = s->override;
> @@ -1958,6 +2021,13 @@ static void gen_lea_modrm(CPUX86State *env,
> DisasContext *s, int modrm)
>              tcg_gen_addi_tl(cpu_A0, sum, disp);
>          }
>
> +        non_control_float_instr = non_control_x87_instr(modrm, b);
> +        if (non_control_float_instr) {
> +            tcg_gen_mov_tl(cpu_fpdp, cpu_A0);
> +            if (s->aflag == MO_32) {
> +                tcg_gen_ext32u_tl(cpu_fpdp, cpu_fpdp);
> +            }
> +        }
>          if (must_add_seg) {
>              if (override < 0) {
>                  if (base == R_EBP || base == R_ESP) {
> @@ -1969,6 +2039,12 @@ static void gen_lea_modrm(CPUX86State *env,
> DisasContext *s, int modrm)
>
>              tcg_gen_ld_tl(cpu_tmp0, cpu_env,
>                            offsetof(CPUX86State, segs[override].base));
> +
> +            if (non_control_float_instr) {
> +                tcg_gen_ld_i32(cpu_fpds, cpu_env,
> +                              offsetof(CPUX86State, segs[override].selector));
> +            }
> +
>              if (CODE64(s)) {
>                  if (s->aflag == MO_32) {
>                      tcg_gen_ext32u_tl(cpu_A0, cpu_A0);
> @@ -1978,6 +2054,11 @@ static void gen_lea_modrm(CPUX86State *env,
> DisasContext *s, int modrm)
>              }
>
>              tcg_gen_add_tl(cpu_A0, cpu_A0, cpu_tmp0);
> +        } else {
> +            if (non_control_float_instr) {
> +                tcg_gen_ld_i32(cpu_fpds, cpu_env,
> +                              offsetof(CPUX86State, segs[R_DS].selector));
> +            }
>          }
>
>          if (s->aflag == MO_32) {
> @@ -2047,8 +2128,22 @@ static void gen_lea_modrm(CPUX86State *env,
> DisasContext *s, int modrm)
>                      override = R_DS;
>                  }
>              }
> +            if (non_control_x87_instr(modrm, b)) {
> +                tcg_gen_mov_tl(cpu_fpdp, cpu_A0);
> +                tcg_gen_ld_i32(cpu_fpds, cpu_env,
> +                              offsetof(CPUX86State, segs[override].selector));
> +            }
>              gen_op_addl_A0_seg(s, override);
> +        } else {
> +            if (non_control_x87_instr(modrm, b)) {
> +                tcg_gen_mov_tl(cpu_fpdp, cpu_A0);
> +                tcg_gen_ld_i32(cpu_fpds, cpu_env,
> +                              offsetof(CPUX86State, segs[R_DS].selector));
> +            }
>          }
> +#ifdef TARGET_X86_64
> +        tcg_gen_andi_tl(cpu_fpdp, cpu_fpdp, 0xffffffff);
> +#endif
>          break;
>
>      default:
> @@ -2138,7 +2233,7 @@ static void gen_add_A0_ds_seg(DisasContext *s)
>  /* generate modrm memory load or store of 'reg'. TMP0 is used if reg ==
>     OR_TMP0 */
>  static void gen_ldst_modrm(CPUX86State *env, DisasContext *s, int modrm,
> -                           TCGMemOp ot, int reg, int is_store)
> +                           TCGMemOp ot, int reg, int is_store, int b)
>  {
>      int mod, rm;
>
> @@ -2155,7 +2250,7 @@ static void gen_ldst_modrm(CPUX86State *env,
> DisasContext *s, int modrm,
>                  gen_op_mov_reg_v(ot, reg, cpu_T[0]);
>          }
>      } else {
> -        gen_lea_modrm(env, s, modrm);
> +        gen_lea_modrm(env, s, modrm, b);
>          if (is_store) {
>              if (reg != OR_TMP0)
>                  gen_op_mov_v_reg(ot, cpu_T[0], reg);
> @@ -2258,7 +2353,7 @@ static void gen_cmovcc1(CPUX86State *env,
> DisasContext *s, TCGMemOp ot, int b,
>  {
>      CCPrepare cc;
>
> -    gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
> +    gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0, b);
>
>      cc = gen_prepare_cc(s, b, cpu_T[1]);
>      if (cc.mask != -1) {
> @@ -2284,17 +2379,17 @@ static void gen_cmovcc1(CPUX86State *env,
> DisasContext *s, TCGMemOp ot, int b,
>
>  static inline void gen_op_movl_T0_seg(int seg_reg)
>  {
> -    tcg_gen_ld32u_tl(cpu_T[0], cpu_env,
> +    tcg_gen_ld32u_tl(cpu_T[0], cpu_env,
>                       offsetof(CPUX86State,segs[seg_reg].selector));
>  }
>
>  static inline void gen_op_movl_seg_T0_vm(int seg_reg)
>  {
>      tcg_gen_andi_tl(cpu_T[0], cpu_T[0], 0xffff);
> -    tcg_gen_st32_tl(cpu_T[0], cpu_env,
> +    tcg_gen_st32_tl(cpu_T[0], cpu_env,
>                      offsetof(CPUX86State,segs[seg_reg].selector));
>      tcg_gen_shli_tl(cpu_T[0], cpu_T[0], 4);
> -    tcg_gen_st_tl(cpu_T[0], cpu_env,
> +    tcg_gen_st_tl(cpu_T[0], cpu_env,
>                    offsetof(CPUX86State,segs[seg_reg].base));
>  }
>
> @@ -3051,7 +3146,7 @@ static void gen_sse(CPUX86State *env,
> DisasContext *s, int b,
>          case 0x0e7: /* movntq */
>              if (mod == 3)
>                  goto illegal_op;
> -            gen_lea_modrm(env, s, modrm);
> +            gen_lea_modrm(env, s, modrm, b);
>              gen_stq_env_A0(s, offsetof(CPUX86State, fpregs[reg].mmx));
>              break;
>          case 0x1e7: /* movntdq */
> @@ -3059,20 +3154,20 @@ static void gen_sse(CPUX86State *env,
> DisasContext *s, int b,
>          case 0x12b: /* movntps */
>              if (mod == 3)
>                  goto illegal_op;
> -            gen_lea_modrm(env, s, modrm);
> +            gen_lea_modrm(env, s, modrm, b);
>              gen_sto_env_A0(s, offsetof(CPUX86State, xmm_regs[reg]));
>              break;
>          case 0x3f0: /* lddqu */
>              if (mod == 3)
>                  goto illegal_op;
> -            gen_lea_modrm(env, s, modrm);
> +            gen_lea_modrm(env, s, modrm, b);
>              gen_ldo_env_A0(s, offsetof(CPUX86State, xmm_regs[reg]));
>              break;
>          case 0x22b: /* movntss */
>          case 0x32b: /* movntsd */
>              if (mod == 3)
>                  goto illegal_op;
> -            gen_lea_modrm(env, s, modrm);
> +            gen_lea_modrm(env, s, modrm, b);
>              if (b1 & 1) {
>                  gen_stq_env_A0(s, offsetof(CPUX86State, xmm_regs[reg]));
>              } else {
> @@ -3084,13 +3179,13 @@ static void gen_sse(CPUX86State *env,
> DisasContext *s, int b,
>          case 0x6e: /* movd mm, ea */
>  #ifdef TARGET_X86_64
>              if (s->dflag == MO_64) {
> -                gen_ldst_modrm(env, s, modrm, MO_64, OR_TMP0, 0);
> +                gen_ldst_modrm(env, s, modrm, MO_64, OR_TMP0, 0, b);
>                  tcg_gen_st_tl(cpu_T[0], cpu_env,
> offsetof(CPUX86State,fpregs[reg].mmx));
>              } else
>  #endif
>              {
> -                gen_ldst_modrm(env, s, modrm, MO_32, OR_TMP0, 0);
> -                tcg_gen_addi_ptr(cpu_ptr0, cpu_env,
> +                gen_ldst_modrm(env, s, modrm, MO_32, OR_TMP0, 0, b);
> +                tcg_gen_addi_ptr(cpu_ptr0, cpu_env,
>                                   offsetof(CPUX86State,fpregs[reg].mmx));
>                  tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
>                  gen_helper_movl_mm_T0_mmx(cpu_ptr0, cpu_tmp2_i32);
> @@ -3099,15 +3194,15 @@ static void gen_sse(CPUX86State *env,
> DisasContext *s, int b,
>          case 0x16e: /* movd xmm, ea */
>  #ifdef TARGET_X86_64
>              if (s->dflag == MO_64) {
> -                gen_ldst_modrm(env, s, modrm, MO_64, OR_TMP0, 0);
> -                tcg_gen_addi_ptr(cpu_ptr0, cpu_env,
> +                gen_ldst_modrm(env, s, modrm, MO_64, OR_TMP0, 0, b);
> +                tcg_gen_addi_ptr(cpu_ptr0, cpu_env,
>                                   offsetof(CPUX86State,xmm_regs[reg]));
>                  gen_helper_movq_mm_T0_xmm(cpu_ptr0, cpu_T[0]);
>              } else
>  #endif
>              {
> -                gen_ldst_modrm(env, s, modrm, MO_32, OR_TMP0, 0);
> -                tcg_gen_addi_ptr(cpu_ptr0, cpu_env,
> +                gen_ldst_modrm(env, s, modrm, MO_32, OR_TMP0, 0, b);
> +                tcg_gen_addi_ptr(cpu_ptr0, cpu_env,
>                                   offsetof(CPUX86State,xmm_regs[reg]));
>                  tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
>                  gen_helper_movl_mm_T0_xmm(cpu_ptr0, cpu_tmp2_i32);
> @@ -3115,7 +3210,7 @@ static void gen_sse(CPUX86State *env,
> DisasContext *s, int b,
>              break;
>          case 0x6f: /* movq mm, ea */
>              if (mod != 3) {
> -                gen_lea_modrm(env, s, modrm);
> +                gen_lea_modrm(env, s, modrm, b);
>                  gen_ldq_env_A0(s, offsetof(CPUX86State, fpregs[reg].mmx));
>              } else {
>                  rm = (modrm & 7);
> @@ -3132,7 +3227,7 @@ static void gen_sse(CPUX86State *env,
> DisasContext *s, int b,
>          case 0x16f: /* movdqa xmm, ea */
>          case 0x26f: /* movdqu xmm, ea */
>              if (mod != 3) {
> -                gen_lea_modrm(env, s, modrm);
> +                gen_lea_modrm(env, s, modrm, b);
>                  gen_ldo_env_A0(s, offsetof(CPUX86State, xmm_regs[reg]));
>              } else {
>                  rm = (modrm & 7) | REX_B(s);
> @@ -3142,7 +3237,7 @@ static void gen_sse(CPUX86State *env,
> DisasContext *s, int b,
>              break;
>          case 0x210: /* movss xmm, ea */
>              if (mod != 3) {
> -                gen_lea_modrm(env, s, modrm);
> +                gen_lea_modrm(env, s, modrm, b);
>                  gen_op_ld_v(s, MO_32, cpu_T[0], cpu_A0);
>                  tcg_gen_st32_tl(cpu_T[0], cpu_env,
> offsetof(CPUX86State,xmm_regs[reg].XMM_L(0)));
>                  tcg_gen_movi_tl(cpu_T[0], 0);
> @@ -3157,7 +3252,7 @@ static void gen_sse(CPUX86State *env,
> DisasContext *s, int b,
>              break;
>          case 0x310: /* movsd xmm, ea */
>              if (mod != 3) {
> -                gen_lea_modrm(env, s, modrm);
> +                gen_lea_modrm(env, s, modrm, b);
>                  gen_ldq_env_A0(s, offsetof(CPUX86State,
>                                             xmm_regs[reg].XMM_Q(0)));
>                  tcg_gen_movi_tl(cpu_T[0], 0);
> @@ -3172,7 +3267,7 @@ static void gen_sse(CPUX86State *env,
> DisasContext *s, int b,
>          case 0x012: /* movlps */
>          case 0x112: /* movlpd */
>              if (mod != 3) {
> -                gen_lea_modrm(env, s, modrm);
> +                gen_lea_modrm(env, s, modrm, b);
>                  gen_ldq_env_A0(s, offsetof(CPUX86State,
>                                             xmm_regs[reg].XMM_Q(0)));
>              } else {
> @@ -3184,7 +3279,7 @@ static void gen_sse(CPUX86State *env,
> DisasContext *s, int b,
>              break;
>          case 0x212: /* movsldup */
>              if (mod != 3) {
> -                gen_lea_modrm(env, s, modrm);
> +                gen_lea_modrm(env, s, modrm, b);
>                  gen_ldo_env_A0(s, offsetof(CPUX86State, xmm_regs[reg]));
>              } else {
>                  rm = (modrm & 7) | REX_B(s);
> @@ -3200,7 +3295,7 @@ static void gen_sse(CPUX86State *env,
> DisasContext *s, int b,
>              break;
>          case 0x312: /* movddup */
>              if (mod != 3) {
> -                gen_lea_modrm(env, s, modrm);
> +                gen_lea_modrm(env, s, modrm, b);
>                  gen_ldq_env_A0(s, offsetof(CPUX86State,
>                                             xmm_regs[reg].XMM_Q(0)));
>              } else {
> @@ -3214,7 +3309,7 @@ static void gen_sse(CPUX86State *env,
> DisasContext *s, int b,
>          case 0x016: /* movhps */
>          case 0x116: /* movhpd */
>              if (mod != 3) {
> -                gen_lea_modrm(env, s, modrm);
> +                gen_lea_modrm(env, s, modrm, b);
>                  gen_ldq_env_A0(s, offsetof(CPUX86State,
>                                             xmm_regs[reg].XMM_Q(1)));
>              } else {
> @@ -3226,7 +3321,7 @@ static void gen_sse(CPUX86State *env,
> DisasContext *s, int b,
>              break;
>          case 0x216: /* movshdup */
>              if (mod != 3) {
> -                gen_lea_modrm(env, s, modrm);
> +                gen_lea_modrm(env, s, modrm, b);
>                  gen_ldo_env_A0(s, offsetof(CPUX86State, xmm_regs[reg]));
>              } else {
>                  rm = (modrm & 7) | REX_B(s);
> @@ -3264,34 +3359,34 @@ static void gen_sse(CPUX86State *env,
> DisasContext *s, int b,
>          case 0x7e: /* movd ea, mm */
>  #ifdef TARGET_X86_64
>              if (s->dflag == MO_64) {
> -                tcg_gen_ld_i64(cpu_T[0], cpu_env,
> +                tcg_gen_ld_i64(cpu_T[0], cpu_env,
>                                 offsetof(CPUX86State,fpregs[reg].mmx));
> -                gen_ldst_modrm(env, s, modrm, MO_64, OR_TMP0, 1);
> +                gen_ldst_modrm(env, s, modrm, MO_64, OR_TMP0, 1, b);
>              } else
>  #endif
>              {
> -                tcg_gen_ld32u_tl(cpu_T[0], cpu_env,
> +                tcg_gen_ld32u_tl(cpu_T[0], cpu_env,
>
> offsetof(CPUX86State,fpregs[reg].mmx.MMX_L(0)));
> -                gen_ldst_modrm(env, s, modrm, MO_32, OR_TMP0, 1);
> +                gen_ldst_modrm(env, s, modrm, MO_32, OR_TMP0, 1, b);
>              }
>              break;
>          case 0x17e: /* movd ea, xmm */
>  #ifdef TARGET_X86_64
>              if (s->dflag == MO_64) {
> -                tcg_gen_ld_i64(cpu_T[0], cpu_env,
> +                tcg_gen_ld_i64(cpu_T[0], cpu_env,
>                                 offsetof(CPUX86State,xmm_regs[reg].XMM_Q(0)));
> -                gen_ldst_modrm(env, s, modrm, MO_64, OR_TMP0, 1);
> +                gen_ldst_modrm(env, s, modrm, MO_64, OR_TMP0, 1, b);
>              } else
>  #endif
>              {
> -                tcg_gen_ld32u_tl(cpu_T[0], cpu_env,
> +                tcg_gen_ld32u_tl(cpu_T[0], cpu_env,
>                                   offsetof(CPUX86State,xmm_regs[reg].XMM_L(0)));
> -                gen_ldst_modrm(env, s, modrm, MO_32, OR_TMP0, 1);
> +                gen_ldst_modrm(env, s, modrm, MO_32, OR_TMP0, 1, b);
>              }
>              break;
>          case 0x27e: /* movq xmm, ea */
>              if (mod != 3) {
> -                gen_lea_modrm(env, s, modrm);
> +                gen_lea_modrm(env, s, modrm, b);
>                  gen_ldq_env_A0(s, offsetof(CPUX86State,
>                                             xmm_regs[reg].XMM_Q(0)));
>              } else {
> @@ -3303,7 +3398,7 @@ static void gen_sse(CPUX86State *env,
> DisasContext *s, int b,
>              break;
>          case 0x7f: /* movq ea, mm */
>              if (mod != 3) {
> -                gen_lea_modrm(env, s, modrm);
> +                gen_lea_modrm(env, s, modrm, b);
>                  gen_stq_env_A0(s, offsetof(CPUX86State, fpregs[reg].mmx));
>              } else {
>                  rm = (modrm & 7);
> @@ -3318,7 +3413,7 @@ static void gen_sse(CPUX86State *env,
> DisasContext *s, int b,
>          case 0x17f: /* movdqa ea, xmm */
>          case 0x27f: /* movdqu ea, xmm */
>              if (mod != 3) {
> -                gen_lea_modrm(env, s, modrm);
> +                gen_lea_modrm(env, s, modrm, b);
>                  gen_sto_env_A0(s, offsetof(CPUX86State, xmm_regs[reg]));
>              } else {
>                  rm = (modrm & 7) | REX_B(s);
> @@ -3328,7 +3423,7 @@ static void gen_sse(CPUX86State *env,
> DisasContext *s, int b,
>              break;
>          case 0x211: /* movss ea, xmm */
>              if (mod != 3) {
> -                gen_lea_modrm(env, s, modrm);
> +                gen_lea_modrm(env, s, modrm, b);
>                  tcg_gen_ld32u_tl(cpu_T[0], cpu_env,
> offsetof(CPUX86State,xmm_regs[reg].XMM_L(0)));
>                  gen_op_st_v(s, MO_32, cpu_T[0], cpu_A0);
>              } else {
> @@ -3339,7 +3434,7 @@ static void gen_sse(CPUX86State *env,
> DisasContext *s, int b,
>              break;
>          case 0x311: /* movsd ea, xmm */
>              if (mod != 3) {
> -                gen_lea_modrm(env, s, modrm);
> +                gen_lea_modrm(env, s, modrm, b);
>                  gen_stq_env_A0(s, offsetof(CPUX86State,
>                                             xmm_regs[reg].XMM_Q(0)));
>              } else {
> @@ -3351,7 +3446,7 @@ static void gen_sse(CPUX86State *env,
> DisasContext *s, int b,
>          case 0x013: /* movlps */
>          case 0x113: /* movlpd */
>              if (mod != 3) {
> -                gen_lea_modrm(env, s, modrm);
> +                gen_lea_modrm(env, s, modrm, b);
>                  gen_stq_env_A0(s, offsetof(CPUX86State,
>                                             xmm_regs[reg].XMM_Q(0)));
>              } else {
> @@ -3361,7 +3456,7 @@ static void gen_sse(CPUX86State *env,
> DisasContext *s, int b,
>          case 0x017: /* movhps */
>          case 0x117: /* movhpd */
>              if (mod != 3) {
> -                gen_lea_modrm(env, s, modrm);
> +                gen_lea_modrm(env, s, modrm, b);
>                  gen_stq_env_A0(s, offsetof(CPUX86State,
>                                             xmm_regs[reg].XMM_Q(1)));
>              } else {
> @@ -3409,14 +3504,14 @@ static void gen_sse(CPUX86State *env,
> DisasContext *s, int b,
>              break;
>          case 0x050: /* movmskps */
>              rm = (modrm & 7) | REX_B(s);
> -            tcg_gen_addi_ptr(cpu_ptr0, cpu_env,
> +            tcg_gen_addi_ptr(cpu_ptr0, cpu_env,
>                               offsetof(CPUX86State,xmm_regs[rm]));
>              gen_helper_movmskps(cpu_tmp2_i32, cpu_env, cpu_ptr0);
>              tcg_gen_extu_i32_tl(cpu_regs[reg], cpu_tmp2_i32);
>              break;
>          case 0x150: /* movmskpd */
>              rm = (modrm & 7) | REX_B(s);
> -            tcg_gen_addi_ptr(cpu_ptr0, cpu_env,
> +            tcg_gen_addi_ptr(cpu_ptr0, cpu_env,
>                               offsetof(CPUX86State,xmm_regs[rm]));
>              gen_helper_movmskpd(cpu_tmp2_i32, cpu_env, cpu_ptr0);
>              tcg_gen_extu_i32_tl(cpu_regs[reg], cpu_tmp2_i32);
> @@ -3425,7 +3520,7 @@ static void gen_sse(CPUX86State *env,
> DisasContext *s, int b,
>          case 0x12a: /* cvtpi2pd */
>              gen_helper_enter_mmx(cpu_env);
>              if (mod != 3) {
> -                gen_lea_modrm(env, s, modrm);
> +                gen_lea_modrm(env, s, modrm, b);
>                  op2_offset = offsetof(CPUX86State,mmx_t0);
>                  gen_ldq_env_A0(s, op2_offset);
>              } else {
> @@ -3448,7 +3543,7 @@ static void gen_sse(CPUX86State *env,
> DisasContext *s, int b,
>          case 0x22a: /* cvtsi2ss */
>          case 0x32a: /* cvtsi2sd */
>              ot = mo_64_32(s->dflag);
> -            gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
> +            gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0, b);
>              op1_offset = offsetof(CPUX86State,xmm_regs[reg]);
>              tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op1_offset);
>              if (ot == MO_32) {
> @@ -3470,7 +3565,7 @@ static void gen_sse(CPUX86State *env,
> DisasContext *s, int b,
>          case 0x12d: /* cvtpd2pi */
>              gen_helper_enter_mmx(cpu_env);
>              if (mod != 3) {
> -                gen_lea_modrm(env, s, modrm);
> +                gen_lea_modrm(env, s, modrm, b);
>                  op2_offset = offsetof(CPUX86State,xmm_t0);
>                  gen_ldo_env_A0(s, op2_offset);
>              } else {
> @@ -3501,7 +3596,7 @@ static void gen_sse(CPUX86State *env,
> DisasContext *s, int b,
>          case 0x32d: /* cvtsd2si */
>              ot = mo_64_32(s->dflag);
>              if (mod != 3) {
> -                gen_lea_modrm(env, s, modrm);
> +                gen_lea_modrm(env, s, modrm, b);
>                  if ((b >> 8) & 1) {
>                      gen_ldq_env_A0(s, offsetof(CPUX86State, xmm_t0.XMM_Q(0)));
>                  } else {
> @@ -3533,7 +3628,7 @@ static void gen_sse(CPUX86State *env,
> DisasContext *s, int b,
>          case 0xc4: /* pinsrw */
>          case 0x1c4:
>              s->rip_offset = 1;
> -            gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0);
> +            gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0, b);
>              val = cpu_ldub_code(env, s->pc++);
>              if (b1) {
>                  val &= 7;
> @@ -3567,7 +3662,7 @@ static void gen_sse(CPUX86State *env,
> DisasContext *s, int b,
>              break;
>          case 0x1d6: /* movq ea, xmm */
>              if (mod != 3) {
> -                gen_lea_modrm(env, s, modrm);
> +                gen_lea_modrm(env, s, modrm, b);
>                  gen_stq_env_A0(s, offsetof(CPUX86State,
>                                             xmm_regs[reg].XMM_Q(0)));
>              } else {
> @@ -3634,7 +3729,7 @@ static void gen_sse(CPUX86State *env,
> DisasContext *s, int b,
>                      op2_offset = offsetof(CPUX86State,xmm_regs[rm | REX_B(s)]);
>                  } else {
>                      op2_offset = offsetof(CPUX86State,xmm_t0);
> -                    gen_lea_modrm(env, s, modrm);
> +                    gen_lea_modrm(env, s, modrm, b);
>                      switch (b) {
>                      case 0x20: case 0x30: /* pmovsxbw, pmovzxbw */
>                      case 0x23: case 0x33: /* pmovsxwd, pmovzxwd */
> @@ -3668,7 +3763,7 @@ static void gen_sse(CPUX86State *env,
> DisasContext *s, int b,
>                      op2_offset = offsetof(CPUX86State,fpregs[rm].mmx);
>                  } else {
>                      op2_offset = offsetof(CPUX86State,mmx_t0);
> -                    gen_lea_modrm(env, s, modrm);
> +                    gen_lea_modrm(env, s, modrm, b);
>                      gen_ldq_env_A0(s, op2_offset);
>                  }
>              }
> @@ -3709,7 +3804,7 @@ static void gen_sse(CPUX86State *env,
> DisasContext *s, int b,
>                  }
>
>                  tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_regs[reg]);
> -                gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
> +                gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0, b);
>                  gen_helper_crc32(cpu_T[0], cpu_tmp2_i32,
>                                   cpu_T[0], tcg_const_i32(8 << ot));
>
> @@ -3737,7 +3832,7 @@ static void gen_sse(CPUX86State *env,
> DisasContext *s, int b,
>                      ot = MO_64;
>                  }
>
> -                gen_lea_modrm(env, s, modrm);
> +                gen_lea_modrm(env, s, modrm, b);
>                  if ((b & 1) == 0) {
>                      tcg_gen_qemu_ld_tl(cpu_T[0], cpu_A0,
>                                         s->mem_index, ot | MO_BE);
> @@ -3755,7 +3850,7 @@ static void gen_sse(CPUX86State *env,
> DisasContext *s, int b,
>                      goto illegal_op;
>                  }
>                  ot = mo_64_32(s->dflag);
> -                gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
> +                gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0, b);
>                  tcg_gen_andc_tl(cpu_T[0], cpu_regs[s->vex_v], cpu_T[0]);
>                  gen_op_mov_reg_v(ot, reg, cpu_T[0]);
>                  gen_op_update1_cc();
> @@ -3772,7 +3867,7 @@ static void gen_sse(CPUX86State *env,
> DisasContext *s, int b,
>                  {
>                      TCGv bound, zero;
>
> -                    gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
> +                    gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0, b);
>                      /* Extract START, and shift the operand.
>                         Shifts larger than operand size get zeros.  */
>                      tcg_gen_ext8u_tl(cpu_A0, cpu_regs[s->vex_v]);
> @@ -3809,7 +3904,7 @@ static void gen_sse(CPUX86State *env,
> DisasContext *s, int b,
>                      goto illegal_op;
>                  }
>                  ot = mo_64_32(s->dflag);
> -                gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
> +                gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0, b);
>                  tcg_gen_ext8u_tl(cpu_T[1], cpu_regs[s->vex_v]);
>                  {
>                      TCGv bound = tcg_const_tl(ot == MO_64 ? 63 : 31);
> @@ -3836,7 +3931,7 @@ static void gen_sse(CPUX86State *env,
> DisasContext *s, int b,
>                      goto illegal_op;
>                  }
>                  ot = mo_64_32(s->dflag);
> -                gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
> +                gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0, b);
>                  switch (ot) {
>                  default:
>                      tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
> @@ -3862,7 +3957,7 @@ static void gen_sse(CPUX86State *env,
> DisasContext *s, int b,
>                      goto illegal_op;
>                  }
>                  ot = mo_64_32(s->dflag);
> -                gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
> +                gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0, b);
>                  /* Note that by zero-extending the mask operand, we
>                     automatically handle zero-extending the result.  */
>                  if (ot == MO_64) {
> @@ -3880,7 +3975,7 @@ static void gen_sse(CPUX86State *env,
> DisasContext *s, int b,
>                      goto illegal_op;
>                  }
>                  ot = mo_64_32(s->dflag);
> -                gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
> +                gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0, b);
>                  /* Note that by zero-extending the mask operand, we
>                     automatically handle zero-extending the result.  */
>                  if (ot == MO_64) {
> @@ -3900,7 +3995,7 @@ static void gen_sse(CPUX86State *env,
> DisasContext *s, int b,
>                      int end_op;
>
>                      ot = mo_64_32(s->dflag);
> -                    gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
> +                    gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0, b);
>
>                      /* Re-use the carry-out from a previous round.  */
>                      TCGV_UNUSED(carry_in);
> @@ -3979,7 +4074,7 @@ static void gen_sse(CPUX86State *env,
> DisasContext *s, int b,
>                      goto illegal_op;
>                  }
>                  ot = mo_64_32(s->dflag);
> -                gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
> +                gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0, b);
>                  if (ot == MO_64) {
>                      tcg_gen_andi_tl(cpu_T[1], cpu_regs[s->vex_v], 63);
>                  } else {
> @@ -4011,7 +4106,7 @@ static void gen_sse(CPUX86State *env,
> DisasContext *s, int b,
>                      goto illegal_op;
>                  }
>                  ot = mo_64_32(s->dflag);
> -                gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
> +                gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0, b);
>
>                  switch (reg & 7) {
>                  case 1: /* blsr By,Ey */
> @@ -4070,7 +4165,7 @@ static void gen_sse(CPUX86State *env,
> DisasContext *s, int b,
>                  ot = mo_64_32(s->dflag);
>                  rm = (modrm & 7) | REX_B(s);
>                  if (mod != 3)
> -                    gen_lea_modrm(env, s, modrm);
> +                    gen_lea_modrm(env, s, modrm, b);
>                  reg = ((modrm >> 3) & 7) | rex_r;
>                  val = cpu_ldub_code(env, s->pc++);
>                  switch (b) {
> @@ -4207,7 +4302,7 @@ static void gen_sse(CPUX86State *env,
> DisasContext *s, int b,
>                      op2_offset = offsetof(CPUX86State,xmm_regs[rm | REX_B(s)]);
>                  } else {
>                      op2_offset = offsetof(CPUX86State,xmm_t0);
> -                    gen_lea_modrm(env, s, modrm);
> +                    gen_lea_modrm(env, s, modrm, b);
>                      gen_ldo_env_A0(s, op2_offset);
>                  }
>              } else {
> @@ -4216,7 +4311,7 @@ static void gen_sse(CPUX86State *env,
> DisasContext *s, int b,
>                      op2_offset = offsetof(CPUX86State,fpregs[rm].mmx);
>                  } else {
>                      op2_offset = offsetof(CPUX86State,mmx_t0);
> -                    gen_lea_modrm(env, s, modrm);
> +                    gen_lea_modrm(env, s, modrm, b);
>                      gen_ldq_env_A0(s, op2_offset);
>                  }
>              }
> @@ -4250,7 +4345,7 @@ static void gen_sse(CPUX86State *env,
> DisasContext *s, int b,
>                      goto illegal_op;
>                  }
>                  ot = mo_64_32(s->dflag);
> -                gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
> +                gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0, b);
>                  b = cpu_ldub_code(env, s->pc++);
>                  if (ot == MO_64) {
>                      tcg_gen_rotri_tl(cpu_T[0], cpu_T[0], b & 63);
> @@ -4286,7 +4381,7 @@ static void gen_sse(CPUX86State *env,
> DisasContext *s, int b,
>              if (mod != 3) {
>                  int sz = 4;
>
> -                gen_lea_modrm(env, s, modrm);
> +                gen_lea_modrm(env, s, modrm, b);
>                  op2_offset = offsetof(CPUX86State,xmm_t0);
>
>                  switch (b) {
> @@ -4334,7 +4429,7 @@ static void gen_sse(CPUX86State *env,
> DisasContext *s, int b,
>          } else {
>              op1_offset = offsetof(CPUX86State,fpregs[reg].mmx);
>              if (mod != 3) {
> -                gen_lea_modrm(env, s, modrm);
> +                gen_lea_modrm(env, s, modrm, b);
>                  op2_offset = offsetof(CPUX86State,mmx_t0);
>                  gen_ldq_env_A0(s, op2_offset);
>              } else {
> @@ -4603,7 +4698,7 @@ static target_ulong disas_insn(CPUX86State *env,
> DisasContext *s,
>                  mod = (modrm >> 6) & 3;
>                  rm = (modrm & 7) | REX_B(s);
>                  if (mod != 3) {
> -                    gen_lea_modrm(env, s, modrm);
> +                    gen_lea_modrm(env, s, modrm, b);
>                      opreg = OR_TMP0;
>                  } else if (op == OP_XORL && rm == reg) {
>                  xor_zero:
> @@ -4624,7 +4719,7 @@ static target_ulong disas_insn(CPUX86State *env,
> DisasContext *s,
>                  reg = ((modrm >> 3) & 7) | rex_r;
>                  rm = (modrm & 7) | REX_B(s);
>                  if (mod != 3) {
> -                    gen_lea_modrm(env, s, modrm);
> +                    gen_lea_modrm(env, s, modrm, b);
>                      gen_op_ld_v(s, ot, cpu_T[1], cpu_A0);
>                  } else if (op == OP_XORL && rm == reg) {
>                      goto xor_zero;
> @@ -4663,7 +4758,7 @@ static target_ulong disas_insn(CPUX86State *env,
> DisasContext *s,
>                      s->rip_offset = 1;
>                  else
>                      s->rip_offset = insn_const_size(ot);
> -                gen_lea_modrm(env, s, modrm);
> +                gen_lea_modrm(env, s, modrm, b);
>                  opreg = OR_TMP0;
>              } else {
>                  opreg = rm;
> @@ -4706,7 +4801,7 @@ static target_ulong disas_insn(CPUX86State *env,
> DisasContext *s,
>          if (mod != 3) {
>              if (op == 0)
>                  s->rip_offset = insn_const_size(ot);
> -            gen_lea_modrm(env, s, modrm);
> +            gen_lea_modrm(env, s, modrm, b);
>              gen_op_ld_v(s, ot, cpu_T[0], cpu_A0);
>          } else {
>              gen_op_mov_v_reg(ot, cpu_T[0], rm);
> @@ -4914,7 +5009,7 @@ static target_ulong disas_insn(CPUX86State *env,
> DisasContext *s,
>              }
>          }
>          if (mod != 3) {
> -            gen_lea_modrm(env, s, modrm);
> +            gen_lea_modrm(env, s, modrm, b);
>              if (op >= 2 && op != 3 && op != 5)
>                  gen_op_ld_v(s, ot, cpu_T[0], cpu_A0);
>          } else {
> @@ -5006,7 +5101,7 @@ static target_ulong disas_insn(CPUX86State *env,
> DisasContext *s,
>          modrm = cpu_ldub_code(env, s->pc++);
>          reg = ((modrm >> 3) & 7) | rex_r;
>
> -        gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
> +        gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0, b);
>          gen_op_mov_v_reg(ot, cpu_T[1], reg);
>          gen_op_testl_T0_T1_cc();
>          set_cc_op(s, CC_OP_LOGICB + ot);
> @@ -5081,7 +5176,7 @@ static target_ulong disas_insn(CPUX86State *env,
> DisasContext *s,
>              s->rip_offset = insn_const_size(ot);
>          else if (b == 0x6b)
>              s->rip_offset = 1;
> -        gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
> +        gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0, b);
>          if (b == 0x69) {
>              val = insn_get(env, s, ot);
>              tcg_gen_movi_tl(cpu_T[1], val);
> @@ -5138,7 +5233,7 @@ static target_ulong disas_insn(CPUX86State *env,
> DisasContext *s,
>              gen_op_mov_reg_v(ot, reg, cpu_T[1]);
>              gen_op_mov_reg_v(ot, rm, cpu_T[0]);
>          } else {
> -            gen_lea_modrm(env, s, modrm);
> +            gen_lea_modrm(env, s, modrm, b);
>              gen_op_mov_v_reg(ot, cpu_T[0], reg);
>              gen_op_ld_v(s, ot, cpu_T[1], cpu_A0);
>              tcg_gen_add_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
> @@ -5167,7 +5262,7 @@ static target_ulong disas_insn(CPUX86State *env,
> DisasContext *s,
>                  rm = (modrm & 7) | REX_B(s);
>                  gen_op_mov_v_reg(ot, t0, rm);
>              } else {
> -                gen_lea_modrm(env, s, modrm);
> +                gen_lea_modrm(env, s, modrm, b);
>                  tcg_gen_mov_tl(a0, cpu_A0);
>                  gen_op_ld_v(s, ot, t0, a0);
>                  rm = 0; /* avoid warning */
> @@ -5215,16 +5310,16 @@ static target_ulong disas_insn(CPUX86State
> *env, DisasContext *s,
>                  goto illegal_op;
>              gen_jmp_im(pc_start - s->cs_base);
>              gen_update_cc_op(s);
> -            gen_lea_modrm(env, s, modrm);
> +            gen_lea_modrm(env, s, modrm, b);
>              gen_helper_cmpxchg16b(cpu_env, cpu_A0);
>          } else
> -#endif
> +#endif
>          {
>              if (!(s->cpuid_features & CPUID_CX8))
>                  goto illegal_op;
>              gen_jmp_im(pc_start - s->cs_base);
>              gen_update_cc_op(s);
> -            gen_lea_modrm(env, s, modrm);
> +            gen_lea_modrm(env, s, modrm, b);
>              gen_helper_cmpxchg8b(cpu_env, cpu_A0);
>          }
>          set_cc_op(s, CC_OP_EFLAGS);
> @@ -5274,7 +5369,7 @@ static target_ulong disas_insn(CPUX86State *env,
> DisasContext *s,
>          } else {
>              /* NOTE: order is important too for MMU exceptions */
>              s->popl_esp_hack = 1 << ot;
> -            gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 1);
> +            gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 1, b);
>              s->popl_esp_hack = 0;
>              gen_pop_update(s, ot);
>          }
> @@ -5360,7 +5455,7 @@ static target_ulong disas_insn(CPUX86State *env,
> DisasContext *s,
>          reg = ((modrm >> 3) & 7) | rex_r;
>
>          /* generate a generic store */
> -        gen_ldst_modrm(env, s, modrm, ot, reg, 1);
> +        gen_ldst_modrm(env, s, modrm, ot, reg, 1, b);
>          break;
>      case 0xc6:
>      case 0xc7: /* mov Ev, Iv */
> @@ -5369,7 +5464,7 @@ static target_ulong disas_insn(CPUX86State *env,
> DisasContext *s,
>          mod = (modrm >> 6) & 3;
>          if (mod != 3) {
>              s->rip_offset = insn_const_size(ot);
> -            gen_lea_modrm(env, s, modrm);
> +            gen_lea_modrm(env, s, modrm, b);
>          }
>          val = insn_get(env, s, ot);
>          tcg_gen_movi_tl(cpu_T[0], val);
> @@ -5385,7 +5480,7 @@ static target_ulong disas_insn(CPUX86State *env,
> DisasContext *s,
>          modrm = cpu_ldub_code(env, s->pc++);
>          reg = ((modrm >> 3) & 7) | rex_r;
>
> -        gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
> +        gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0, b);
>          gen_op_mov_reg_v(ot, reg, cpu_T[0]);
>          break;
>      case 0x8e: /* mov seg, Gv */
> @@ -5393,7 +5488,7 @@ static target_ulong disas_insn(CPUX86State *env,
> DisasContext *s,
>          reg = (modrm >> 3) & 7;
>          if (reg >= 6 || reg == R_CS)
>              goto illegal_op;
> -        gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0);
> +        gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0, b);
>          gen_movl_seg_T0(s, reg, pc_start - s->cs_base);
>          if (reg == R_SS) {
>              /* if reg == SS, inhibit interrupts/trace */
> @@ -5416,7 +5511,7 @@ static target_ulong disas_insn(CPUX86State *env,
> DisasContext *s,
>              goto illegal_op;
>          gen_op_movl_T0_seg(reg);
>          ot = mod == 3 ? dflag : MO_16;
> -        gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 1);
> +        gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 1, b);
>          break;
>
>      case 0x1b6: /* movzbS Gv, Eb */
> @@ -5458,7 +5553,7 @@ static target_ulong disas_insn(CPUX86State *env,
> DisasContext *s,
>                  }
>                  gen_op_mov_reg_v(d_ot, reg, cpu_T[0]);
>              } else {
> -                gen_lea_modrm(env, s, modrm);
> +                gen_lea_modrm(env, s, modrm, b);
>                  gen_op_ld_v(s, s_ot, cpu_T[0], cpu_A0);
>                  gen_op_mov_reg_v(d_ot, reg, cpu_T[0]);
>              }
> @@ -5476,7 +5571,7 @@ static target_ulong disas_insn(CPUX86State *env,
> DisasContext *s,
>          s->override = -1;
>          val = s->addseg;
>          s->addseg = 0;
> -        gen_lea_modrm(env, s, modrm);
> +        gen_lea_modrm(env, s, modrm, b);
>          s->addseg = val;
>          gen_op_mov_reg_v(ot, reg, cpu_A0);
>          break;
> @@ -5566,7 +5661,7 @@ static target_ulong disas_insn(CPUX86State *env,
> DisasContext *s,
>              gen_op_mov_reg_v(ot, rm, cpu_T[0]);
>              gen_op_mov_reg_v(ot, reg, cpu_T[1]);
>          } else {
> -            gen_lea_modrm(env, s, modrm);
> +            gen_lea_modrm(env, s, modrm, b);
>              gen_op_mov_v_reg(ot, cpu_T[0], reg);
>              /* for xchg, lock is implicit */
>              if (!(prefixes & PREFIX_LOCK))
> @@ -5601,7 +5696,7 @@ static target_ulong disas_insn(CPUX86State *env,
> DisasContext *s,
>          mod = (modrm >> 6) & 3;
>          if (mod == 3)
>              goto illegal_op;
> -        gen_lea_modrm(env, s, modrm);
> +        gen_lea_modrm(env, s, modrm, b);
>          gen_op_ld_v(s, ot, cpu_T[1], cpu_A0);
>          gen_add_A0_im(s, 1 << ot);
>          /* load the segment first to handle exceptions properly */
> @@ -5632,7 +5727,7 @@ static target_ulong disas_insn(CPUX86State *env,
> DisasContext *s,
>                  if (shift == 2) {
>                      s->rip_offset = 1;
>                  }
> -                gen_lea_modrm(env, s, modrm);
> +                gen_lea_modrm(env, s, modrm, b);
>                  opreg = OR_TMP0;
>              } else {
>                  opreg = (modrm & 7) | REX_B(s);
> @@ -5682,7 +5777,7 @@ static target_ulong disas_insn(CPUX86State *env,
> DisasContext *s,
>          rm = (modrm & 7) | REX_B(s);
>          reg = ((modrm >> 3) & 7) | rex_r;
>          if (mod != 3) {
> -            gen_lea_modrm(env, s, modrm);
> +            gen_lea_modrm(env, s, modrm, b);
>              opreg = OR_TMP0;
>          } else {
>              opreg = rm;
> @@ -5713,7 +5808,7 @@ static target_ulong disas_insn(CPUX86State *env,
> DisasContext *s,
>          op = ((b & 7) << 3) | ((modrm >> 3) & 7);
>          if (mod != 3) {
>              /* memory op */
> -            gen_lea_modrm(env, s, modrm);
> +            gen_lea_modrm(env, s, modrm, b);
>              switch(op) {
>              case 0x00 ... 0x07: /* fxxxs */
>              case 0x10 ... 0x17: /* fixxxl */
> @@ -5840,7 +5935,9 @@ static target_ulong disas_insn(CPUX86State *env,
> DisasContext *s,
>              case 0x0c: /* fldenv mem */
>                  gen_update_cc_op(s);
>                  gen_jmp_im(pc_start - s->cs_base);
> -                gen_helper_fldenv(cpu_env, cpu_A0, tcg_const_i32(dflag - 1));
> +                gen_helper_fldenv(cpu_env, cpu_A0,
> +                                  tcg_const_i32(dflag == MO_32),
> +                                  tcg_const_i32(IS_PROTECTED_MODE(s)));
>                  break;
>              case 0x0d: /* fldcw mem */
>                  tcg_gen_qemu_ld_i32(cpu_tmp2_i32, cpu_A0,
> @@ -5850,7 +5947,9 @@ static target_ulong disas_insn(CPUX86State *env,
> DisasContext *s,
>              case 0x0e: /* fnstenv mem */
>                  gen_update_cc_op(s);
>                  gen_jmp_im(pc_start - s->cs_base);
> -                gen_helper_fstenv(cpu_env, cpu_A0, tcg_const_i32(dflag - 1));
> +                gen_helper_fstenv(cpu_env, cpu_A0,
> +                                  tcg_const_i32(dflag == MO_32),
> +                                  tcg_const_i32(IS_PROTECTED_MODE(s)));
>                  break;
>              case 0x0f: /* fnstcw mem */
>                  gen_helper_fnstcw(cpu_tmp2_i32, cpu_env);
> @@ -5871,12 +5970,16 @@ static target_ulong disas_insn(CPUX86State
> *env, DisasContext *s,
>              case 0x2c: /* frstor mem */
>                  gen_update_cc_op(s);
>                  gen_jmp_im(pc_start - s->cs_base);
> -                gen_helper_frstor(cpu_env, cpu_A0, tcg_const_i32(dflag - 1));
> +                gen_helper_frstor(cpu_env, cpu_A0,
> +                                  tcg_const_i32(dflag == MO_32),
> +                                  tcg_const_i32(IS_PROTECTED_MODE(s)));
>                  break;
>              case 0x2e: /* fnsave mem */
>                  gen_update_cc_op(s);
>                  gen_jmp_im(pc_start - s->cs_base);
> -                gen_helper_fsave(cpu_env, cpu_A0, tcg_const_i32(dflag - 1));
> +                gen_helper_fsave(cpu_env, cpu_A0,
> +                                 tcg_const_i32(dflag == MO_32),
> +                                 tcg_const_i32(IS_PROTECTED_MODE(s)));
>                  break;
>              case 0x2f: /* fnstsw mem */
>                  gen_helper_fnstsw(cpu_tmp2_i32, cpu_env);
> @@ -6217,6 +6320,11 @@ static target_ulong disas_insn(CPUX86State
> *env, DisasContext *s,
>                  goto illegal_op;
>              }
>          }
> +        if (non_control_x87_instr(modrm, b)) {
> +            tcg_gen_movi_i32(cpu_fpop, ((b & 0x7) << 8) | (modrm & 0xff));
> +            tcg_gen_movi_tl(cpu_fpip, pc_start - s->cs_base);
> +            tcg_gen_movi_i32(cpu_fpcs, env->segs[R_CS].selector);
> +        }
>          break;
>          /************************/
>          /* string ops */
> @@ -6276,7 +6384,7 @@ static target_ulong disas_insn(CPUX86State *env,
> DisasContext *s,
>      case 0x6d:
>          ot = mo_b_d32(b, dflag);
>          tcg_gen_ext16u_tl(cpu_T[0], cpu_regs[R_EDX]);
> -        gen_check_io(s, ot, pc_start - s->cs_base,
> +        gen_check_io(s, ot, pc_start - s->cs_base,
>                       SVM_IOIO_TYPE_MASK | svm_is_rep(prefixes) | 4);
>          if (prefixes & (PREFIX_REPZ | PREFIX_REPNZ)) {
>              gen_repz_ins(s, ot, pc_start - s->cs_base, s->pc - s->cs_base);
> @@ -6535,7 +6643,7 @@ static target_ulong disas_insn(CPUX86State *env,
> DisasContext *s,
>      case 0x190 ... 0x19f: /* setcc Gv */
>          modrm = cpu_ldub_code(env, s->pc++);
>          gen_setcc1(s, b, cpu_T[0]);
> -        gen_ldst_modrm(env, s, modrm, MO_8, OR_TMP0, 1);
> +        gen_ldst_modrm(env, s, modrm, MO_8, OR_TMP0, 1, b);
>          break;
>      case 0x140 ... 0x14f: /* cmov Gv, Ev */
>          if (!(s->cpuid_features & CPUID_CMOV)) {
> @@ -6665,7 +6773,7 @@ static target_ulong disas_insn(CPUX86State *env,
> DisasContext *s,
>          rm = (modrm & 7) | REX_B(s);
>          if (mod != 3) {
>              s->rip_offset = 1;
> -            gen_lea_modrm(env, s, modrm);
> +            gen_lea_modrm(env, s, modrm, b);
>              gen_op_ld_v(s, ot, cpu_T[0], cpu_A0);
>          } else {
>              gen_op_mov_v_reg(ot, cpu_T[0], rm);
> @@ -6696,7 +6804,7 @@ static target_ulong disas_insn(CPUX86State *env,
> DisasContext *s,
>          rm = (modrm & 7) | REX_B(s);
>          gen_op_mov_v_reg(MO_32, cpu_T[1], reg);
>          if (mod != 3) {
> -            gen_lea_modrm(env, s, modrm);
> +            gen_lea_modrm(env, s, modrm, b);
>              /* specific case: we need to add a displacement */
>              gen_exts(ot, cpu_T[1]);
>              tcg_gen_sari_tl(cpu_tmp0, cpu_T[1], 3 + ot);
> @@ -6750,7 +6858,7 @@ static target_ulong disas_insn(CPUX86State *env,
> DisasContext *s,
>          ot = dflag;
>          modrm = cpu_ldub_code(env, s->pc++);
>          reg = ((modrm >> 3) & 7) | rex_r;
> -        gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
> +        gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0, b);
>          gen_extu(ot, cpu_T[0]);
>
>          /* Note that lzcnt and tzcnt are in different extensions.  */
> @@ -6953,7 +7061,7 @@ static target_ulong disas_insn(CPUX86State *env,
> DisasContext *s,
>          if (mod == 3)
>              goto illegal_op;
>          gen_op_mov_v_reg(ot, cpu_T[0], reg);
> -        gen_lea_modrm(env, s, modrm);
> +        gen_lea_modrm(env, s, modrm, b);
>          gen_jmp_im(pc_start - s->cs_base);
>          tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
>          if (ot == MO_16) {
> @@ -7135,7 +7243,7 @@ static target_ulong disas_insn(CPUX86State *env,
> DisasContext *s,
>              gen_svm_check_intercept(s, pc_start, SVM_EXIT_LDTR_READ);
>              tcg_gen_ld32u_tl(cpu_T[0], cpu_env,
> offsetof(CPUX86State,ldt.selector));
>              ot = mod == 3 ? dflag : MO_16;
> -            gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 1);
> +            gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 1, b);
>              break;
>          case 2: /* lldt */
>              if (!s->pe || s->vm86)
> @@ -7144,7 +7252,7 @@ static target_ulong disas_insn(CPUX86State *env,
> DisasContext *s,
>                  gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
>              } else {
>                  gen_svm_check_intercept(s, pc_start, SVM_EXIT_LDTR_WRITE);
> -                gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0);
> +                gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0, b);
>                  gen_jmp_im(pc_start - s->cs_base);
>                  tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
>                  gen_helper_lldt(cpu_env, cpu_tmp2_i32);
> @@ -7156,7 +7264,7 @@ static target_ulong disas_insn(CPUX86State *env,
> DisasContext *s,
>              gen_svm_check_intercept(s, pc_start, SVM_EXIT_TR_READ);
>              tcg_gen_ld32u_tl(cpu_T[0], cpu_env,
> offsetof(CPUX86State,tr.selector));
>              ot = mod == 3 ? dflag : MO_16;
> -            gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 1);
> +            gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 1, b);
>              break;
>          case 3: /* ltr */
>              if (!s->pe || s->vm86)
> @@ -7165,7 +7273,7 @@ static target_ulong disas_insn(CPUX86State *env,
> DisasContext *s,
>                  gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
>              } else {
>                  gen_svm_check_intercept(s, pc_start, SVM_EXIT_TR_WRITE);
> -                gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0);
> +                gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0, b);
>                  gen_jmp_im(pc_start - s->cs_base);
>                  tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
>                  gen_helper_ltr(cpu_env, cpu_tmp2_i32);
> @@ -7175,7 +7283,7 @@ static target_ulong disas_insn(CPUX86State *env,
> DisasContext *s,
>          case 5: /* verw */
>              if (!s->pe || s->vm86)
>                  goto illegal_op;
> -            gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0);
> +            gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0, b);
>              gen_update_cc_op(s);
>              if (op == 4) {
>                  gen_helper_verr(cpu_env, cpu_T[0]);
> @@ -7198,7 +7306,7 @@ static target_ulong disas_insn(CPUX86State *env,
> DisasContext *s,
>              if (mod == 3)
>                  goto illegal_op;
>              gen_svm_check_intercept(s, pc_start, SVM_EXIT_GDTR_READ);
> -            gen_lea_modrm(env, s, modrm);
> +            gen_lea_modrm(env, s, modrm, b);
>              tcg_gen_ld32u_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,
> gdt.limit));
>              gen_op_st_v(s, MO_16, cpu_T[0], cpu_A0);
>              gen_add_A0_im(s, 2);
> @@ -7254,7 +7362,7 @@ static target_ulong disas_insn(CPUX86State *env,
> DisasContext *s,
>                  }
>              } else { /* sidt */
>                  gen_svm_check_intercept(s, pc_start, SVM_EXIT_IDTR_READ);
> -                gen_lea_modrm(env, s, modrm);
> +                gen_lea_modrm(env, s, modrm, b);
>                  tcg_gen_ld32u_tl(cpu_T[0], cpu_env,
> offsetof(CPUX86State, idt.limit));
>                  gen_op_st_v(s, MO_16, cpu_T[0], cpu_A0);
>                  gen_add_A0_im(s, 2);
> @@ -7311,7 +7419,7 @@ static target_ulong disas_insn(CPUX86State *env,
> DisasContext *s,
>                      break;
>                  case 4: /* STGI */
>                      if ((!(s->flags & HF_SVME_MASK) &&
> -                         !(s->cpuid_ext3_features & CPUID_EXT3_SKINIT)) ||
> +                         !(s->cpuid_ext3_features & CPUID_EXT3_SKINIT)) ||
>                          !s->pe)
>                          goto illegal_op;
>                      if (s->cpl != 0) {
> @@ -7332,8 +7440,8 @@ static target_ulong disas_insn(CPUX86State *env,
> DisasContext *s,
>                      }
>                      break;
>                  case 6: /* SKINIT */
> -                    if ((!(s->flags & HF_SVME_MASK) &&
> -                         !(s->cpuid_ext3_features & CPUID_EXT3_SKINIT)) ||
> +                    if ((!(s->flags & HF_SVME_MASK) &&
> +                         !(s->cpuid_ext3_features & CPUID_EXT3_SKINIT)) ||
>                          !s->pe)
>                          goto illegal_op;
>                      gen_helper_skinit(cpu_env);
> @@ -7357,7 +7465,7 @@ static target_ulong disas_insn(CPUX86State *env,
> DisasContext *s,
>              } else {
>                  gen_svm_check_intercept(s, pc_start,
>                                          op==2 ? SVM_EXIT_GDTR_WRITE :
> SVM_EXIT_IDTR_WRITE);
> -                gen_lea_modrm(env, s, modrm);
> +                gen_lea_modrm(env, s, modrm, b);
>                  gen_op_ld_v(s, MO_16, cpu_T[1], cpu_A0);
>                  gen_add_A0_im(s, 2);
>                  gen_op_ld_v(s, CODE64(s) + MO_32, cpu_T[0], cpu_A0);
> @@ -7380,14 +7488,14 @@ static target_ulong disas_insn(CPUX86State
> *env, DisasContext *s,
>  #else
>              tcg_gen_ld32u_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,cr[0]));
>  #endif
> -            gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 1);
> +            gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 1, b);
>              break;
>          case 6: /* lmsw */
>              if (s->cpl != 0) {
>                  gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
>              } else {
>                  gen_svm_check_intercept(s, pc_start, SVM_EXIT_WRITE_CR0);
> -                gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0);
> +                gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0, b);
>                  gen_helper_lmsw(cpu_env, cpu_T[0]);
>                  gen_jmp_im(s->pc - s->cs_base);
>                  gen_eob(s);
> @@ -7400,7 +7508,7 @@ static target_ulong disas_insn(CPUX86State *env,
> DisasContext *s,
>                  } else {
>                      gen_update_cc_op(s);
>                      gen_jmp_im(pc_start - s->cs_base);
> -                    gen_lea_modrm(env, s, modrm);
> +                    gen_lea_modrm(env, s, modrm, b);
>                      gen_helper_invlpg(cpu_env, cpu_A0);
>                      gen_jmp_im(s->pc - s->cs_base);
>                      gen_eob(s);
> @@ -7479,7 +7587,7 @@ static target_ulong disas_insn(CPUX86State *env,
> DisasContext *s,
>                  }
>                  gen_op_mov_reg_v(d_ot, reg, cpu_T[0]);
>              } else {
> -                gen_lea_modrm(env, s, modrm);
> +                gen_lea_modrm(env, s, modrm, b);
>                  gen_op_ld_v(s, MO_32 | MO_SIGN, cpu_T[0], cpu_A0);
>                  gen_op_mov_reg_v(d_ot, reg, cpu_T[0]);
>              }
> @@ -7500,7 +7608,7 @@ static target_ulong disas_insn(CPUX86State *env,
> DisasContext *s,
>              mod = (modrm >> 6) & 3;
>              rm = modrm & 7;
>              if (mod != 3) {
> -                gen_lea_modrm(env, s, modrm);
> +                gen_lea_modrm(env, s, modrm, b);
>                  gen_op_ld_v(s, ot, t0, cpu_A0);
>                  a0 = tcg_temp_local_new();
>                  tcg_gen_mov_tl(a0, cpu_A0);
> @@ -7542,7 +7650,7 @@ static target_ulong disas_insn(CPUX86State *env,
> DisasContext *s,
>              ot = dflag != MO_16 ? MO_32 : MO_16;
>              modrm = cpu_ldub_code(env, s->pc++);
>              reg = ((modrm >> 3) & 7) | rex_r;
> -            gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0);
> +            gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0, b);
>              t0 = tcg_temp_local_new();
>              gen_update_cc_op(s);
>              if (b == 0x102) {
> @@ -7570,7 +7678,7 @@ static target_ulong disas_insn(CPUX86State *env,
> DisasContext *s,
>          case 3: /* prefetchnt0 */
>              if (mod == 3)
>                  goto illegal_op;
> -            gen_lea_modrm(env, s, modrm);
> +            gen_lea_modrm(env, s, modrm, b);
>              /* nothing more to do */
>              break;
>          default: /* nop (multi byte) */
> @@ -7682,7 +7790,7 @@ static target_ulong disas_insn(CPUX86State *env,
> DisasContext *s,
>              goto illegal_op;
>          reg = ((modrm >> 3) & 7) | rex_r;
>          /* generate a generic store */
> -        gen_ldst_modrm(env, s, modrm, ot, reg, 1);
> +        gen_ldst_modrm(env, s, modrm, ot, reg, 1, b);
>          break;
>      case 0x1ae:
>          modrm = cpu_ldub_code(env, s->pc++);
> @@ -7697,10 +7805,12 @@ static target_ulong disas_insn(CPUX86State
> *env, DisasContext *s,
>                  gen_exception(s, EXCP07_PREX, pc_start - s->cs_base);
>                  break;
>              }
> -            gen_lea_modrm(env, s, modrm);
> +            gen_lea_modrm(env, s, modrm, b);
>              gen_update_cc_op(s);
>              gen_jmp_im(pc_start - s->cs_base);
> -            gen_helper_fxsave(cpu_env, cpu_A0, tcg_const_i32(dflag == MO_64));
> +            gen_helper_fxsave(cpu_env, cpu_A0,
> +                              tcg_const_i32(dflag == MO_32),
> +                              tcg_const_i32(dflag == MO_64));
>              break;
>          case 1: /* fxrstor */
>              if (mod == 3 || !(s->cpuid_features & CPUID_FXSR) ||
> @@ -7710,10 +7820,12 @@ static target_ulong disas_insn(CPUX86State
> *env, DisasContext *s,
>                  gen_exception(s, EXCP07_PREX, pc_start - s->cs_base);
>                  break;
>              }
> -            gen_lea_modrm(env, s, modrm);
> +            gen_lea_modrm(env, s, modrm, b);
>              gen_update_cc_op(s);
>              gen_jmp_im(pc_start - s->cs_base);
> -            gen_helper_fxrstor(cpu_env, cpu_A0, tcg_const_i32(dflag == MO_64));
> +            gen_helper_fxrstor(cpu_env, cpu_A0,
> +                               tcg_const_i32(dflag == MO_32),
> +                               tcg_const_i32(dflag == MO_64));
>              break;
>          case 2: /* ldmxcsr */
>          case 3: /* stmxcsr */
> @@ -7724,7 +7836,7 @@ static target_ulong disas_insn(CPUX86State *env,
> DisasContext *s,
>              if ((s->flags & HF_EM_MASK) || !(s->flags & HF_OSFXSR_MASK) ||
>                  mod == 3)
>                  goto illegal_op;
> -            gen_lea_modrm(env, s, modrm);
> +            gen_lea_modrm(env, s, modrm, b);
>              if (op == 2) {
>                  tcg_gen_qemu_ld_i32(cpu_tmp2_i32, cpu_A0,
>                                      s->mem_index, MO_LEUL);
> @@ -7749,7 +7861,7 @@ static target_ulong disas_insn(CPUX86State *env,
> DisasContext *s,
>                  /* clflush */
>                  if (!(s->cpuid_features & CPUID_CLFLUSH))
>                      goto illegal_op;
> -                gen_lea_modrm(env, s, modrm);
> +                gen_lea_modrm(env, s, modrm, b);
>              }
>              break;
>          default:
> @@ -7761,7 +7873,7 @@ static target_ulong disas_insn(CPUX86State *env,
> DisasContext *s,
>          mod = (modrm >> 6) & 3;
>          if (mod == 3)
>              goto illegal_op;
> -        gen_lea_modrm(env, s, modrm);
> +        gen_lea_modrm(env, s, modrm, b);
>          /* ignore for now */
>          break;
>      case 0x1aa: /* rsm */
> @@ -7789,7 +7901,7 @@ static target_ulong disas_insn(CPUX86State *env,
> DisasContext *s,
>              ot = mo_64_32(dflag);
>          }
>
> -        gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
> +        gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0, b);
>          gen_helper_popcnt(cpu_T[0], cpu_env, cpu_T[0], tcg_const_i32(ot));
>          gen_op_mov_reg_v(ot, reg, cpu_T[0]);
>
> @@ -7866,6 +7978,17 @@ void optimize_flags_init(void)
>      cpu_cc_src2 = tcg_global_mem_new(TCG_AREG0, offsetof(CPUX86State, cc_src2),
>                                       "cc_src2");
>
> +    cpu_fpop = tcg_global_mem_new_i32(TCG_AREG0,
> +                                      offsetof(CPUX86State, fpop.tcg), "fpop");
> +    cpu_fpip = tcg_global_mem_new(TCG_AREG0, offsetof(CPUX86State, fpip),
> +                                     "fpip");
> +    cpu_fpdp = tcg_global_mem_new(TCG_AREG0, offsetof(CPUX86State, fpdp),
> +                                     "fpdp");
> +    cpu_fpds = tcg_global_mem_new_i32(TCG_AREG0, offsetof(CPUX86State, fpds),
> +                                     "fpds");
> +    cpu_fpcs = tcg_global_mem_new_i32(TCG_AREG0, offsetof(CPUX86State, fpcs),
> +                                     "fpcs");
> +
>      for (i = 0; i < CPU_NB_REGS; ++i) {
>          cpu_regs[i] = tcg_global_mem_new(TCG_AREG0,
>                                           offsetof(CPUX86State, regs[i]),



-- 
Jaume

^ permalink raw reply related	[flat|nested] 6+ messages in thread

* Re: [Qemu-devel] PATCH for bugs 661696 and 1248376: target-i386: x87 exception pointers using TCG.
  2014-06-22 14:55 ` Jaume Martí
@ 2014-06-22 18:55   ` Richard Henderson
  2014-06-22 19:17     ` Jaume Martí
  0 siblings, 1 reply; 6+ messages in thread
From: Richard Henderson @ 2014-06-22 18:55 UTC (permalink / raw)
  To: Jaume Martí, qemu-devel
  Cc: Peter Maydell, mtosatti, gleb, mst, riku.voipio, quintela,
	vrozenfe, anthony, pbonzini, alex.bennee, afaerber

On 06/22/2014 07:55 AM, Jaume Martí wrote:
> -        cpu_x86_fsave(env, fpstate_addr, 1);
> -        fpstate->status = fpstate->sw;
> -        magic = 0xffff;
> +    cpu_x86_fsave(env, fpstate_addr);
> +    fpstate->status = fpstate->sw;
> +    magic = 0xffff;

This patch needs to be split into format fixes and the actual change to be
reviewed.

> -    /* KVM-only so far */
> -    uint16_t fpop;
> +    union {
> +        uint32_t tcg;
> +        uint16_t kvm;
> +    } fpop;

This is highly questionable.

>      .fields = (VMStateField[]) {
> -        VMSTATE_UINT16(env.fpop, X86CPU),
> +        VMSTATE_UINT16(env.fpop.kvm, X86CPU),

You're breaking save/restore in tcg.  KVM is not required for migration.

> +        if (non_control_x87_instr(modrm, b)) {
> +            tcg_gen_movi_i32(cpu_fpop, ((b & 0x7) << 8) | (modrm & 0xff));
> +            tcg_gen_movi_tl(cpu_fpip, pc_start - s->cs_base);
> +            tcg_gen_movi_i32(cpu_fpcs, env->segs[R_CS].selector);
> +        }

I strongly suspect you can implement this feature without having to add 3
(largely redundant) register writes to every x87 instruction executed.

See how restore_state_to_opc works to compute the value of CC_OP during
translation.  You can do the same thing to recover these three values.

You do have to sync these values before normal exits from the TB, but you only
have to do that once, not once for every insn executed.  See gen_update_cc_op.


r~

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [Qemu-devel] PATCH for bugs 661696 and 1248376: target-i386: x87 exception pointers using TCG.
  2014-06-22 18:55   ` Richard Henderson
@ 2014-06-22 19:17     ` Jaume Martí
  2014-07-19  0:36       ` Jaume Martí
  0 siblings, 1 reply; 6+ messages in thread
From: Jaume Martí @ 2014-06-22 19:17 UTC (permalink / raw)
  To: Richard Henderson
  Cc: Peter Maydell, mtosatti, gleb, mst, riku.voipio, qemu-devel,
	quintela, vrozenfe, anthony, pbonzini, alex.bennee, afaerber

Thanks Richard for your feedback. I am going to correct the patch and
resubmit it.

Best regards,
Jaume

On Sun, Jun 22, 2014 at 8:55 PM, Richard Henderson <rth@twiddle.net> wrote:
> On 06/22/2014 07:55 AM, Jaume Martí wrote:
>> -        cpu_x86_fsave(env, fpstate_addr, 1);
>> -        fpstate->status = fpstate->sw;
>> -        magic = 0xffff;
>> +    cpu_x86_fsave(env, fpstate_addr);
>> +    fpstate->status = fpstate->sw;
>> +    magic = 0xffff;
>
> This patch needs to be split into format fixes and the actual change to be
> reviewed.
>
>> -    /* KVM-only so far */
>> -    uint16_t fpop;
>> +    union {
>> +        uint32_t tcg;
>> +        uint16_t kvm;
>> +    } fpop;
>
> This is highly questionable.
>
>>      .fields = (VMStateField[]) {
>> -        VMSTATE_UINT16(env.fpop, X86CPU),
>> +        VMSTATE_UINT16(env.fpop.kvm, X86CPU),
>
> You're breaking save/restore in tcg.  KVM is not required for migration.
>
>> +        if (non_control_x87_instr(modrm, b)) {
>> +            tcg_gen_movi_i32(cpu_fpop, ((b & 0x7) << 8) | (modrm & 0xff));
>> +            tcg_gen_movi_tl(cpu_fpip, pc_start - s->cs_base);
>> +            tcg_gen_movi_i32(cpu_fpcs, env->segs[R_CS].selector);
>> +        }
>
> I strongly suspect you can implement this feature without having to add 3
> (largely redundant) register writes to every x87 instruction executed.
>
> See how restore_state_to_opc works to compute the value of CC_OP during
> translation.  You can do the same thing to recover these three values.
>
> You do have to sync these values before normal exits from the TB, but you only
> have to do that once, not once for every insn executed.  See gen_update_cc_op.
>
>
> r~

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [Qemu-devel] PATCH for bugs 661696 and 1248376: target-i386: x87 exception pointers using TCG.
  2014-06-22 19:17     ` Jaume Martí
@ 2014-07-19  0:36       ` Jaume Martí
  2014-07-21 18:55         ` Jaume Martí
  0 siblings, 1 reply; 6+ messages in thread
From: Jaume Martí @ 2014-07-19  0:36 UTC (permalink / raw)
  To: Richard Henderson
  Cc: Peter Maydell, mtosatti, gleb, mst, riku.voipio, qemu-devel,
	quintela, vrozenfe, anthony, pbonzini, alex.bennee, afaerber

Hello,

I attach a patch with the fix for the issues pointed out by Richard.
Maybe it would be useful to have the option to disabled this feature
at compile time, for performance reasons.
Please review and apply.

Best regards,
Jaume

Signed-off-by: Jaume Marti Farriol (jaume.martif@gmail.com)
diff --git a/linux-user/signal.c b/linux-user/signal.c
index 1141054..73f8f6b 100644
--- a/linux-user/signal.c
+++ b/linux-user/signal.c
@@ -865,7 +865,7 @@ static void setup_sigcontext(struct target_sigcontext *sc,
     __put_user(env->regs[R_ESP], &sc->esp_at_signal);
     __put_user(env->segs[R_SS].selector, (unsigned int *)&sc->ss);

-        cpu_x86_fsave(env, fpstate_addr, 1);
+        cpu_x86_fsave(env, fpstate_addr);
         fpstate->status = fpstate->sw;
         magic = 0xffff;
     __put_user(magic, &fpstate->magic);
@@ -1068,7 +1068,7 @@ restore_sigcontext(CPUX86State *env, struct
target_sigcontext *sc, int *peax)
                 if (!access_ok(VERIFY_READ, fpstate_addr,
                                sizeof(struct target_fpstate)))
                         goto badframe;
-                cpu_x86_frstor(env, fpstate_addr, 1);
+                cpu_x86_frstor(env, fpstate_addr);
  }

         *peax = tswapl(sc->eax);
diff --git a/target-i386/cpu.h b/target-i386/cpu.h
index e634d83..4274ce3 100644
--- a/target-i386/cpu.h
+++ b/target-i386/cpu.h
@@ -819,10 +819,11 @@ typedef struct CPUX86State {
     uint16_t fpuc;
     uint8_t fptags[8];   /* 0 = valid, 1 = empty */
     FPReg fpregs[8];
-    /* KVM-only so far */
-    uint16_t fpop;
+    uint32_t fpop;
     uint64_t fpip;
     uint64_t fpdp;
+    uint32_t fpcs;
+    uint32_t fpds;

     /* emulator internal variables */
     float_status fp_status;
@@ -1067,8 +1068,8 @@ floatx80 cpu_set_fp80(uint64_t mant, uint16_t upper);
 /* the following helpers are only usable in user mode simulation as
    they can trigger unexpected exceptions */
 void cpu_x86_load_seg(CPUX86State *s, int seg_reg, int selector);
-void cpu_x86_fsave(CPUX86State *s, target_ulong ptr, int data32);
-void cpu_x86_frstor(CPUX86State *s, target_ulong ptr, int data32);
+void cpu_x86_fsave(CPUX86State *s, target_ulong ptr);
+void cpu_x86_frstor(CPUX86State *s, target_ulong ptr);

 /* you can call this signal handler from your SIGBUS and SIGSEGV
    signal handlers to inform the virtual CPU of exceptions. non zero
diff --git a/target-i386/fpu_helper.c b/target-i386/fpu_helper.c
index 1b2900d..6886031 100644
--- a/target-i386/fpu_helper.c
+++ b/target-i386/fpu_helper.c
@@ -56,6 +56,8 @@
 #define floatx80_l2e make_floatx80(0x3fff, 0xb8aa3b295c17f0bcLL)
 #define floatx80_l2t make_floatx80(0x4000, 0xd49a784bcd1b8afeLL)

+#define FPUS(env) ((env->fpus & ~0x3800) | ((env->fpstt & 0x7) << 11))
+
 static inline void fpush(CPUX86State *env)
 {
     env->fpstt = (env->fpstt - 1) & 7;
@@ -604,6 +606,10 @@ void helper_fninit(CPUX86State *env)
     env->fptags[5] = 1;
     env->fptags[6] = 1;
     env->fptags[7] = 1;
+    env->fpip = 0;
+    env->fpcs = 0;
+    env->fpdp = 0;
+    env->fpds = 0;
 }

 /* BCD ops */
@@ -961,13 +967,13 @@ void helper_fxam_ST0(CPUX86State *env)
     }
 }

-void helper_fstenv(CPUX86State *env, target_ulong ptr, int data32)
+void helper_fstenv(CPUX86State *env, target_ulong ptr, int data32,
+                   int protected_mode)
 {
-    int fpus, fptag, exp, i;
+    int fptag, exp, i;
     uint64_t mant;
     CPU_LDoubleU tmp;

-    fpus = (env->fpus & ~0x3800) | (env->fpstt & 0x7) << 11;
     fptag = 0;
     for (i = 7; i >= 0; i--) {
         fptag <<= 2;
@@ -987,83 +993,150 @@ void helper_fstenv(CPUX86State *env,
target_ulong ptr, int data32)
             }
         }
     }
+
     if (data32) {
         /* 32 bit */
-        cpu_stl_data(env, ptr, env->fpuc);
-        cpu_stl_data(env, ptr + 4, fpus);
-        cpu_stl_data(env, ptr + 8, fptag);
-        cpu_stl_data(env, ptr + 12, 0); /* fpip */
-        cpu_stl_data(env, ptr + 16, 0); /* fpcs */
-        cpu_stl_data(env, ptr + 20, 0); /* fpoo */
-        cpu_stl_data(env, ptr + 24, 0); /* fpos */
+        cpu_stw_data(env, ptr, env->fpuc);
+        cpu_stw_data(env, ptr + 4, FPUS(env));
+        cpu_stw_data(env, ptr + 8, fptag);
+        if (protected_mode) {
+            cpu_stl_data(env, ptr + 12, env->fpip);
+            cpu_stl_data(env, ptr + 16,
+                        ((env->fpop & 0x7ff) << 16) | (env->fpcs & 0xffff));
+            cpu_stl_data(env, ptr + 20, env->fpdp);
+            cpu_stl_data(env, ptr + 24, env->fpds);
+        } else {
+            /* Real mode  */
+            cpu_stl_data(env, ptr + 12, env->fpip); /* fpip[15..00] */
+            cpu_stl_data(env, ptr + 16, ((((env->fpip >> 16) & 0xffff) << 12) |
+                        (env->fpop & 0x7ff))); /* fpip[31..16], fpop */
+            cpu_stl_data(env, ptr + 20, env->fpdp); /* fpdp[15..00] */
+            cpu_stl_data(env, ptr + 24,
+                        (env->fpdp >> 4) & 0xffff000); /* fpdp[31..16] */
+        }
     } else {
         /* 16 bit */
         cpu_stw_data(env, ptr, env->fpuc);
-        cpu_stw_data(env, ptr + 2, fpus);
+        cpu_stw_data(env, ptr + 2, FPUS(env));
         cpu_stw_data(env, ptr + 4, fptag);
-        cpu_stw_data(env, ptr + 6, 0);
-        cpu_stw_data(env, ptr + 8, 0);
-        cpu_stw_data(env, ptr + 10, 0);
-        cpu_stw_data(env, ptr + 12, 0);
+        if (protected_mode) {
+            cpu_stw_data(env, ptr + 6, env->fpip);
+            cpu_stw_data(env, ptr + 8, env->fpcs);
+            cpu_stw_data(env, ptr + 10, env->fpdp);
+            cpu_stw_data(env, ptr + 12, env->fpds);
+        } else {
+            /* Real mode  */
+            cpu_stw_data(env, ptr + 6, env->fpip); /* fpip[15..0] */
+            cpu_stw_data(env, ptr + 8, ((env->fpip >> 4) & 0xf000) |
+                        (env->fpop & 0x7ff)); /* fpip[19..16], fpop */
+            cpu_stw_data(env, ptr + 10, env->fpdp); /* fpdp[15..0] */
+            cpu_stw_data(env, ptr + 12,
+                        (env->fpdp >> 4) & 0xf000); /* fpdp[19..16] */
+        }
     }
+
+    env->fpip = 0;
+    env->fpcs = 0;
+    env->fpdp = 0;
+    env->fpds = 0;
 }

-void helper_fldenv(CPUX86State *env, target_ulong ptr, int data32)
+void helper_fldenv(CPUX86State *env, target_ulong ptr, int data32,
+                   int protected_mode)
 {
-    int i, fpus, fptag;
+    int tmp, i, fpus, fptag;

     if (data32) {
+        /* 32 bit */
         env->fpuc = cpu_lduw_data(env, ptr);
         fpus = cpu_lduw_data(env, ptr + 4);
         fptag = cpu_lduw_data(env, ptr + 8);
+        if (protected_mode) {
+            env->fpip = cpu_ldl_data(env, ptr + 12);
+            tmp = cpu_ldl_data(env, ptr + 16);
+            env->fpcs = tmp & 0xffff;
+            env->fpop = tmp >> 16;
+            env->fpdp = cpu_ldl_data(env, ptr + 20);
+            env->fpds = cpu_lduw_data(env, ptr + 24);
+        } else {
+            /* Real mode */
+            tmp = cpu_ldl_data(env, ptr + 16);
+            env->fpip = ((tmp & 0xffff000) << 4) |
+                        cpu_lduw_data(env, ptr + 12);
+            env->fpop = tmp & 0x7ff;
+            env->fpdp = (cpu_ldl_data(env, ptr + 24) << 4) |
+                        cpu_lduw_data(env, ptr + 20);
+        }
     } else {
+        /* 16 bit */
         env->fpuc = cpu_lduw_data(env, ptr);
         fpus = cpu_lduw_data(env, ptr + 2);
         fptag = cpu_lduw_data(env, ptr + 4);
+        if (protected_mode) {
+            /* Protected mode  */
+            env->fpip = cpu_lduw_data(env, ptr + 6);
+            env->fpcs = cpu_lduw_data(env, ptr + 8);
+            env->fpdp = cpu_lduw_data(env, ptr + 10);
+            env->fpds = cpu_lduw_data(env, ptr + 12);
+        } else {
+            /* Real mode  */
+            tmp = cpu_lduw_data(env, ptr + 8);
+            env->fpip = ((tmp & 0xf000) << 4) | cpu_lduw_data(env, ptr + 6);
+            env->fpop = tmp & 0x7ff;
+            env->fpdp = cpu_lduw_data(env, ptr + 12) << 4 |
+                        cpu_lduw_data(env, ptr + 10);
+        }
     }
+
     env->fpstt = (fpus >> 11) & 7;
     env->fpus = fpus & ~0x3800;
     for (i = 0; i < 8; i++) {
         env->fptags[i] = ((fptag & 3) == 3);
         fptag >>= 2;
     }
+
+    env->fpip &= 0xffffffff;
+    env->fpdp &= 0xffffffff;
+    if (!protected_mode) {
+        env->fpcs = 0;
+        env->fpds = 0;
+    }
 }

-void helper_fsave(CPUX86State *env, target_ulong ptr, int data32)
+void helper_fsave(CPUX86State *env, target_ulong ptr, int data32,
+                  int protected_mode)
 {
     floatx80 tmp;
     int i;

-    helper_fstenv(env, ptr, data32);
+    helper_fstenv(env, ptr, data32, protected_mode);

-    ptr += (14 << data32);
+    if (data32) {
+        ptr += 28;
+    } else {
+        ptr += 14;
+    }
     for (i = 0; i < 8; i++) {
         tmp = ST(i);
         helper_fstt(env, tmp, ptr);
         ptr += 10;
     }

-    /* fninit */
-    env->fpus = 0;
-    env->fpstt = 0;
-    env->fpuc = 0x37f;
-    env->fptags[0] = 1;
-    env->fptags[1] = 1;
-    env->fptags[2] = 1;
-    env->fptags[3] = 1;
-    env->fptags[4] = 1;
-    env->fptags[5] = 1;
-    env->fptags[6] = 1;
-    env->fptags[7] = 1;
+    helper_fninit(env);
 }

-void helper_frstor(CPUX86State *env, target_ulong ptr, int data32)
+void helper_frstor(CPUX86State *env, target_ulong ptr, int data32,
+                   int protected_mode)
 {
     floatx80 tmp;
     int i;

-    helper_fldenv(env, ptr, data32);
-    ptr += (14 << data32);
+    helper_fldenv(env, ptr, data32, protected_mode);
+    if (data32) {
+        ptr += 28;
+    } else {
+        ptr += 14;
+    }

     for (i = 0; i < 8; i++) {
         tmp = helper_fldt(env, ptr);
@@ -1072,21 +1145,22 @@ void helper_frstor(CPUX86State *env,
target_ulong ptr, int data32)
     }
 }

-#if defined(CONFIG_USER_ONLY)
-void cpu_x86_fsave(CPUX86State *env, target_ulong ptr, int data32)
+#if defined(CONFIG_USER_ONLY) && defined(TARGET_I386) && TARGET_ABI_BITS == 32
+
+void cpu_x86_fsave(CPUX86State *env, target_ulong ptr)
 {
-    helper_fsave(env, ptr, data32);
+    helper_fsave(env, ptr, 1, 1);
 }

-void cpu_x86_frstor(CPUX86State *env, target_ulong ptr, int data32)
+void cpu_x86_frstor(CPUX86State *env, target_ulong ptr)
 {
-    helper_frstor(env, ptr, data32);
+    helper_frstor(env, ptr, 1, 1);
 }
 #endif

-void helper_fxsave(CPUX86State *env, target_ulong ptr, int data64)
+void helper_fxsave(CPUX86State *env, target_ulong ptr, int data32, int data64)
 {
-    int fpus, fptag, i, nb_xmm_regs;
+    int i, nb_xmm_regs, fptag;
     floatx80 tmp;
     target_ulong addr;

@@ -1095,25 +1169,36 @@ void helper_fxsave(CPUX86State *env,
target_ulong ptr, int data64)
         raise_exception(env, EXCP0D_GPF);
     }

-    fpus = (env->fpus & ~0x3800) | (env->fpstt & 0x7) << 11;
     fptag = 0;
     for (i = 0; i < 8; i++) {
         fptag |= (env->fptags[i] << i);
     }
+    fptag ^= 0xff;
+
     cpu_stw_data(env, ptr, env->fpuc);
-    cpu_stw_data(env, ptr + 2, fpus);
-    cpu_stw_data(env, ptr + 4, fptag ^ 0xff);
+    cpu_stw_data(env, ptr + 2, FPUS(env));
+    cpu_stw_data(env, ptr + 4, fptag & 0xff);
+    cpu_stw_data(env, ptr + 6, env->fpop);
+
 #ifdef TARGET_X86_64
     if (data64) {
-        cpu_stq_data(env, ptr + 0x08, 0); /* rip */
-        cpu_stq_data(env, ptr + 0x10, 0); /* rdp */
+        /* 64 bit */
+        cpu_stq_data(env, ptr + 8, env->fpip);
+        cpu_stq_data(env, ptr + 16, env->fpdp);
     } else
 #endif
     {
-        cpu_stl_data(env, ptr + 0x08, 0); /* eip */
-        cpu_stl_data(env, ptr + 0x0c, 0); /* sel  */
-        cpu_stl_data(env, ptr + 0x10, 0); /* dp */
-        cpu_stl_data(env, ptr + 0x14, 0); /* sel  */
+        if (data32) {
+            /* 32 bit */
+            cpu_stl_data(env, ptr + 8, env->fpip);
+            cpu_stl_data(env, ptr + 16, env->fpdp);
+        } else {
+            /* 16 bit */
+            cpu_stw_data(env, ptr + 8, env->fpip);
+            cpu_stw_data(env, ptr + 16, env->fpdp);
+        }
+        cpu_stw_data(env, ptr + 12, env->fpcs & 0xffff);
+        cpu_stw_data(env, ptr + 20, env->fpds & 0xffff);
     }

     addr = ptr + 0x20;
@@ -1146,7 +1231,7 @@ void helper_fxsave(CPUX86State *env,
target_ulong ptr, int data64)
     }
 }

-void helper_fxrstor(CPUX86State *env, target_ulong ptr, int data64)
+void helper_fxrstor(CPUX86State *env, target_ulong ptr, int data32, int data64)
 {
     int i, fpus, fptag, nb_xmm_regs;
     floatx80 tmp;
@@ -1167,6 +1252,30 @@ void helper_fxrstor(CPUX86State *env,
target_ulong ptr, int data64)
         env->fptags[i] = ((fptag >> i) & 1);
     }

+    env->fpop = (cpu_lduw_data(env, ptr + 6) >> 5) & 0x7ff;
+
+#ifdef TARGET_X86_64
+    if (data64) {
+        /* 64 bit */
+        env->fpip = cpu_ldq_data(env, ptr + 8);
+        env->fpdp = cpu_ldq_data(env, ptr + 16);
+    } else
+#endif
+    {
+        if (data32) {
+            /* 32 bit */
+            env->fpip = cpu_ldl_data(env, ptr + 8);
+            env->fpdp = cpu_ldl_data(env, ptr + 16);
+        } else {
+            /* 16 bit */
+            env->fpip = cpu_lduw_data(env, ptr + 8);
+            env->fpdp = cpu_lduw_data(env, ptr + 16);
+        }
+
+        env->fpcs = cpu_lduw_data(env, ptr + 12);
+        env->fpds = cpu_lduw_data(env, ptr + 20);
+    }
+
     addr = ptr + 0x20;
     for (i = 0; i < 8; i++) {
         tmp = helper_fldt(env, addr);
@@ -1195,6 +1304,11 @@ void helper_fxrstor(CPUX86State *env,
target_ulong ptr, int data64)
             }
         }
     }
+
+    if (!data64) {
+        env->fpip &= 0xffffffff;
+        env->fpdp &= 0xffffffff;
+    }
 }

 void cpu_get_fp80(uint64_t *pmant, uint16_t *pexp, floatx80 f)
diff --git a/target-i386/helper.h b/target-i386/helper.h
index 8eb0145..9c4fd22 100644
--- a/target-i386/helper.h
+++ b/target-i386/helper.h
@@ -183,12 +183,12 @@ DEF_HELPER_1(frndint, void, env)
 DEF_HELPER_1(fscale, void, env)
 DEF_HELPER_1(fsin, void, env)
 DEF_HELPER_1(fcos, void, env)
-DEF_HELPER_3(fstenv, void, env, tl, int)
-DEF_HELPER_3(fldenv, void, env, tl, int)
-DEF_HELPER_3(fsave, void, env, tl, int)
-DEF_HELPER_3(frstor, void, env, tl, int)
-DEF_HELPER_3(fxsave, void, env, tl, int)
-DEF_HELPER_3(fxrstor, void, env, tl, int)
+DEF_HELPER_4(fstenv, void, env, tl, int, int)
+DEF_HELPER_4(fldenv, void, env, tl, int, int)
+DEF_HELPER_4(fsave, void, env, tl, int, int)
+DEF_HELPER_4(frstor, void, env, tl, int, int)
+DEF_HELPER_4(fxsave, void, env, tl, int, int)
+DEF_HELPER_4(fxrstor, void, env, tl, int, int)

 DEF_HELPER_FLAGS_1(clz, TCG_CALL_NO_RWG_SE, tl, tl)
 DEF_HELPER_FLAGS_1(ctz, TCG_CALL_NO_RWG_SE, tl, tl)
diff --git a/target-i386/machine.c b/target-i386/machine.c
index 16d2f6a..500f04f 100644
--- a/target-i386/machine.c
+++ b/target-i386/machine.c
@@ -397,7 +397,7 @@ static const VMStateDescription vmstate_fpop_ip_dp = {
     .version_id = 1,
     .minimum_version_id = 1,
     .fields = (VMStateField[]) {
-        VMSTATE_UINT16(env.fpop, X86CPU),
+        VMSTATE_UINT32(env.fpop, X86CPU),
         VMSTATE_UINT64(env.fpip, X86CPU),
         VMSTATE_UINT64(env.fpdp, X86CPU),
         VMSTATE_END_OF_LIST()
diff --git a/target-i386/translate.c b/target-i386/translate.c
index 6fcd824..8e490de 100644
--- a/target-i386/translate.c
+++ b/target-i386/translate.c
@@ -58,6 +58,9 @@
 #endif

 //#define MACRO_TEST   1
+#define IS_PROTECTED_MODE(s) (s->pe && !s->vm86)
+#define FP_EP_VALID 0x80000000
+#define FP_EP_INVALID 0

 /* global register indexes */
 static TCGv_ptr cpu_env;
@@ -65,6 +68,11 @@ static TCGv cpu_A0;
 static TCGv cpu_cc_dst, cpu_cc_src, cpu_cc_src2, cpu_cc_srcT;
 static TCGv_i32 cpu_cc_op;
 static TCGv cpu_regs[CPU_NB_REGS];
+static TCGv_i32 cpu_fpop;
+static TCGv cpu_fpip;
+static TCGv cpu_fpdp;
+static TCGv_i32 cpu_fpds;
+static TCGv_i32 cpu_fpcs;
 /* local temps */
 static TCGv cpu_T[2];
 /* local register indexes (only used inside old micro ops) */
@@ -74,6 +82,9 @@ static TCGv_i32 cpu_tmp2_i32, cpu_tmp3_i32;
 static TCGv_i64 cpu_tmp1_i64;

 static uint8_t gen_opc_cc_op[OPC_BUF_SIZE];
+static uint16_t gen_opc_fp_op[OPC_BUF_SIZE];
+static uint16_t gen_opc_fp_cs[OPC_BUF_SIZE];
+static target_ulong gen_opc_fp_ip[OPC_BUF_SIZE];

 #include "exec/gen-icount.h"

@@ -104,6 +115,10 @@ typedef struct DisasContext {
     int ss32;   /* 32 bit stack segment */
     CCOp cc_op;  /* current CC operation */
     bool cc_op_dirty;
+    uint16_t fp_op;
+    bool fp_ep_dirty;
+    target_ulong fp_ip;
+    uint16_t fp_cs;
     int addseg; /* non zero if either DS/ES/SS have a non zero base */
     int f_st;   /* currently unused */
     int vm86;   /* vm86 mode */
@@ -208,6 +223,62 @@ static const uint8_t cc_op_live[CC_OP_NB] = {
     [CC_OP_CLR] = 0,
 };

+static inline bool instr_is_x87_nc(int modrm, int b)
+{
+    int op, mod, rm;
+    switch (b) {
+    case 0xd8 ... 0xdf:
+        /* floats */
+        op = ((b & 7) << 3) | ((modrm >> 3) & 7);
+        mod = (modrm >> 6) & 3;
+        rm = modrm & 7;
+        if (mod != 3) {
+            /* memory */
+            switch (op) {
+            case 0x0c: /* fldenv */
+            case 0x0d: /* fldcw */
+            case 0x0e: /* fstenv, fnstenv */
+            case 0x0f: /* fstcw, fnstcw */
+            case 0x2c: /* frstor */
+            case 0x2e: /* fsave, fnsave */
+            case 0x2f: /* fstsw, fnstsw */
+                return false;
+            default:
+                return true;
+            }
+        } else {
+            /* register */
+            switch (op) {
+            case 0x0a:
+                return false; /* fnop, Illegal op */
+            case 0x0e: /* fdecstp, fincstp */
+            case 0x28: /* ffree */
+                return false;
+            case 0x1c:
+                switch (rm) {
+                case 1: /* feni */
+                    return true;
+                case 2: /* fclex, fnclex */
+                case 3: /* finit, fninit */
+                    return false;
+                case 4: /* fsetpm */
+                    return true;
+                default: /* Illegal op */
+                    return false;
+                }
+            case 0x3c:
+                return false; /* fstsw, fnstsw, Illegal op */
+            default:
+                return true;
+            }
+        }
+    /*case 0x9b: // fwait, wait
+        return false;*/
+    default:
+        return false;
+    }
+}
+
 static void set_cc_op(DisasContext *s, CCOp op)
 {
     int dead;
@@ -253,6 +324,23 @@ static void gen_update_cc_op(DisasContext *s)
     }
 }

+static void set_ep(DisasContext *s, int fp_op, int fp_ip, int fp_cs) {
+    s->fp_op = FP_EP_VALID | fp_op;
+    s->fp_ip = fp_ip;
+    s->fp_cs = fp_cs;
+    s->fp_ep_dirty = true;
+}
+
+static void gen_update_ep(DisasContext *s)
+{
+    if (s->fp_ep_dirty) {
+        tcg_gen_movi_i32(cpu_fpop, s->fp_op);
+        tcg_gen_movi_tl(cpu_fpip, s->fp_ip);
+        tcg_gen_movi_i32(cpu_fpcs, s->fp_cs);
+        s->fp_ep_dirty = false;
+    }
+}
+
 #ifdef TARGET_X86_64

 #define NB_OP_SIZES 4
@@ -666,6 +754,7 @@ static void gen_check_io(DisasContext *s, TCGMemOp
ot, target_ulong cur_eip,
     state_saved = 0;
     if (s->pe && (s->cpl > s->iopl || s->vm86)) {
         gen_update_cc_op(s);
+        gen_update_ep(s);
         gen_jmp_im(cur_eip);
         state_saved = 1;
         tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
@@ -686,6 +775,7 @@ static void gen_check_io(DisasContext *s, TCGMemOp
ot, target_ulong cur_eip,
     if(s->flags & HF_SVMI_MASK) {
         if (!state_saved) {
             gen_update_cc_op(s);
+            gen_update_ep(s);
             gen_jmp_im(cur_eip);
         }
         svm_flags |= (1 << (4 + ot));
@@ -1097,6 +1187,7 @@ static inline void gen_jcc1(DisasContext *s, int
b, int l1)
     CCPrepare cc = gen_prepare_cc(s, b, cpu_T[0]);

     gen_update_cc_op(s);
+    gen_update_ep(s);
     if (cc.mask != -1) {
         tcg_gen_andi_tl(cpu_T[0], cc.reg, cc.mask);
         cc.reg = cpu_T[0];
@@ -1580,14 +1671,14 @@ static void gen_rot_rm_T1(DisasContext *s,
TCGMemOp ot, int op1, int is_right)
     t0 = tcg_const_i32(0);
     t1 = tcg_temp_new_i32();
     tcg_gen_trunc_tl_i32(t1, cpu_T[1]);
-    tcg_gen_movi_i32(cpu_tmp2_i32, CC_OP_ADCOX);
+    tcg_gen_movi_i32(cpu_tmp2_i32, CC_OP_ADCOX);
     tcg_gen_movi_i32(cpu_tmp3_i32, CC_OP_EFLAGS);
     tcg_gen_movcond_i32(TCG_COND_NE, cpu_cc_op, t1, t0,
                         cpu_tmp2_i32, cpu_tmp3_i32);
     tcg_temp_free_i32(t0);
     tcg_temp_free_i32(t1);

-    /* The CC_OP value is no longer predictable.  */
+    /* The CC_OP value is no longer predictable.  */
     set_cc_op(s, CC_OP_DYNAMIC);
 }

@@ -1863,7 +1954,7 @@ static void gen_shifti(DisasContext *s1, int op,
TCGMemOp ot, int d, int c)
     }
 }

-static void gen_lea_modrm(CPUX86State *env, DisasContext *s, int modrm)
+static void gen_lea_modrm(CPUX86State *env, DisasContext *s, int modrm, int b)
 {
     target_long disp;
     int havesib;
@@ -1871,6 +1962,7 @@ static void gen_lea_modrm(CPUX86State *env,
DisasContext *s, int modrm)
     int index;
     int scale;
     int mod, rm, code, override, must_add_seg;
+    int curr_instr_is_x87_nc;
     TCGv sum;

     override = s->override;
@@ -1950,6 +2042,13 @@ static void gen_lea_modrm(CPUX86State *env,
DisasContext *s, int modrm)
             tcg_gen_addi_tl(cpu_A0, sum, disp);
         }

+        curr_instr_is_x87_nc = instr_is_x87_nc(modrm, b);
+        if (curr_instr_is_x87_nc) {
+            tcg_gen_mov_tl(cpu_fpdp, cpu_A0);
+            if (s->aflag == MO_32) {
+                tcg_gen_ext32u_tl(cpu_fpdp, cpu_fpdp);
+            }
+        }
         if (must_add_seg) {
             if (override < 0) {
                 if (base == R_EBP || base == R_ESP) {
@@ -1961,6 +2060,12 @@ static void gen_lea_modrm(CPUX86State *env,
DisasContext *s, int modrm)

             tcg_gen_ld_tl(cpu_tmp0, cpu_env,
                           offsetof(CPUX86State, segs[override].base));
+
+            if (curr_instr_is_x87_nc) {
+                tcg_gen_ld_i32(cpu_fpds, cpu_env,
+                              offsetof(CPUX86State, segs[override].selector));
+            }
+
             if (CODE64(s)) {
                 if (s->aflag == MO_32) {
                     tcg_gen_ext32u_tl(cpu_A0, cpu_A0);
@@ -1970,6 +2075,11 @@ static void gen_lea_modrm(CPUX86State *env,
DisasContext *s, int modrm)
             }

             tcg_gen_add_tl(cpu_A0, cpu_A0, cpu_tmp0);
+        } else {
+            if (curr_instr_is_x87_nc) {
+                tcg_gen_ld_i32(cpu_fpds, cpu_env,
+                              offsetof(CPUX86State, segs[R_DS].selector));
+            }
         }

         if (s->aflag == MO_32) {
@@ -2039,8 +2149,22 @@ static void gen_lea_modrm(CPUX86State *env,
DisasContext *s, int modrm)
                     override = R_DS;
                 }
             }
+            if (instr_is_x87_nc(modrm, b)) {
+                tcg_gen_mov_tl(cpu_fpdp, cpu_A0);
+                tcg_gen_ld_i32(cpu_fpds, cpu_env,
+                              offsetof(CPUX86State, segs[override].selector));
+            }
             gen_op_addl_A0_seg(s, override);
+        } else {
+            if (instr_is_x87_nc(modrm, b)) {
+                tcg_gen_mov_tl(cpu_fpdp, cpu_A0);
+                tcg_gen_ld_i32(cpu_fpds, cpu_env,
+                              offsetof(CPUX86State, segs[R_DS].selector));
+            }
         }
+#ifdef TARGET_X86_64
+        tcg_gen_andi_tl(cpu_fpdp, cpu_fpdp, 0xffffffff);
+#endif
         break;

     default:
@@ -2130,7 +2254,7 @@ static void gen_add_A0_ds_seg(DisasContext *s)
 /* generate modrm memory load or store of 'reg'. TMP0 is used if reg ==
    OR_TMP0 */
 static void gen_ldst_modrm(CPUX86State *env, DisasContext *s, int modrm,
-                           TCGMemOp ot, int reg, int is_store)
+                           TCGMemOp ot, int reg, int is_store, int b)
 {
     int mod, rm;

@@ -2147,7 +2271,7 @@ static void gen_ldst_modrm(CPUX86State *env,
DisasContext *s, int modrm,
                 gen_op_mov_reg_v(ot, reg, cpu_T[0]);
         }
     } else {
-        gen_lea_modrm(env, s, modrm);
+        gen_lea_modrm(env, s, modrm, b);
         if (is_store) {
             if (reg != OR_TMP0)
                 gen_op_mov_v_reg(ot, cpu_T[0], reg);
@@ -2250,7 +2374,7 @@ static void gen_cmovcc1(CPUX86State *env,
DisasContext *s, TCGMemOp ot, int b,
 {
     CCPrepare cc;

-    gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
+    gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0, b);

     cc = gen_prepare_cc(s, b, cpu_T[1]);
     if (cc.mask != -1) {
@@ -2297,6 +2421,7 @@ static void gen_movl_seg_T0(DisasContext *s, int
seg_reg, target_ulong cur_eip)
     if (s->pe && !s->vm86) {
         /* XXX: optimize by finding processor state dynamically */
         gen_update_cc_op(s);
+        gen_update_ep(s);
         gen_jmp_im(cur_eip);
         tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
         gen_helper_load_seg(cpu_env, tcg_const_i32(seg_reg), cpu_tmp2_i32);
@@ -2326,6 +2451,7 @@ gen_svm_check_intercept_param(DisasContext *s,
target_ulong pc_start,
     if (likely(!(s->flags & HF_SVMI_MASK)))
         return;
     gen_update_cc_op(s);
+    gen_update_ep(s);
     gen_jmp_im(pc_start - s->cs_base);
     gen_helper_svm_check_intercept_param(cpu_env, tcg_const_i32(type),
                                          tcg_const_i64(param));
@@ -2513,6 +2639,7 @@ static void gen_enter(DisasContext *s, int
esp_addend, int level)
 static void gen_exception(DisasContext *s, int trapno, target_ulong cur_eip)
 {
     gen_update_cc_op(s);
+    gen_update_ep(s);
     gen_jmp_im(cur_eip);
     gen_helper_raise_exception(cpu_env, tcg_const_i32(trapno));
     s->is_jmp = DISAS_TB_JUMP;
@@ -2524,6 +2651,7 @@ static void gen_interrupt(DisasContext *s, int intno,
                           target_ulong cur_eip, target_ulong next_eip)
 {
     gen_update_cc_op(s);
+    gen_update_ep(s);
     gen_jmp_im(cur_eip);
     gen_helper_raise_interrupt(cpu_env, tcg_const_i32(intno),
                                tcg_const_i32(next_eip - cur_eip));
@@ -2533,6 +2661,7 @@ static void gen_interrupt(DisasContext *s, int intno,
 static void gen_debug(DisasContext *s, target_ulong cur_eip)
 {
     gen_update_cc_op(s);
+    gen_update_ep(s);
     gen_jmp_im(cur_eip);
     gen_helper_debug(cpu_env);
     s->is_jmp = DISAS_TB_JUMP;
@@ -2543,6 +2672,7 @@ static void gen_debug(DisasContext *s,
target_ulong cur_eip)
 static void gen_eob(DisasContext *s)
 {
     gen_update_cc_op(s);
+    gen_update_ep(s);
     if (s->tb->flags & HF_INHIBIT_IRQ_MASK) {
         gen_helper_reset_inhibit_irq(cpu_env);
     }
@@ -2564,6 +2694,7 @@ static void gen_eob(DisasContext *s)
 static void gen_jmp_tb(DisasContext *s, target_ulong eip, int tb_num)
 {
     gen_update_cc_op(s);
+    gen_update_ep(s);
     set_cc_op(s, CC_OP_DYNAMIC);
     if (s->jmp_opt) {
         gen_goto_tb(s, tb_num, eip);
@@ -3043,7 +3174,7 @@ static void gen_sse(CPUX86State *env,
DisasContext *s, int b,
         case 0x0e7: /* movntq */
             if (mod == 3)
                 goto illegal_op;
-            gen_lea_modrm(env, s, modrm);
+            gen_lea_modrm(env, s, modrm, b);
             gen_stq_env_A0(s, offsetof(CPUX86State, fpregs[reg].mmx));
             break;
         case 0x1e7: /* movntdq */
@@ -3051,20 +3182,20 @@ static void gen_sse(CPUX86State *env,
DisasContext *s, int b,
         case 0x12b: /* movntps */
             if (mod == 3)
                 goto illegal_op;
-            gen_lea_modrm(env, s, modrm);
+            gen_lea_modrm(env, s, modrm, b);
             gen_sto_env_A0(s, offsetof(CPUX86State, xmm_regs[reg]));
             break;
         case 0x3f0: /* lddqu */
             if (mod == 3)
                 goto illegal_op;
-            gen_lea_modrm(env, s, modrm);
+            gen_lea_modrm(env, s, modrm, b);
             gen_ldo_env_A0(s, offsetof(CPUX86State, xmm_regs[reg]));
             break;
         case 0x22b: /* movntss */
         case 0x32b: /* movntsd */
             if (mod == 3)
                 goto illegal_op;
-            gen_lea_modrm(env, s, modrm);
+            gen_lea_modrm(env, s, modrm, b);
             if (b1 & 1) {
                 gen_stq_env_A0(s, offsetof(CPUX86State, xmm_regs[reg]));
             } else {
@@ -3076,13 +3207,13 @@ static void gen_sse(CPUX86State *env,
DisasContext *s, int b,
         case 0x6e: /* movd mm, ea */
 #ifdef TARGET_X86_64
             if (s->dflag == MO_64) {
-                gen_ldst_modrm(env, s, modrm, MO_64, OR_TMP0, 0);
+                gen_ldst_modrm(env, s, modrm, MO_64, OR_TMP0, 0, b);
                 tcg_gen_st_tl(cpu_T[0], cpu_env,
offsetof(CPUX86State,fpregs[reg].mmx));
             } else
 #endif
             {
-                gen_ldst_modrm(env, s, modrm, MO_32, OR_TMP0, 0);
-                tcg_gen_addi_ptr(cpu_ptr0, cpu_env,
+                gen_ldst_modrm(env, s, modrm, MO_32, OR_TMP0, 0, b);
+                tcg_gen_addi_ptr(cpu_ptr0, cpu_env,
                                  offsetof(CPUX86State,fpregs[reg].mmx));
                 tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
                 gen_helper_movl_mm_T0_mmx(cpu_ptr0, cpu_tmp2_i32);
@@ -3091,15 +3222,15 @@ static void gen_sse(CPUX86State *env,
DisasContext *s, int b,
         case 0x16e: /* movd xmm, ea */
 #ifdef TARGET_X86_64
             if (s->dflag == MO_64) {
-                gen_ldst_modrm(env, s, modrm, MO_64, OR_TMP0, 0);
-                tcg_gen_addi_ptr(cpu_ptr0, cpu_env,
+                gen_ldst_modrm(env, s, modrm, MO_64, OR_TMP0, 0, b);
+                tcg_gen_addi_ptr(cpu_ptr0, cpu_env,
                                  offsetof(CPUX86State,xmm_regs[reg]));
                 gen_helper_movq_mm_T0_xmm(cpu_ptr0, cpu_T[0]);
             } else
 #endif
             {
-                gen_ldst_modrm(env, s, modrm, MO_32, OR_TMP0, 0);
-                tcg_gen_addi_ptr(cpu_ptr0, cpu_env,
+                gen_ldst_modrm(env, s, modrm, MO_32, OR_TMP0, 0, b);
+                tcg_gen_addi_ptr(cpu_ptr0, cpu_env,
                                  offsetof(CPUX86State,xmm_regs[reg]));
                 tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
                 gen_helper_movl_mm_T0_xmm(cpu_ptr0, cpu_tmp2_i32);
@@ -3107,7 +3238,7 @@ static void gen_sse(CPUX86State *env,
DisasContext *s, int b,
             break;
         case 0x6f: /* movq mm, ea */
             if (mod != 3) {
-                gen_lea_modrm(env, s, modrm);
+                gen_lea_modrm(env, s, modrm, b);
                 gen_ldq_env_A0(s, offsetof(CPUX86State, fpregs[reg].mmx));
             } else {
                 rm = (modrm & 7);
@@ -3124,7 +3255,7 @@ static void gen_sse(CPUX86State *env,
DisasContext *s, int b,
         case 0x16f: /* movdqa xmm, ea */
         case 0x26f: /* movdqu xmm, ea */
             if (mod != 3) {
-                gen_lea_modrm(env, s, modrm);
+                gen_lea_modrm(env, s, modrm, b);
                 gen_ldo_env_A0(s, offsetof(CPUX86State, xmm_regs[reg]));
             } else {
                 rm = (modrm & 7) | REX_B(s);
@@ -3134,7 +3265,7 @@ static void gen_sse(CPUX86State *env,
DisasContext *s, int b,
             break;
         case 0x210: /* movss xmm, ea */
             if (mod != 3) {
-                gen_lea_modrm(env, s, modrm);
+                gen_lea_modrm(env, s, modrm, b);
                 gen_op_ld_v(s, MO_32, cpu_T[0], cpu_A0);
                 tcg_gen_st32_tl(cpu_T[0], cpu_env,
offsetof(CPUX86State,xmm_regs[reg].XMM_L(0)));
                 tcg_gen_movi_tl(cpu_T[0], 0);
@@ -3149,7 +3280,7 @@ static void gen_sse(CPUX86State *env,
DisasContext *s, int b,
             break;
         case 0x310: /* movsd xmm, ea */
             if (mod != 3) {
-                gen_lea_modrm(env, s, modrm);
+                gen_lea_modrm(env, s, modrm, b);
                 gen_ldq_env_A0(s, offsetof(CPUX86State,
                                            xmm_regs[reg].XMM_Q(0)));
                 tcg_gen_movi_tl(cpu_T[0], 0);
@@ -3164,7 +3295,7 @@ static void gen_sse(CPUX86State *env,
DisasContext *s, int b,
         case 0x012: /* movlps */
         case 0x112: /* movlpd */
             if (mod != 3) {
-                gen_lea_modrm(env, s, modrm);
+                gen_lea_modrm(env, s, modrm, b);
                 gen_ldq_env_A0(s, offsetof(CPUX86State,
                                            xmm_regs[reg].XMM_Q(0)));
             } else {
@@ -3176,7 +3307,7 @@ static void gen_sse(CPUX86State *env,
DisasContext *s, int b,
             break;
         case 0x212: /* movsldup */
             if (mod != 3) {
-                gen_lea_modrm(env, s, modrm);
+                gen_lea_modrm(env, s, modrm, b);
                 gen_ldo_env_A0(s, offsetof(CPUX86State, xmm_regs[reg]));
             } else {
                 rm = (modrm & 7) | REX_B(s);
@@ -3192,7 +3323,7 @@ static void gen_sse(CPUX86State *env,
DisasContext *s, int b,
             break;
         case 0x312: /* movddup */
             if (mod != 3) {
-                gen_lea_modrm(env, s, modrm);
+                gen_lea_modrm(env, s, modrm, b);
                 gen_ldq_env_A0(s, offsetof(CPUX86State,
                                            xmm_regs[reg].XMM_Q(0)));
             } else {
@@ -3206,7 +3337,7 @@ static void gen_sse(CPUX86State *env,
DisasContext *s, int b,
         case 0x016: /* movhps */
         case 0x116: /* movhpd */
             if (mod != 3) {
-                gen_lea_modrm(env, s, modrm);
+                gen_lea_modrm(env, s, modrm, b);
                 gen_ldq_env_A0(s, offsetof(CPUX86State,
                                            xmm_regs[reg].XMM_Q(1)));
             } else {
@@ -3218,7 +3349,7 @@ static void gen_sse(CPUX86State *env,
DisasContext *s, int b,
             break;
         case 0x216: /* movshdup */
             if (mod != 3) {
-                gen_lea_modrm(env, s, modrm);
+                gen_lea_modrm(env, s, modrm, b);
                 gen_ldo_env_A0(s, offsetof(CPUX86State, xmm_regs[reg]));
             } else {
                 rm = (modrm & 7) | REX_B(s);
@@ -3256,34 +3387,34 @@ static void gen_sse(CPUX86State *env,
DisasContext *s, int b,
         case 0x7e: /* movd ea, mm */
 #ifdef TARGET_X86_64
             if (s->dflag == MO_64) {
-                tcg_gen_ld_i64(cpu_T[0], cpu_env,
+                tcg_gen_ld_i64(cpu_T[0], cpu_env,
                                offsetof(CPUX86State,fpregs[reg].mmx));
-                gen_ldst_modrm(env, s, modrm, MO_64, OR_TMP0, 1);
+                gen_ldst_modrm(env, s, modrm, MO_64, OR_TMP0, 1, b);
             } else
 #endif
             {
-                tcg_gen_ld32u_tl(cpu_T[0], cpu_env,
+                tcg_gen_ld32u_tl(cpu_T[0], cpu_env,

offsetof(CPUX86State,fpregs[reg].mmx.MMX_L(0)));
-                gen_ldst_modrm(env, s, modrm, MO_32, OR_TMP0, 1);
+                gen_ldst_modrm(env, s, modrm, MO_32, OR_TMP0, 1, b);
             }
             break;
         case 0x17e: /* movd ea, xmm */
 #ifdef TARGET_X86_64
             if (s->dflag == MO_64) {
-                tcg_gen_ld_i64(cpu_T[0], cpu_env,
+                tcg_gen_ld_i64(cpu_T[0], cpu_env,
                                offsetof(CPUX86State,xmm_regs[reg].XMM_Q(0)));
-                gen_ldst_modrm(env, s, modrm, MO_64, OR_TMP0, 1);
+                gen_ldst_modrm(env, s, modrm, MO_64, OR_TMP0, 1, b);
             } else
 #endif
             {
-                tcg_gen_ld32u_tl(cpu_T[0], cpu_env,
+                tcg_gen_ld32u_tl(cpu_T[0], cpu_env,
                                  offsetof(CPUX86State,xmm_regs[reg].XMM_L(0)));
-                gen_ldst_modrm(env, s, modrm, MO_32, OR_TMP0, 1);
+                gen_ldst_modrm(env, s, modrm, MO_32, OR_TMP0, 1, b);
             }
             break;
         case 0x27e: /* movq xmm, ea */
             if (mod != 3) {
-                gen_lea_modrm(env, s, modrm);
+                gen_lea_modrm(env, s, modrm, b);
                 gen_ldq_env_A0(s, offsetof(CPUX86State,
                                            xmm_regs[reg].XMM_Q(0)));
             } else {
@@ -3295,7 +3426,7 @@ static void gen_sse(CPUX86State *env,
DisasContext *s, int b,
             break;
         case 0x7f: /* movq ea, mm */
             if (mod != 3) {
-                gen_lea_modrm(env, s, modrm);
+                gen_lea_modrm(env, s, modrm, b);
                 gen_stq_env_A0(s, offsetof(CPUX86State, fpregs[reg].mmx));
             } else {
                 rm = (modrm & 7);
@@ -3310,7 +3441,7 @@ static void gen_sse(CPUX86State *env,
DisasContext *s, int b,
         case 0x17f: /* movdqa ea, xmm */
         case 0x27f: /* movdqu ea, xmm */
             if (mod != 3) {
-                gen_lea_modrm(env, s, modrm);
+                gen_lea_modrm(env, s, modrm, b);
                 gen_sto_env_A0(s, offsetof(CPUX86State, xmm_regs[reg]));
             } else {
                 rm = (modrm & 7) | REX_B(s);
@@ -3320,7 +3451,7 @@ static void gen_sse(CPUX86State *env,
DisasContext *s, int b,
             break;
         case 0x211: /* movss ea, xmm */
             if (mod != 3) {
-                gen_lea_modrm(env, s, modrm);
+                gen_lea_modrm(env, s, modrm, b);
                 tcg_gen_ld32u_tl(cpu_T[0], cpu_env,
offsetof(CPUX86State,xmm_regs[reg].XMM_L(0)));
                 gen_op_st_v(s, MO_32, cpu_T[0], cpu_A0);
             } else {
@@ -3331,7 +3462,7 @@ static void gen_sse(CPUX86State *env,
DisasContext *s, int b,
             break;
         case 0x311: /* movsd ea, xmm */
             if (mod != 3) {
-                gen_lea_modrm(env, s, modrm);
+                gen_lea_modrm(env, s, modrm, b);
                 gen_stq_env_A0(s, offsetof(CPUX86State,
                                            xmm_regs[reg].XMM_Q(0)));
             } else {
@@ -3343,7 +3474,7 @@ static void gen_sse(CPUX86State *env,
DisasContext *s, int b,
         case 0x013: /* movlps */
         case 0x113: /* movlpd */
             if (mod != 3) {
-                gen_lea_modrm(env, s, modrm);
+                gen_lea_modrm(env, s, modrm, b);
                 gen_stq_env_A0(s, offsetof(CPUX86State,
                                            xmm_regs[reg].XMM_Q(0)));
             } else {
@@ -3353,7 +3484,7 @@ static void gen_sse(CPUX86State *env,
DisasContext *s, int b,
         case 0x017: /* movhps */
         case 0x117: /* movhpd */
             if (mod != 3) {
-                gen_lea_modrm(env, s, modrm);
+                gen_lea_modrm(env, s, modrm, b);
                 gen_stq_env_A0(s, offsetof(CPUX86State,
                                            xmm_regs[reg].XMM_Q(1)));
             } else {
@@ -3417,7 +3548,7 @@ static void gen_sse(CPUX86State *env,
DisasContext *s, int b,
         case 0x12a: /* cvtpi2pd */
             gen_helper_enter_mmx(cpu_env);
             if (mod != 3) {
-                gen_lea_modrm(env, s, modrm);
+                gen_lea_modrm(env, s, modrm, b);
                 op2_offset = offsetof(CPUX86State,mmx_t0);
                 gen_ldq_env_A0(s, op2_offset);
             } else {
@@ -3440,7 +3571,7 @@ static void gen_sse(CPUX86State *env,
DisasContext *s, int b,
         case 0x22a: /* cvtsi2ss */
         case 0x32a: /* cvtsi2sd */
             ot = mo_64_32(s->dflag);
-            gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
+            gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0, b);
             op1_offset = offsetof(CPUX86State,xmm_regs[reg]);
             tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op1_offset);
             if (ot == MO_32) {
@@ -3462,7 +3593,7 @@ static void gen_sse(CPUX86State *env,
DisasContext *s, int b,
         case 0x12d: /* cvtpd2pi */
             gen_helper_enter_mmx(cpu_env);
             if (mod != 3) {
-                gen_lea_modrm(env, s, modrm);
+                gen_lea_modrm(env, s, modrm, b);
                 op2_offset = offsetof(CPUX86State,xmm_t0);
                 gen_ldo_env_A0(s, op2_offset);
             } else {
@@ -3493,7 +3624,7 @@ static void gen_sse(CPUX86State *env,
DisasContext *s, int b,
         case 0x32d: /* cvtsd2si */
             ot = mo_64_32(s->dflag);
             if (mod != 3) {
-                gen_lea_modrm(env, s, modrm);
+                gen_lea_modrm(env, s, modrm, b);
                 if ((b >> 8) & 1) {
                     gen_ldq_env_A0(s, offsetof(CPUX86State, xmm_t0.XMM_Q(0)));
                 } else {
@@ -3525,7 +3656,7 @@ static void gen_sse(CPUX86State *env,
DisasContext *s, int b,
         case 0xc4: /* pinsrw */
         case 0x1c4:
             s->rip_offset = 1;
-            gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0);
+            gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0, b);
             val = cpu_ldub_code(env, s->pc++);
             if (b1) {
                 val &= 7;
@@ -3559,7 +3690,7 @@ static void gen_sse(CPUX86State *env,
DisasContext *s, int b,
             break;
         case 0x1d6: /* movq ea, xmm */
             if (mod != 3) {
-                gen_lea_modrm(env, s, modrm);
+                gen_lea_modrm(env, s, modrm, b);
                 gen_stq_env_A0(s, offsetof(CPUX86State,
                                            xmm_regs[reg].XMM_Q(0)));
             } else {
@@ -3626,7 +3757,7 @@ static void gen_sse(CPUX86State *env,
DisasContext *s, int b,
                     op2_offset = offsetof(CPUX86State,xmm_regs[rm | REX_B(s)]);
                 } else {
                     op2_offset = offsetof(CPUX86State,xmm_t0);
-                    gen_lea_modrm(env, s, modrm);
+                    gen_lea_modrm(env, s, modrm, b);
                     switch (b) {
                     case 0x20: case 0x30: /* pmovsxbw, pmovzxbw */
                     case 0x23: case 0x33: /* pmovsxwd, pmovzxwd */
@@ -3660,7 +3791,7 @@ static void gen_sse(CPUX86State *env,
DisasContext *s, int b,
                     op2_offset = offsetof(CPUX86State,fpregs[rm].mmx);
                 } else {
                     op2_offset = offsetof(CPUX86State,mmx_t0);
-                    gen_lea_modrm(env, s, modrm);
+                    gen_lea_modrm(env, s, modrm, b);
                     gen_ldq_env_A0(s, op2_offset);
                 }
             }
@@ -3701,7 +3832,7 @@ static void gen_sse(CPUX86State *env,
DisasContext *s, int b,
                 }

                 tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_regs[reg]);
-                gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
+                gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0, b);
                 gen_helper_crc32(cpu_T[0], cpu_tmp2_i32,
                                  cpu_T[0], tcg_const_i32(8 << ot));

@@ -3729,7 +3860,7 @@ static void gen_sse(CPUX86State *env,
DisasContext *s, int b,
                     ot = MO_64;
                 }

-                gen_lea_modrm(env, s, modrm);
+                gen_lea_modrm(env, s, modrm, b);
                 if ((b & 1) == 0) {
                     tcg_gen_qemu_ld_tl(cpu_T[0], cpu_A0,
                                        s->mem_index, ot | MO_BE);
@@ -3747,7 +3878,7 @@ static void gen_sse(CPUX86State *env,
DisasContext *s, int b,
                     goto illegal_op;
                 }
                 ot = mo_64_32(s->dflag);
-                gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
+                gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0, b);
                 tcg_gen_andc_tl(cpu_T[0], cpu_regs[s->vex_v], cpu_T[0]);
                 gen_op_mov_reg_v(ot, reg, cpu_T[0]);
                 gen_op_update1_cc();
@@ -3764,7 +3895,7 @@ static void gen_sse(CPUX86State *env,
DisasContext *s, int b,
                 {
                     TCGv bound, zero;

-                    gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
+                    gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0, b);
                     /* Extract START, and shift the operand.
                        Shifts larger than operand size get zeros.  */
                     tcg_gen_ext8u_tl(cpu_A0, cpu_regs[s->vex_v]);
@@ -3801,7 +3932,7 @@ static void gen_sse(CPUX86State *env,
DisasContext *s, int b,
                     goto illegal_op;
                 }
                 ot = mo_64_32(s->dflag);
-                gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
+                gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0, b);
                 tcg_gen_ext8u_tl(cpu_T[1], cpu_regs[s->vex_v]);
                 {
                     TCGv bound = tcg_const_tl(ot == MO_64 ? 63 : 31);
@@ -3828,7 +3959,7 @@ static void gen_sse(CPUX86State *env,
DisasContext *s, int b,
                     goto illegal_op;
                 }
                 ot = mo_64_32(s->dflag);
-                gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
+                gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0, b);
                 switch (ot) {
                 default:
                     tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
@@ -3854,7 +3985,7 @@ static void gen_sse(CPUX86State *env,
DisasContext *s, int b,
                     goto illegal_op;
                 }
                 ot = mo_64_32(s->dflag);
-                gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
+                gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0, b);
                 /* Note that by zero-extending the mask operand, we
                    automatically handle zero-extending the result.  */
                 if (ot == MO_64) {
@@ -3872,7 +4003,7 @@ static void gen_sse(CPUX86State *env,
DisasContext *s, int b,
                     goto illegal_op;
                 }
                 ot = mo_64_32(s->dflag);
-                gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
+                gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0, b);
                 /* Note that by zero-extending the mask operand, we
                    automatically handle zero-extending the result.  */
                 if (ot == MO_64) {
@@ -3892,7 +4023,7 @@ static void gen_sse(CPUX86State *env,
DisasContext *s, int b,
                     int end_op;

                     ot = mo_64_32(s->dflag);
-                    gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
+                    gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0, b);

                     /* Re-use the carry-out from a previous round.  */
                     TCGV_UNUSED(carry_in);
@@ -3971,7 +4102,7 @@ static void gen_sse(CPUX86State *env,
DisasContext *s, int b,
                     goto illegal_op;
                 }
                 ot = mo_64_32(s->dflag);
-                gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
+                gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0, b);
                 if (ot == MO_64) {
                     tcg_gen_andi_tl(cpu_T[1], cpu_regs[s->vex_v], 63);
                 } else {
@@ -4003,7 +4134,7 @@ static void gen_sse(CPUX86State *env,
DisasContext *s, int b,
                     goto illegal_op;
                 }
                 ot = mo_64_32(s->dflag);
-                gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
+                gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0, b);

                 switch (reg & 7) {
                 case 1: /* blsr By,Ey */
@@ -4062,7 +4193,7 @@ static void gen_sse(CPUX86State *env,
DisasContext *s, int b,
                 ot = mo_64_32(s->dflag);
                 rm = (modrm & 7) | REX_B(s);
                 if (mod != 3)
-                    gen_lea_modrm(env, s, modrm);
+                    gen_lea_modrm(env, s, modrm, b);
                 reg = ((modrm >> 3) & 7) | rex_r;
                 val = cpu_ldub_code(env, s->pc++);
                 switch (b) {
@@ -4199,7 +4330,7 @@ static void gen_sse(CPUX86State *env,
DisasContext *s, int b,
                     op2_offset = offsetof(CPUX86State,xmm_regs[rm | REX_B(s)]);
                 } else {
                     op2_offset = offsetof(CPUX86State,xmm_t0);
-                    gen_lea_modrm(env, s, modrm);
+                    gen_lea_modrm(env, s, modrm, b);
                     gen_ldo_env_A0(s, op2_offset);
                 }
             } else {
@@ -4208,7 +4339,7 @@ static void gen_sse(CPUX86State *env,
DisasContext *s, int b,
                     op2_offset = offsetof(CPUX86State,fpregs[rm].mmx);
                 } else {
                     op2_offset = offsetof(CPUX86State,mmx_t0);
-                    gen_lea_modrm(env, s, modrm);
+                    gen_lea_modrm(env, s, modrm, b);
                     gen_ldq_env_A0(s, op2_offset);
                 }
             }
@@ -4242,7 +4373,7 @@ static void gen_sse(CPUX86State *env,
DisasContext *s, int b,
                     goto illegal_op;
                 }
                 ot = mo_64_32(s->dflag);
-                gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
+                gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0, b);
                 b = cpu_ldub_code(env, s->pc++);
                 if (ot == MO_64) {
                     tcg_gen_rotri_tl(cpu_T[0], cpu_T[0], b & 63);
@@ -4278,7 +4409,7 @@ static void gen_sse(CPUX86State *env,
DisasContext *s, int b,
             if (mod != 3) {
                 int sz = 4;

-                gen_lea_modrm(env, s, modrm);
+                gen_lea_modrm(env, s, modrm, b);
                 op2_offset = offsetof(CPUX86State,xmm_t0);

                 switch (b) {
@@ -4326,7 +4457,7 @@ static void gen_sse(CPUX86State *env,
DisasContext *s, int b,
         } else {
             op1_offset = offsetof(CPUX86State,fpregs[reg].mmx);
             if (mod != 3) {
-                gen_lea_modrm(env, s, modrm);
+                gen_lea_modrm(env, s, modrm, b);
                 op2_offset = offsetof(CPUX86State,mmx_t0);
                 gen_ldq_env_A0(s, op2_offset);
             } else {
@@ -4404,6 +4535,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
     int modrm, reg, rm, mod, op, opreg, val;
     target_ulong next_eip, tval;
     int rex_w, rex_r;
+    int fp_op, fp_ip, fp_cs;

     if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP | CPU_LOG_TB_OP_OPT))) {
         tcg_gen_debug_insn_start(pc_start);
@@ -4595,7 +4727,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
                 mod = (modrm >> 6) & 3;
                 rm = (modrm & 7) | REX_B(s);
                 if (mod != 3) {
-                    gen_lea_modrm(env, s, modrm);
+                    gen_lea_modrm(env, s, modrm, b);
                     opreg = OR_TMP0;
                 } else if (op == OP_XORL && rm == reg) {
                 xor_zero:
@@ -4616,7 +4748,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
                 reg = ((modrm >> 3) & 7) | rex_r;
                 rm = (modrm & 7) | REX_B(s);
                 if (mod != 3) {
-                    gen_lea_modrm(env, s, modrm);
+                    gen_lea_modrm(env, s, modrm, b);
                     gen_op_ld_v(s, ot, cpu_T[1], cpu_A0);
                 } else if (op == OP_XORL && rm == reg) {
                     goto xor_zero;
@@ -4655,7 +4787,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
                     s->rip_offset = 1;
                 else
                     s->rip_offset = insn_const_size(ot);
-                gen_lea_modrm(env, s, modrm);
+                gen_lea_modrm(env, s, modrm, b);
                 opreg = OR_TMP0;
             } else {
                 opreg = rm;
@@ -4698,7 +4830,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
         if (mod != 3) {
             if (op == 0)
                 s->rip_offset = insn_const_size(ot);
-            gen_lea_modrm(env, s, modrm);
+            gen_lea_modrm(env, s, modrm, b);
             gen_op_ld_v(s, ot, cpu_T[0], cpu_A0);
         } else {
             gen_op_mov_v_reg(ot, cpu_T[0], rm);
@@ -4906,7 +5038,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
             }
         }
         if (mod != 3) {
-            gen_lea_modrm(env, s, modrm);
+            gen_lea_modrm(env, s, modrm, b);
             if (op >= 2 && op != 3 && op != 5)
                 gen_op_ld_v(s, ot, cpu_T[0], cpu_A0);
         } else {
@@ -4946,6 +5078,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
         do_lcall:
             if (s->pe && !s->vm86) {
                 gen_update_cc_op(s);
+                gen_update_ep(s);
                 gen_jmp_im(pc_start - s->cs_base);
                 tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
                 gen_helper_lcall_protected(cpu_env, cpu_tmp2_i32, cpu_T[1],
@@ -4973,6 +5106,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
         do_ljmp:
             if (s->pe && !s->vm86) {
                 gen_update_cc_op(s);
+                gen_update_ep(s);
                 gen_jmp_im(pc_start - s->cs_base);
                 tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
                 gen_helper_ljmp_protected(cpu_env, cpu_tmp2_i32, cpu_T[1],
@@ -4998,7 +5132,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
         modrm = cpu_ldub_code(env, s->pc++);
         reg = ((modrm >> 3) & 7) | rex_r;

-        gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
+        gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0, b);
         gen_op_mov_v_reg(ot, cpu_T[1], reg);
         gen_op_testl_T0_T1_cc();
         set_cc_op(s, CC_OP_LOGICB + ot);
@@ -5073,7 +5207,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
             s->rip_offset = insn_const_size(ot);
         else if (b == 0x6b)
             s->rip_offset = 1;
-        gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
+        gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0, b);
         if (b == 0x69) {
             val = insn_get(env, s, ot);
             tcg_gen_movi_tl(cpu_T[1], val);
@@ -5130,7 +5264,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
             gen_op_mov_reg_v(ot, reg, cpu_T[1]);
             gen_op_mov_reg_v(ot, rm, cpu_T[0]);
         } else {
-            gen_lea_modrm(env, s, modrm);
+            gen_lea_modrm(env, s, modrm, b);
             gen_op_mov_v_reg(ot, cpu_T[0], reg);
             gen_op_ld_v(s, ot, cpu_T[1], cpu_A0);
             tcg_gen_add_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
@@ -5159,7 +5293,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
                 rm = (modrm & 7) | REX_B(s);
                 gen_op_mov_v_reg(ot, t0, rm);
             } else {
-                gen_lea_modrm(env, s, modrm);
+                gen_lea_modrm(env, s, modrm, b);
                 tcg_gen_mov_tl(a0, cpu_A0);
                 gen_op_ld_v(s, ot, t0, a0);
                 rm = 0; /* avoid warning */
@@ -5207,16 +5341,16 @@ static target_ulong disas_insn(CPUX86State
*env, DisasContext *s,
                 goto illegal_op;
             gen_jmp_im(pc_start - s->cs_base);
             gen_update_cc_op(s);
-            gen_lea_modrm(env, s, modrm);
+            gen_lea_modrm(env, s, modrm, b);
             gen_helper_cmpxchg16b(cpu_env, cpu_A0);
         } else
-#endif
+#endif
         {
             if (!(s->cpuid_features & CPUID_CX8))
                 goto illegal_op;
             gen_jmp_im(pc_start - s->cs_base);
             gen_update_cc_op(s);
-            gen_lea_modrm(env, s, modrm);
+            gen_lea_modrm(env, s, modrm, b);
             gen_helper_cmpxchg8b(cpu_env, cpu_A0);
         }
         set_cc_op(s, CC_OP_EFLAGS);
@@ -5266,7 +5400,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
         } else {
             /* NOTE: order is important too for MMU exceptions */
             s->popl_esp_hack = 1 << ot;
-            gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 1);
+            gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 1, b);
             s->popl_esp_hack = 0;
             gen_pop_update(s, ot);
         }
@@ -5352,7 +5486,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
         reg = ((modrm >> 3) & 7) | rex_r;

         /* generate a generic store */
-        gen_ldst_modrm(env, s, modrm, ot, reg, 1);
+        gen_ldst_modrm(env, s, modrm, ot, reg, 1, b);
         break;
     case 0xc6:
     case 0xc7: /* mov Ev, Iv */
@@ -5361,7 +5495,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
         mod = (modrm >> 6) & 3;
         if (mod != 3) {
             s->rip_offset = insn_const_size(ot);
-            gen_lea_modrm(env, s, modrm);
+            gen_lea_modrm(env, s, modrm, b);
         }
         val = insn_get(env, s, ot);
         tcg_gen_movi_tl(cpu_T[0], val);
@@ -5377,7 +5511,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
         modrm = cpu_ldub_code(env, s->pc++);
         reg = ((modrm >> 3) & 7) | rex_r;

-        gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
+        gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0, b);
         gen_op_mov_reg_v(ot, reg, cpu_T[0]);
         break;
     case 0x8e: /* mov seg, Gv */
@@ -5385,7 +5519,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
         reg = (modrm >> 3) & 7;
         if (reg >= 6 || reg == R_CS)
             goto illegal_op;
-        gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0);
+        gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0, b);
         gen_movl_seg_T0(s, reg, pc_start - s->cs_base);
         if (reg == R_SS) {
             /* if reg == SS, inhibit interrupts/trace */
@@ -5408,7 +5542,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
             goto illegal_op;
         gen_op_movl_T0_seg(reg);
         ot = mod == 3 ? dflag : MO_16;
-        gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 1);
+        gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 1, b);
         break;

     case 0x1b6: /* movzbS Gv, Eb */
@@ -5450,7 +5584,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
                 }
                 gen_op_mov_reg_v(d_ot, reg, cpu_T[0]);
             } else {
-                gen_lea_modrm(env, s, modrm);
+                gen_lea_modrm(env, s, modrm, b);
                 gen_op_ld_v(s, s_ot, cpu_T[0], cpu_A0);
                 gen_op_mov_reg_v(d_ot, reg, cpu_T[0]);
             }
@@ -5468,7 +5602,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
         s->override = -1;
         val = s->addseg;
         s->addseg = 0;
-        gen_lea_modrm(env, s, modrm);
+        gen_lea_modrm(env, s, modrm, b);
         s->addseg = val;
         gen_op_mov_reg_v(ot, reg, cpu_A0);
         break;
@@ -5558,7 +5692,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
             gen_op_mov_reg_v(ot, rm, cpu_T[0]);
             gen_op_mov_reg_v(ot, reg, cpu_T[1]);
         } else {
-            gen_lea_modrm(env, s, modrm);
+            gen_lea_modrm(env, s, modrm, b);
             gen_op_mov_v_reg(ot, cpu_T[0], reg);
             /* for xchg, lock is implicit */
             if (!(prefixes & PREFIX_LOCK))
@@ -5593,7 +5727,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
         mod = (modrm >> 6) & 3;
         if (mod == 3)
             goto illegal_op;
-        gen_lea_modrm(env, s, modrm);
+        gen_lea_modrm(env, s, modrm, b);
         gen_op_ld_v(s, ot, cpu_T[1], cpu_A0);
         gen_add_A0_im(s, 1 << ot);
         /* load the segment first to handle exceptions properly */
@@ -5624,7 +5758,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
                 if (shift == 2) {
                     s->rip_offset = 1;
                 }
-                gen_lea_modrm(env, s, modrm);
+                gen_lea_modrm(env, s, modrm, b);
                 opreg = OR_TMP0;
             } else {
                 opreg = (modrm & 7) | REX_B(s);
@@ -5674,7 +5808,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
         rm = (modrm & 7) | REX_B(s);
         reg = ((modrm >> 3) & 7) | rex_r;
         if (mod != 3) {
-            gen_lea_modrm(env, s, modrm);
+            gen_lea_modrm(env, s, modrm, b);
             opreg = OR_TMP0;
         } else {
             opreg = rm;
@@ -5705,7 +5839,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
         op = ((b & 7) << 3) | ((modrm >> 3) & 7);
         if (mod != 3) {
             /* memory op */
-            gen_lea_modrm(env, s, modrm);
+            gen_lea_modrm(env, s, modrm, b);
             switch(op) {
             case 0x00 ... 0x07: /* fxxxs */
             case 0x10 ... 0x17: /* fixxxl */
@@ -5832,7 +5966,9 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
             case 0x0c: /* fldenv mem */
                 gen_update_cc_op(s);
                 gen_jmp_im(pc_start - s->cs_base);
-                gen_helper_fldenv(cpu_env, cpu_A0, tcg_const_i32(dflag - 1));
+                gen_helper_fldenv(cpu_env, cpu_A0,
+                                  tcg_const_i32(dflag == MO_32),
+                                  tcg_const_i32(IS_PROTECTED_MODE(s)));
                 break;
             case 0x0d: /* fldcw mem */
                 tcg_gen_qemu_ld_i32(cpu_tmp2_i32, cpu_A0,
@@ -5841,8 +5977,11 @@ static target_ulong disas_insn(CPUX86State
*env, DisasContext *s,
                 break;
             case 0x0e: /* fnstenv mem */
                 gen_update_cc_op(s);
+                gen_update_ep(s);
                 gen_jmp_im(pc_start - s->cs_base);
-                gen_helper_fstenv(cpu_env, cpu_A0, tcg_const_i32(dflag - 1));
+                gen_helper_fstenv(cpu_env, cpu_A0,
+                                  tcg_const_i32(dflag == MO_32),
+                                  tcg_const_i32(IS_PROTECTED_MODE(s)));
                 break;
             case 0x0f: /* fnstcw mem */
                 gen_helper_fnstcw(cpu_tmp2_i32, cpu_env);
@@ -5863,12 +6002,17 @@ static target_ulong disas_insn(CPUX86State
*env, DisasContext *s,
             case 0x2c: /* frstor mem */
                 gen_update_cc_op(s);
                 gen_jmp_im(pc_start - s->cs_base);
-                gen_helper_frstor(cpu_env, cpu_A0, tcg_const_i32(dflag - 1));
+                gen_helper_frstor(cpu_env, cpu_A0,
+                                  tcg_const_i32(dflag == MO_32),
+                                  tcg_const_i32(IS_PROTECTED_MODE(s)));
                 break;
             case 0x2e: /* fnsave mem */
                 gen_update_cc_op(s);
+                gen_update_ep(s);
                 gen_jmp_im(pc_start - s->cs_base);
-                gen_helper_fsave(cpu_env, cpu_A0, tcg_const_i32(dflag - 1));
+                gen_helper_fsave(cpu_env, cpu_A0,
+                                 tcg_const_i32(dflag == MO_32),
+                                 tcg_const_i32(IS_PROTECTED_MODE(s)));
                 break;
             case 0x2f: /* fnstsw mem */
                 gen_helper_fnstsw(cpu_tmp2_i32, cpu_env);
@@ -6209,6 +6353,12 @@ static target_ulong disas_insn(CPUX86State
*env, DisasContext *s,
                 goto illegal_op;
             }
         }
+        if (instr_is_x87_nc(modrm, b)) {
+            fp_op = ((b & 0x7) << 8) | (modrm & 0xff);
+            fp_ip = pc_start - s->cs_base;
+            fp_cs = env->segs[R_CS].selector;
+            set_ep(s, fp_op, fp_ip, fp_cs);
+        }
         break;
         /************************/
         /* string ops */
@@ -6393,6 +6543,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
     do_lret:
         if (s->pe && !s->vm86) {
             gen_update_cc_op(s);
+            gen_update_ep(s);
             gen_jmp_im(pc_start - s->cs_base);
             gen_helper_lret_protected(cpu_env, tcg_const_i32(dflag - 1),
                                       tcg_const_i32(val));
@@ -6430,6 +6581,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
             }
         } else {
             gen_update_cc_op(s);
+            gen_update_ep(s);
             gen_jmp_im(pc_start - s->cs_base);
             gen_helper_iret_protected(cpu_env, tcg_const_i32(dflag - 1),
                                       tcg_const_i32(s->pc - s->cs_base));
@@ -6527,7 +6679,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
     case 0x190 ... 0x19f: /* setcc Gv */
         modrm = cpu_ldub_code(env, s->pc++);
         gen_setcc1(s, b, cpu_T[0]);
-        gen_ldst_modrm(env, s, modrm, MO_8, OR_TMP0, 1);
+        gen_ldst_modrm(env, s, modrm, MO_8, OR_TMP0, 1, b);
         break;
     case 0x140 ... 0x14f: /* cmov Gv, Ev */
         if (!(s->cpuid_features & CPUID_CMOV)) {
@@ -6657,7 +6809,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
         rm = (modrm & 7) | REX_B(s);
         if (mod != 3) {
             s->rip_offset = 1;
-            gen_lea_modrm(env, s, modrm);
+            gen_lea_modrm(env, s, modrm, b);
             gen_op_ld_v(s, ot, cpu_T[0], cpu_A0);
         } else {
             gen_op_mov_v_reg(ot, cpu_T[0], rm);
@@ -6688,7 +6840,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
         rm = (modrm & 7) | REX_B(s);
         gen_op_mov_v_reg(MO_32, cpu_T[1], reg);
         if (mod != 3) {
-            gen_lea_modrm(env, s, modrm);
+            gen_lea_modrm(env, s, modrm, b);
             /* specific case: we need to add a displacement */
             gen_exts(ot, cpu_T[1]);
             tcg_gen_sari_tl(cpu_tmp0, cpu_T[1], 3 + ot);
@@ -6764,7 +6916,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
         ot = dflag;
         modrm = cpu_ldub_code(env, s->pc++);
         reg = ((modrm >> 3) & 7) | rex_r;
-        gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
+        gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0, b);
         gen_extu(ot, cpu_T[0]);

         /* Note that lzcnt and tzcnt are in different extensions.  */
@@ -6884,6 +7036,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
             gen_exception(s, EXCP07_PREX, pc_start - s->cs_base);
         } else {
             gen_update_cc_op(s);
+            gen_update_ep(s);
             gen_jmp_im(pc_start - s->cs_base);
             gen_helper_fwait(cpu_env);
         }
@@ -6903,6 +7056,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
         if (CODE64(s))
             goto illegal_op;
         gen_update_cc_op(s);
+        gen_update_ep(s);
         gen_jmp_im(pc_start - s->cs_base);
         gen_helper_into(cpu_env, tcg_const_i32(s->pc - pc_start));
         break;
@@ -6967,7 +7121,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
         if (mod == 3)
             goto illegal_op;
         gen_op_mov_v_reg(ot, cpu_T[0], reg);
-        gen_lea_modrm(env, s, modrm);
+        gen_lea_modrm(env, s, modrm, b);
         gen_jmp_im(pc_start - s->cs_base);
         tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
         if (ot == MO_16) {
@@ -7095,6 +7249,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
             gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
         } else {
             gen_update_cc_op(s);
+            gen_update_ep(s);
             gen_jmp_im(pc_start - s->cs_base);
             gen_helper_sysexit(cpu_env, tcg_const_i32(dflag - 1));
             gen_eob(s);
@@ -7104,6 +7259,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
     case 0x105: /* syscall */
         /* XXX: is it usable in real mode ? */
         gen_update_cc_op(s);
+        gen_update_ep(s);
         gen_jmp_im(pc_start - s->cs_base);
         gen_helper_syscall(cpu_env, tcg_const_i32(s->pc - pc_start));
         gen_eob(s);
@@ -7113,6 +7269,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
             gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
         } else {
             gen_update_cc_op(s);
+            gen_update_ep(s);
             gen_jmp_im(pc_start - s->cs_base);
             gen_helper_sysret(cpu_env, tcg_const_i32(dflag - 1));
             /* condition codes are modified only in long mode */
@@ -7133,6 +7290,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
             gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
         } else {
             gen_update_cc_op(s);
+            gen_update_ep(s);
             gen_jmp_im(pc_start - s->cs_base);
             gen_helper_hlt(cpu_env, tcg_const_i32(s->pc - pc_start));
             s->is_jmp = DISAS_TB_JUMP;
@@ -7149,7 +7307,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
             gen_svm_check_intercept(s, pc_start, SVM_EXIT_LDTR_READ);
             tcg_gen_ld32u_tl(cpu_T[0], cpu_env,
offsetof(CPUX86State,ldt.selector));
             ot = mod == 3 ? dflag : MO_16;
-            gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 1);
+            gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 1, b);
             break;
         case 2: /* lldt */
             if (!s->pe || s->vm86)
@@ -7158,7 +7316,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
                 gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
             } else {
                 gen_svm_check_intercept(s, pc_start, SVM_EXIT_LDTR_WRITE);
-                gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0);
+                gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0, b);
                 gen_jmp_im(pc_start - s->cs_base);
                 tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
                 gen_helper_lldt(cpu_env, cpu_tmp2_i32);
@@ -7170,7 +7328,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
             gen_svm_check_intercept(s, pc_start, SVM_EXIT_TR_READ);
             tcg_gen_ld32u_tl(cpu_T[0], cpu_env,
offsetof(CPUX86State,tr.selector));
             ot = mod == 3 ? dflag : MO_16;
-            gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 1);
+            gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 1, b);
             break;
         case 3: /* ltr */
             if (!s->pe || s->vm86)
@@ -7179,7 +7337,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
                 gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
             } else {
                 gen_svm_check_intercept(s, pc_start, SVM_EXIT_TR_WRITE);
-                gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0);
+                gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0, b);
                 gen_jmp_im(pc_start - s->cs_base);
                 tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
                 gen_helper_ltr(cpu_env, cpu_tmp2_i32);
@@ -7189,7 +7347,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
         case 5: /* verw */
             if (!s->pe || s->vm86)
                 goto illegal_op;
-            gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0);
+            gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0, b);
             gen_update_cc_op(s);
             if (op == 4) {
                 gen_helper_verr(cpu_env, cpu_T[0]);
@@ -7212,7 +7370,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
             if (mod == 3)
                 goto illegal_op;
             gen_svm_check_intercept(s, pc_start, SVM_EXIT_GDTR_READ);
-            gen_lea_modrm(env, s, modrm);
+            gen_lea_modrm(env, s, modrm, b);
             tcg_gen_ld32u_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,
gdt.limit));
             gen_op_st_v(s, MO_16, cpu_T[0], cpu_A0);
             gen_add_A0_im(s, 2);
@@ -7241,6 +7399,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
                         s->cpl != 0)
                         goto illegal_op;
                     gen_update_cc_op(s);
+                    gen_update_ep(s);
                     gen_jmp_im(pc_start - s->cs_base);
                     gen_helper_mwait(cpu_env, tcg_const_i32(s->pc - pc_start));
                     gen_eob(s);
@@ -7268,7 +7427,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
                 }
             } else { /* sidt */
                 gen_svm_check_intercept(s, pc_start, SVM_EXIT_IDTR_READ);
-                gen_lea_modrm(env, s, modrm);
+                gen_lea_modrm(env, s, modrm, b);
                 tcg_gen_ld32u_tl(cpu_T[0], cpu_env,
offsetof(CPUX86State, idt.limit));
                 gen_op_st_v(s, MO_16, cpu_T[0], cpu_A0);
                 gen_add_A0_im(s, 2);
@@ -7371,7 +7530,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
             } else {
                 gen_svm_check_intercept(s, pc_start,
                                         op==2 ? SVM_EXIT_GDTR_WRITE :
SVM_EXIT_IDTR_WRITE);
-                gen_lea_modrm(env, s, modrm);
+                gen_lea_modrm(env, s, modrm, b);
                 gen_op_ld_v(s, MO_16, cpu_T[1], cpu_A0);
                 gen_add_A0_im(s, 2);
                 gen_op_ld_v(s, CODE64(s) + MO_32, cpu_T[0], cpu_A0);
@@ -7394,14 +7553,14 @@ static target_ulong disas_insn(CPUX86State
*env, DisasContext *s,
 #else
             tcg_gen_ld32u_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,cr[0]));
 #endif
-            gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 1);
+            gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 1, b);
             break;
         case 6: /* lmsw */
             if (s->cpl != 0) {
                 gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
             } else {
                 gen_svm_check_intercept(s, pc_start, SVM_EXIT_WRITE_CR0);
-                gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0);
+                gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0, b);
                 gen_helper_lmsw(cpu_env, cpu_T[0]);
                 gen_jmp_im(s->pc - s->cs_base);
                 gen_eob(s);
@@ -7413,8 +7572,9 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
                     gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
                 } else {
                     gen_update_cc_op(s);
+                    gen_update_ep(s);
                     gen_jmp_im(pc_start - s->cs_base);
-                    gen_lea_modrm(env, s, modrm);
+                    gen_lea_modrm(env, s, modrm, b);
                     gen_helper_invlpg(cpu_env, cpu_A0);
                     gen_jmp_im(s->pc - s->cs_base);
                     gen_eob(s);
@@ -7446,6 +7606,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
                     if (!(s->cpuid_ext2_features & CPUID_EXT2_RDTSCP))
                         goto illegal_op;
                     gen_update_cc_op(s);
+                    gen_update_ep(s);
                     gen_jmp_im(pc_start - s->cs_base);
                     if (use_icount)
                         gen_io_start();
@@ -7493,7 +7654,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
                 }
                 gen_op_mov_reg_v(d_ot, reg, cpu_T[0]);
             } else {
-                gen_lea_modrm(env, s, modrm);
+                gen_lea_modrm(env, s, modrm, b);
                 gen_op_ld_v(s, MO_32 | MO_SIGN, cpu_T[0], cpu_A0);
                 gen_op_mov_reg_v(d_ot, reg, cpu_T[0]);
             }
@@ -7514,7 +7675,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
             mod = (modrm >> 6) & 3;
             rm = modrm & 7;
             if (mod != 3) {
-                gen_lea_modrm(env, s, modrm);
+                gen_lea_modrm(env, s, modrm, b);
                 gen_op_ld_v(s, ot, t0, cpu_A0);
                 a0 = tcg_temp_local_new();
                 tcg_gen_mov_tl(a0, cpu_A0);
@@ -7556,7 +7717,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
             ot = dflag != MO_16 ? MO_32 : MO_16;
             modrm = cpu_ldub_code(env, s->pc++);
             reg = ((modrm >> 3) & 7) | rex_r;
-            gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0);
+            gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0, b);
             t0 = tcg_temp_local_new();
             gen_update_cc_op(s);
             if (b == 0x102) {
@@ -7584,7 +7745,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
         case 3: /* prefetchnt0 */
             if (mod == 3)
                 goto illegal_op;
-            gen_lea_modrm(env, s, modrm);
+            gen_lea_modrm(env, s, modrm, b);
             /* nothing more to do */
             break;
         default: /* nop (multi byte) */
@@ -7624,6 +7785,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
             case 4:
             case 8:
                 gen_update_cc_op(s);
+                gen_update_ep(s);
                 gen_jmp_im(pc_start - s->cs_base);
                 if (b & 2) {
                     gen_op_mov_v_reg(ot, cpu_T[0], rm);
@@ -7696,7 +7858,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
             goto illegal_op;
         reg = ((modrm >> 3) & 7) | rex_r;
         /* generate a generic store */
-        gen_ldst_modrm(env, s, modrm, ot, reg, 1);
+        gen_ldst_modrm(env, s, modrm, ot, reg, 1, b);
         break;
     case 0x1ae:
         modrm = cpu_ldub_code(env, s->pc++);
@@ -7704,6 +7866,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
         op = (modrm >> 3) & 7;
         switch(op) {
         case 0: /* fxsave */
+            gen_update_ep(s);
             if (mod == 3 || !(s->cpuid_features & CPUID_FXSR) ||
                 (s->prefix & PREFIX_LOCK))
                 goto illegal_op;
@@ -7711,10 +7874,13 @@ static target_ulong disas_insn(CPUX86State
*env, DisasContext *s,
                 gen_exception(s, EXCP07_PREX, pc_start - s->cs_base);
                 break;
             }
-            gen_lea_modrm(env, s, modrm);
+            gen_lea_modrm(env, s, modrm, b);
             gen_update_cc_op(s);
+            gen_update_ep(s);
             gen_jmp_im(pc_start - s->cs_base);
-            gen_helper_fxsave(cpu_env, cpu_A0, tcg_const_i32(dflag == MO_64));
+            gen_helper_fxsave(cpu_env, cpu_A0,
+                              tcg_const_i32(dflag == MO_32),
+                              tcg_const_i32(dflag == MO_64));
             break;
         case 1: /* fxrstor */
             if (mod == 3 || !(s->cpuid_features & CPUID_FXSR) ||
@@ -7724,10 +7890,12 @@ static target_ulong disas_insn(CPUX86State
*env, DisasContext *s,
                 gen_exception(s, EXCP07_PREX, pc_start - s->cs_base);
                 break;
             }
-            gen_lea_modrm(env, s, modrm);
+            gen_lea_modrm(env, s, modrm, b);
             gen_update_cc_op(s);
             gen_jmp_im(pc_start - s->cs_base);
-            gen_helper_fxrstor(cpu_env, cpu_A0, tcg_const_i32(dflag == MO_64));
+            gen_helper_fxrstor(cpu_env, cpu_A0,
+                               tcg_const_i32(dflag == MO_32),
+                               tcg_const_i32(dflag == MO_64));
             break;
         case 2: /* ldmxcsr */
         case 3: /* stmxcsr */
@@ -7738,7 +7906,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
             if ((s->flags & HF_EM_MASK) || !(s->flags & HF_OSFXSR_MASK) ||
                 mod == 3)
                 goto illegal_op;
-            gen_lea_modrm(env, s, modrm);
+            gen_lea_modrm(env, s, modrm, b);
             if (op == 2) {
                 tcg_gen_qemu_ld_i32(cpu_tmp2_i32, cpu_A0,
                                     s->mem_index, MO_LEUL);
@@ -7763,7 +7931,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
                 /* clflush */
                 if (!(s->cpuid_features & CPUID_CLFLUSH))
                     goto illegal_op;
-                gen_lea_modrm(env, s, modrm);
+                gen_lea_modrm(env, s, modrm, b);
             }
             break;
         default:
@@ -7775,7 +7943,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
         mod = (modrm >> 6) & 3;
         if (mod == 3)
             goto illegal_op;
-        gen_lea_modrm(env, s, modrm);
+        gen_lea_modrm(env, s, modrm, b);
         /* ignore for now */
         break;
     case 0x1aa: /* rsm */
@@ -7783,6 +7951,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
         if (!(s->flags & HF_SMM_MASK))
             goto illegal_op;
         gen_update_cc_op(s);
+        gen_update_ep(s);
         gen_jmp_im(s->pc - s->cs_base);
         gen_helper_rsm(cpu_env);
         gen_eob(s);
@@ -7803,7 +7972,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
             ot = mo_64_32(dflag);
         }

-        gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
+        gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0, b);
         gen_helper_popcnt(cpu_T[0], cpu_env, cpu_T[0], tcg_const_i32(ot));
         gen_op_mov_reg_v(ot, reg, cpu_T[0]);

@@ -7880,6 +8049,17 @@ void optimize_flags_init(void)
     cpu_cc_src2 = tcg_global_mem_new(TCG_AREG0, offsetof(CPUX86State, cc_src2),
                                      "cc_src2");

+    cpu_fpop = tcg_global_mem_new_i32(TCG_AREG0,
+                                      offsetof(CPUX86State, fpop), "fpop");
+    cpu_fpip = tcg_global_mem_new(TCG_AREG0, offsetof(CPUX86State, fpip),
+                                     "fpip");
+    cpu_fpdp = tcg_global_mem_new(TCG_AREG0, offsetof(CPUX86State, fpdp),
+                                     "fpdp");
+    cpu_fpds = tcg_global_mem_new_i32(TCG_AREG0, offsetof(CPUX86State, fpds),
+                                     "fpds");
+    cpu_fpcs = tcg_global_mem_new_i32(TCG_AREG0, offsetof(CPUX86State, fpcs),
+                                     "fpcs");
+
     for (i = 0; i < CPU_NB_REGS; ++i) {
         cpu_regs[i] = tcg_global_mem_new(TCG_AREG0,
                                          offsetof(CPUX86State, regs[i]),
@@ -7924,6 +8104,8 @@ static inline void
gen_intermediate_code_internal(X86CPU *cpu,
     dc->singlestep_enabled = cs->singlestep_enabled;
     dc->cc_op = CC_OP_DYNAMIC;
     dc->cc_op_dirty = false;
+    dc->fp_op = FP_EP_INVALID;
+    dc->fp_ep_dirty = false;
     dc->cs_base = cs_base;
     dc->tb = tb;
     dc->popl_esp_hack = 0;
@@ -7997,6 +8179,9 @@ static inline void
gen_intermediate_code_internal(X86CPU *cpu,
             }
             tcg_ctx.gen_opc_pc[lj] = pc_ptr;
             gen_opc_cc_op[lj] = dc->cc_op;
+            gen_opc_fp_op[lj] = dc->fp_op;
+            gen_opc_fp_ip[lj] = dc->fp_ip;
+            gen_opc_fp_cs[lj] = dc->fp_cs;
             tcg_ctx.gen_opc_instr_start[lj] = 1;
             tcg_ctx.gen_opc_icount[lj] = num_insns;
         }
@@ -8080,6 +8265,7 @@ void gen_intermediate_code_pc(CPUX86State *env,
TranslationBlock *tb)
 void restore_state_to_opc(CPUX86State *env, TranslationBlock *tb, int pc_pos)
 {
     int cc_op;
+    uint16_t fp_op;
 #ifdef DEBUG_DISAS
     if (qemu_loglevel_mask(CPU_LOG_TB_OP)) {
         int i;
@@ -8099,4 +8285,10 @@ void restore_state_to_opc(CPUX86State *env,
TranslationBlock *tb, int pc_pos)
     cc_op = gen_opc_cc_op[pc_pos];
     if (cc_op != CC_OP_DYNAMIC)
         env->cc_op = cc_op;
+    fp_op = gen_opc_fp_op[pc_pos];
+    if (fp_op & FP_EP_VALID) {
+        tcg_gen_movi_i32(cpu_fpop, fp_op);
+        tcg_gen_movi_tl(cpu_fpip, gen_opc_fp_ip[pc_pos]);
+        tcg_gen_movi_i32(cpu_fpcs, gen_opc_fp_cs[pc_pos]);
+    }
 }

On Sun, Jun 22, 2014 at 9:17 PM, Jaume Martí <jaume.martif@gmail.com> wrote:
> Thanks Richard for your feedback. I am going to correct the patch and
> resubmit it.
>
> Best regards,
> Jaume
>
> On Sun, Jun 22, 2014 at 8:55 PM, Richard Henderson <rth@twiddle.net> wrote:
>> On 06/22/2014 07:55 AM, Jaume Martí wrote:
>>> -        cpu_x86_fsave(env, fpstate_addr, 1);
>>> -        fpstate->status = fpstate->sw;
>>> -        magic = 0xffff;
>>> +    cpu_x86_fsave(env, fpstate_addr);
>>> +    fpstate->status = fpstate->sw;
>>> +    magic = 0xffff;
>>
>> This patch needs to be split into format fixes and the actual change to be
>> reviewed.
>>
>>> -    /* KVM-only so far */
>>> -    uint16_t fpop;
>>> +    union {
>>> +        uint32_t tcg;
>>> +        uint16_t kvm;
>>> +    } fpop;
>>
>> This is highly questionable.
>>
>>>      .fields = (VMStateField[]) {
>>> -        VMSTATE_UINT16(env.fpop, X86CPU),
>>> +        VMSTATE_UINT16(env.fpop.kvm, X86CPU),
>>
>> You're breaking save/restore in tcg.  KVM is not required for migration.
>>
>>> +        if (non_control_x87_instr(modrm, b)) {
>>> +            tcg_gen_movi_i32(cpu_fpop, ((b & 0x7) << 8) | (modrm & 0xff));
>>> +            tcg_gen_movi_tl(cpu_fpip, pc_start - s->cs_base);
>>> +            tcg_gen_movi_i32(cpu_fpcs, env->segs[R_CS].selector);
>>> +        }
>>
>> I strongly suspect you can implement this feature without having to add 3
>> (largely redundant) register writes to every x87 instruction executed.
>>
>> See how restore_state_to_opc works to compute the value of CC_OP during
>> translation.  You can do the same thing to recover these three values.
>>
>> You do have to sync these values before normal exits from the TB, but you only
>> have to do that once, not once for every insn executed.  See gen_update_cc_op.
>>
>>
>> r~

^ permalink raw reply related	[flat|nested] 6+ messages in thread

* Re: [Qemu-devel] PATCH for bugs 661696 and 1248376: target-i386: x87 exception pointers using TCG.
  2014-07-19  0:36       ` Jaume Martí
@ 2014-07-21 18:55         ` Jaume Martí
  0 siblings, 0 replies; 6+ messages in thread
From: Jaume Martí @ 2014-07-21 18:55 UTC (permalink / raw)
  To: Richard Henderson
  Cc: Peter Maydell, mtosatti, gleb, mst, riku.voipio, qemu-devel,
	quintela, vrozenfe, anthony, pbonzini, alex.bennee, afaerber

[-- Attachment #1: Type: text/plain, Size: 176775 bytes --]

Hello,

The patch in my previous email got corrupted due to gmail's limitation of
78 characters per line when sending plain text emails.
I attach a new patch. Also you can pull the code from
https://github.com/jmartif/qemu.git
Please review and apply.

Best regards,
Jaume

Signed-off-by: Jaume Marti Farriol (jaume.martif@gmail.com)
diff --git a/linux-user/signal.c b/linux-user/signal.c
index 1141054..73f8f6b 100644
--- a/linux-user/signal.c
+++ b/linux-user/signal.c
@@ -865,7 +865,7 @@ static void setup_sigcontext(struct target_sigcontext
*sc,
     __put_user(env->regs[R_ESP], &sc->esp_at_signal);
     __put_user(env->segs[R_SS].selector, (unsigned int *)&sc->ss);

-        cpu_x86_fsave(env, fpstate_addr, 1);
+        cpu_x86_fsave(env, fpstate_addr);
         fpstate->status = fpstate->sw;
         magic = 0xffff;
     __put_user(magic, &fpstate->magic);
@@ -1068,7 +1068,7 @@ restore_sigcontext(CPUX86State *env, struct
target_sigcontext *sc, int *peax)
                 if (!access_ok(VERIFY_READ, fpstate_addr,
                                sizeof(struct target_fpstate)))
                         goto badframe;
-                cpu_x86_frstor(env, fpstate_addr, 1);
+                cpu_x86_frstor(env, fpstate_addr);
  }

         *peax = tswapl(sc->eax);
diff --git a/target-i386/cpu.h b/target-i386/cpu.h
index e634d83..4274ce3 100644
--- a/target-i386/cpu.h
+++ b/target-i386/cpu.h
@@ -819,10 +819,11 @@ typedef struct CPUX86State {
     uint16_t fpuc;
     uint8_t fptags[8];   /* 0 = valid, 1 = empty */
     FPReg fpregs[8];
-    /* KVM-only so far */
-    uint16_t fpop;
+    uint32_t fpop;
     uint64_t fpip;
     uint64_t fpdp;
+    uint32_t fpcs;
+    uint32_t fpds;

     /* emulator internal variables */
     float_status fp_status;
@@ -1067,8 +1068,8 @@ floatx80 cpu_set_fp80(uint64_t mant, uint16_t upper);
 /* the following helpers are only usable in user mode simulation as
    they can trigger unexpected exceptions */
 void cpu_x86_load_seg(CPUX86State *s, int seg_reg, int selector);
-void cpu_x86_fsave(CPUX86State *s, target_ulong ptr, int data32);
-void cpu_x86_frstor(CPUX86State *s, target_ulong ptr, int data32);
+void cpu_x86_fsave(CPUX86State *s, target_ulong ptr);
+void cpu_x86_frstor(CPUX86State *s, target_ulong ptr);

 /* you can call this signal handler from your SIGBUS and SIGSEGV
    signal handlers to inform the virtual CPU of exceptions. non zero
diff --git a/target-i386/fpu_helper.c b/target-i386/fpu_helper.c
index 1b2900d..6886031 100644
--- a/target-i386/fpu_helper.c
+++ b/target-i386/fpu_helper.c
@@ -56,6 +56,8 @@
 #define floatx80_l2e make_floatx80(0x3fff, 0xb8aa3b295c17f0bcLL)
 #define floatx80_l2t make_floatx80(0x4000, 0xd49a784bcd1b8afeLL)

+#define FPUS(env) ((env->fpus & ~0x3800) | ((env->fpstt & 0x7) << 11))
+
 static inline void fpush(CPUX86State *env)
 {
     env->fpstt = (env->fpstt - 1) & 7;
@@ -604,6 +606,10 @@ void helper_fninit(CPUX86State *env)
     env->fptags[5] = 1;
     env->fptags[6] = 1;
     env->fptags[7] = 1;
+    env->fpip = 0;
+    env->fpcs = 0;
+    env->fpdp = 0;
+    env->fpds = 0;
 }

 /* BCD ops */
@@ -961,13 +967,13 @@ void helper_fxam_ST0(CPUX86State *env)
     }
 }

-void helper_fstenv(CPUX86State *env, target_ulong ptr, int data32)
+void helper_fstenv(CPUX86State *env, target_ulong ptr, int data32,
+                   int protected_mode)
 {
-    int fpus, fptag, exp, i;
+    int fptag, exp, i;
     uint64_t mant;
     CPU_LDoubleU tmp;

-    fpus = (env->fpus & ~0x3800) | (env->fpstt & 0x7) << 11;
     fptag = 0;
     for (i = 7; i >= 0; i--) {
         fptag <<= 2;
@@ -987,83 +993,150 @@ void helper_fstenv(CPUX86State *env, target_ulong
ptr, int data32)
             }
         }
     }
+
     if (data32) {
         /* 32 bit */
-        cpu_stl_data(env, ptr, env->fpuc);
-        cpu_stl_data(env, ptr + 4, fpus);
-        cpu_stl_data(env, ptr + 8, fptag);
-        cpu_stl_data(env, ptr + 12, 0); /* fpip */
-        cpu_stl_data(env, ptr + 16, 0); /* fpcs */
-        cpu_stl_data(env, ptr + 20, 0); /* fpoo */
-        cpu_stl_data(env, ptr + 24, 0); /* fpos */
+        cpu_stw_data(env, ptr, env->fpuc);
+        cpu_stw_data(env, ptr + 4, FPUS(env));
+        cpu_stw_data(env, ptr + 8, fptag);
+        if (protected_mode) {
+            cpu_stl_data(env, ptr + 12, env->fpip);
+            cpu_stl_data(env, ptr + 16,
+                        ((env->fpop & 0x7ff) << 16) | (env->fpcs &
0xffff));
+            cpu_stl_data(env, ptr + 20, env->fpdp);
+            cpu_stl_data(env, ptr + 24, env->fpds);
+        } else {
+            /* Real mode  */
+            cpu_stl_data(env, ptr + 12, env->fpip); /* fpip[15..00] */
+            cpu_stl_data(env, ptr + 16, ((((env->fpip >> 16) & 0xffff) <<
12) |
+                        (env->fpop & 0x7ff))); /* fpip[31..16], fpop */
+            cpu_stl_data(env, ptr + 20, env->fpdp); /* fpdp[15..00] */
+            cpu_stl_data(env, ptr + 24,
+                        (env->fpdp >> 4) & 0xffff000); /* fpdp[31..16] */
+        }
     } else {
         /* 16 bit */
         cpu_stw_data(env, ptr, env->fpuc);
-        cpu_stw_data(env, ptr + 2, fpus);
+        cpu_stw_data(env, ptr + 2, FPUS(env));
         cpu_stw_data(env, ptr + 4, fptag);
-        cpu_stw_data(env, ptr + 6, 0);
-        cpu_stw_data(env, ptr + 8, 0);
-        cpu_stw_data(env, ptr + 10, 0);
-        cpu_stw_data(env, ptr + 12, 0);
+        if (protected_mode) {
+            cpu_stw_data(env, ptr + 6, env->fpip);
+            cpu_stw_data(env, ptr + 8, env->fpcs);
+            cpu_stw_data(env, ptr + 10, env->fpdp);
+            cpu_stw_data(env, ptr + 12, env->fpds);
+        } else {
+            /* Real mode  */
+            cpu_stw_data(env, ptr + 6, env->fpip); /* fpip[15..0] */
+            cpu_stw_data(env, ptr + 8, ((env->fpip >> 4) & 0xf000) |
+                        (env->fpop & 0x7ff)); /* fpip[19..16], fpop */
+            cpu_stw_data(env, ptr + 10, env->fpdp); /* fpdp[15..0] */
+            cpu_stw_data(env, ptr + 12,
+                        (env->fpdp >> 4) & 0xf000); /* fpdp[19..16] */
+        }
     }
+
+    env->fpip = 0;
+    env->fpcs = 0;
+    env->fpdp = 0;
+    env->fpds = 0;
 }

-void helper_fldenv(CPUX86State *env, target_ulong ptr, int data32)
+void helper_fldenv(CPUX86State *env, target_ulong ptr, int data32,
+                   int protected_mode)
 {
-    int i, fpus, fptag;
+    int tmp, i, fpus, fptag;

     if (data32) {
+        /* 32 bit */
         env->fpuc = cpu_lduw_data(env, ptr);
         fpus = cpu_lduw_data(env, ptr + 4);
         fptag = cpu_lduw_data(env, ptr + 8);
+        if (protected_mode) {
+            env->fpip = cpu_ldl_data(env, ptr + 12);
+            tmp = cpu_ldl_data(env, ptr + 16);
+            env->fpcs = tmp & 0xffff;
+            env->fpop = tmp >> 16;
+            env->fpdp = cpu_ldl_data(env, ptr + 20);
+            env->fpds = cpu_lduw_data(env, ptr + 24);
+        } else {
+            /* Real mode */
+            tmp = cpu_ldl_data(env, ptr + 16);
+            env->fpip = ((tmp & 0xffff000) << 4) |
+                        cpu_lduw_data(env, ptr + 12);
+            env->fpop = tmp & 0x7ff;
+            env->fpdp = (cpu_ldl_data(env, ptr + 24) << 4) |
+                        cpu_lduw_data(env, ptr + 20);
+        }
     } else {
+        /* 16 bit */
         env->fpuc = cpu_lduw_data(env, ptr);
         fpus = cpu_lduw_data(env, ptr + 2);
         fptag = cpu_lduw_data(env, ptr + 4);
+        if (protected_mode) {
+            /* Protected mode  */
+            env->fpip = cpu_lduw_data(env, ptr + 6);
+            env->fpcs = cpu_lduw_data(env, ptr + 8);
+            env->fpdp = cpu_lduw_data(env, ptr + 10);
+            env->fpds = cpu_lduw_data(env, ptr + 12);
+        } else {
+            /* Real mode  */
+            tmp = cpu_lduw_data(env, ptr + 8);
+            env->fpip = ((tmp & 0xf000) << 4) | cpu_lduw_data(env, ptr +
6);
+            env->fpop = tmp & 0x7ff;
+            env->fpdp = cpu_lduw_data(env, ptr + 12) << 4 |
+                        cpu_lduw_data(env, ptr + 10);
+        }
     }
+
     env->fpstt = (fpus >> 11) & 7;
     env->fpus = fpus & ~0x3800;
     for (i = 0; i < 8; i++) {
         env->fptags[i] = ((fptag & 3) == 3);
         fptag >>= 2;
     }
+
+    env->fpip &= 0xffffffff;
+    env->fpdp &= 0xffffffff;
+    if (!protected_mode) {
+        env->fpcs = 0;
+        env->fpds = 0;
+    }
 }

-void helper_fsave(CPUX86State *env, target_ulong ptr, int data32)
+void helper_fsave(CPUX86State *env, target_ulong ptr, int data32,
+                  int protected_mode)
 {
     floatx80 tmp;
     int i;

-    helper_fstenv(env, ptr, data32);
+    helper_fstenv(env, ptr, data32, protected_mode);

-    ptr += (14 << data32);
+    if (data32) {
+        ptr += 28;
+    } else {
+        ptr += 14;
+    }
     for (i = 0; i < 8; i++) {
         tmp = ST(i);
         helper_fstt(env, tmp, ptr);
         ptr += 10;
     }

-    /* fninit */
-    env->fpus = 0;
-    env->fpstt = 0;
-    env->fpuc = 0x37f;
-    env->fptags[0] = 1;
-    env->fptags[1] = 1;
-    env->fptags[2] = 1;
-    env->fptags[3] = 1;
-    env->fptags[4] = 1;
-    env->fptags[5] = 1;
-    env->fptags[6] = 1;
-    env->fptags[7] = 1;
+    helper_fninit(env);
 }

-void helper_frstor(CPUX86State *env, target_ulong ptr, int data32)
+void helper_frstor(CPUX86State *env, target_ulong ptr, int data32,
+                   int protected_mode)
 {
     floatx80 tmp;
     int i;

-    helper_fldenv(env, ptr, data32);
-    ptr += (14 << data32);
+    helper_fldenv(env, ptr, data32, protected_mode);
+    if (data32) {
+        ptr += 28;
+    } else {
+        ptr += 14;
+    }

     for (i = 0; i < 8; i++) {
         tmp = helper_fldt(env, ptr);
@@ -1072,21 +1145,22 @@ void helper_frstor(CPUX86State *env, target_ulong
ptr, int data32)
     }
 }

-#if defined(CONFIG_USER_ONLY)
-void cpu_x86_fsave(CPUX86State *env, target_ulong ptr, int data32)
+#if defined(CONFIG_USER_ONLY) && defined(TARGET_I386) && TARGET_ABI_BITS
== 32
+
+void cpu_x86_fsave(CPUX86State *env, target_ulong ptr)
 {
-    helper_fsave(env, ptr, data32);
+    helper_fsave(env, ptr, 1, 1);
 }

-void cpu_x86_frstor(CPUX86State *env, target_ulong ptr, int data32)
+void cpu_x86_frstor(CPUX86State *env, target_ulong ptr)
 {
-    helper_frstor(env, ptr, data32);
+    helper_frstor(env, ptr, 1, 1);
 }
 #endif

-void helper_fxsave(CPUX86State *env, target_ulong ptr, int data64)
+void helper_fxsave(CPUX86State *env, target_ulong ptr, int data32, int
data64)
 {
-    int fpus, fptag, i, nb_xmm_regs;
+    int i, nb_xmm_regs, fptag;
     floatx80 tmp;
     target_ulong addr;

@@ -1095,25 +1169,36 @@ void helper_fxsave(CPUX86State *env, target_ulong
ptr, int data64)
         raise_exception(env, EXCP0D_GPF);
     }

-    fpus = (env->fpus & ~0x3800) | (env->fpstt & 0x7) << 11;
     fptag = 0;
     for (i = 0; i < 8; i++) {
         fptag |= (env->fptags[i] << i);
     }
+    fptag ^= 0xff;
+
     cpu_stw_data(env, ptr, env->fpuc);
-    cpu_stw_data(env, ptr + 2, fpus);
-    cpu_stw_data(env, ptr + 4, fptag ^ 0xff);
+    cpu_stw_data(env, ptr + 2, FPUS(env));
+    cpu_stw_data(env, ptr + 4, fptag & 0xff);
+    cpu_stw_data(env, ptr + 6, env->fpop);
+
 #ifdef TARGET_X86_64
     if (data64) {
-        cpu_stq_data(env, ptr + 0x08, 0); /* rip */
-        cpu_stq_data(env, ptr + 0x10, 0); /* rdp */
+        /* 64 bit */
+        cpu_stq_data(env, ptr + 8, env->fpip);
+        cpu_stq_data(env, ptr + 16, env->fpdp);
     } else
 #endif
     {
-        cpu_stl_data(env, ptr + 0x08, 0); /* eip */
-        cpu_stl_data(env, ptr + 0x0c, 0); /* sel  */
-        cpu_stl_data(env, ptr + 0x10, 0); /* dp */
-        cpu_stl_data(env, ptr + 0x14, 0); /* sel  */
+        if (data32) {
+            /* 32 bit */
+            cpu_stl_data(env, ptr + 8, env->fpip);
+            cpu_stl_data(env, ptr + 16, env->fpdp);
+        } else {
+            /* 16 bit */
+            cpu_stw_data(env, ptr + 8, env->fpip);
+            cpu_stw_data(env, ptr + 16, env->fpdp);
+        }
+        cpu_stw_data(env, ptr + 12, env->fpcs & 0xffff);
+        cpu_stw_data(env, ptr + 20, env->fpds & 0xffff);
     }

     addr = ptr + 0x20;
@@ -1146,7 +1231,7 @@ void helper_fxsave(CPUX86State *env, target_ulong
ptr, int data64)
     }
 }

-void helper_fxrstor(CPUX86State *env, target_ulong ptr, int data64)
+void helper_fxrstor(CPUX86State *env, target_ulong ptr, int data32, int
data64)
 {
     int i, fpus, fptag, nb_xmm_regs;
     floatx80 tmp;
@@ -1167,6 +1252,30 @@ void helper_fxrstor(CPUX86State *env, target_ulong
ptr, int data64)
         env->fptags[i] = ((fptag >> i) & 1);
     }

+    env->fpop = (cpu_lduw_data(env, ptr + 6) >> 5) & 0x7ff;
+
+#ifdef TARGET_X86_64
+    if (data64) {
+        /* 64 bit */
+        env->fpip = cpu_ldq_data(env, ptr + 8);
+        env->fpdp = cpu_ldq_data(env, ptr + 16);
+    } else
+#endif
+    {
+        if (data32) {
+            /* 32 bit */
+            env->fpip = cpu_ldl_data(env, ptr + 8);
+            env->fpdp = cpu_ldl_data(env, ptr + 16);
+        } else {
+            /* 16 bit */
+            env->fpip = cpu_lduw_data(env, ptr + 8);
+            env->fpdp = cpu_lduw_data(env, ptr + 16);
+        }
+
+        env->fpcs = cpu_lduw_data(env, ptr + 12);
+        env->fpds = cpu_lduw_data(env, ptr + 20);
+    }
+
     addr = ptr + 0x20;
     for (i = 0; i < 8; i++) {
         tmp = helper_fldt(env, addr);
@@ -1195,6 +1304,11 @@ void helper_fxrstor(CPUX86State *env, target_ulong
ptr, int data64)
             }
         }
     }
+
+    if (!data64) {
+        env->fpip &= 0xffffffff;
+        env->fpdp &= 0xffffffff;
+    }
 }

 void cpu_get_fp80(uint64_t *pmant, uint16_t *pexp, floatx80 f)
diff --git a/target-i386/helper.h b/target-i386/helper.h
index 8eb0145..9c4fd22 100644
--- a/target-i386/helper.h
+++ b/target-i386/helper.h
@@ -183,12 +183,12 @@ DEF_HELPER_1(frndint, void, env)
 DEF_HELPER_1(fscale, void, env)
 DEF_HELPER_1(fsin, void, env)
 DEF_HELPER_1(fcos, void, env)
-DEF_HELPER_3(fstenv, void, env, tl, int)
-DEF_HELPER_3(fldenv, void, env, tl, int)
-DEF_HELPER_3(fsave, void, env, tl, int)
-DEF_HELPER_3(frstor, void, env, tl, int)
-DEF_HELPER_3(fxsave, void, env, tl, int)
-DEF_HELPER_3(fxrstor, void, env, tl, int)
+DEF_HELPER_4(fstenv, void, env, tl, int, int)
+DEF_HELPER_4(fldenv, void, env, tl, int, int)
+DEF_HELPER_4(fsave, void, env, tl, int, int)
+DEF_HELPER_4(frstor, void, env, tl, int, int)
+DEF_HELPER_4(fxsave, void, env, tl, int, int)
+DEF_HELPER_4(fxrstor, void, env, tl, int, int)

 DEF_HELPER_FLAGS_1(clz, TCG_CALL_NO_RWG_SE, tl, tl)
 DEF_HELPER_FLAGS_1(ctz, TCG_CALL_NO_RWG_SE, tl, tl)
diff --git a/target-i386/machine.c b/target-i386/machine.c
index 16d2f6a..500f04f 100644
--- a/target-i386/machine.c
+++ b/target-i386/machine.c
@@ -397,7 +397,7 @@ static const VMStateDescription vmstate_fpop_ip_dp = {
     .version_id = 1,
     .minimum_version_id = 1,
     .fields = (VMStateField[]) {
-        VMSTATE_UINT16(env.fpop, X86CPU),
+        VMSTATE_UINT32(env.fpop, X86CPU),
         VMSTATE_UINT64(env.fpip, X86CPU),
         VMSTATE_UINT64(env.fpdp, X86CPU),
         VMSTATE_END_OF_LIST()
diff --git a/target-i386/translate.c b/target-i386/translate.c
index 6fcd824..8e490de 100644
--- a/target-i386/translate.c
+++ b/target-i386/translate.c
@@ -58,6 +58,9 @@
 #endif

 //#define MACRO_TEST   1
+#define IS_PROTECTED_MODE(s) (s->pe && !s->vm86)
+#define FP_EP_VALID 0x80000000
+#define FP_EP_INVALID 0

 /* global register indexes */
 static TCGv_ptr cpu_env;
@@ -65,6 +68,11 @@ static TCGv cpu_A0;
 static TCGv cpu_cc_dst, cpu_cc_src, cpu_cc_src2, cpu_cc_srcT;
 static TCGv_i32 cpu_cc_op;
 static TCGv cpu_regs[CPU_NB_REGS];
+static TCGv_i32 cpu_fpop;
+static TCGv cpu_fpip;
+static TCGv cpu_fpdp;
+static TCGv_i32 cpu_fpds;
+static TCGv_i32 cpu_fpcs;
 /* local temps */
 static TCGv cpu_T[2];
 /* local register indexes (only used inside old micro ops) */
@@ -74,6 +82,9 @@ static TCGv_i32 cpu_tmp2_i32, cpu_tmp3_i32;
 static TCGv_i64 cpu_tmp1_i64;

 static uint8_t gen_opc_cc_op[OPC_BUF_SIZE];
+static uint16_t gen_opc_fp_op[OPC_BUF_SIZE];
+static uint16_t gen_opc_fp_cs[OPC_BUF_SIZE];
+static target_ulong gen_opc_fp_ip[OPC_BUF_SIZE];

 #include "exec/gen-icount.h"

@@ -104,6 +115,10 @@ typedef struct DisasContext {
     int ss32;   /* 32 bit stack segment */
     CCOp cc_op;  /* current CC operation */
     bool cc_op_dirty;
+    uint16_t fp_op;
+    bool fp_ep_dirty;
+    target_ulong fp_ip;
+    uint16_t fp_cs;
     int addseg; /* non zero if either DS/ES/SS have a non zero base */
     int f_st;   /* currently unused */
     int vm86;   /* vm86 mode */
@@ -208,6 +223,62 @@ static const uint8_t cc_op_live[CC_OP_NB] = {
     [CC_OP_CLR] = 0,
 };

+static inline bool instr_is_x87_nc(int modrm, int b)
+{
+    int op, mod, rm;
+    switch (b) {
+    case 0xd8 ... 0xdf:
+        /* floats */
+        op = ((b & 7) << 3) | ((modrm >> 3) & 7);
+        mod = (modrm >> 6) & 3;
+        rm = modrm & 7;
+        if (mod != 3) {
+            /* memory */
+            switch (op) {
+            case 0x0c: /* fldenv */
+            case 0x0d: /* fldcw */
+            case 0x0e: /* fstenv, fnstenv */
+            case 0x0f: /* fstcw, fnstcw */
+            case 0x2c: /* frstor */
+            case 0x2e: /* fsave, fnsave */
+            case 0x2f: /* fstsw, fnstsw */
+                return false;
+            default:
+                return true;
+            }
+        } else {
+            /* register */
+            switch (op) {
+            case 0x0a:
+                return false; /* fnop, Illegal op */
+            case 0x0e: /* fdecstp, fincstp */
+            case 0x28: /* ffree */
+                return false;
+            case 0x1c:
+                switch (rm) {
+                case 1: /* feni */
+                    return true;
+                case 2: /* fclex, fnclex */
+                case 3: /* finit, fninit */
+                    return false;
+                case 4: /* fsetpm */
+                    return true;
+                default: /* Illegal op */
+                    return false;
+                }
+            case 0x3c:
+                return false; /* fstsw, fnstsw, Illegal op */
+            default:
+                return true;
+            }
+        }
+    /*case 0x9b: // fwait, wait
+        return false;*/
+    default:
+        return false;
+    }
+}
+
 static void set_cc_op(DisasContext *s, CCOp op)
 {
     int dead;
@@ -253,6 +324,23 @@ static void gen_update_cc_op(DisasContext *s)
     }
 }

+static void set_ep(DisasContext *s, int fp_op, int fp_ip, int fp_cs) {
+    s->fp_op = FP_EP_VALID | fp_op;
+    s->fp_ip = fp_ip;
+    s->fp_cs = fp_cs;
+    s->fp_ep_dirty = true;
+}
+
+static void gen_update_ep(DisasContext *s)
+{
+    if (s->fp_ep_dirty) {
+        tcg_gen_movi_i32(cpu_fpop, s->fp_op);
+        tcg_gen_movi_tl(cpu_fpip, s->fp_ip);
+        tcg_gen_movi_i32(cpu_fpcs, s->fp_cs);
+        s->fp_ep_dirty = false;
+    }
+}
+
 #ifdef TARGET_X86_64

 #define NB_OP_SIZES 4
@@ -666,6 +754,7 @@ static void gen_check_io(DisasContext *s, TCGMemOp ot,
target_ulong cur_eip,
     state_saved = 0;
     if (s->pe && (s->cpl > s->iopl || s->vm86)) {
         gen_update_cc_op(s);
+        gen_update_ep(s);
         gen_jmp_im(cur_eip);
         state_saved = 1;
         tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
@@ -686,6 +775,7 @@ static void gen_check_io(DisasContext *s, TCGMemOp ot,
target_ulong cur_eip,
     if(s->flags & HF_SVMI_MASK) {
         if (!state_saved) {
             gen_update_cc_op(s);
+            gen_update_ep(s);
             gen_jmp_im(cur_eip);
         }
         svm_flags |= (1 << (4 + ot));
@@ -1097,6 +1187,7 @@ static inline void gen_jcc1(DisasContext *s, int b,
int l1)
     CCPrepare cc = gen_prepare_cc(s, b, cpu_T[0]);

     gen_update_cc_op(s);
+    gen_update_ep(s);
     if (cc.mask != -1) {
         tcg_gen_andi_tl(cpu_T[0], cc.reg, cc.mask);
         cc.reg = cpu_T[0];
@@ -1580,14 +1671,14 @@ static void gen_rot_rm_T1(DisasContext *s, TCGMemOp
ot, int op1, int is_right)
     t0 = tcg_const_i32(0);
     t1 = tcg_temp_new_i32();
     tcg_gen_trunc_tl_i32(t1, cpu_T[1]);
-    tcg_gen_movi_i32(cpu_tmp2_i32, CC_OP_ADCOX);
+    tcg_gen_movi_i32(cpu_tmp2_i32, CC_OP_ADCOX);
     tcg_gen_movi_i32(cpu_tmp3_i32, CC_OP_EFLAGS);
     tcg_gen_movcond_i32(TCG_COND_NE, cpu_cc_op, t1, t0,
                         cpu_tmp2_i32, cpu_tmp3_i32);
     tcg_temp_free_i32(t0);
     tcg_temp_free_i32(t1);

-    /* The CC_OP value is no longer predictable.  */
+    /* The CC_OP value is no longer predictable.  */
     set_cc_op(s, CC_OP_DYNAMIC);
 }

@@ -1863,7 +1954,7 @@ static void gen_shifti(DisasContext *s1, int op,
TCGMemOp ot, int d, int c)
     }
 }

-static void gen_lea_modrm(CPUX86State *env, DisasContext *s, int modrm)
+static void gen_lea_modrm(CPUX86State *env, DisasContext *s, int modrm,
int b)
 {
     target_long disp;
     int havesib;
@@ -1871,6 +1962,7 @@ static void gen_lea_modrm(CPUX86State *env,
DisasContext *s, int modrm)
     int index;
     int scale;
     int mod, rm, code, override, must_add_seg;
+    int curr_instr_is_x87_nc;
     TCGv sum;

     override = s->override;
@@ -1950,6 +2042,13 @@ static void gen_lea_modrm(CPUX86State *env,
DisasContext *s, int modrm)
             tcg_gen_addi_tl(cpu_A0, sum, disp);
         }

+        curr_instr_is_x87_nc = instr_is_x87_nc(modrm, b);
+        if (curr_instr_is_x87_nc) {
+            tcg_gen_mov_tl(cpu_fpdp, cpu_A0);
+            if (s->aflag == MO_32) {
+                tcg_gen_ext32u_tl(cpu_fpdp, cpu_fpdp);
+            }
+        }
         if (must_add_seg) {
             if (override < 0) {
                 if (base == R_EBP || base == R_ESP) {
@@ -1961,6 +2060,12 @@ static void gen_lea_modrm(CPUX86State *env,
DisasContext *s, int modrm)

             tcg_gen_ld_tl(cpu_tmp0, cpu_env,
                           offsetof(CPUX86State, segs[override].base));
+
+            if (curr_instr_is_x87_nc) {
+                tcg_gen_ld_i32(cpu_fpds, cpu_env,
+                              offsetof(CPUX86State,
segs[override].selector));
+            }
+
             if (CODE64(s)) {
                 if (s->aflag == MO_32) {
                     tcg_gen_ext32u_tl(cpu_A0, cpu_A0);
@@ -1970,6 +2075,11 @@ static void gen_lea_modrm(CPUX86State *env,
DisasContext *s, int modrm)
             }

             tcg_gen_add_tl(cpu_A0, cpu_A0, cpu_tmp0);
+        } else {
+            if (curr_instr_is_x87_nc) {
+                tcg_gen_ld_i32(cpu_fpds, cpu_env,
+                              offsetof(CPUX86State, segs[R_DS].selector));
+            }
         }

         if (s->aflag == MO_32) {
@@ -2039,8 +2149,22 @@ static void gen_lea_modrm(CPUX86State *env,
DisasContext *s, int modrm)
                     override = R_DS;
                 }
             }
+            if (instr_is_x87_nc(modrm, b)) {
+                tcg_gen_mov_tl(cpu_fpdp, cpu_A0);
+                tcg_gen_ld_i32(cpu_fpds, cpu_env,
+                              offsetof(CPUX86State,
segs[override].selector));
+            }
             gen_op_addl_A0_seg(s, override);
+        } else {
+            if (instr_is_x87_nc(modrm, b)) {
+                tcg_gen_mov_tl(cpu_fpdp, cpu_A0);
+                tcg_gen_ld_i32(cpu_fpds, cpu_env,
+                              offsetof(CPUX86State, segs[R_DS].selector));
+            }
         }
+#ifdef TARGET_X86_64
+        tcg_gen_andi_tl(cpu_fpdp, cpu_fpdp, 0xffffffff);
+#endif
         break;

     default:
@@ -2130,7 +2254,7 @@ static void gen_add_A0_ds_seg(DisasContext *s)
 /* generate modrm memory load or store of 'reg'. TMP0 is used if reg ==
    OR_TMP0 */
 static void gen_ldst_modrm(CPUX86State *env, DisasContext *s, int modrm,
-                           TCGMemOp ot, int reg, int is_store)
+                           TCGMemOp ot, int reg, int is_store, int b)
 {
     int mod, rm;

@@ -2147,7 +2271,7 @@ static void gen_ldst_modrm(CPUX86State *env,
DisasContext *s, int modrm,
                 gen_op_mov_reg_v(ot, reg, cpu_T[0]);
         }
     } else {
-        gen_lea_modrm(env, s, modrm);
+        gen_lea_modrm(env, s, modrm, b);
         if (is_store) {
             if (reg != OR_TMP0)
                 gen_op_mov_v_reg(ot, cpu_T[0], reg);
@@ -2250,7 +2374,7 @@ static void gen_cmovcc1(CPUX86State *env,
DisasContext *s, TCGMemOp ot, int b,
 {
     CCPrepare cc;

-    gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
+    gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0, b);

     cc = gen_prepare_cc(s, b, cpu_T[1]);
     if (cc.mask != -1) {
@@ -2297,6 +2421,7 @@ static void gen_movl_seg_T0(DisasContext *s, int
seg_reg, target_ulong cur_eip)
     if (s->pe && !s->vm86) {
         /* XXX: optimize by finding processor state dynamically */
         gen_update_cc_op(s);
+        gen_update_ep(s);
         gen_jmp_im(cur_eip);
         tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
         gen_helper_load_seg(cpu_env, tcg_const_i32(seg_reg), cpu_tmp2_i32);
@@ -2326,6 +2451,7 @@ gen_svm_check_intercept_param(DisasContext *s,
target_ulong pc_start,
     if (likely(!(s->flags & HF_SVMI_MASK)))
         return;
     gen_update_cc_op(s);
+    gen_update_ep(s);
     gen_jmp_im(pc_start - s->cs_base);
     gen_helper_svm_check_intercept_param(cpu_env, tcg_const_i32(type),
                                          tcg_const_i64(param));
@@ -2513,6 +2639,7 @@ static void gen_enter(DisasContext *s, int
esp_addend, int level)
 static void gen_exception(DisasContext *s, int trapno, target_ulong
cur_eip)
 {
     gen_update_cc_op(s);
+    gen_update_ep(s);
     gen_jmp_im(cur_eip);
     gen_helper_raise_exception(cpu_env, tcg_const_i32(trapno));
     s->is_jmp = DISAS_TB_JUMP;
@@ -2524,6 +2651,7 @@ static void gen_interrupt(DisasContext *s, int intno,
                           target_ulong cur_eip, target_ulong next_eip)
 {
     gen_update_cc_op(s);
+    gen_update_ep(s);
     gen_jmp_im(cur_eip);
     gen_helper_raise_interrupt(cpu_env, tcg_const_i32(intno),
                                tcg_const_i32(next_eip - cur_eip));
@@ -2533,6 +2661,7 @@ static void gen_interrupt(DisasContext *s, int intno,
 static void gen_debug(DisasContext *s, target_ulong cur_eip)
 {
     gen_update_cc_op(s);
+    gen_update_ep(s);
     gen_jmp_im(cur_eip);
     gen_helper_debug(cpu_env);
     s->is_jmp = DISAS_TB_JUMP;
@@ -2543,6 +2672,7 @@ static void gen_debug(DisasContext *s, target_ulong
cur_eip)
 static void gen_eob(DisasContext *s)
 {
     gen_update_cc_op(s);
+    gen_update_ep(s);
     if (s->tb->flags & HF_INHIBIT_IRQ_MASK) {
         gen_helper_reset_inhibit_irq(cpu_env);
     }
@@ -2564,6 +2694,7 @@ static void gen_eob(DisasContext *s)
 static void gen_jmp_tb(DisasContext *s, target_ulong eip, int tb_num)
 {
     gen_update_cc_op(s);
+    gen_update_ep(s);
     set_cc_op(s, CC_OP_DYNAMIC);
     if (s->jmp_opt) {
         gen_goto_tb(s, tb_num, eip);
@@ -3043,7 +3174,7 @@ static void gen_sse(CPUX86State *env, DisasContext
*s, int b,
         case 0x0e7: /* movntq */
             if (mod == 3)
                 goto illegal_op;
-            gen_lea_modrm(env, s, modrm);
+            gen_lea_modrm(env, s, modrm, b);
             gen_stq_env_A0(s, offsetof(CPUX86State, fpregs[reg].mmx));
             break;
         case 0x1e7: /* movntdq */
@@ -3051,20 +3182,20 @@ static void gen_sse(CPUX86State *env, DisasContext
*s, int b,
         case 0x12b: /* movntps */
             if (mod == 3)
                 goto illegal_op;
-            gen_lea_modrm(env, s, modrm);
+            gen_lea_modrm(env, s, modrm, b);
             gen_sto_env_A0(s, offsetof(CPUX86State, xmm_regs[reg]));
             break;
         case 0x3f0: /* lddqu */
             if (mod == 3)
                 goto illegal_op;
-            gen_lea_modrm(env, s, modrm);
+            gen_lea_modrm(env, s, modrm, b);
             gen_ldo_env_A0(s, offsetof(CPUX86State, xmm_regs[reg]));
             break;
         case 0x22b: /* movntss */
         case 0x32b: /* movntsd */
             if (mod == 3)
                 goto illegal_op;
-            gen_lea_modrm(env, s, modrm);
+            gen_lea_modrm(env, s, modrm, b);
             if (b1 & 1) {
                 gen_stq_env_A0(s, offsetof(CPUX86State, xmm_regs[reg]));
             } else {
@@ -3076,13 +3207,13 @@ static void gen_sse(CPUX86State *env, DisasContext
*s, int b,
         case 0x6e: /* movd mm, ea */
 #ifdef TARGET_X86_64
             if (s->dflag == MO_64) {
-                gen_ldst_modrm(env, s, modrm, MO_64, OR_TMP0, 0);
+                gen_ldst_modrm(env, s, modrm, MO_64, OR_TMP0, 0, b);
                 tcg_gen_st_tl(cpu_T[0], cpu_env,
offsetof(CPUX86State,fpregs[reg].mmx));
             } else
 #endif
             {
-                gen_ldst_modrm(env, s, modrm, MO_32, OR_TMP0, 0);
-                tcg_gen_addi_ptr(cpu_ptr0, cpu_env,
+                gen_ldst_modrm(env, s, modrm, MO_32, OR_TMP0, 0, b);
+                tcg_gen_addi_ptr(cpu_ptr0, cpu_env,
                                  offsetof(CPUX86State,fpregs[reg].mmx));
                 tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
                 gen_helper_movl_mm_T0_mmx(cpu_ptr0, cpu_tmp2_i32);
@@ -3091,15 +3222,15 @@ static void gen_sse(CPUX86State *env, DisasContext
*s, int b,
         case 0x16e: /* movd xmm, ea */
 #ifdef TARGET_X86_64
             if (s->dflag == MO_64) {
-                gen_ldst_modrm(env, s, modrm, MO_64, OR_TMP0, 0);
-                tcg_gen_addi_ptr(cpu_ptr0, cpu_env,
+                gen_ldst_modrm(env, s, modrm, MO_64, OR_TMP0, 0, b);
+                tcg_gen_addi_ptr(cpu_ptr0, cpu_env,
                                  offsetof(CPUX86State,xmm_regs[reg]));
                 gen_helper_movq_mm_T0_xmm(cpu_ptr0, cpu_T[0]);
             } else
 #endif
             {
-                gen_ldst_modrm(env, s, modrm, MO_32, OR_TMP0, 0);
-                tcg_gen_addi_ptr(cpu_ptr0, cpu_env,
+                gen_ldst_modrm(env, s, modrm, MO_32, OR_TMP0, 0, b);
+                tcg_gen_addi_ptr(cpu_ptr0, cpu_env,
                                  offsetof(CPUX86State,xmm_regs[reg]));
                 tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
                 gen_helper_movl_mm_T0_xmm(cpu_ptr0, cpu_tmp2_i32);
@@ -3107,7 +3238,7 @@ static void gen_sse(CPUX86State *env, DisasContext
*s, int b,
             break;
         case 0x6f: /* movq mm, ea */
             if (mod != 3) {
-                gen_lea_modrm(env, s, modrm);
+                gen_lea_modrm(env, s, modrm, b);
                 gen_ldq_env_A0(s, offsetof(CPUX86State, fpregs[reg].mmx));
             } else {
                 rm = (modrm & 7);
@@ -3124,7 +3255,7 @@ static void gen_sse(CPUX86State *env, DisasContext
*s, int b,
         case 0x16f: /* movdqa xmm, ea */
         case 0x26f: /* movdqu xmm, ea */
             if (mod != 3) {
-                gen_lea_modrm(env, s, modrm);
+                gen_lea_modrm(env, s, modrm, b);
                 gen_ldo_env_A0(s, offsetof(CPUX86State, xmm_regs[reg]));
             } else {
                 rm = (modrm & 7) | REX_B(s);
@@ -3134,7 +3265,7 @@ static void gen_sse(CPUX86State *env, DisasContext
*s, int b,
             break;
         case 0x210: /* movss xmm, ea */
             if (mod != 3) {
-                gen_lea_modrm(env, s, modrm);
+                gen_lea_modrm(env, s, modrm, b);
                 gen_op_ld_v(s, MO_32, cpu_T[0], cpu_A0);
                 tcg_gen_st32_tl(cpu_T[0], cpu_env,
offsetof(CPUX86State,xmm_regs[reg].XMM_L(0)));
                 tcg_gen_movi_tl(cpu_T[0], 0);
@@ -3149,7 +3280,7 @@ static void gen_sse(CPUX86State *env, DisasContext
*s, int b,
             break;
         case 0x310: /* movsd xmm, ea */
             if (mod != 3) {
-                gen_lea_modrm(env, s, modrm);
+                gen_lea_modrm(env, s, modrm, b);
                 gen_ldq_env_A0(s, offsetof(CPUX86State,
                                            xmm_regs[reg].XMM_Q(0)));
                 tcg_gen_movi_tl(cpu_T[0], 0);
@@ -3164,7 +3295,7 @@ static void gen_sse(CPUX86State *env, DisasContext
*s, int b,
         case 0x012: /* movlps */
         case 0x112: /* movlpd */
             if (mod != 3) {
-                gen_lea_modrm(env, s, modrm);
+                gen_lea_modrm(env, s, modrm, b);
                 gen_ldq_env_A0(s, offsetof(CPUX86State,
                                            xmm_regs[reg].XMM_Q(0)));
             } else {
@@ -3176,7 +3307,7 @@ static void gen_sse(CPUX86State *env, DisasContext
*s, int b,
             break;
         case 0x212: /* movsldup */
             if (mod != 3) {
-                gen_lea_modrm(env, s, modrm);
+                gen_lea_modrm(env, s, modrm, b);
                 gen_ldo_env_A0(s, offsetof(CPUX86State, xmm_regs[reg]));
             } else {
                 rm = (modrm & 7) | REX_B(s);
@@ -3192,7 +3323,7 @@ static void gen_sse(CPUX86State *env, DisasContext
*s, int b,
             break;
         case 0x312: /* movddup */
             if (mod != 3) {
-                gen_lea_modrm(env, s, modrm);
+                gen_lea_modrm(env, s, modrm, b);
                 gen_ldq_env_A0(s, offsetof(CPUX86State,
                                            xmm_regs[reg].XMM_Q(0)));
             } else {
@@ -3206,7 +3337,7 @@ static void gen_sse(CPUX86State *env, DisasContext
*s, int b,
         case 0x016: /* movhps */
         case 0x116: /* movhpd */
             if (mod != 3) {
-                gen_lea_modrm(env, s, modrm);
+                gen_lea_modrm(env, s, modrm, b);
                 gen_ldq_env_A0(s, offsetof(CPUX86State,
                                            xmm_regs[reg].XMM_Q(1)));
             } else {
@@ -3218,7 +3349,7 @@ static void gen_sse(CPUX86State *env, DisasContext
*s, int b,
             break;
         case 0x216: /* movshdup */
             if (mod != 3) {
-                gen_lea_modrm(env, s, modrm);
+                gen_lea_modrm(env, s, modrm, b);
                 gen_ldo_env_A0(s, offsetof(CPUX86State, xmm_regs[reg]));
             } else {
                 rm = (modrm & 7) | REX_B(s);
@@ -3256,34 +3387,34 @@ static void gen_sse(CPUX86State *env, DisasContext
*s, int b,
         case 0x7e: /* movd ea, mm */
 #ifdef TARGET_X86_64
             if (s->dflag == MO_64) {
-                tcg_gen_ld_i64(cpu_T[0], cpu_env,
+                tcg_gen_ld_i64(cpu_T[0], cpu_env,
                                offsetof(CPUX86State,fpregs[reg].mmx));
-                gen_ldst_modrm(env, s, modrm, MO_64, OR_TMP0, 1);
+                gen_ldst_modrm(env, s, modrm, MO_64, OR_TMP0, 1, b);
             } else
 #endif
             {
-                tcg_gen_ld32u_tl(cpu_T[0], cpu_env,
+                tcg_gen_ld32u_tl(cpu_T[0], cpu_env,

offsetof(CPUX86State,fpregs[reg].mmx.MMX_L(0)));
-                gen_ldst_modrm(env, s, modrm, MO_32, OR_TMP0, 1);
+                gen_ldst_modrm(env, s, modrm, MO_32, OR_TMP0, 1, b);
             }
             break;
         case 0x17e: /* movd ea, xmm */
 #ifdef TARGET_X86_64
             if (s->dflag == MO_64) {
-                tcg_gen_ld_i64(cpu_T[0], cpu_env,
+                tcg_gen_ld_i64(cpu_T[0], cpu_env,

offsetof(CPUX86State,xmm_regs[reg].XMM_Q(0)));
-                gen_ldst_modrm(env, s, modrm, MO_64, OR_TMP0, 1);
+                gen_ldst_modrm(env, s, modrm, MO_64, OR_TMP0, 1, b);
             } else
 #endif
             {
-                tcg_gen_ld32u_tl(cpu_T[0], cpu_env,
+                tcg_gen_ld32u_tl(cpu_T[0], cpu_env,

offsetof(CPUX86State,xmm_regs[reg].XMM_L(0)));
-                gen_ldst_modrm(env, s, modrm, MO_32, OR_TMP0, 1);
+                gen_ldst_modrm(env, s, modrm, MO_32, OR_TMP0, 1, b);
             }
             break;
         case 0x27e: /* movq xmm, ea */
             if (mod != 3) {
-                gen_lea_modrm(env, s, modrm);
+                gen_lea_modrm(env, s, modrm, b);
                 gen_ldq_env_A0(s, offsetof(CPUX86State,
                                            xmm_regs[reg].XMM_Q(0)));
             } else {
@@ -3295,7 +3426,7 @@ static void gen_sse(CPUX86State *env, DisasContext
*s, int b,
             break;
         case 0x7f: /* movq ea, mm */
             if (mod != 3) {
-                gen_lea_modrm(env, s, modrm);
+                gen_lea_modrm(env, s, modrm, b);
                 gen_stq_env_A0(s, offsetof(CPUX86State, fpregs[reg].mmx));
             } else {
                 rm = (modrm & 7);
@@ -3310,7 +3441,7 @@ static void gen_sse(CPUX86State *env, DisasContext
*s, int b,
         case 0x17f: /* movdqa ea, xmm */
         case 0x27f: /* movdqu ea, xmm */
             if (mod != 3) {
-                gen_lea_modrm(env, s, modrm);
+                gen_lea_modrm(env, s, modrm, b);
                 gen_sto_env_A0(s, offsetof(CPUX86State, xmm_regs[reg]));
             } else {
                 rm = (modrm & 7) | REX_B(s);
@@ -3320,7 +3451,7 @@ static void gen_sse(CPUX86State *env, DisasContext
*s, int b,
             break;
         case 0x211: /* movss ea, xmm */
             if (mod != 3) {
-                gen_lea_modrm(env, s, modrm);
+                gen_lea_modrm(env, s, modrm, b);
                 tcg_gen_ld32u_tl(cpu_T[0], cpu_env,
offsetof(CPUX86State,xmm_regs[reg].XMM_L(0)));
                 gen_op_st_v(s, MO_32, cpu_T[0], cpu_A0);
             } else {
@@ -3331,7 +3462,7 @@ static void gen_sse(CPUX86State *env, DisasContext
*s, int b,
             break;
         case 0x311: /* movsd ea, xmm */
             if (mod != 3) {
-                gen_lea_modrm(env, s, modrm);
+                gen_lea_modrm(env, s, modrm, b);
                 gen_stq_env_A0(s, offsetof(CPUX86State,
                                            xmm_regs[reg].XMM_Q(0)));
             } else {
@@ -3343,7 +3474,7 @@ static void gen_sse(CPUX86State *env, DisasContext
*s, int b,
         case 0x013: /* movlps */
         case 0x113: /* movlpd */
             if (mod != 3) {
-                gen_lea_modrm(env, s, modrm);
+                gen_lea_modrm(env, s, modrm, b);
                 gen_stq_env_A0(s, offsetof(CPUX86State,
                                            xmm_regs[reg].XMM_Q(0)));
             } else {
@@ -3353,7 +3484,7 @@ static void gen_sse(CPUX86State *env, DisasContext
*s, int b,
         case 0x017: /* movhps */
         case 0x117: /* movhpd */
             if (mod != 3) {
-                gen_lea_modrm(env, s, modrm);
+                gen_lea_modrm(env, s, modrm, b);
                 gen_stq_env_A0(s, offsetof(CPUX86State,
                                            xmm_regs[reg].XMM_Q(1)));
             } else {
@@ -3417,7 +3548,7 @@ static void gen_sse(CPUX86State *env, DisasContext
*s, int b,
         case 0x12a: /* cvtpi2pd */
             gen_helper_enter_mmx(cpu_env);
             if (mod != 3) {
-                gen_lea_modrm(env, s, modrm);
+                gen_lea_modrm(env, s, modrm, b);
                 op2_offset = offsetof(CPUX86State,mmx_t0);
                 gen_ldq_env_A0(s, op2_offset);
             } else {
@@ -3440,7 +3571,7 @@ static void gen_sse(CPUX86State *env, DisasContext
*s, int b,
         case 0x22a: /* cvtsi2ss */
         case 0x32a: /* cvtsi2sd */
             ot = mo_64_32(s->dflag);
-            gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
+            gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0, b);
             op1_offset = offsetof(CPUX86State,xmm_regs[reg]);
             tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op1_offset);
             if (ot == MO_32) {
@@ -3462,7 +3593,7 @@ static void gen_sse(CPUX86State *env, DisasContext
*s, int b,
         case 0x12d: /* cvtpd2pi */
             gen_helper_enter_mmx(cpu_env);
             if (mod != 3) {
-                gen_lea_modrm(env, s, modrm);
+                gen_lea_modrm(env, s, modrm, b);
                 op2_offset = offsetof(CPUX86State,xmm_t0);
                 gen_ldo_env_A0(s, op2_offset);
             } else {
@@ -3493,7 +3624,7 @@ static void gen_sse(CPUX86State *env, DisasContext
*s, int b,
         case 0x32d: /* cvtsd2si */
             ot = mo_64_32(s->dflag);
             if (mod != 3) {
-                gen_lea_modrm(env, s, modrm);
+                gen_lea_modrm(env, s, modrm, b);
                 if ((b >> 8) & 1) {
                     gen_ldq_env_A0(s, offsetof(CPUX86State,
xmm_t0.XMM_Q(0)));
                 } else {
@@ -3525,7 +3656,7 @@ static void gen_sse(CPUX86State *env, DisasContext
*s, int b,
         case 0xc4: /* pinsrw */
         case 0x1c4:
             s->rip_offset = 1;
-            gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0);
+            gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0, b);
             val = cpu_ldub_code(env, s->pc++);
             if (b1) {
                 val &= 7;
@@ -3559,7 +3690,7 @@ static void gen_sse(CPUX86State *env, DisasContext
*s, int b,
             break;
         case 0x1d6: /* movq ea, xmm */
             if (mod != 3) {
-                gen_lea_modrm(env, s, modrm);
+                gen_lea_modrm(env, s, modrm, b);
                 gen_stq_env_A0(s, offsetof(CPUX86State,
                                            xmm_regs[reg].XMM_Q(0)));
             } else {
@@ -3626,7 +3757,7 @@ static void gen_sse(CPUX86State *env, DisasContext
*s, int b,
                     op2_offset = offsetof(CPUX86State,xmm_regs[rm |
REX_B(s)]);
                 } else {
                     op2_offset = offsetof(CPUX86State,xmm_t0);
-                    gen_lea_modrm(env, s, modrm);
+                    gen_lea_modrm(env, s, modrm, b);
                     switch (b) {
                     case 0x20: case 0x30: /* pmovsxbw, pmovzxbw */
                     case 0x23: case 0x33: /* pmovsxwd, pmovzxwd */
@@ -3660,7 +3791,7 @@ static void gen_sse(CPUX86State *env, DisasContext
*s, int b,
                     op2_offset = offsetof(CPUX86State,fpregs[rm].mmx);
                 } else {
                     op2_offset = offsetof(CPUX86State,mmx_t0);
-                    gen_lea_modrm(env, s, modrm);
+                    gen_lea_modrm(env, s, modrm, b);
                     gen_ldq_env_A0(s, op2_offset);
                 }
             }
@@ -3701,7 +3832,7 @@ static void gen_sse(CPUX86State *env, DisasContext
*s, int b,
                 }

                 tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_regs[reg]);
-                gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
+                gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0, b);
                 gen_helper_crc32(cpu_T[0], cpu_tmp2_i32,
                                  cpu_T[0], tcg_const_i32(8 << ot));

@@ -3729,7 +3860,7 @@ static void gen_sse(CPUX86State *env, DisasContext
*s, int b,
                     ot = MO_64;
                 }

-                gen_lea_modrm(env, s, modrm);
+                gen_lea_modrm(env, s, modrm, b);
                 if ((b & 1) == 0) {
                     tcg_gen_qemu_ld_tl(cpu_T[0], cpu_A0,
                                        s->mem_index, ot | MO_BE);
@@ -3747,7 +3878,7 @@ static void gen_sse(CPUX86State *env, DisasContext
*s, int b,
                     goto illegal_op;
                 }
                 ot = mo_64_32(s->dflag);
-                gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
+                gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0, b);
                 tcg_gen_andc_tl(cpu_T[0], cpu_regs[s->vex_v], cpu_T[0]);
                 gen_op_mov_reg_v(ot, reg, cpu_T[0]);
                 gen_op_update1_cc();
@@ -3764,7 +3895,7 @@ static void gen_sse(CPUX86State *env, DisasContext
*s, int b,
                 {
                     TCGv bound, zero;

-                    gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
+                    gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0, b);
                     /* Extract START, and shift the operand.
                        Shifts larger than operand size get zeros.  */
                     tcg_gen_ext8u_tl(cpu_A0, cpu_regs[s->vex_v]);
@@ -3801,7 +3932,7 @@ static void gen_sse(CPUX86State *env, DisasContext
*s, int b,
                     goto illegal_op;
                 }
                 ot = mo_64_32(s->dflag);
-                gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
+                gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0, b);
                 tcg_gen_ext8u_tl(cpu_T[1], cpu_regs[s->vex_v]);
                 {
                     TCGv bound = tcg_const_tl(ot == MO_64 ? 63 : 31);
@@ -3828,7 +3959,7 @@ static void gen_sse(CPUX86State *env, DisasContext
*s, int b,
                     goto illegal_op;
                 }
                 ot = mo_64_32(s->dflag);
-                gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
+                gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0, b);
                 switch (ot) {
                 default:
                     tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
@@ -3854,7 +3985,7 @@ static void gen_sse(CPUX86State *env, DisasContext
*s, int b,
                     goto illegal_op;
                 }
                 ot = mo_64_32(s->dflag);
-                gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
+                gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0, b);
                 /* Note that by zero-extending the mask operand, we
                    automatically handle zero-extending the result.  */
                 if (ot == MO_64) {
@@ -3872,7 +4003,7 @@ static void gen_sse(CPUX86State *env, DisasContext
*s, int b,
                     goto illegal_op;
                 }
                 ot = mo_64_32(s->dflag);
-                gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
+                gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0, b);
                 /* Note that by zero-extending the mask operand, we
                    automatically handle zero-extending the result.  */
                 if (ot == MO_64) {
@@ -3892,7 +4023,7 @@ static void gen_sse(CPUX86State *env, DisasContext
*s, int b,
                     int end_op;

                     ot = mo_64_32(s->dflag);
-                    gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
+                    gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0, b);

                     /* Re-use the carry-out from a previous round.  */
                     TCGV_UNUSED(carry_in);
@@ -3971,7 +4102,7 @@ static void gen_sse(CPUX86State *env, DisasContext
*s, int b,
                     goto illegal_op;
                 }
                 ot = mo_64_32(s->dflag);
-                gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
+                gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0, b);
                 if (ot == MO_64) {
                     tcg_gen_andi_tl(cpu_T[1], cpu_regs[s->vex_v], 63);
                 } else {
@@ -4003,7 +4134,7 @@ static void gen_sse(CPUX86State *env, DisasContext
*s, int b,
                     goto illegal_op;
                 }
                 ot = mo_64_32(s->dflag);
-                gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
+                gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0, b);

                 switch (reg & 7) {
                 case 1: /* blsr By,Ey */
@@ -4062,7 +4193,7 @@ static void gen_sse(CPUX86State *env, DisasContext
*s, int b,
                 ot = mo_64_32(s->dflag);
                 rm = (modrm & 7) | REX_B(s);
                 if (mod != 3)
-                    gen_lea_modrm(env, s, modrm);
+                    gen_lea_modrm(env, s, modrm, b);
                 reg = ((modrm >> 3) & 7) | rex_r;
                 val = cpu_ldub_code(env, s->pc++);
                 switch (b) {
@@ -4199,7 +4330,7 @@ static void gen_sse(CPUX86State *env, DisasContext
*s, int b,
                     op2_offset = offsetof(CPUX86State,xmm_regs[rm |
REX_B(s)]);
                 } else {
                     op2_offset = offsetof(CPUX86State,xmm_t0);
-                    gen_lea_modrm(env, s, modrm);
+                    gen_lea_modrm(env, s, modrm, b);
                     gen_ldo_env_A0(s, op2_offset);
                 }
             } else {
@@ -4208,7 +4339,7 @@ static void gen_sse(CPUX86State *env, DisasContext
*s, int b,
                     op2_offset = offsetof(CPUX86State,fpregs[rm].mmx);
                 } else {
                     op2_offset = offsetof(CPUX86State,mmx_t0);
-                    gen_lea_modrm(env, s, modrm);
+                    gen_lea_modrm(env, s, modrm, b);
                     gen_ldq_env_A0(s, op2_offset);
                 }
             }
@@ -4242,7 +4373,7 @@ static void gen_sse(CPUX86State *env, DisasContext
*s, int b,
                     goto illegal_op;
                 }
                 ot = mo_64_32(s->dflag);
-                gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
+                gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0, b);
                 b = cpu_ldub_code(env, s->pc++);
                 if (ot == MO_64) {
                     tcg_gen_rotri_tl(cpu_T[0], cpu_T[0], b & 63);
@@ -4278,7 +4409,7 @@ static void gen_sse(CPUX86State *env, DisasContext
*s, int b,
             if (mod != 3) {
                 int sz = 4;

-                gen_lea_modrm(env, s, modrm);
+                gen_lea_modrm(env, s, modrm, b);
                 op2_offset = offsetof(CPUX86State,xmm_t0);

                 switch (b) {
@@ -4326,7 +4457,7 @@ static void gen_sse(CPUX86State *env, DisasContext
*s, int b,
         } else {
             op1_offset = offsetof(CPUX86State,fpregs[reg].mmx);
             if (mod != 3) {
-                gen_lea_modrm(env, s, modrm);
+                gen_lea_modrm(env, s, modrm, b);
                 op2_offset = offsetof(CPUX86State,mmx_t0);
                 gen_ldq_env_A0(s, op2_offset);
             } else {
@@ -4404,6 +4535,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
     int modrm, reg, rm, mod, op, opreg, val;
     target_ulong next_eip, tval;
     int rex_w, rex_r;
+    int fp_op, fp_ip, fp_cs;

     if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP | CPU_LOG_TB_OP_OPT))) {
         tcg_gen_debug_insn_start(pc_start);
@@ -4595,7 +4727,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
                 mod = (modrm >> 6) & 3;
                 rm = (modrm & 7) | REX_B(s);
                 if (mod != 3) {
-                    gen_lea_modrm(env, s, modrm);
+                    gen_lea_modrm(env, s, modrm, b);
                     opreg = OR_TMP0;
                 } else if (op == OP_XORL && rm == reg) {
                 xor_zero:
@@ -4616,7 +4748,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
                 reg = ((modrm >> 3) & 7) | rex_r;
                 rm = (modrm & 7) | REX_B(s);
                 if (mod != 3) {
-                    gen_lea_modrm(env, s, modrm);
+                    gen_lea_modrm(env, s, modrm, b);
                     gen_op_ld_v(s, ot, cpu_T[1], cpu_A0);
                 } else if (op == OP_XORL && rm == reg) {
                     goto xor_zero;
@@ -4655,7 +4787,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
                     s->rip_offset = 1;
                 else
                     s->rip_offset = insn_const_size(ot);
-                gen_lea_modrm(env, s, modrm);
+                gen_lea_modrm(env, s, modrm, b);
                 opreg = OR_TMP0;
             } else {
                 opreg = rm;
@@ -4698,7 +4830,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
         if (mod != 3) {
             if (op == 0)
                 s->rip_offset = insn_const_size(ot);
-            gen_lea_modrm(env, s, modrm);
+            gen_lea_modrm(env, s, modrm, b);
             gen_op_ld_v(s, ot, cpu_T[0], cpu_A0);
         } else {
             gen_op_mov_v_reg(ot, cpu_T[0], rm);
@@ -4906,7 +5038,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
             }
         }
         if (mod != 3) {
-            gen_lea_modrm(env, s, modrm);
+            gen_lea_modrm(env, s, modrm, b);
             if (op >= 2 && op != 3 && op != 5)
                 gen_op_ld_v(s, ot, cpu_T[0], cpu_A0);
         } else {
@@ -4946,6 +5078,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
         do_lcall:
             if (s->pe && !s->vm86) {
                 gen_update_cc_op(s);
+                gen_update_ep(s);
                 gen_jmp_im(pc_start - s->cs_base);
                 tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
                 gen_helper_lcall_protected(cpu_env, cpu_tmp2_i32, cpu_T[1],
@@ -4973,6 +5106,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
         do_ljmp:
             if (s->pe && !s->vm86) {
                 gen_update_cc_op(s);
+                gen_update_ep(s);
                 gen_jmp_im(pc_start - s->cs_base);
                 tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
                 gen_helper_ljmp_protected(cpu_env, cpu_tmp2_i32, cpu_T[1],
@@ -4998,7 +5132,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
         modrm = cpu_ldub_code(env, s->pc++);
         reg = ((modrm >> 3) & 7) | rex_r;

-        gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
+        gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0, b);
         gen_op_mov_v_reg(ot, cpu_T[1], reg);
         gen_op_testl_T0_T1_cc();
         set_cc_op(s, CC_OP_LOGICB + ot);
@@ -5073,7 +5207,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
             s->rip_offset = insn_const_size(ot);
         else if (b == 0x6b)
             s->rip_offset = 1;
-        gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
+        gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0, b);
         if (b == 0x69) {
             val = insn_get(env, s, ot);
             tcg_gen_movi_tl(cpu_T[1], val);
@@ -5130,7 +5264,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
             gen_op_mov_reg_v(ot, reg, cpu_T[1]);
             gen_op_mov_reg_v(ot, rm, cpu_T[0]);
         } else {
-            gen_lea_modrm(env, s, modrm);
+            gen_lea_modrm(env, s, modrm, b);
             gen_op_mov_v_reg(ot, cpu_T[0], reg);
             gen_op_ld_v(s, ot, cpu_T[1], cpu_A0);
             tcg_gen_add_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
@@ -5159,7 +5293,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
                 rm = (modrm & 7) | REX_B(s);
                 gen_op_mov_v_reg(ot, t0, rm);
             } else {
-                gen_lea_modrm(env, s, modrm);
+                gen_lea_modrm(env, s, modrm, b);
                 tcg_gen_mov_tl(a0, cpu_A0);
                 gen_op_ld_v(s, ot, t0, a0);
                 rm = 0; /* avoid warning */
@@ -5207,16 +5341,16 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
                 goto illegal_op;
             gen_jmp_im(pc_start - s->cs_base);
             gen_update_cc_op(s);
-            gen_lea_modrm(env, s, modrm);
+            gen_lea_modrm(env, s, modrm, b);
             gen_helper_cmpxchg16b(cpu_env, cpu_A0);
         } else
-#endif
+#endif
         {
             if (!(s->cpuid_features & CPUID_CX8))
                 goto illegal_op;
             gen_jmp_im(pc_start - s->cs_base);
             gen_update_cc_op(s);
-            gen_lea_modrm(env, s, modrm);
+            gen_lea_modrm(env, s, modrm, b);
             gen_helper_cmpxchg8b(cpu_env, cpu_A0);
         }
         set_cc_op(s, CC_OP_EFLAGS);
@@ -5266,7 +5400,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
         } else {
             /* NOTE: order is important too for MMU exceptions */
             s->popl_esp_hack = 1 << ot;
-            gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 1);
+            gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 1, b);
             s->popl_esp_hack = 0;
             gen_pop_update(s, ot);
         }
@@ -5352,7 +5486,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
         reg = ((modrm >> 3) & 7) | rex_r;

         /* generate a generic store */
-        gen_ldst_modrm(env, s, modrm, ot, reg, 1);
+        gen_ldst_modrm(env, s, modrm, ot, reg, 1, b);
         break;
     case 0xc6:
     case 0xc7: /* mov Ev, Iv */
@@ -5361,7 +5495,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
         mod = (modrm >> 6) & 3;
         if (mod != 3) {
             s->rip_offset = insn_const_size(ot);
-            gen_lea_modrm(env, s, modrm);
+            gen_lea_modrm(env, s, modrm, b);
         }
         val = insn_get(env, s, ot);
         tcg_gen_movi_tl(cpu_T[0], val);
@@ -5377,7 +5511,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
         modrm = cpu_ldub_code(env, s->pc++);
         reg = ((modrm >> 3) & 7) | rex_r;

-        gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
+        gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0, b);
         gen_op_mov_reg_v(ot, reg, cpu_T[0]);
         break;
     case 0x8e: /* mov seg, Gv */
@@ -5385,7 +5519,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
         reg = (modrm >> 3) & 7;
         if (reg >= 6 || reg == R_CS)
             goto illegal_op;
-        gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0);
+        gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0, b);
         gen_movl_seg_T0(s, reg, pc_start - s->cs_base);
         if (reg == R_SS) {
             /* if reg == SS, inhibit interrupts/trace */
@@ -5408,7 +5542,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
             goto illegal_op;
         gen_op_movl_T0_seg(reg);
         ot = mod == 3 ? dflag : MO_16;
-        gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 1);
+        gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 1, b);
         break;

     case 0x1b6: /* movzbS Gv, Eb */
@@ -5450,7 +5584,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
                 }
                 gen_op_mov_reg_v(d_ot, reg, cpu_T[0]);
             } else {
-                gen_lea_modrm(env, s, modrm);
+                gen_lea_modrm(env, s, modrm, b);
                 gen_op_ld_v(s, s_ot, cpu_T[0], cpu_A0);
                 gen_op_mov_reg_v(d_ot, reg, cpu_T[0]);
             }
@@ -5468,7 +5602,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
         s->override = -1;
         val = s->addseg;
         s->addseg = 0;
-        gen_lea_modrm(env, s, modrm);
+        gen_lea_modrm(env, s, modrm, b);
         s->addseg = val;
         gen_op_mov_reg_v(ot, reg, cpu_A0);
         break;
@@ -5558,7 +5692,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
             gen_op_mov_reg_v(ot, rm, cpu_T[0]);
             gen_op_mov_reg_v(ot, reg, cpu_T[1]);
         } else {
-            gen_lea_modrm(env, s, modrm);
+            gen_lea_modrm(env, s, modrm, b);
             gen_op_mov_v_reg(ot, cpu_T[0], reg);
             /* for xchg, lock is implicit */
             if (!(prefixes & PREFIX_LOCK))
@@ -5593,7 +5727,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
         mod = (modrm >> 6) & 3;
         if (mod == 3)
             goto illegal_op;
-        gen_lea_modrm(env, s, modrm);
+        gen_lea_modrm(env, s, modrm, b);
         gen_op_ld_v(s, ot, cpu_T[1], cpu_A0);
         gen_add_A0_im(s, 1 << ot);
         /* load the segment first to handle exceptions properly */
@@ -5624,7 +5758,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
                 if (shift == 2) {
                     s->rip_offset = 1;
                 }
-                gen_lea_modrm(env, s, modrm);
+                gen_lea_modrm(env, s, modrm, b);
                 opreg = OR_TMP0;
             } else {
                 opreg = (modrm & 7) | REX_B(s);
@@ -5674,7 +5808,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
         rm = (modrm & 7) | REX_B(s);
         reg = ((modrm >> 3) & 7) | rex_r;
         if (mod != 3) {
-            gen_lea_modrm(env, s, modrm);
+            gen_lea_modrm(env, s, modrm, b);
             opreg = OR_TMP0;
         } else {
             opreg = rm;
@@ -5705,7 +5839,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
         op = ((b & 7) << 3) | ((modrm >> 3) & 7);
         if (mod != 3) {
             /* memory op */
-            gen_lea_modrm(env, s, modrm);
+            gen_lea_modrm(env, s, modrm, b);
             switch(op) {
             case 0x00 ... 0x07: /* fxxxs */
             case 0x10 ... 0x17: /* fixxxl */
@@ -5832,7 +5966,9 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
             case 0x0c: /* fldenv mem */
                 gen_update_cc_op(s);
                 gen_jmp_im(pc_start - s->cs_base);
-                gen_helper_fldenv(cpu_env, cpu_A0, tcg_const_i32(dflag -
1));
+                gen_helper_fldenv(cpu_env, cpu_A0,
+                                  tcg_const_i32(dflag == MO_32),
+                                  tcg_const_i32(IS_PROTECTED_MODE(s)));
                 break;
             case 0x0d: /* fldcw mem */
                 tcg_gen_qemu_ld_i32(cpu_tmp2_i32, cpu_A0,
@@ -5841,8 +5977,11 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
                 break;
             case 0x0e: /* fnstenv mem */
                 gen_update_cc_op(s);
+                gen_update_ep(s);
                 gen_jmp_im(pc_start - s->cs_base);
-                gen_helper_fstenv(cpu_env, cpu_A0, tcg_const_i32(dflag -
1));
+                gen_helper_fstenv(cpu_env, cpu_A0,
+                                  tcg_const_i32(dflag == MO_32),
+                                  tcg_const_i32(IS_PROTECTED_MODE(s)));
                 break;
             case 0x0f: /* fnstcw mem */
                 gen_helper_fnstcw(cpu_tmp2_i32, cpu_env);
@@ -5863,12 +6002,17 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
             case 0x2c: /* frstor mem */
                 gen_update_cc_op(s);
                 gen_jmp_im(pc_start - s->cs_base);
-                gen_helper_frstor(cpu_env, cpu_A0, tcg_const_i32(dflag -
1));
+                gen_helper_frstor(cpu_env, cpu_A0,
+                                  tcg_const_i32(dflag == MO_32),
+                                  tcg_const_i32(IS_PROTECTED_MODE(s)));
                 break;
             case 0x2e: /* fnsave mem */
                 gen_update_cc_op(s);
+                gen_update_ep(s);
                 gen_jmp_im(pc_start - s->cs_base);
-                gen_helper_fsave(cpu_env, cpu_A0, tcg_const_i32(dflag -
1));
+                gen_helper_fsave(cpu_env, cpu_A0,
+                                 tcg_const_i32(dflag == MO_32),
+                                 tcg_const_i32(IS_PROTECTED_MODE(s)));
                 break;
             case 0x2f: /* fnstsw mem */
                 gen_helper_fnstsw(cpu_tmp2_i32, cpu_env);
@@ -6209,6 +6353,12 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
                 goto illegal_op;
             }
         }
+        if (instr_is_x87_nc(modrm, b)) {
+            fp_op = ((b & 0x7) << 8) | (modrm & 0xff);
+            fp_ip = pc_start - s->cs_base;
+            fp_cs = env->segs[R_CS].selector;
+            set_ep(s, fp_op, fp_ip, fp_cs);
+        }
         break;
         /************************/
         /* string ops */
@@ -6393,6 +6543,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
     do_lret:
         if (s->pe && !s->vm86) {
             gen_update_cc_op(s);
+            gen_update_ep(s);
             gen_jmp_im(pc_start - s->cs_base);
             gen_helper_lret_protected(cpu_env, tcg_const_i32(dflag - 1),
                                       tcg_const_i32(val));
@@ -6430,6 +6581,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
             }
         } else {
             gen_update_cc_op(s);
+            gen_update_ep(s);
             gen_jmp_im(pc_start - s->cs_base);
             gen_helper_iret_protected(cpu_env, tcg_const_i32(dflag - 1),
                                       tcg_const_i32(s->pc - s->cs_base));
@@ -6527,7 +6679,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
     case 0x190 ... 0x19f: /* setcc Gv */
         modrm = cpu_ldub_code(env, s->pc++);
         gen_setcc1(s, b, cpu_T[0]);
-        gen_ldst_modrm(env, s, modrm, MO_8, OR_TMP0, 1);
+        gen_ldst_modrm(env, s, modrm, MO_8, OR_TMP0, 1, b);
         break;
     case 0x140 ... 0x14f: /* cmov Gv, Ev */
         if (!(s->cpuid_features & CPUID_CMOV)) {
@@ -6657,7 +6809,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
         rm = (modrm & 7) | REX_B(s);
         if (mod != 3) {
             s->rip_offset = 1;
-            gen_lea_modrm(env, s, modrm);
+            gen_lea_modrm(env, s, modrm, b);
             gen_op_ld_v(s, ot, cpu_T[0], cpu_A0);
         } else {
             gen_op_mov_v_reg(ot, cpu_T[0], rm);
@@ -6688,7 +6840,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
         rm = (modrm & 7) | REX_B(s);
         gen_op_mov_v_reg(MO_32, cpu_T[1], reg);
         if (mod != 3) {
-            gen_lea_modrm(env, s, modrm);
+            gen_lea_modrm(env, s, modrm, b);
             /* specific case: we need to add a displacement */
             gen_exts(ot, cpu_T[1]);
             tcg_gen_sari_tl(cpu_tmp0, cpu_T[1], 3 + ot);
@@ -6764,7 +6916,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
         ot = dflag;
         modrm = cpu_ldub_code(env, s->pc++);
         reg = ((modrm >> 3) & 7) | rex_r;
-        gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
+        gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0, b);
         gen_extu(ot, cpu_T[0]);

         /* Note that lzcnt and tzcnt are in different extensions.  */
@@ -6884,6 +7036,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
             gen_exception(s, EXCP07_PREX, pc_start - s->cs_base);
         } else {
             gen_update_cc_op(s);
+            gen_update_ep(s);
             gen_jmp_im(pc_start - s->cs_base);
             gen_helper_fwait(cpu_env);
         }
@@ -6903,6 +7056,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
         if (CODE64(s))
             goto illegal_op;
         gen_update_cc_op(s);
+        gen_update_ep(s);
         gen_jmp_im(pc_start - s->cs_base);
         gen_helper_into(cpu_env, tcg_const_i32(s->pc - pc_start));
         break;
@@ -6967,7 +7121,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
         if (mod == 3)
             goto illegal_op;
         gen_op_mov_v_reg(ot, cpu_T[0], reg);
-        gen_lea_modrm(env, s, modrm);
+        gen_lea_modrm(env, s, modrm, b);
         gen_jmp_im(pc_start - s->cs_base);
         tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
         if (ot == MO_16) {
@@ -7095,6 +7249,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
             gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
         } else {
             gen_update_cc_op(s);
+            gen_update_ep(s);
             gen_jmp_im(pc_start - s->cs_base);
             gen_helper_sysexit(cpu_env, tcg_const_i32(dflag - 1));
             gen_eob(s);
@@ -7104,6 +7259,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
     case 0x105: /* syscall */
         /* XXX: is it usable in real mode ? */
         gen_update_cc_op(s);
+        gen_update_ep(s);
         gen_jmp_im(pc_start - s->cs_base);
         gen_helper_syscall(cpu_env, tcg_const_i32(s->pc - pc_start));
         gen_eob(s);
@@ -7113,6 +7269,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
             gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
         } else {
             gen_update_cc_op(s);
+            gen_update_ep(s);
             gen_jmp_im(pc_start - s->cs_base);
             gen_helper_sysret(cpu_env, tcg_const_i32(dflag - 1));
             /* condition codes are modified only in long mode */
@@ -7133,6 +7290,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
             gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
         } else {
             gen_update_cc_op(s);
+            gen_update_ep(s);
             gen_jmp_im(pc_start - s->cs_base);
             gen_helper_hlt(cpu_env, tcg_const_i32(s->pc - pc_start));
             s->is_jmp = DISAS_TB_JUMP;
@@ -7149,7 +7307,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
             gen_svm_check_intercept(s, pc_start, SVM_EXIT_LDTR_READ);
             tcg_gen_ld32u_tl(cpu_T[0], cpu_env,
offsetof(CPUX86State,ldt.selector));
             ot = mod == 3 ? dflag : MO_16;
-            gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 1);
+            gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 1, b);
             break;
         case 2: /* lldt */
             if (!s->pe || s->vm86)
@@ -7158,7 +7316,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
                 gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
             } else {
                 gen_svm_check_intercept(s, pc_start, SVM_EXIT_LDTR_WRITE);
-                gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0);
+                gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0, b);
                 gen_jmp_im(pc_start - s->cs_base);
                 tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
                 gen_helper_lldt(cpu_env, cpu_tmp2_i32);
@@ -7170,7 +7328,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
             gen_svm_check_intercept(s, pc_start, SVM_EXIT_TR_READ);
             tcg_gen_ld32u_tl(cpu_T[0], cpu_env,
offsetof(CPUX86State,tr.selector));
             ot = mod == 3 ? dflag : MO_16;
-            gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 1);
+            gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 1, b);
             break;
         case 3: /* ltr */
             if (!s->pe || s->vm86)
@@ -7179,7 +7337,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
                 gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
             } else {
                 gen_svm_check_intercept(s, pc_start, SVM_EXIT_TR_WRITE);
-                gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0);
+                gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0, b);
                 gen_jmp_im(pc_start - s->cs_base);
                 tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
                 gen_helper_ltr(cpu_env, cpu_tmp2_i32);
@@ -7189,7 +7347,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
         case 5: /* verw */
             if (!s->pe || s->vm86)
                 goto illegal_op;
-            gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0);
+            gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0, b);
             gen_update_cc_op(s);
             if (op == 4) {
                 gen_helper_verr(cpu_env, cpu_T[0]);
@@ -7212,7 +7370,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
             if (mod == 3)
                 goto illegal_op;
             gen_svm_check_intercept(s, pc_start, SVM_EXIT_GDTR_READ);
-            gen_lea_modrm(env, s, modrm);
+            gen_lea_modrm(env, s, modrm, b);
             tcg_gen_ld32u_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,
gdt.limit));
             gen_op_st_v(s, MO_16, cpu_T[0], cpu_A0);
             gen_add_A0_im(s, 2);
@@ -7241,6 +7399,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
                         s->cpl != 0)
                         goto illegal_op;
                     gen_update_cc_op(s);
+                    gen_update_ep(s);
                     gen_jmp_im(pc_start - s->cs_base);
                     gen_helper_mwait(cpu_env, tcg_const_i32(s->pc -
pc_start));
                     gen_eob(s);
@@ -7268,7 +7427,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
                 }
             } else { /* sidt */
                 gen_svm_check_intercept(s, pc_start, SVM_EXIT_IDTR_READ);
-                gen_lea_modrm(env, s, modrm);
+                gen_lea_modrm(env, s, modrm, b);
                 tcg_gen_ld32u_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,
idt.limit));
                 gen_op_st_v(s, MO_16, cpu_T[0], cpu_A0);
                 gen_add_A0_im(s, 2);
@@ -7371,7 +7530,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
             } else {
                 gen_svm_check_intercept(s, pc_start,
                                         op==2 ? SVM_EXIT_GDTR_WRITE :
SVM_EXIT_IDTR_WRITE);
-                gen_lea_modrm(env, s, modrm);
+                gen_lea_modrm(env, s, modrm, b);
                 gen_op_ld_v(s, MO_16, cpu_T[1], cpu_A0);
                 gen_add_A0_im(s, 2);
                 gen_op_ld_v(s, CODE64(s) + MO_32, cpu_T[0], cpu_A0);
@@ -7394,14 +7553,14 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
 #else
             tcg_gen_ld32u_tl(cpu_T[0], cpu_env,
offsetof(CPUX86State,cr[0]));
 #endif
-            gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 1);
+            gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 1, b);
             break;
         case 6: /* lmsw */
             if (s->cpl != 0) {
                 gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
             } else {
                 gen_svm_check_intercept(s, pc_start, SVM_EXIT_WRITE_CR0);
-                gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0);
+                gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0, b);
                 gen_helper_lmsw(cpu_env, cpu_T[0]);
                 gen_jmp_im(s->pc - s->cs_base);
                 gen_eob(s);
@@ -7413,8 +7572,9 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
                     gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
                 } else {
                     gen_update_cc_op(s);
+                    gen_update_ep(s);
                     gen_jmp_im(pc_start - s->cs_base);
-                    gen_lea_modrm(env, s, modrm);
+                    gen_lea_modrm(env, s, modrm, b);
                     gen_helper_invlpg(cpu_env, cpu_A0);
                     gen_jmp_im(s->pc - s->cs_base);
                     gen_eob(s);
@@ -7446,6 +7606,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
                     if (!(s->cpuid_ext2_features & CPUID_EXT2_RDTSCP))
                         goto illegal_op;
                     gen_update_cc_op(s);
+                    gen_update_ep(s);
                     gen_jmp_im(pc_start - s->cs_base);
                     if (use_icount)
                         gen_io_start();
@@ -7493,7 +7654,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
                 }
                 gen_op_mov_reg_v(d_ot, reg, cpu_T[0]);
             } else {
-                gen_lea_modrm(env, s, modrm);
+                gen_lea_modrm(env, s, modrm, b);
                 gen_op_ld_v(s, MO_32 | MO_SIGN, cpu_T[0], cpu_A0);
                 gen_op_mov_reg_v(d_ot, reg, cpu_T[0]);
             }
@@ -7514,7 +7675,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
             mod = (modrm >> 6) & 3;
             rm = modrm & 7;
             if (mod != 3) {
-                gen_lea_modrm(env, s, modrm);
+                gen_lea_modrm(env, s, modrm, b);
                 gen_op_ld_v(s, ot, t0, cpu_A0);
                 a0 = tcg_temp_local_new();
                 tcg_gen_mov_tl(a0, cpu_A0);
@@ -7556,7 +7717,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
             ot = dflag != MO_16 ? MO_32 : MO_16;
             modrm = cpu_ldub_code(env, s->pc++);
             reg = ((modrm >> 3) & 7) | rex_r;
-            gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0);
+            gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0, b);
             t0 = tcg_temp_local_new();
             gen_update_cc_op(s);
             if (b == 0x102) {
@@ -7584,7 +7745,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
         case 3: /* prefetchnt0 */
             if (mod == 3)
                 goto illegal_op;
-            gen_lea_modrm(env, s, modrm);
+            gen_lea_modrm(env, s, modrm, b);
             /* nothing more to do */
             break;
         default: /* nop (multi byte) */
@@ -7624,6 +7785,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
             case 4:
             case 8:
                 gen_update_cc_op(s);
+                gen_update_ep(s);
                 gen_jmp_im(pc_start - s->cs_base);
                 if (b & 2) {
                     gen_op_mov_v_reg(ot, cpu_T[0], rm);
@@ -7696,7 +7858,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
             goto illegal_op;
         reg = ((modrm >> 3) & 7) | rex_r;
         /* generate a generic store */
-        gen_ldst_modrm(env, s, modrm, ot, reg, 1);
+        gen_ldst_modrm(env, s, modrm, ot, reg, 1, b);
         break;
     case 0x1ae:
         modrm = cpu_ldub_code(env, s->pc++);
@@ -7704,6 +7866,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
         op = (modrm >> 3) & 7;
         switch(op) {
         case 0: /* fxsave */
+            gen_update_ep(s);
             if (mod == 3 || !(s->cpuid_features & CPUID_FXSR) ||
                 (s->prefix & PREFIX_LOCK))
                 goto illegal_op;
@@ -7711,10 +7874,13 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
                 gen_exception(s, EXCP07_PREX, pc_start - s->cs_base);
                 break;
             }
-            gen_lea_modrm(env, s, modrm);
+            gen_lea_modrm(env, s, modrm, b);
             gen_update_cc_op(s);
+            gen_update_ep(s);
             gen_jmp_im(pc_start - s->cs_base);
-            gen_helper_fxsave(cpu_env, cpu_A0, tcg_const_i32(dflag ==
MO_64));
+            gen_helper_fxsave(cpu_env, cpu_A0,
+                              tcg_const_i32(dflag == MO_32),
+                              tcg_const_i32(dflag == MO_64));
             break;
         case 1: /* fxrstor */
             if (mod == 3 || !(s->cpuid_features & CPUID_FXSR) ||
@@ -7724,10 +7890,12 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
                 gen_exception(s, EXCP07_PREX, pc_start - s->cs_base);
                 break;
             }
-            gen_lea_modrm(env, s, modrm);
+            gen_lea_modrm(env, s, modrm, b);
             gen_update_cc_op(s);
             gen_jmp_im(pc_start - s->cs_base);
-            gen_helper_fxrstor(cpu_env, cpu_A0, tcg_const_i32(dflag ==
MO_64));
+            gen_helper_fxrstor(cpu_env, cpu_A0,
+                               tcg_const_i32(dflag == MO_32),
+                               tcg_const_i32(dflag == MO_64));
             break;
         case 2: /* ldmxcsr */
         case 3: /* stmxcsr */
@@ -7738,7 +7906,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
             if ((s->flags & HF_EM_MASK) || !(s->flags & HF_OSFXSR_MASK) ||
                 mod == 3)
                 goto illegal_op;
-            gen_lea_modrm(env, s, modrm);
+            gen_lea_modrm(env, s, modrm, b);
             if (op == 2) {
                 tcg_gen_qemu_ld_i32(cpu_tmp2_i32, cpu_A0,
                                     s->mem_index, MO_LEUL);
@@ -7763,7 +7931,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
                 /* clflush */
                 if (!(s->cpuid_features & CPUID_CLFLUSH))
                     goto illegal_op;
-                gen_lea_modrm(env, s, modrm);
+                gen_lea_modrm(env, s, modrm, b);
             }
             break;
         default:
@@ -7775,7 +7943,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
         mod = (modrm >> 6) & 3;
         if (mod == 3)
             goto illegal_op;
-        gen_lea_modrm(env, s, modrm);
+        gen_lea_modrm(env, s, modrm, b);
         /* ignore for now */
         break;
     case 0x1aa: /* rsm */
@@ -7783,6 +7951,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
         if (!(s->flags & HF_SMM_MASK))
             goto illegal_op;
         gen_update_cc_op(s);
+        gen_update_ep(s);
         gen_jmp_im(s->pc - s->cs_base);
         gen_helper_rsm(cpu_env);
         gen_eob(s);
@@ -7803,7 +7972,7 @@ static target_ulong disas_insn(CPUX86State *env,
DisasContext *s,
             ot = mo_64_32(dflag);
         }

-        gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
+        gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0, b);
         gen_helper_popcnt(cpu_T[0], cpu_env, cpu_T[0], tcg_const_i32(ot));
         gen_op_mov_reg_v(ot, reg, cpu_T[0]);

@@ -7880,6 +8049,17 @@ void optimize_flags_init(void)
     cpu_cc_src2 = tcg_global_mem_new(TCG_AREG0, offsetof(CPUX86State,
cc_src2),
                                      "cc_src2");

+    cpu_fpop = tcg_global_mem_new_i32(TCG_AREG0,
+                                      offsetof(CPUX86State, fpop), "fpop");
+    cpu_fpip = tcg_global_mem_new(TCG_AREG0, offsetof(CPUX86State, fpip),
+                                     "fpip");
+    cpu_fpdp = tcg_global_mem_new(TCG_AREG0, offsetof(CPUX86State, fpdp),
+                                     "fpdp");
+    cpu_fpds = tcg_global_mem_new_i32(TCG_AREG0, offsetof(CPUX86State,
fpds),
+                                     "fpds");
+    cpu_fpcs = tcg_global_mem_new_i32(TCG_AREG0, offsetof(CPUX86State,
fpcs),
+                                     "fpcs");
+
     for (i = 0; i < CPU_NB_REGS; ++i) {
         cpu_regs[i] = tcg_global_mem_new(TCG_AREG0,
                                          offsetof(CPUX86State, regs[i]),
@@ -7924,6 +8104,8 @@ static inline void
gen_intermediate_code_internal(X86CPU *cpu,
     dc->singlestep_enabled = cs->singlestep_enabled;
     dc->cc_op = CC_OP_DYNAMIC;
     dc->cc_op_dirty = false;
+    dc->fp_op = FP_EP_INVALID;
+    dc->fp_ep_dirty = false;
     dc->cs_base = cs_base;
     dc->tb = tb;
     dc->popl_esp_hack = 0;
@@ -7997,6 +8179,9 @@ static inline void
gen_intermediate_code_internal(X86CPU *cpu,
             }
             tcg_ctx.gen_opc_pc[lj] = pc_ptr;
             gen_opc_cc_op[lj] = dc->cc_op;
+            gen_opc_fp_op[lj] = dc->fp_op;
+            gen_opc_fp_ip[lj] = dc->fp_ip;
+            gen_opc_fp_cs[lj] = dc->fp_cs;
             tcg_ctx.gen_opc_instr_start[lj] = 1;
             tcg_ctx.gen_opc_icount[lj] = num_insns;
         }
@@ -8080,6 +8265,7 @@ void gen_intermediate_code_pc(CPUX86State *env,
TranslationBlock *tb)
 void restore_state_to_opc(CPUX86State *env, TranslationBlock *tb, int
pc_pos)
 {
     int cc_op;
+    uint16_t fp_op;
 #ifdef DEBUG_DISAS
     if (qemu_loglevel_mask(CPU_LOG_TB_OP)) {
         int i;
@@ -8099,4 +8285,10 @@ void restore_state_to_opc(CPUX86State *env,
TranslationBlock *tb, int pc_pos)
     cc_op = gen_opc_cc_op[pc_pos];
     if (cc_op != CC_OP_DYNAMIC)
         env->cc_op = cc_op;
+    fp_op = gen_opc_fp_op[pc_pos];
+    if (fp_op & FP_EP_VALID) {
+        tcg_gen_movi_i32(cpu_fpop, fp_op);
+        tcg_gen_movi_tl(cpu_fpip, gen_opc_fp_ip[pc_pos]);
+        tcg_gen_movi_i32(cpu_fpcs, gen_opc_fp_cs[pc_pos]);
+    }
 }



On Sat, Jul 19, 2014 at 2:36 AM, Jaume Martí <jaume.martif@gmail.com> wrote:

> Hello,
>
> I attach a patch with the fix for the issues pointed out by Richard.
> Maybe it would be useful to have the option to disabled this feature
> at compile time, for performance reasons.
> Please review and apply.
>
> Best regards,
> Jaume
>
> Signed-off-by: Jaume Marti Farriol (jaume.martif@gmail.com)
> diff --git a/linux-user/signal.c b/linux-user/signal.c
> index 1141054..73f8f6b 100644
> --- a/linux-user/signal.c
> +++ b/linux-user/signal.c
> @@ -865,7 +865,7 @@ static void setup_sigcontext(struct target_sigcontext
> *sc,
>      __put_user(env->regs[R_ESP], &sc->esp_at_signal);
>      __put_user(env->segs[R_SS].selector, (unsigned int *)&sc->ss);
>
> -        cpu_x86_fsave(env, fpstate_addr, 1);
> +        cpu_x86_fsave(env, fpstate_addr);
>          fpstate->status = fpstate->sw;
>          magic = 0xffff;
>      __put_user(magic, &fpstate->magic);
> @@ -1068,7 +1068,7 @@ restore_sigcontext(CPUX86State *env, struct
> target_sigcontext *sc, int *peax)
>                  if (!access_ok(VERIFY_READ, fpstate_addr,
>                                 sizeof(struct target_fpstate)))
>                          goto badframe;
> -                cpu_x86_frstor(env, fpstate_addr, 1);
> +                cpu_x86_frstor(env, fpstate_addr);
>   }
>
>          *peax = tswapl(sc->eax);
> diff --git a/target-i386/cpu.h b/target-i386/cpu.h
> index e634d83..4274ce3 100644
> --- a/target-i386/cpu.h
> +++ b/target-i386/cpu.h
> @@ -819,10 +819,11 @@ typedef struct CPUX86State {
>      uint16_t fpuc;
>      uint8_t fptags[8];   /* 0 = valid, 1 = empty */
>      FPReg fpregs[8];
> -    /* KVM-only so far */
> -    uint16_t fpop;
> +    uint32_t fpop;
>      uint64_t fpip;
>      uint64_t fpdp;
> +    uint32_t fpcs;
> +    uint32_t fpds;
>
>      /* emulator internal variables */
>      float_status fp_status;
> @@ -1067,8 +1068,8 @@ floatx80 cpu_set_fp80(uint64_t mant, uint16_t upper);
>  /* the following helpers are only usable in user mode simulation as
>     they can trigger unexpected exceptions */
>  void cpu_x86_load_seg(CPUX86State *s, int seg_reg, int selector);
> -void cpu_x86_fsave(CPUX86State *s, target_ulong ptr, int data32);
> -void cpu_x86_frstor(CPUX86State *s, target_ulong ptr, int data32);
> +void cpu_x86_fsave(CPUX86State *s, target_ulong ptr);
> +void cpu_x86_frstor(CPUX86State *s, target_ulong ptr);
>
>  /* you can call this signal handler from your SIGBUS and SIGSEGV
>     signal handlers to inform the virtual CPU of exceptions. non zero
> diff --git a/target-i386/fpu_helper.c b/target-i386/fpu_helper.c
> index 1b2900d..6886031 100644
> --- a/target-i386/fpu_helper.c
> +++ b/target-i386/fpu_helper.c
> @@ -56,6 +56,8 @@
>  #define floatx80_l2e make_floatx80(0x3fff, 0xb8aa3b295c17f0bcLL)
>  #define floatx80_l2t make_floatx80(0x4000, 0xd49a784bcd1b8afeLL)
>
> +#define FPUS(env) ((env->fpus & ~0x3800) | ((env->fpstt & 0x7) << 11))
> +
>  static inline void fpush(CPUX86State *env)
>  {
>      env->fpstt = (env->fpstt - 1) & 7;
> @@ -604,6 +606,10 @@ void helper_fninit(CPUX86State *env)
>      env->fptags[5] = 1;
>      env->fptags[6] = 1;
>      env->fptags[7] = 1;
> +    env->fpip = 0;
> +    env->fpcs = 0;
> +    env->fpdp = 0;
> +    env->fpds = 0;
>  }
>
>  /* BCD ops */
> @@ -961,13 +967,13 @@ void helper_fxam_ST0(CPUX86State *env)
>      }
>  }
>
> -void helper_fstenv(CPUX86State *env, target_ulong ptr, int data32)
> +void helper_fstenv(CPUX86State *env, target_ulong ptr, int data32,
> +                   int protected_mode)
>  {
> -    int fpus, fptag, exp, i;
> +    int fptag, exp, i;
>      uint64_t mant;
>      CPU_LDoubleU tmp;
>
> -    fpus = (env->fpus & ~0x3800) | (env->fpstt & 0x7) << 11;
>      fptag = 0;
>      for (i = 7; i >= 0; i--) {
>          fptag <<= 2;
> @@ -987,83 +993,150 @@ void helper_fstenv(CPUX86State *env,
> target_ulong ptr, int data32)
>              }
>          }
>      }
> +
>      if (data32) {
>          /* 32 bit */
> -        cpu_stl_data(env, ptr, env->fpuc);
> -        cpu_stl_data(env, ptr + 4, fpus);
> -        cpu_stl_data(env, ptr + 8, fptag);
> -        cpu_stl_data(env, ptr + 12, 0); /* fpip */
> -        cpu_stl_data(env, ptr + 16, 0); /* fpcs */
> -        cpu_stl_data(env, ptr + 20, 0); /* fpoo */
> -        cpu_stl_data(env, ptr + 24, 0); /* fpos */
> +        cpu_stw_data(env, ptr, env->fpuc);
> +        cpu_stw_data(env, ptr + 4, FPUS(env));
> +        cpu_stw_data(env, ptr + 8, fptag);
> +        if (protected_mode) {
> +            cpu_stl_data(env, ptr + 12, env->fpip);
> +            cpu_stl_data(env, ptr + 16,
> +                        ((env->fpop & 0x7ff) << 16) | (env->fpcs &
> 0xffff));
> +            cpu_stl_data(env, ptr + 20, env->fpdp);
> +            cpu_stl_data(env, ptr + 24, env->fpds);
> +        } else {
> +            /* Real mode  */
> +            cpu_stl_data(env, ptr + 12, env->fpip); /* fpip[15..00] */
> +            cpu_stl_data(env, ptr + 16, ((((env->fpip >> 16) & 0xffff) <<
> 12) |
> +                        (env->fpop & 0x7ff))); /* fpip[31..16], fpop */
> +            cpu_stl_data(env, ptr + 20, env->fpdp); /* fpdp[15..00] */
> +            cpu_stl_data(env, ptr + 24,
> +                        (env->fpdp >> 4) & 0xffff000); /* fpdp[31..16] */
> +        }
>      } else {
>          /* 16 bit */
>          cpu_stw_data(env, ptr, env->fpuc);
> -        cpu_stw_data(env, ptr + 2, fpus);
> +        cpu_stw_data(env, ptr + 2, FPUS(env));
>          cpu_stw_data(env, ptr + 4, fptag);
> -        cpu_stw_data(env, ptr + 6, 0);
> -        cpu_stw_data(env, ptr + 8, 0);
> -        cpu_stw_data(env, ptr + 10, 0);
> -        cpu_stw_data(env, ptr + 12, 0);
> +        if (protected_mode) {
> +            cpu_stw_data(env, ptr + 6, env->fpip);
> +            cpu_stw_data(env, ptr + 8, env->fpcs);
> +            cpu_stw_data(env, ptr + 10, env->fpdp);
> +            cpu_stw_data(env, ptr + 12, env->fpds);
> +        } else {
> +            /* Real mode  */
> +            cpu_stw_data(env, ptr + 6, env->fpip); /* fpip[15..0] */
> +            cpu_stw_data(env, ptr + 8, ((env->fpip >> 4) & 0xf000) |
> +                        (env->fpop & 0x7ff)); /* fpip[19..16], fpop */
> +            cpu_stw_data(env, ptr + 10, env->fpdp); /* fpdp[15..0] */
> +            cpu_stw_data(env, ptr + 12,
> +                        (env->fpdp >> 4) & 0xf000); /* fpdp[19..16] */
> +        }
>      }
> +
> +    env->fpip = 0;
> +    env->fpcs = 0;
> +    env->fpdp = 0;
> +    env->fpds = 0;
>  }
>
> -void helper_fldenv(CPUX86State *env, target_ulong ptr, int data32)
> +void helper_fldenv(CPUX86State *env, target_ulong ptr, int data32,
> +                   int protected_mode)
>  {
> -    int i, fpus, fptag;
> +    int tmp, i, fpus, fptag;
>
>      if (data32) {
> +        /* 32 bit */
>          env->fpuc = cpu_lduw_data(env, ptr);
>          fpus = cpu_lduw_data(env, ptr + 4);
>          fptag = cpu_lduw_data(env, ptr + 8);
> +        if (protected_mode) {
> +            env->fpip = cpu_ldl_data(env, ptr + 12);
> +            tmp = cpu_ldl_data(env, ptr + 16);
> +            env->fpcs = tmp & 0xffff;
> +            env->fpop = tmp >> 16;
> +            env->fpdp = cpu_ldl_data(env, ptr + 20);
> +            env->fpds = cpu_lduw_data(env, ptr + 24);
> +        } else {
> +            /* Real mode */
> +            tmp = cpu_ldl_data(env, ptr + 16);
> +            env->fpip = ((tmp & 0xffff000) << 4) |
> +                        cpu_lduw_data(env, ptr + 12);
> +            env->fpop = tmp & 0x7ff;
> +            env->fpdp = (cpu_ldl_data(env, ptr + 24) << 4) |
> +                        cpu_lduw_data(env, ptr + 20);
> +        }
>      } else {
> +        /* 16 bit */
>          env->fpuc = cpu_lduw_data(env, ptr);
>          fpus = cpu_lduw_data(env, ptr + 2);
>          fptag = cpu_lduw_data(env, ptr + 4);
> +        if (protected_mode) {
> +            /* Protected mode  */
> +            env->fpip = cpu_lduw_data(env, ptr + 6);
> +            env->fpcs = cpu_lduw_data(env, ptr + 8);
> +            env->fpdp = cpu_lduw_data(env, ptr + 10);
> +            env->fpds = cpu_lduw_data(env, ptr + 12);
> +        } else {
> +            /* Real mode  */
> +            tmp = cpu_lduw_data(env, ptr + 8);
> +            env->fpip = ((tmp & 0xf000) << 4) | cpu_lduw_data(env, ptr +
> 6);
> +            env->fpop = tmp & 0x7ff;
> +            env->fpdp = cpu_lduw_data(env, ptr + 12) << 4 |
> +                        cpu_lduw_data(env, ptr + 10);
> +        }
>      }
> +
>      env->fpstt = (fpus >> 11) & 7;
>      env->fpus = fpus & ~0x3800;
>      for (i = 0; i < 8; i++) {
>          env->fptags[i] = ((fptag & 3) == 3);
>          fptag >>= 2;
>      }
> +
> +    env->fpip &= 0xffffffff;
> +    env->fpdp &= 0xffffffff;
> +    if (!protected_mode) {
> +        env->fpcs = 0;
> +        env->fpds = 0;
> +    }
>  }
>
> -void helper_fsave(CPUX86State *env, target_ulong ptr, int data32)
> +void helper_fsave(CPUX86State *env, target_ulong ptr, int data32,
> +                  int protected_mode)
>  {
>      floatx80 tmp;
>      int i;
>
> -    helper_fstenv(env, ptr, data32);
> +    helper_fstenv(env, ptr, data32, protected_mode);
>
> -    ptr += (14 << data32);
> +    if (data32) {
> +        ptr += 28;
> +    } else {
> +        ptr += 14;
> +    }
>      for (i = 0; i < 8; i++) {
>          tmp = ST(i);
>          helper_fstt(env, tmp, ptr);
>          ptr += 10;
>      }
>
> -    /* fninit */
> -    env->fpus = 0;
> -    env->fpstt = 0;
> -    env->fpuc = 0x37f;
> -    env->fptags[0] = 1;
> -    env->fptags[1] = 1;
> -    env->fptags[2] = 1;
> -    env->fptags[3] = 1;
> -    env->fptags[4] = 1;
> -    env->fptags[5] = 1;
> -    env->fptags[6] = 1;
> -    env->fptags[7] = 1;
> +    helper_fninit(env);
>  }
>
> -void helper_frstor(CPUX86State *env, target_ulong ptr, int data32)
> +void helper_frstor(CPUX86State *env, target_ulong ptr, int data32,
> +                   int protected_mode)
>  {
>      floatx80 tmp;
>      int i;
>
> -    helper_fldenv(env, ptr, data32);
> -    ptr += (14 << data32);
> +    helper_fldenv(env, ptr, data32, protected_mode);
> +    if (data32) {
> +        ptr += 28;
> +    } else {
> +        ptr += 14;
> +    }
>
>      for (i = 0; i < 8; i++) {
>          tmp = helper_fldt(env, ptr);
> @@ -1072,21 +1145,22 @@ void helper_frstor(CPUX86State *env,
> target_ulong ptr, int data32)
>      }
>  }
>
> -#if defined(CONFIG_USER_ONLY)
> -void cpu_x86_fsave(CPUX86State *env, target_ulong ptr, int data32)
> +#if defined(CONFIG_USER_ONLY) && defined(TARGET_I386) && TARGET_ABI_BITS
> == 32
> +
> +void cpu_x86_fsave(CPUX86State *env, target_ulong ptr)
>  {
> -    helper_fsave(env, ptr, data32);
> +    helper_fsave(env, ptr, 1, 1);
>  }
>
> -void cpu_x86_frstor(CPUX86State *env, target_ulong ptr, int data32)
> +void cpu_x86_frstor(CPUX86State *env, target_ulong ptr)
>  {
> -    helper_frstor(env, ptr, data32);
> +    helper_frstor(env, ptr, 1, 1);
>  }
>  #endif
>
> -void helper_fxsave(CPUX86State *env, target_ulong ptr, int data64)
> +void helper_fxsave(CPUX86State *env, target_ulong ptr, int data32, int
> data64)
>  {
> -    int fpus, fptag, i, nb_xmm_regs;
> +    int i, nb_xmm_regs, fptag;
>      floatx80 tmp;
>      target_ulong addr;
>
> @@ -1095,25 +1169,36 @@ void helper_fxsave(CPUX86State *env,
> target_ulong ptr, int data64)
>          raise_exception(env, EXCP0D_GPF);
>      }
>
> -    fpus = (env->fpus & ~0x3800) | (env->fpstt & 0x7) << 11;
>      fptag = 0;
>      for (i = 0; i < 8; i++) {
>          fptag |= (env->fptags[i] << i);
>      }
> +    fptag ^= 0xff;
> +
>      cpu_stw_data(env, ptr, env->fpuc);
> -    cpu_stw_data(env, ptr + 2, fpus);
> -    cpu_stw_data(env, ptr + 4, fptag ^ 0xff);
> +    cpu_stw_data(env, ptr + 2, FPUS(env));
> +    cpu_stw_data(env, ptr + 4, fptag & 0xff);
> +    cpu_stw_data(env, ptr + 6, env->fpop);
> +
>  #ifdef TARGET_X86_64
>      if (data64) {
> -        cpu_stq_data(env, ptr + 0x08, 0); /* rip */
> -        cpu_stq_data(env, ptr + 0x10, 0); /* rdp */
> +        /* 64 bit */
> +        cpu_stq_data(env, ptr + 8, env->fpip);
> +        cpu_stq_data(env, ptr + 16, env->fpdp);
>      } else
>  #endif
>      {
> -        cpu_stl_data(env, ptr + 0x08, 0); /* eip */
> -        cpu_stl_data(env, ptr + 0x0c, 0); /* sel  */
> -        cpu_stl_data(env, ptr + 0x10, 0); /* dp */
> -        cpu_stl_data(env, ptr + 0x14, 0); /* sel  */
> +        if (data32) {
> +            /* 32 bit */
> +            cpu_stl_data(env, ptr + 8, env->fpip);
> +            cpu_stl_data(env, ptr + 16, env->fpdp);
> +        } else {
> +            /* 16 bit */
> +            cpu_stw_data(env, ptr + 8, env->fpip);
> +            cpu_stw_data(env, ptr + 16, env->fpdp);
> +        }
> +        cpu_stw_data(env, ptr + 12, env->fpcs & 0xffff);
> +        cpu_stw_data(env, ptr + 20, env->fpds & 0xffff);
>      }
>
>      addr = ptr + 0x20;
> @@ -1146,7 +1231,7 @@ void helper_fxsave(CPUX86State *env,
> target_ulong ptr, int data64)
>      }
>  }
>
> -void helper_fxrstor(CPUX86State *env, target_ulong ptr, int data64)
> +void helper_fxrstor(CPUX86State *env, target_ulong ptr, int data32, int
> data64)
>  {
>      int i, fpus, fptag, nb_xmm_regs;
>      floatx80 tmp;
> @@ -1167,6 +1252,30 @@ void helper_fxrstor(CPUX86State *env,
> target_ulong ptr, int data64)
>          env->fptags[i] = ((fptag >> i) & 1);
>      }
>
> +    env->fpop = (cpu_lduw_data(env, ptr + 6) >> 5) & 0x7ff;
> +
> +#ifdef TARGET_X86_64
> +    if (data64) {
> +        /* 64 bit */
> +        env->fpip = cpu_ldq_data(env, ptr + 8);
> +        env->fpdp = cpu_ldq_data(env, ptr + 16);
> +    } else
> +#endif
> +    {
> +        if (data32) {
> +            /* 32 bit */
> +            env->fpip = cpu_ldl_data(env, ptr + 8);
> +            env->fpdp = cpu_ldl_data(env, ptr + 16);
> +        } else {
> +            /* 16 bit */
> +            env->fpip = cpu_lduw_data(env, ptr + 8);
> +            env->fpdp = cpu_lduw_data(env, ptr + 16);
> +        }
> +
> +        env->fpcs = cpu_lduw_data(env, ptr + 12);
> +        env->fpds = cpu_lduw_data(env, ptr + 20);
> +    }
> +
>      addr = ptr + 0x20;
>      for (i = 0; i < 8; i++) {
>          tmp = helper_fldt(env, addr);
> @@ -1195,6 +1304,11 @@ void helper_fxrstor(CPUX86State *env,
> target_ulong ptr, int data64)
>              }
>          }
>      }
> +
> +    if (!data64) {
> +        env->fpip &= 0xffffffff;
> +        env->fpdp &= 0xffffffff;
> +    }
>  }
>
>  void cpu_get_fp80(uint64_t *pmant, uint16_t *pexp, floatx80 f)
> diff --git a/target-i386/helper.h b/target-i386/helper.h
> index 8eb0145..9c4fd22 100644
> --- a/target-i386/helper.h
> +++ b/target-i386/helper.h
> @@ -183,12 +183,12 @@ DEF_HELPER_1(frndint, void, env)
>  DEF_HELPER_1(fscale, void, env)
>  DEF_HELPER_1(fsin, void, env)
>  DEF_HELPER_1(fcos, void, env)
> -DEF_HELPER_3(fstenv, void, env, tl, int)
> -DEF_HELPER_3(fldenv, void, env, tl, int)
> -DEF_HELPER_3(fsave, void, env, tl, int)
> -DEF_HELPER_3(frstor, void, env, tl, int)
> -DEF_HELPER_3(fxsave, void, env, tl, int)
> -DEF_HELPER_3(fxrstor, void, env, tl, int)
> +DEF_HELPER_4(fstenv, void, env, tl, int, int)
> +DEF_HELPER_4(fldenv, void, env, tl, int, int)
> +DEF_HELPER_4(fsave, void, env, tl, int, int)
> +DEF_HELPER_4(frstor, void, env, tl, int, int)
> +DEF_HELPER_4(fxsave, void, env, tl, int, int)
> +DEF_HELPER_4(fxrstor, void, env, tl, int, int)
>
>  DEF_HELPER_FLAGS_1(clz, TCG_CALL_NO_RWG_SE, tl, tl)
>  DEF_HELPER_FLAGS_1(ctz, TCG_CALL_NO_RWG_SE, tl, tl)
> diff --git a/target-i386/machine.c b/target-i386/machine.c
> index 16d2f6a..500f04f 100644
> --- a/target-i386/machine.c
> +++ b/target-i386/machine.c
> @@ -397,7 +397,7 @@ static const VMStateDescription vmstate_fpop_ip_dp = {
>      .version_id = 1,
>      .minimum_version_id = 1,
>      .fields = (VMStateField[]) {
> -        VMSTATE_UINT16(env.fpop, X86CPU),
> +        VMSTATE_UINT32(env.fpop, X86CPU),
>          VMSTATE_UINT64(env.fpip, X86CPU),
>          VMSTATE_UINT64(env.fpdp, X86CPU),
>          VMSTATE_END_OF_LIST()
> diff --git a/target-i386/translate.c b/target-i386/translate.c
> index 6fcd824..8e490de 100644
> --- a/target-i386/translate.c
> +++ b/target-i386/translate.c
> @@ -58,6 +58,9 @@
>  #endif
>
>  //#define MACRO_TEST   1
> +#define IS_PROTECTED_MODE(s) (s->pe && !s->vm86)
> +#define FP_EP_VALID 0x80000000
> +#define FP_EP_INVALID 0
>
>  /* global register indexes */
>  static TCGv_ptr cpu_env;
> @@ -65,6 +68,11 @@ static TCGv cpu_A0;
>  static TCGv cpu_cc_dst, cpu_cc_src, cpu_cc_src2, cpu_cc_srcT;
>  static TCGv_i32 cpu_cc_op;
>  static TCGv cpu_regs[CPU_NB_REGS];
> +static TCGv_i32 cpu_fpop;
> +static TCGv cpu_fpip;
> +static TCGv cpu_fpdp;
> +static TCGv_i32 cpu_fpds;
> +static TCGv_i32 cpu_fpcs;
>  /* local temps */
>  static TCGv cpu_T[2];
>  /* local register indexes (only used inside old micro ops) */
> @@ -74,6 +82,9 @@ static TCGv_i32 cpu_tmp2_i32, cpu_tmp3_i32;
>  static TCGv_i64 cpu_tmp1_i64;
>
>  static uint8_t gen_opc_cc_op[OPC_BUF_SIZE];
> +static uint16_t gen_opc_fp_op[OPC_BUF_SIZE];
> +static uint16_t gen_opc_fp_cs[OPC_BUF_SIZE];
> +static target_ulong gen_opc_fp_ip[OPC_BUF_SIZE];
>
>  #include "exec/gen-icount.h"
>
> @@ -104,6 +115,10 @@ typedef struct DisasContext {
>      int ss32;   /* 32 bit stack segment */
>      CCOp cc_op;  /* current CC operation */
>      bool cc_op_dirty;
> +    uint16_t fp_op;
> +    bool fp_ep_dirty;
> +    target_ulong fp_ip;
> +    uint16_t fp_cs;
>      int addseg; /* non zero if either DS/ES/SS have a non zero base */
>      int f_st;   /* currently unused */
>      int vm86;   /* vm86 mode */
> @@ -208,6 +223,62 @@ static const uint8_t cc_op_live[CC_OP_NB] = {
>      [CC_OP_CLR] = 0,
>  };
>
> +static inline bool instr_is_x87_nc(int modrm, int b)
> +{
> +    int op, mod, rm;
> +    switch (b) {
> +    case 0xd8 ... 0xdf:
> +        /* floats */
> +        op = ((b & 7) << 3) | ((modrm >> 3) & 7);
> +        mod = (modrm >> 6) & 3;
> +        rm = modrm & 7;
> +        if (mod != 3) {
> +            /* memory */
> +            switch (op) {
> +            case 0x0c: /* fldenv */
> +            case 0x0d: /* fldcw */
> +            case 0x0e: /* fstenv, fnstenv */
> +            case 0x0f: /* fstcw, fnstcw */
> +            case 0x2c: /* frstor */
> +            case 0x2e: /* fsave, fnsave */
> +            case 0x2f: /* fstsw, fnstsw */
> +                return false;
> +            default:
> +                return true;
> +            }
> +        } else {
> +            /* register */
> +            switch (op) {
> +            case 0x0a:
> +                return false; /* fnop, Illegal op */
> +            case 0x0e: /* fdecstp, fincstp */
> +            case 0x28: /* ffree */
> +                return false;
> +            case 0x1c:
> +                switch (rm) {
> +                case 1: /* feni */
> +                    return true;
> +                case 2: /* fclex, fnclex */
> +                case 3: /* finit, fninit */
> +                    return false;
> +                case 4: /* fsetpm */
> +                    return true;
> +                default: /* Illegal op */
> +                    return false;
> +                }
> +            case 0x3c:
> +                return false; /* fstsw, fnstsw, Illegal op */
> +            default:
> +                return true;
> +            }
> +        }
> +    /*case 0x9b: // fwait, wait
> +        return false;*/
> +    default:
> +        return false;
> +    }
> +}
> +
>  static void set_cc_op(DisasContext *s, CCOp op)
>  {
>      int dead;
> @@ -253,6 +324,23 @@ static void gen_update_cc_op(DisasContext *s)
>      }
>  }
>
> +static void set_ep(DisasContext *s, int fp_op, int fp_ip, int fp_cs) {
> +    s->fp_op = FP_EP_VALID | fp_op;
> +    s->fp_ip = fp_ip;
> +    s->fp_cs = fp_cs;
> +    s->fp_ep_dirty = true;
> +}
> +
> +static void gen_update_ep(DisasContext *s)
> +{
> +    if (s->fp_ep_dirty) {
> +        tcg_gen_movi_i32(cpu_fpop, s->fp_op);
> +        tcg_gen_movi_tl(cpu_fpip, s->fp_ip);
> +        tcg_gen_movi_i32(cpu_fpcs, s->fp_cs);
> +        s->fp_ep_dirty = false;
> +    }
> +}
> +
>  #ifdef TARGET_X86_64
>
>  #define NB_OP_SIZES 4
> @@ -666,6 +754,7 @@ static void gen_check_io(DisasContext *s, TCGMemOp
> ot, target_ulong cur_eip,
>      state_saved = 0;
>      if (s->pe && (s->cpl > s->iopl || s->vm86)) {
>          gen_update_cc_op(s);
> +        gen_update_ep(s);
>          gen_jmp_im(cur_eip);
>          state_saved = 1;
>          tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
> @@ -686,6 +775,7 @@ static void gen_check_io(DisasContext *s, TCGMemOp
> ot, target_ulong cur_eip,
>      if(s->flags & HF_SVMI_MASK) {
>          if (!state_saved) {
>              gen_update_cc_op(s);
> +            gen_update_ep(s);
>              gen_jmp_im(cur_eip);
>          }
>          svm_flags |= (1 << (4 + ot));
> @@ -1097,6 +1187,7 @@ static inline void gen_jcc1(DisasContext *s, int
> b, int l1)
>      CCPrepare cc = gen_prepare_cc(s, b, cpu_T[0]);
>
>      gen_update_cc_op(s);
> +    gen_update_ep(s);
>      if (cc.mask != -1) {
>          tcg_gen_andi_tl(cpu_T[0], cc.reg, cc.mask);
>          cc.reg = cpu_T[0];
> @@ -1580,14 +1671,14 @@ static void gen_rot_rm_T1(DisasContext *s,
> TCGMemOp ot, int op1, int is_right)
>      t0 = tcg_const_i32(0);
>      t1 = tcg_temp_new_i32();
>      tcg_gen_trunc_tl_i32(t1, cpu_T[1]);
> -    tcg_gen_movi_i32(cpu_tmp2_i32, CC_OP_ADCOX);
> +    tcg_gen_movi_i32(cpu_tmp2_i32, CC_OP_ADCOX);
>      tcg_gen_movi_i32(cpu_tmp3_i32, CC_OP_EFLAGS);
>      tcg_gen_movcond_i32(TCG_COND_NE, cpu_cc_op, t1, t0,
>                          cpu_tmp2_i32, cpu_tmp3_i32);
>      tcg_temp_free_i32(t0);
>      tcg_temp_free_i32(t1);
>
> -    /* The CC_OP value is no longer predictable.  */
> +    /* The CC_OP value is no longer predictable.  */
>      set_cc_op(s, CC_OP_DYNAMIC);
>  }
>
> @@ -1863,7 +1954,7 @@ static void gen_shifti(DisasContext *s1, int op,
> TCGMemOp ot, int d, int c)
>      }
>  }
>
> -static void gen_lea_modrm(CPUX86State *env, DisasContext *s, int modrm)
> +static void gen_lea_modrm(CPUX86State *env, DisasContext *s, int modrm,
> int b)
>  {
>      target_long disp;
>      int havesib;
> @@ -1871,6 +1962,7 @@ static void gen_lea_modrm(CPUX86State *env,
> DisasContext *s, int modrm)
>      int index;
>      int scale;
>      int mod, rm, code, override, must_add_seg;
> +    int curr_instr_is_x87_nc;
>      TCGv sum;
>
>      override = s->override;
> @@ -1950,6 +2042,13 @@ static void gen_lea_modrm(CPUX86State *env,
> DisasContext *s, int modrm)
>              tcg_gen_addi_tl(cpu_A0, sum, disp);
>          }
>
> +        curr_instr_is_x87_nc = instr_is_x87_nc(modrm, b);
> +        if (curr_instr_is_x87_nc) {
> +            tcg_gen_mov_tl(cpu_fpdp, cpu_A0);
> +            if (s->aflag == MO_32) {
> +                tcg_gen_ext32u_tl(cpu_fpdp, cpu_fpdp);
> +            }
> +        }
>          if (must_add_seg) {
>              if (override < 0) {
>                  if (base == R_EBP || base == R_ESP) {
> @@ -1961,6 +2060,12 @@ static void gen_lea_modrm(CPUX86State *env,
> DisasContext *s, int modrm)
>
>              tcg_gen_ld_tl(cpu_tmp0, cpu_env,
>                            offsetof(CPUX86State, segs[override].base));
> +
> +            if (curr_instr_is_x87_nc) {
> +                tcg_gen_ld_i32(cpu_fpds, cpu_env,
> +                              offsetof(CPUX86State,
> segs[override].selector));
> +            }
> +
>              if (CODE64(s)) {
>                  if (s->aflag == MO_32) {
>                      tcg_gen_ext32u_tl(cpu_A0, cpu_A0);
> @@ -1970,6 +2075,11 @@ static void gen_lea_modrm(CPUX86State *env,
> DisasContext *s, int modrm)
>              }
>
>              tcg_gen_add_tl(cpu_A0, cpu_A0, cpu_tmp0);
> +        } else {
> +            if (curr_instr_is_x87_nc) {
> +                tcg_gen_ld_i32(cpu_fpds, cpu_env,
> +                              offsetof(CPUX86State, segs[R_DS].selector));
> +            }
>          }
>
>          if (s->aflag == MO_32) {
> @@ -2039,8 +2149,22 @@ static void gen_lea_modrm(CPUX86State *env,
> DisasContext *s, int modrm)
>                      override = R_DS;
>                  }
>              }
> +            if (instr_is_x87_nc(modrm, b)) {
> +                tcg_gen_mov_tl(cpu_fpdp, cpu_A0);
> +                tcg_gen_ld_i32(cpu_fpds, cpu_env,
> +                              offsetof(CPUX86State,
> segs[override].selector));
> +            }
>              gen_op_addl_A0_seg(s, override);
> +        } else {
> +            if (instr_is_x87_nc(modrm, b)) {
> +                tcg_gen_mov_tl(cpu_fpdp, cpu_A0);
> +                tcg_gen_ld_i32(cpu_fpds, cpu_env,
> +                              offsetof(CPUX86State, segs[R_DS].selector));
> +            }
>          }
> +#ifdef TARGET_X86_64
> +        tcg_gen_andi_tl(cpu_fpdp, cpu_fpdp, 0xffffffff);
> +#endif
>          break;
>
>      default:
> @@ -2130,7 +2254,7 @@ static void gen_add_A0_ds_seg(DisasContext *s)
>  /* generate modrm memory load or store of 'reg'. TMP0 is used if reg ==
>     OR_TMP0 */
>  static void gen_ldst_modrm(CPUX86State *env, DisasContext *s, int modrm,
> -                           TCGMemOp ot, int reg, int is_store)
> +                           TCGMemOp ot, int reg, int is_store, int b)
>  {
>      int mod, rm;
>
> @@ -2147,7 +2271,7 @@ static void gen_ldst_modrm(CPUX86State *env,
> DisasContext *s, int modrm,
>                  gen_op_mov_reg_v(ot, reg, cpu_T[0]);
>          }
>      } else {
> -        gen_lea_modrm(env, s, modrm);
> +        gen_lea_modrm(env, s, modrm, b);
>          if (is_store) {
>              if (reg != OR_TMP0)
>                  gen_op_mov_v_reg(ot, cpu_T[0], reg);
> @@ -2250,7 +2374,7 @@ static void gen_cmovcc1(CPUX86State *env,
> DisasContext *s, TCGMemOp ot, int b,
>  {
>      CCPrepare cc;
>
> -    gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
> +    gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0, b);
>
>      cc = gen_prepare_cc(s, b, cpu_T[1]);
>      if (cc.mask != -1) {
> @@ -2297,6 +2421,7 @@ static void gen_movl_seg_T0(DisasContext *s, int
> seg_reg, target_ulong cur_eip)
>      if (s->pe && !s->vm86) {
>          /* XXX: optimize by finding processor state dynamically */
>          gen_update_cc_op(s);
> +        gen_update_ep(s);
>          gen_jmp_im(cur_eip);
>          tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
>          gen_helper_load_seg(cpu_env, tcg_const_i32(seg_reg),
> cpu_tmp2_i32);
> @@ -2326,6 +2451,7 @@ gen_svm_check_intercept_param(DisasContext *s,
> target_ulong pc_start,
>      if (likely(!(s->flags & HF_SVMI_MASK)))
>          return;
>      gen_update_cc_op(s);
> +    gen_update_ep(s);
>      gen_jmp_im(pc_start - s->cs_base);
>      gen_helper_svm_check_intercept_param(cpu_env, tcg_const_i32(type),
>                                           tcg_const_i64(param));
> @@ -2513,6 +2639,7 @@ static void gen_enter(DisasContext *s, int
> esp_addend, int level)
>  static void gen_exception(DisasContext *s, int trapno, target_ulong
> cur_eip)
>  {
>      gen_update_cc_op(s);
> +    gen_update_ep(s);
>      gen_jmp_im(cur_eip);
>      gen_helper_raise_exception(cpu_env, tcg_const_i32(trapno));
>      s->is_jmp = DISAS_TB_JUMP;
> @@ -2524,6 +2651,7 @@ static void gen_interrupt(DisasContext *s, int intno,
>                            target_ulong cur_eip, target_ulong next_eip)
>  {
>      gen_update_cc_op(s);
> +    gen_update_ep(s);
>      gen_jmp_im(cur_eip);
>      gen_helper_raise_interrupt(cpu_env, tcg_const_i32(intno),
>                                 tcg_const_i32(next_eip - cur_eip));
> @@ -2533,6 +2661,7 @@ static void gen_interrupt(DisasContext *s, int intno,
>  static void gen_debug(DisasContext *s, target_ulong cur_eip)
>  {
>      gen_update_cc_op(s);
> +    gen_update_ep(s);
>      gen_jmp_im(cur_eip);
>      gen_helper_debug(cpu_env);
>      s->is_jmp = DISAS_TB_JUMP;
> @@ -2543,6 +2672,7 @@ static void gen_debug(DisasContext *s,
> target_ulong cur_eip)
>  static void gen_eob(DisasContext *s)
>  {
>      gen_update_cc_op(s);
> +    gen_update_ep(s);
>      if (s->tb->flags & HF_INHIBIT_IRQ_MASK) {
>          gen_helper_reset_inhibit_irq(cpu_env);
>      }
> @@ -2564,6 +2694,7 @@ static void gen_eob(DisasContext *s)
>  static void gen_jmp_tb(DisasContext *s, target_ulong eip, int tb_num)
>  {
>      gen_update_cc_op(s);
> +    gen_update_ep(s);
>      set_cc_op(s, CC_OP_DYNAMIC);
>      if (s->jmp_opt) {
>          gen_goto_tb(s, tb_num, eip);
> @@ -3043,7 +3174,7 @@ static void gen_sse(CPUX86State *env,
> DisasContext *s, int b,
>          case 0x0e7: /* movntq */
>              if (mod == 3)
>                  goto illegal_op;
> -            gen_lea_modrm(env, s, modrm);
> +            gen_lea_modrm(env, s, modrm, b);
>              gen_stq_env_A0(s, offsetof(CPUX86State, fpregs[reg].mmx));
>              break;
>          case 0x1e7: /* movntdq */
> @@ -3051,20 +3182,20 @@ static void gen_sse(CPUX86State *env,
> DisasContext *s, int b,
>          case 0x12b: /* movntps */
>              if (mod == 3)
>                  goto illegal_op;
> -            gen_lea_modrm(env, s, modrm);
> +            gen_lea_modrm(env, s, modrm, b);
>              gen_sto_env_A0(s, offsetof(CPUX86State, xmm_regs[reg]));
>              break;
>          case 0x3f0: /* lddqu */
>              if (mod == 3)
>                  goto illegal_op;
> -            gen_lea_modrm(env, s, modrm);
> +            gen_lea_modrm(env, s, modrm, b);
>              gen_ldo_env_A0(s, offsetof(CPUX86State, xmm_regs[reg]));
>              break;
>          case 0x22b: /* movntss */
>          case 0x32b: /* movntsd */
>              if (mod == 3)
>                  goto illegal_op;
> -            gen_lea_modrm(env, s, modrm);
> +            gen_lea_modrm(env, s, modrm, b);
>              if (b1 & 1) {
>                  gen_stq_env_A0(s, offsetof(CPUX86State, xmm_regs[reg]));
>              } else {
> @@ -3076,13 +3207,13 @@ static void gen_sse(CPUX86State *env,
> DisasContext *s, int b,
>          case 0x6e: /* movd mm, ea */
>  #ifdef TARGET_X86_64
>              if (s->dflag == MO_64) {
> -                gen_ldst_modrm(env, s, modrm, MO_64, OR_TMP0, 0);
> +                gen_ldst_modrm(env, s, modrm, MO_64, OR_TMP0, 0, b);
>                  tcg_gen_st_tl(cpu_T[0], cpu_env,
> offsetof(CPUX86State,fpregs[reg].mmx));
>              } else
>  #endif
>              {
> -                gen_ldst_modrm(env, s, modrm, MO_32, OR_TMP0, 0);
> -                tcg_gen_addi_ptr(cpu_ptr0, cpu_env,
> +                gen_ldst_modrm(env, s, modrm, MO_32, OR_TMP0, 0, b);
> +                tcg_gen_addi_ptr(cpu_ptr0, cpu_env,
>                                   offsetof(CPUX86State,fpregs[reg].mmx));
>                  tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
>                  gen_helper_movl_mm_T0_mmx(cpu_ptr0, cpu_tmp2_i32);
> @@ -3091,15 +3222,15 @@ static void gen_sse(CPUX86State *env,
> DisasContext *s, int b,
>          case 0x16e: /* movd xmm, ea */
>  #ifdef TARGET_X86_64
>              if (s->dflag == MO_64) {
> -                gen_ldst_modrm(env, s, modrm, MO_64, OR_TMP0, 0);
> -                tcg_gen_addi_ptr(cpu_ptr0, cpu_env,
> +                gen_ldst_modrm(env, s, modrm, MO_64, OR_TMP0, 0, b);
> +                tcg_gen_addi_ptr(cpu_ptr0, cpu_env,
>                                   offsetof(CPUX86State,xmm_regs[reg]));
>                  gen_helper_movq_mm_T0_xmm(cpu_ptr0, cpu_T[0]);
>              } else
>  #endif
>              {
> -                gen_ldst_modrm(env, s, modrm, MO_32, OR_TMP0, 0);
> -                tcg_gen_addi_ptr(cpu_ptr0, cpu_env,
> +                gen_ldst_modrm(env, s, modrm, MO_32, OR_TMP0, 0, b);
> +                tcg_gen_addi_ptr(cpu_ptr0, cpu_env,
>                                   offsetof(CPUX86State,xmm_regs[reg]));
>                  tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
>                  gen_helper_movl_mm_T0_xmm(cpu_ptr0, cpu_tmp2_i32);
> @@ -3107,7 +3238,7 @@ static void gen_sse(CPUX86State *env,
> DisasContext *s, int b,
>              break;
>          case 0x6f: /* movq mm, ea */
>              if (mod != 3) {
> -                gen_lea_modrm(env, s, modrm);
> +                gen_lea_modrm(env, s, modrm, b);
>                  gen_ldq_env_A0(s, offsetof(CPUX86State, fpregs[reg].mmx));
>              } else {
>                  rm = (modrm & 7);
> @@ -3124,7 +3255,7 @@ static void gen_sse(CPUX86State *env,
> DisasContext *s, int b,
>          case 0x16f: /* movdqa xmm, ea */
>          case 0x26f: /* movdqu xmm, ea */
>              if (mod != 3) {
> -                gen_lea_modrm(env, s, modrm);
> +                gen_lea_modrm(env, s, modrm, b);
>                  gen_ldo_env_A0(s, offsetof(CPUX86State, xmm_regs[reg]));
>              } else {
>                  rm = (modrm & 7) | REX_B(s);
> @@ -3134,7 +3265,7 @@ static void gen_sse(CPUX86State *env,
> DisasContext *s, int b,
>              break;
>          case 0x210: /* movss xmm, ea */
>              if (mod != 3) {
> -                gen_lea_modrm(env, s, modrm);
> +                gen_lea_modrm(env, s, modrm, b);
>                  gen_op_ld_v(s, MO_32, cpu_T[0], cpu_A0);
>                  tcg_gen_st32_tl(cpu_T[0], cpu_env,
> offsetof(CPUX86State,xmm_regs[reg].XMM_L(0)));
>                  tcg_gen_movi_tl(cpu_T[0], 0);
> @@ -3149,7 +3280,7 @@ static void gen_sse(CPUX86State *env,
> DisasContext *s, int b,
>              break;
>          case 0x310: /* movsd xmm, ea */
>              if (mod != 3) {
> -                gen_lea_modrm(env, s, modrm);
> +                gen_lea_modrm(env, s, modrm, b);
>                  gen_ldq_env_A0(s, offsetof(CPUX86State,
>                                             xmm_regs[reg].XMM_Q(0)));
>                  tcg_gen_movi_tl(cpu_T[0], 0);
> @@ -3164,7 +3295,7 @@ static void gen_sse(CPUX86State *env,
> DisasContext *s, int b,
>          case 0x012: /* movlps */
>          case 0x112: /* movlpd */
>              if (mod != 3) {
> -                gen_lea_modrm(env, s, modrm);
> +                gen_lea_modrm(env, s, modrm, b);
>                  gen_ldq_env_A0(s, offsetof(CPUX86State,
>                                             xmm_regs[reg].XMM_Q(0)));
>              } else {
> @@ -3176,7 +3307,7 @@ static void gen_sse(CPUX86State *env,
> DisasContext *s, int b,
>              break;
>          case 0x212: /* movsldup */
>              if (mod != 3) {
> -                gen_lea_modrm(env, s, modrm);
> +                gen_lea_modrm(env, s, modrm, b);
>                  gen_ldo_env_A0(s, offsetof(CPUX86State, xmm_regs[reg]));
>              } else {
>                  rm = (modrm & 7) | REX_B(s);
> @@ -3192,7 +3323,7 @@ static void gen_sse(CPUX86State *env,
> DisasContext *s, int b,
>              break;
>          case 0x312: /* movddup */
>              if (mod != 3) {
> -                gen_lea_modrm(env, s, modrm);
> +                gen_lea_modrm(env, s, modrm, b);
>                  gen_ldq_env_A0(s, offsetof(CPUX86State,
>                                             xmm_regs[reg].XMM_Q(0)));
>              } else {
> @@ -3206,7 +3337,7 @@ static void gen_sse(CPUX86State *env,
> DisasContext *s, int b,
>          case 0x016: /* movhps */
>          case 0x116: /* movhpd */
>              if (mod != 3) {
> -                gen_lea_modrm(env, s, modrm);
> +                gen_lea_modrm(env, s, modrm, b);
>                  gen_ldq_env_A0(s, offsetof(CPUX86State,
>                                             xmm_regs[reg].XMM_Q(1)));
>              } else {
> @@ -3218,7 +3349,7 @@ static void gen_sse(CPUX86State *env,
> DisasContext *s, int b,
>              break;
>          case 0x216: /* movshdup */
>              if (mod != 3) {
> -                gen_lea_modrm(env, s, modrm);
> +                gen_lea_modrm(env, s, modrm, b);
>                  gen_ldo_env_A0(s, offsetof(CPUX86State, xmm_regs[reg]));
>              } else {
>                  rm = (modrm & 7) | REX_B(s);
> @@ -3256,34 +3387,34 @@ static void gen_sse(CPUX86State *env,
> DisasContext *s, int b,
>          case 0x7e: /* movd ea, mm */
>  #ifdef TARGET_X86_64
>              if (s->dflag == MO_64) {
> -                tcg_gen_ld_i64(cpu_T[0], cpu_env,
> +                tcg_gen_ld_i64(cpu_T[0], cpu_env,
>                                 offsetof(CPUX86State,fpregs[reg].mmx));
> -                gen_ldst_modrm(env, s, modrm, MO_64, OR_TMP0, 1);
> +                gen_ldst_modrm(env, s, modrm, MO_64, OR_TMP0, 1, b);
>              } else
>  #endif
>              {
> -                tcg_gen_ld32u_tl(cpu_T[0], cpu_env,
> +                tcg_gen_ld32u_tl(cpu_T[0], cpu_env,
>
> offsetof(CPUX86State,fpregs[reg].mmx.MMX_L(0)));
> -                gen_ldst_modrm(env, s, modrm, MO_32, OR_TMP0, 1);
> +                gen_ldst_modrm(env, s, modrm, MO_32, OR_TMP0, 1, b);
>              }
>              break;
>          case 0x17e: /* movd ea, xmm */
>  #ifdef TARGET_X86_64
>              if (s->dflag == MO_64) {
> -                tcg_gen_ld_i64(cpu_T[0], cpu_env,
> +                tcg_gen_ld_i64(cpu_T[0], cpu_env,
>
> offsetof(CPUX86State,xmm_regs[reg].XMM_Q(0)));
> -                gen_ldst_modrm(env, s, modrm, MO_64, OR_TMP0, 1);
> +                gen_ldst_modrm(env, s, modrm, MO_64, OR_TMP0, 1, b);
>              } else
>  #endif
>              {
> -                tcg_gen_ld32u_tl(cpu_T[0], cpu_env,
> +                tcg_gen_ld32u_tl(cpu_T[0], cpu_env,
>
> offsetof(CPUX86State,xmm_regs[reg].XMM_L(0)));
> -                gen_ldst_modrm(env, s, modrm, MO_32, OR_TMP0, 1);
> +                gen_ldst_modrm(env, s, modrm, MO_32, OR_TMP0, 1, b);
>              }
>              break;
>          case 0x27e: /* movq xmm, ea */
>              if (mod != 3) {
> -                gen_lea_modrm(env, s, modrm);
> +                gen_lea_modrm(env, s, modrm, b);
>                  gen_ldq_env_A0(s, offsetof(CPUX86State,
>                                             xmm_regs[reg].XMM_Q(0)));
>              } else {
> @@ -3295,7 +3426,7 @@ static void gen_sse(CPUX86State *env,
> DisasContext *s, int b,
>              break;
>          case 0x7f: /* movq ea, mm */
>              if (mod != 3) {
> -                gen_lea_modrm(env, s, modrm);
> +                gen_lea_modrm(env, s, modrm, b);
>                  gen_stq_env_A0(s, offsetof(CPUX86State, fpregs[reg].mmx));
>              } else {
>                  rm = (modrm & 7);
> @@ -3310,7 +3441,7 @@ static void gen_sse(CPUX86State *env,
> DisasContext *s, int b,
>          case 0x17f: /* movdqa ea, xmm */
>          case 0x27f: /* movdqu ea, xmm */
>              if (mod != 3) {
> -                gen_lea_modrm(env, s, modrm);
> +                gen_lea_modrm(env, s, modrm, b);
>                  gen_sto_env_A0(s, offsetof(CPUX86State, xmm_regs[reg]));
>              } else {
>                  rm = (modrm & 7) | REX_B(s);
> @@ -3320,7 +3451,7 @@ static void gen_sse(CPUX86State *env,
> DisasContext *s, int b,
>              break;
>          case 0x211: /* movss ea, xmm */
>              if (mod != 3) {
> -                gen_lea_modrm(env, s, modrm);
> +                gen_lea_modrm(env, s, modrm, b);
>                  tcg_gen_ld32u_tl(cpu_T[0], cpu_env,
> offsetof(CPUX86State,xmm_regs[reg].XMM_L(0)));
>                  gen_op_st_v(s, MO_32, cpu_T[0], cpu_A0);
>              } else {
> @@ -3331,7 +3462,7 @@ static void gen_sse(CPUX86State *env,
> DisasContext *s, int b,
>              break;
>          case 0x311: /* movsd ea, xmm */
>              if (mod != 3) {
> -                gen_lea_modrm(env, s, modrm);
> +                gen_lea_modrm(env, s, modrm, b);
>                  gen_stq_env_A0(s, offsetof(CPUX86State,
>                                             xmm_regs[reg].XMM_Q(0)));
>              } else {
> @@ -3343,7 +3474,7 @@ static void gen_sse(CPUX86State *env,
> DisasContext *s, int b,
>          case 0x013: /* movlps */
>          case 0x113: /* movlpd */
>              if (mod != 3) {
> -                gen_lea_modrm(env, s, modrm);
> +                gen_lea_modrm(env, s, modrm, b);
>                  gen_stq_env_A0(s, offsetof(CPUX86State,
>                                             xmm_regs[reg].XMM_Q(0)));
>              } else {
> @@ -3353,7 +3484,7 @@ static void gen_sse(CPUX86State *env,
> DisasContext *s, int b,
>          case 0x017: /* movhps */
>          case 0x117: /* movhpd */
>              if (mod != 3) {
> -                gen_lea_modrm(env, s, modrm);
> +                gen_lea_modrm(env, s, modrm, b);
>                  gen_stq_env_A0(s, offsetof(CPUX86State,
>                                             xmm_regs[reg].XMM_Q(1)));
>              } else {
> @@ -3417,7 +3548,7 @@ static void gen_sse(CPUX86State *env,
> DisasContext *s, int b,
>          case 0x12a: /* cvtpi2pd */
>              gen_helper_enter_mmx(cpu_env);
>              if (mod != 3) {
> -                gen_lea_modrm(env, s, modrm);
> +                gen_lea_modrm(env, s, modrm, b);
>                  op2_offset = offsetof(CPUX86State,mmx_t0);
>                  gen_ldq_env_A0(s, op2_offset);
>              } else {
> @@ -3440,7 +3571,7 @@ static void gen_sse(CPUX86State *env,
> DisasContext *s, int b,
>          case 0x22a: /* cvtsi2ss */
>          case 0x32a: /* cvtsi2sd */
>              ot = mo_64_32(s->dflag);
> -            gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
> +            gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0, b);
>              op1_offset = offsetof(CPUX86State,xmm_regs[reg]);
>              tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op1_offset);
>              if (ot == MO_32) {
> @@ -3462,7 +3593,7 @@ static void gen_sse(CPUX86State *env,
> DisasContext *s, int b,
>          case 0x12d: /* cvtpd2pi */
>              gen_helper_enter_mmx(cpu_env);
>              if (mod != 3) {
> -                gen_lea_modrm(env, s, modrm);
> +                gen_lea_modrm(env, s, modrm, b);
>                  op2_offset = offsetof(CPUX86State,xmm_t0);
>                  gen_ldo_env_A0(s, op2_offset);
>              } else {
> @@ -3493,7 +3624,7 @@ static void gen_sse(CPUX86State *env,
> DisasContext *s, int b,
>          case 0x32d: /* cvtsd2si */
>              ot = mo_64_32(s->dflag);
>              if (mod != 3) {
> -                gen_lea_modrm(env, s, modrm);
> +                gen_lea_modrm(env, s, modrm, b);
>                  if ((b >> 8) & 1) {
>                      gen_ldq_env_A0(s, offsetof(CPUX86State,
> xmm_t0.XMM_Q(0)));
>                  } else {
> @@ -3525,7 +3656,7 @@ static void gen_sse(CPUX86State *env,
> DisasContext *s, int b,
>          case 0xc4: /* pinsrw */
>          case 0x1c4:
>              s->rip_offset = 1;
> -            gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0);
> +            gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0, b);
>              val = cpu_ldub_code(env, s->pc++);
>              if (b1) {
>                  val &= 7;
> @@ -3559,7 +3690,7 @@ static void gen_sse(CPUX86State *env,
> DisasContext *s, int b,
>              break;
>          case 0x1d6: /* movq ea, xmm */
>              if (mod != 3) {
> -                gen_lea_modrm(env, s, modrm);
> +                gen_lea_modrm(env, s, modrm, b);
>                  gen_stq_env_A0(s, offsetof(CPUX86State,
>                                             xmm_regs[reg].XMM_Q(0)));
>              } else {
> @@ -3626,7 +3757,7 @@ static void gen_sse(CPUX86State *env,
> DisasContext *s, int b,
>                      op2_offset = offsetof(CPUX86State,xmm_regs[rm |
> REX_B(s)]);
>                  } else {
>                      op2_offset = offsetof(CPUX86State,xmm_t0);
> -                    gen_lea_modrm(env, s, modrm);
> +                    gen_lea_modrm(env, s, modrm, b);
>                      switch (b) {
>                      case 0x20: case 0x30: /* pmovsxbw, pmovzxbw */
>                      case 0x23: case 0x33: /* pmovsxwd, pmovzxwd */
> @@ -3660,7 +3791,7 @@ static void gen_sse(CPUX86State *env,
> DisasContext *s, int b,
>                      op2_offset = offsetof(CPUX86State,fpregs[rm].mmx);
>                  } else {
>                      op2_offset = offsetof(CPUX86State,mmx_t0);
> -                    gen_lea_modrm(env, s, modrm);
> +                    gen_lea_modrm(env, s, modrm, b);
>                      gen_ldq_env_A0(s, op2_offset);
>                  }
>              }
> @@ -3701,7 +3832,7 @@ static void gen_sse(CPUX86State *env,
> DisasContext *s, int b,
>                  }
>
>                  tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_regs[reg]);
> -                gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
> +                gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0, b);
>                  gen_helper_crc32(cpu_T[0], cpu_tmp2_i32,
>                                   cpu_T[0], tcg_const_i32(8 << ot));
>
> @@ -3729,7 +3860,7 @@ static void gen_sse(CPUX86State *env,
> DisasContext *s, int b,
>                      ot = MO_64;
>                  }
>
> -                gen_lea_modrm(env, s, modrm);
> +                gen_lea_modrm(env, s, modrm, b);
>                  if ((b & 1) == 0) {
>                      tcg_gen_qemu_ld_tl(cpu_T[0], cpu_A0,
>                                         s->mem_index, ot | MO_BE);
> @@ -3747,7 +3878,7 @@ static void gen_sse(CPUX86State *env,
> DisasContext *s, int b,
>                      goto illegal_op;
>                  }
>                  ot = mo_64_32(s->dflag);
> -                gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
> +                gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0, b);
>                  tcg_gen_andc_tl(cpu_T[0], cpu_regs[s->vex_v], cpu_T[0]);
>                  gen_op_mov_reg_v(ot, reg, cpu_T[0]);
>                  gen_op_update1_cc();
> @@ -3764,7 +3895,7 @@ static void gen_sse(CPUX86State *env,
> DisasContext *s, int b,
>                  {
>                      TCGv bound, zero;
>
> -                    gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
> +                    gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0, b);
>                      /* Extract START, and shift the operand.
>                         Shifts larger than operand size get zeros.  */
>                      tcg_gen_ext8u_tl(cpu_A0, cpu_regs[s->vex_v]);
> @@ -3801,7 +3932,7 @@ static void gen_sse(CPUX86State *env,
> DisasContext *s, int b,
>                      goto illegal_op;
>                  }
>                  ot = mo_64_32(s->dflag);
> -                gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
> +                gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0, b);
>                  tcg_gen_ext8u_tl(cpu_T[1], cpu_regs[s->vex_v]);
>                  {
>                      TCGv bound = tcg_const_tl(ot == MO_64 ? 63 : 31);
> @@ -3828,7 +3959,7 @@ static void gen_sse(CPUX86State *env,
> DisasContext *s, int b,
>                      goto illegal_op;
>                  }
>                  ot = mo_64_32(s->dflag);
> -                gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
> +                gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0, b);
>                  switch (ot) {
>                  default:
>                      tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
> @@ -3854,7 +3985,7 @@ static void gen_sse(CPUX86State *env,
> DisasContext *s, int b,
>                      goto illegal_op;
>                  }
>                  ot = mo_64_32(s->dflag);
> -                gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
> +                gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0, b);
>                  /* Note that by zero-extending the mask operand, we
>                     automatically handle zero-extending the result.  */
>                  if (ot == MO_64) {
> @@ -3872,7 +4003,7 @@ static void gen_sse(CPUX86State *env,
> DisasContext *s, int b,
>                      goto illegal_op;
>                  }
>                  ot = mo_64_32(s->dflag);
> -                gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
> +                gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0, b);
>                  /* Note that by zero-extending the mask operand, we
>                     automatically handle zero-extending the result.  */
>                  if (ot == MO_64) {
> @@ -3892,7 +4023,7 @@ static void gen_sse(CPUX86State *env,
> DisasContext *s, int b,
>                      int end_op;
>
>                      ot = mo_64_32(s->dflag);
> -                    gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
> +                    gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0, b);
>
>                      /* Re-use the carry-out from a previous round.  */
>                      TCGV_UNUSED(carry_in);
> @@ -3971,7 +4102,7 @@ static void gen_sse(CPUX86State *env,
> DisasContext *s, int b,
>                      goto illegal_op;
>                  }
>                  ot = mo_64_32(s->dflag);
> -                gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
> +                gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0, b);
>                  if (ot == MO_64) {
>                      tcg_gen_andi_tl(cpu_T[1], cpu_regs[s->vex_v], 63);
>                  } else {
> @@ -4003,7 +4134,7 @@ static void gen_sse(CPUX86State *env,
> DisasContext *s, int b,
>                      goto illegal_op;
>                  }
>                  ot = mo_64_32(s->dflag);
> -                gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
> +                gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0, b);
>
>                  switch (reg & 7) {
>                  case 1: /* blsr By,Ey */
> @@ -4062,7 +4193,7 @@ static void gen_sse(CPUX86State *env,
> DisasContext *s, int b,
>                  ot = mo_64_32(s->dflag);
>                  rm = (modrm & 7) | REX_B(s);
>                  if (mod != 3)
> -                    gen_lea_modrm(env, s, modrm);
> +                    gen_lea_modrm(env, s, modrm, b);
>                  reg = ((modrm >> 3) & 7) | rex_r;
>                  val = cpu_ldub_code(env, s->pc++);
>                  switch (b) {
> @@ -4199,7 +4330,7 @@ static void gen_sse(CPUX86State *env,
> DisasContext *s, int b,
>                      op2_offset = offsetof(CPUX86State,xmm_regs[rm |
> REX_B(s)]);
>                  } else {
>                      op2_offset = offsetof(CPUX86State,xmm_t0);
> -                    gen_lea_modrm(env, s, modrm);
> +                    gen_lea_modrm(env, s, modrm, b);
>                      gen_ldo_env_A0(s, op2_offset);
>                  }
>              } else {
> @@ -4208,7 +4339,7 @@ static void gen_sse(CPUX86State *env,
> DisasContext *s, int b,
>                      op2_offset = offsetof(CPUX86State,fpregs[rm].mmx);
>                  } else {
>                      op2_offset = offsetof(CPUX86State,mmx_t0);
> -                    gen_lea_modrm(env, s, modrm);
> +                    gen_lea_modrm(env, s, modrm, b);
>                      gen_ldq_env_A0(s, op2_offset);
>                  }
>              }
> @@ -4242,7 +4373,7 @@ static void gen_sse(CPUX86State *env,
> DisasContext *s, int b,
>                      goto illegal_op;
>                  }
>                  ot = mo_64_32(s->dflag);
> -                gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
> +                gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0, b);
>                  b = cpu_ldub_code(env, s->pc++);
>                  if (ot == MO_64) {
>                      tcg_gen_rotri_tl(cpu_T[0], cpu_T[0], b & 63);
> @@ -4278,7 +4409,7 @@ static void gen_sse(CPUX86State *env,
> DisasContext *s, int b,
>              if (mod != 3) {
>                  int sz = 4;
>
> -                gen_lea_modrm(env, s, modrm);
> +                gen_lea_modrm(env, s, modrm, b);
>                  op2_offset = offsetof(CPUX86State,xmm_t0);
>
>                  switch (b) {
> @@ -4326,7 +4457,7 @@ static void gen_sse(CPUX86State *env,
> DisasContext *s, int b,
>          } else {
>              op1_offset = offsetof(CPUX86State,fpregs[reg].mmx);
>              if (mod != 3) {
> -                gen_lea_modrm(env, s, modrm);
> +                gen_lea_modrm(env, s, modrm, b);
>                  op2_offset = offsetof(CPUX86State,mmx_t0);
>                  gen_ldq_env_A0(s, op2_offset);
>              } else {
> @@ -4404,6 +4535,7 @@ static target_ulong disas_insn(CPUX86State *env,
> DisasContext *s,
>      int modrm, reg, rm, mod, op, opreg, val;
>      target_ulong next_eip, tval;
>      int rex_w, rex_r;
> +    int fp_op, fp_ip, fp_cs;
>
>      if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP | CPU_LOG_TB_OP_OPT))) {
>          tcg_gen_debug_insn_start(pc_start);
> @@ -4595,7 +4727,7 @@ static target_ulong disas_insn(CPUX86State *env,
> DisasContext *s,
>                  mod = (modrm >> 6) & 3;
>                  rm = (modrm & 7) | REX_B(s);
>                  if (mod != 3) {
> -                    gen_lea_modrm(env, s, modrm);
> +                    gen_lea_modrm(env, s, modrm, b);
>                      opreg = OR_TMP0;
>                  } else if (op == OP_XORL && rm == reg) {
>                  xor_zero:
> @@ -4616,7 +4748,7 @@ static target_ulong disas_insn(CPUX86State *env,
> DisasContext *s,
>                  reg = ((modrm >> 3) & 7) | rex_r;
>                  rm = (modrm & 7) | REX_B(s);
>                  if (mod != 3) {
> -                    gen_lea_modrm(env, s, modrm);
> +                    gen_lea_modrm(env, s, modrm, b);
>                      gen_op_ld_v(s, ot, cpu_T[1], cpu_A0);
>                  } else if (op == OP_XORL && rm == reg) {
>                      goto xor_zero;
> @@ -4655,7 +4787,7 @@ static target_ulong disas_insn(CPUX86State *env,
> DisasContext *s,
>                      s->rip_offset = 1;
>                  else
>                      s->rip_offset = insn_const_size(ot);
> -                gen_lea_modrm(env, s, modrm);
> +                gen_lea_modrm(env, s, modrm, b);
>                  opreg = OR_TMP0;
>              } else {
>                  opreg = rm;
> @@ -4698,7 +4830,7 @@ static target_ulong disas_insn(CPUX86State *env,
> DisasContext *s,
>          if (mod != 3) {
>              if (op == 0)
>                  s->rip_offset = insn_const_size(ot);
> -            gen_lea_modrm(env, s, modrm);
> +            gen_lea_modrm(env, s, modrm, b);
>              gen_op_ld_v(s, ot, cpu_T[0], cpu_A0);
>          } else {
>              gen_op_mov_v_reg(ot, cpu_T[0], rm);
> @@ -4906,7 +5038,7 @@ static target_ulong disas_insn(CPUX86State *env,
> DisasContext *s,
>              }
>          }
>          if (mod != 3) {
> -            gen_lea_modrm(env, s, modrm);
> +            gen_lea_modrm(env, s, modrm, b);
>              if (op >= 2 && op != 3 && op != 5)
>                  gen_op_ld_v(s, ot, cpu_T[0], cpu_A0);
>          } else {
> @@ -4946,6 +5078,7 @@ static target_ulong disas_insn(CPUX86State *env,
> DisasContext *s,
>          do_lcall:
>              if (s->pe && !s->vm86) {
>                  gen_update_cc_op(s);
> +                gen_update_ep(s);
>                  gen_jmp_im(pc_start - s->cs_base);
>                  tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
>                  gen_helper_lcall_protected(cpu_env, cpu_tmp2_i32,
> cpu_T[1],
> @@ -4973,6 +5106,7 @@ static target_ulong disas_insn(CPUX86State *env,
> DisasContext *s,
>          do_ljmp:
>              if (s->pe && !s->vm86) {
>                  gen_update_cc_op(s);
> +                gen_update_ep(s);
>                  gen_jmp_im(pc_start - s->cs_base);
>                  tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
>                  gen_helper_ljmp_protected(cpu_env, cpu_tmp2_i32, cpu_T[1],
> @@ -4998,7 +5132,7 @@ static target_ulong disas_insn(CPUX86State *env,
> DisasContext *s,
>          modrm = cpu_ldub_code(env, s->pc++);
>          reg = ((modrm >> 3) & 7) | rex_r;
>
> -        gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
> +        gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0, b);
>          gen_op_mov_v_reg(ot, cpu_T[1], reg);
>          gen_op_testl_T0_T1_cc();
>          set_cc_op(s, CC_OP_LOGICB + ot);
> @@ -5073,7 +5207,7 @@ static target_ulong disas_insn(CPUX86State *env,
> DisasContext *s,
>              s->rip_offset = insn_const_size(ot);
>          else if (b == 0x6b)
>              s->rip_offset = 1;
> -        gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
> +        gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0, b);
>          if (b == 0x69) {
>              val = insn_get(env, s, ot);
>              tcg_gen_movi_tl(cpu_T[1], val);
> @@ -5130,7 +5264,7 @@ static target_ulong disas_insn(CPUX86State *env,
> DisasContext *s,
>              gen_op_mov_reg_v(ot, reg, cpu_T[1]);
>              gen_op_mov_reg_v(ot, rm, cpu_T[0]);
>          } else {
> -            gen_lea_modrm(env, s, modrm);
> +            gen_lea_modrm(env, s, modrm, b);
>              gen_op_mov_v_reg(ot, cpu_T[0], reg);
>              gen_op_ld_v(s, ot, cpu_T[1], cpu_A0);
>              tcg_gen_add_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
> @@ -5159,7 +5293,7 @@ static target_ulong disas_insn(CPUX86State *env,
> DisasContext *s,
>                  rm = (modrm & 7) | REX_B(s);
>                  gen_op_mov_v_reg(ot, t0, rm);
>              } else {
> -                gen_lea_modrm(env, s, modrm);
> +                gen_lea_modrm(env, s, modrm, b);
>                  tcg_gen_mov_tl(a0, cpu_A0);
>                  gen_op_ld_v(s, ot, t0, a0);
>                  rm = 0; /* avoid warning */
> @@ -5207,16 +5341,16 @@ static target_ulong disas_insn(CPUX86State
> *env, DisasContext *s,
>                  goto illegal_op;
>              gen_jmp_im(pc_start - s->cs_base);
>              gen_update_cc_op(s);
> -            gen_lea_modrm(env, s, modrm);
> +            gen_lea_modrm(env, s, modrm, b);
>              gen_helper_cmpxchg16b(cpu_env, cpu_A0);
>          } else
> -#endif
> +#endif
>          {
>              if (!(s->cpuid_features & CPUID_CX8))
>                  goto illegal_op;
>              gen_jmp_im(pc_start - s->cs_base);
>              gen_update_cc_op(s);
> -            gen_lea_modrm(env, s, modrm);
> +            gen_lea_modrm(env, s, modrm, b);
>              gen_helper_cmpxchg8b(cpu_env, cpu_A0);
>          }
>          set_cc_op(s, CC_OP_EFLAGS);
> @@ -5266,7 +5400,7 @@ static target_ulong disas_insn(CPUX86State *env,
> DisasContext *s,
>          } else {
>              /* NOTE: order is important too for MMU exceptions */
>              s->popl_esp_hack = 1 << ot;
> -            gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 1);
> +            gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 1, b);
>              s->popl_esp_hack = 0;
>              gen_pop_update(s, ot);
>          }
> @@ -5352,7 +5486,7 @@ static target_ulong disas_insn(CPUX86State *env,
> DisasContext *s,
>          reg = ((modrm >> 3) & 7) | rex_r;
>
>          /* generate a generic store */
> -        gen_ldst_modrm(env, s, modrm, ot, reg, 1);
> +        gen_ldst_modrm(env, s, modrm, ot, reg, 1, b);
>          break;
>      case 0xc6:
>      case 0xc7: /* mov Ev, Iv */
> @@ -5361,7 +5495,7 @@ static target_ulong disas_insn(CPUX86State *env,
> DisasContext *s,
>          mod = (modrm >> 6) & 3;
>          if (mod != 3) {
>              s->rip_offset = insn_const_size(ot);
> -            gen_lea_modrm(env, s, modrm);
> +            gen_lea_modrm(env, s, modrm, b);
>          }
>          val = insn_get(env, s, ot);
>          tcg_gen_movi_tl(cpu_T[0], val);
> @@ -5377,7 +5511,7 @@ static target_ulong disas_insn(CPUX86State *env,
> DisasContext *s,
>          modrm = cpu_ldub_code(env, s->pc++);
>          reg = ((modrm >> 3) & 7) | rex_r;
>
> -        gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
> +        gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0, b);
>          gen_op_mov_reg_v(ot, reg, cpu_T[0]);
>          break;
>      case 0x8e: /* mov seg, Gv */
> @@ -5385,7 +5519,7 @@ static target_ulong disas_insn(CPUX86State *env,
> DisasContext *s,
>          reg = (modrm >> 3) & 7;
>          if (reg >= 6 || reg == R_CS)
>              goto illegal_op;
> -        gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0);
> +        gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0, b);
>          gen_movl_seg_T0(s, reg, pc_start - s->cs_base);
>          if (reg == R_SS) {
>              /* if reg == SS, inhibit interrupts/trace */
> @@ -5408,7 +5542,7 @@ static target_ulong disas_insn(CPUX86State *env,
> DisasContext *s,
>              goto illegal_op;
>          gen_op_movl_T0_seg(reg);
>          ot = mod == 3 ? dflag : MO_16;
> -        gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 1);
> +        gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 1, b);
>          break;
>
>      case 0x1b6: /* movzbS Gv, Eb */
> @@ -5450,7 +5584,7 @@ static target_ulong disas_insn(CPUX86State *env,
> DisasContext *s,
>                  }
>                  gen_op_mov_reg_v(d_ot, reg, cpu_T[0]);
>              } else {
> -                gen_lea_modrm(env, s, modrm);
> +                gen_lea_modrm(env, s, modrm, b);
>                  gen_op_ld_v(s, s_ot, cpu_T[0], cpu_A0);
>                  gen_op_mov_reg_v(d_ot, reg, cpu_T[0]);
>              }
> @@ -5468,7 +5602,7 @@ static target_ulong disas_insn(CPUX86State *env,
> DisasContext *s,
>          s->override = -1;
>          val = s->addseg;
>          s->addseg = 0;
> -        gen_lea_modrm(env, s, modrm);
> +        gen_lea_modrm(env, s, modrm, b);
>          s->addseg = val;
>          gen_op_mov_reg_v(ot, reg, cpu_A0);
>          break;
> @@ -5558,7 +5692,7 @@ static target_ulong disas_insn(CPUX86State *env,
> DisasContext *s,
>              gen_op_mov_reg_v(ot, rm, cpu_T[0]);
>              gen_op_mov_reg_v(ot, reg, cpu_T[1]);
>          } else {
> -            gen_lea_modrm(env, s, modrm);
> +            gen_lea_modrm(env, s, modrm, b);
>              gen_op_mov_v_reg(ot, cpu_T[0], reg);
>              /* for xchg, lock is implicit */
>              if (!(prefixes & PREFIX_LOCK))
> @@ -5593,7 +5727,7 @@ static target_ulong disas_insn(CPUX86State *env,
> DisasContext *s,
>          mod = (modrm >> 6) & 3;
>          if (mod == 3)
>              goto illegal_op;
> -        gen_lea_modrm(env, s, modrm);
> +        gen_lea_modrm(env, s, modrm, b);
>          gen_op_ld_v(s, ot, cpu_T[1], cpu_A0);
>          gen_add_A0_im(s, 1 << ot);
>          /* load the segment first to handle exceptions properly */
> @@ -5624,7 +5758,7 @@ static target_ulong disas_insn(CPUX86State *env,
> DisasContext *s,
>                  if (shift == 2) {
>                      s->rip_offset = 1;
>                  }
> -                gen_lea_modrm(env, s, modrm);
> +                gen_lea_modrm(env, s, modrm, b);
>                  opreg = OR_TMP0;
>              } else {
>                  opreg = (modrm & 7) | REX_B(s);
> @@ -5674,7 +5808,7 @@ static target_ulong disas_insn(CPUX86State *env,
> DisasContext *s,
>          rm = (modrm & 7) | REX_B(s);
>          reg = ((modrm >> 3) & 7) | rex_r;
>          if (mod != 3) {
> -            gen_lea_modrm(env, s, modrm);
> +            gen_lea_modrm(env, s, modrm, b);
>              opreg = OR_TMP0;
>          } else {
>              opreg = rm;
> @@ -5705,7 +5839,7 @@ static target_ulong disas_insn(CPUX86State *env,
> DisasContext *s,
>          op = ((b & 7) << 3) | ((modrm >> 3) & 7);
>          if (mod != 3) {
>              /* memory op */
> -            gen_lea_modrm(env, s, modrm);
> +            gen_lea_modrm(env, s, modrm, b);
>              switch(op) {
>              case 0x00 ... 0x07: /* fxxxs */
>              case 0x10 ... 0x17: /* fixxxl */
> @@ -5832,7 +5966,9 @@ static target_ulong disas_insn(CPUX86State *env,
> DisasContext *s,
>              case 0x0c: /* fldenv mem */
>                  gen_update_cc_op(s);
>                  gen_jmp_im(pc_start - s->cs_base);
> -                gen_helper_fldenv(cpu_env, cpu_A0, tcg_const_i32(dflag -
> 1));
> +                gen_helper_fldenv(cpu_env, cpu_A0,
> +                                  tcg_const_i32(dflag == MO_32),
> +                                  tcg_const_i32(IS_PROTECTED_MODE(s)));
>                  break;
>              case 0x0d: /* fldcw mem */
>                  tcg_gen_qemu_ld_i32(cpu_tmp2_i32, cpu_A0,
> @@ -5841,8 +5977,11 @@ static target_ulong disas_insn(CPUX86State
> *env, DisasContext *s,
>                  break;
>              case 0x0e: /* fnstenv mem */
>                  gen_update_cc_op(s);
> +                gen_update_ep(s);
>                  gen_jmp_im(pc_start - s->cs_base);
> -                gen_helper_fstenv(cpu_env, cpu_A0, tcg_const_i32(dflag -
> 1));
> +                gen_helper_fstenv(cpu_env, cpu_A0,
> +                                  tcg_const_i32(dflag == MO_32),
> +                                  tcg_const_i32(IS_PROTECTED_MODE(s)));
>                  break;
>              case 0x0f: /* fnstcw mem */
>                  gen_helper_fnstcw(cpu_tmp2_i32, cpu_env);
> @@ -5863,12 +6002,17 @@ static target_ulong disas_insn(CPUX86State
> *env, DisasContext *s,
>              case 0x2c: /* frstor mem */
>                  gen_update_cc_op(s);
>                  gen_jmp_im(pc_start - s->cs_base);
> -                gen_helper_frstor(cpu_env, cpu_A0, tcg_const_i32(dflag -
> 1));
> +                gen_helper_frstor(cpu_env, cpu_A0,
> +                                  tcg_const_i32(dflag == MO_32),
> +                                  tcg_const_i32(IS_PROTECTED_MODE(s)));
>                  break;
>              case 0x2e: /* fnsave mem */
>                  gen_update_cc_op(s);
> +                gen_update_ep(s);
>                  gen_jmp_im(pc_start - s->cs_base);
> -                gen_helper_fsave(cpu_env, cpu_A0, tcg_const_i32(dflag -
> 1));
> +                gen_helper_fsave(cpu_env, cpu_A0,
> +                                 tcg_const_i32(dflag == MO_32),
> +                                 tcg_const_i32(IS_PROTECTED_MODE(s)));
>                  break;
>              case 0x2f: /* fnstsw mem */
>                  gen_helper_fnstsw(cpu_tmp2_i32, cpu_env);
> @@ -6209,6 +6353,12 @@ static target_ulong disas_insn(CPUX86State
> *env, DisasContext *s,
>                  goto illegal_op;
>              }
>          }
> +        if (instr_is_x87_nc(modrm, b)) {
> +            fp_op = ((b & 0x7) << 8) | (modrm & 0xff);
> +            fp_ip = pc_start - s->cs_base;
> +            fp_cs = env->segs[R_CS].selector;
> +            set_ep(s, fp_op, fp_ip, fp_cs);
> +        }
>          break;
>          /************************/
>          /* string ops */
> @@ -6393,6 +6543,7 @@ static target_ulong disas_insn(CPUX86State *env,
> DisasContext *s,
>      do_lret:
>          if (s->pe && !s->vm86) {
>              gen_update_cc_op(s);
> +            gen_update_ep(s);
>              gen_jmp_im(pc_start - s->cs_base);
>              gen_helper_lret_protected(cpu_env, tcg_const_i32(dflag - 1),
>                                        tcg_const_i32(val));
> @@ -6430,6 +6581,7 @@ static target_ulong disas_insn(CPUX86State *env,
> DisasContext *s,
>              }
>          } else {
>              gen_update_cc_op(s);
> +            gen_update_ep(s);
>              gen_jmp_im(pc_start - s->cs_base);
>              gen_helper_iret_protected(cpu_env, tcg_const_i32(dflag - 1),
>                                        tcg_const_i32(s->pc - s->cs_base));
> @@ -6527,7 +6679,7 @@ static target_ulong disas_insn(CPUX86State *env,
> DisasContext *s,
>      case 0x190 ... 0x19f: /* setcc Gv */
>          modrm = cpu_ldub_code(env, s->pc++);
>          gen_setcc1(s, b, cpu_T[0]);
> -        gen_ldst_modrm(env, s, modrm, MO_8, OR_TMP0, 1);
> +        gen_ldst_modrm(env, s, modrm, MO_8, OR_TMP0, 1, b);
>          break;
>      case 0x140 ... 0x14f: /* cmov Gv, Ev */
>          if (!(s->cpuid_features & CPUID_CMOV)) {
> @@ -6657,7 +6809,7 @@ static target_ulong disas_insn(CPUX86State *env,
> DisasContext *s,
>          rm = (modrm & 7) | REX_B(s);
>          if (mod != 3) {
>              s->rip_offset = 1;
> -            gen_lea_modrm(env, s, modrm);
> +            gen_lea_modrm(env, s, modrm, b);
>              gen_op_ld_v(s, ot, cpu_T[0], cpu_A0);
>          } else {
>              gen_op_mov_v_reg(ot, cpu_T[0], rm);
> @@ -6688,7 +6840,7 @@ static target_ulong disas_insn(CPUX86State *env,
> DisasContext *s,
>          rm = (modrm & 7) | REX_B(s);
>          gen_op_mov_v_reg(MO_32, cpu_T[1], reg);
>          if (mod != 3) {
> -            gen_lea_modrm(env, s, modrm);
> +            gen_lea_modrm(env, s, modrm, b);
>              /* specific case: we need to add a displacement */
>              gen_exts(ot, cpu_T[1]);
>              tcg_gen_sari_tl(cpu_tmp0, cpu_T[1], 3 + ot);
> @@ -6764,7 +6916,7 @@ static target_ulong disas_insn(CPUX86State *env,
> DisasContext *s,
>          ot = dflag;
>          modrm = cpu_ldub_code(env, s->pc++);
>          reg = ((modrm >> 3) & 7) | rex_r;
> -        gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
> +        gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0, b);
>          gen_extu(ot, cpu_T[0]);
>
>          /* Note that lzcnt and tzcnt are in different extensions.  */
> @@ -6884,6 +7036,7 @@ static target_ulong disas_insn(CPUX86State *env,
> DisasContext *s,
>              gen_exception(s, EXCP07_PREX, pc_start - s->cs_base);
>          } else {
>              gen_update_cc_op(s);
> +            gen_update_ep(s);
>              gen_jmp_im(pc_start - s->cs_base);
>              gen_helper_fwait(cpu_env);
>          }
> @@ -6903,6 +7056,7 @@ static target_ulong disas_insn(CPUX86State *env,
> DisasContext *s,
>          if (CODE64(s))
>              goto illegal_op;
>          gen_update_cc_op(s);
> +        gen_update_ep(s);
>          gen_jmp_im(pc_start - s->cs_base);
>          gen_helper_into(cpu_env, tcg_const_i32(s->pc - pc_start));
>          break;
> @@ -6967,7 +7121,7 @@ static target_ulong disas_insn(CPUX86State *env,
> DisasContext *s,
>          if (mod == 3)
>              goto illegal_op;
>          gen_op_mov_v_reg(ot, cpu_T[0], reg);
> -        gen_lea_modrm(env, s, modrm);
> +        gen_lea_modrm(env, s, modrm, b);
>          gen_jmp_im(pc_start - s->cs_base);
>          tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
>          if (ot == MO_16) {
> @@ -7095,6 +7249,7 @@ static target_ulong disas_insn(CPUX86State *env,
> DisasContext *s,
>              gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
>          } else {
>              gen_update_cc_op(s);
> +            gen_update_ep(s);
>              gen_jmp_im(pc_start - s->cs_base);
>              gen_helper_sysexit(cpu_env, tcg_const_i32(dflag - 1));
>              gen_eob(s);
> @@ -7104,6 +7259,7 @@ static target_ulong disas_insn(CPUX86State *env,
> DisasContext *s,
>      case 0x105: /* syscall */
>          /* XXX: is it usable in real mode ? */
>          gen_update_cc_op(s);
> +        gen_update_ep(s);
>          gen_jmp_im(pc_start - s->cs_base);
>          gen_helper_syscall(cpu_env, tcg_const_i32(s->pc - pc_start));
>          gen_eob(s);
> @@ -7113,6 +7269,7 @@ static target_ulong disas_insn(CPUX86State *env,
> DisasContext *s,
>              gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
>          } else {
>              gen_update_cc_op(s);
> +            gen_update_ep(s);
>              gen_jmp_im(pc_start - s->cs_base);
>              gen_helper_sysret(cpu_env, tcg_const_i32(dflag - 1));
>              /* condition codes are modified only in long mode */
> @@ -7133,6 +7290,7 @@ static target_ulong disas_insn(CPUX86State *env,
> DisasContext *s,
>              gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
>          } else {
>              gen_update_cc_op(s);
> +            gen_update_ep(s);
>              gen_jmp_im(pc_start - s->cs_base);
>              gen_helper_hlt(cpu_env, tcg_const_i32(s->pc - pc_start));
>              s->is_jmp = DISAS_TB_JUMP;
> @@ -7149,7 +7307,7 @@ static target_ulong disas_insn(CPUX86State *env,
> DisasContext *s,
>              gen_svm_check_intercept(s, pc_start, SVM_EXIT_LDTR_READ);
>              tcg_gen_ld32u_tl(cpu_T[0], cpu_env,
> offsetof(CPUX86State,ldt.selector));
>              ot = mod == 3 ? dflag : MO_16;
> -            gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 1);
> +            gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 1, b);
>              break;
>          case 2: /* lldt */
>              if (!s->pe || s->vm86)
> @@ -7158,7 +7316,7 @@ static target_ulong disas_insn(CPUX86State *env,
> DisasContext *s,
>                  gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
>              } else {
>                  gen_svm_check_intercept(s, pc_start, SVM_EXIT_LDTR_WRITE);
> -                gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0);
> +                gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0, b);
>                  gen_jmp_im(pc_start - s->cs_base);
>                  tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
>                  gen_helper_lldt(cpu_env, cpu_tmp2_i32);
> @@ -7170,7 +7328,7 @@ static target_ulong disas_insn(CPUX86State *env,
> DisasContext *s,
>              gen_svm_check_intercept(s, pc_start, SVM_EXIT_TR_READ);
>              tcg_gen_ld32u_tl(cpu_T[0], cpu_env,
> offsetof(CPUX86State,tr.selector));
>              ot = mod == 3 ? dflag : MO_16;
> -            gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 1);
> +            gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 1, b);
>              break;
>          case 3: /* ltr */
>              if (!s->pe || s->vm86)
> @@ -7179,7 +7337,7 @@ static target_ulong disas_insn(CPUX86State *env,
> DisasContext *s,
>                  gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
>              } else {
>                  gen_svm_check_intercept(s, pc_start, SVM_EXIT_TR_WRITE);
> -                gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0);
> +                gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0, b);
>                  gen_jmp_im(pc_start - s->cs_base);
>                  tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
>                  gen_helper_ltr(cpu_env, cpu_tmp2_i32);
> @@ -7189,7 +7347,7 @@ static target_ulong disas_insn(CPUX86State *env,
> DisasContext *s,
>          case 5: /* verw */
>              if (!s->pe || s->vm86)
>                  goto illegal_op;
> -            gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0);
> +            gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0, b);
>              gen_update_cc_op(s);
>              if (op == 4) {
>                  gen_helper_verr(cpu_env, cpu_T[0]);
> @@ -7212,7 +7370,7 @@ static target_ulong disas_insn(CPUX86State *env,
> DisasContext *s,
>              if (mod == 3)
>                  goto illegal_op;
>              gen_svm_check_intercept(s, pc_start, SVM_EXIT_GDTR_READ);
> -            gen_lea_modrm(env, s, modrm);
> +            gen_lea_modrm(env, s, modrm, b);
>              tcg_gen_ld32u_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,
> gdt.limit));
>              gen_op_st_v(s, MO_16, cpu_T[0], cpu_A0);
>              gen_add_A0_im(s, 2);
> @@ -7241,6 +7399,7 @@ static target_ulong disas_insn(CPUX86State *env,
> DisasContext *s,
>                          s->cpl != 0)
>                          goto illegal_op;
>                      gen_update_cc_op(s);
> +                    gen_update_ep(s);
>                      gen_jmp_im(pc_start - s->cs_base);
>                      gen_helper_mwait(cpu_env, tcg_const_i32(s->pc -
> pc_start));
>                      gen_eob(s);
> @@ -7268,7 +7427,7 @@ static target_ulong disas_insn(CPUX86State *env,
> DisasContext *s,
>                  }
>              } else { /* sidt */
>                  gen_svm_check_intercept(s, pc_start, SVM_EXIT_IDTR_READ);
> -                gen_lea_modrm(env, s, modrm);
> +                gen_lea_modrm(env, s, modrm, b);
>                  tcg_gen_ld32u_tl(cpu_T[0], cpu_env,
> offsetof(CPUX86State, idt.limit));
>                  gen_op_st_v(s, MO_16, cpu_T[0], cpu_A0);
>                  gen_add_A0_im(s, 2);
> @@ -7371,7 +7530,7 @@ static target_ulong disas_insn(CPUX86State *env,
> DisasContext *s,
>              } else {
>                  gen_svm_check_intercept(s, pc_start,
>                                          op==2 ? SVM_EXIT_GDTR_WRITE :
> SVM_EXIT_IDTR_WRITE);
> -                gen_lea_modrm(env, s, modrm);
> +                gen_lea_modrm(env, s, modrm, b);
>                  gen_op_ld_v(s, MO_16, cpu_T[1], cpu_A0);
>                  gen_add_A0_im(s, 2);
>                  gen_op_ld_v(s, CODE64(s) + MO_32, cpu_T[0], cpu_A0);
> @@ -7394,14 +7553,14 @@ static target_ulong disas_insn(CPUX86State
> *env, DisasContext *s,
>  #else
>              tcg_gen_ld32u_tl(cpu_T[0], cpu_env,
> offsetof(CPUX86State,cr[0]));
>  #endif
> -            gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 1);
> +            gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 1, b);
>              break;
>          case 6: /* lmsw */
>              if (s->cpl != 0) {
>                  gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
>              } else {
>                  gen_svm_check_intercept(s, pc_start, SVM_EXIT_WRITE_CR0);
> -                gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0);
> +                gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0, b);
>                  gen_helper_lmsw(cpu_env, cpu_T[0]);
>                  gen_jmp_im(s->pc - s->cs_base);
>                  gen_eob(s);
> @@ -7413,8 +7572,9 @@ static target_ulong disas_insn(CPUX86State *env,
> DisasContext *s,
>                      gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
>                  } else {
>                      gen_update_cc_op(s);
> +                    gen_update_ep(s);
>                      gen_jmp_im(pc_start - s->cs_base);
> -                    gen_lea_modrm(env, s, modrm);
> +                    gen_lea_modrm(env, s, modrm, b);
>                      gen_helper_invlpg(cpu_env, cpu_A0);
>                      gen_jmp_im(s->pc - s->cs_base);
>                      gen_eob(s);
> @@ -7446,6 +7606,7 @@ static target_ulong disas_insn(CPUX86State *env,
> DisasContext *s,
>                      if (!(s->cpuid_ext2_features & CPUID_EXT2_RDTSCP))
>                          goto illegal_op;
>                      gen_update_cc_op(s);
> +                    gen_update_ep(s);
>                      gen_jmp_im(pc_start - s->cs_base);
>                      if (use_icount)
>                          gen_io_start();
> @@ -7493,7 +7654,7 @@ static target_ulong disas_insn(CPUX86State *env,
> DisasContext *s,
>                  }
>                  gen_op_mov_reg_v(d_ot, reg, cpu_T[0]);
>              } else {
> -                gen_lea_modrm(env, s, modrm);
> +                gen_lea_modrm(env, s, modrm, b);
>                  gen_op_ld_v(s, MO_32 | MO_SIGN, cpu_T[0], cpu_A0);
>                  gen_op_mov_reg_v(d_ot, reg, cpu_T[0]);
>              }
> @@ -7514,7 +7675,7 @@ static target_ulong disas_insn(CPUX86State *env,
> DisasContext *s,
>              mod = (modrm >> 6) & 3;
>              rm = modrm & 7;
>              if (mod != 3) {
> -                gen_lea_modrm(env, s, modrm);
> +                gen_lea_modrm(env, s, modrm, b);
>                  gen_op_ld_v(s, ot, t0, cpu_A0);
>                  a0 = tcg_temp_local_new();
>                  tcg_gen_mov_tl(a0, cpu_A0);
> @@ -7556,7 +7717,7 @@ static target_ulong disas_insn(CPUX86State *env,
> DisasContext *s,
>              ot = dflag != MO_16 ? MO_32 : MO_16;
>              modrm = cpu_ldub_code(env, s->pc++);
>              reg = ((modrm >> 3) & 7) | rex_r;
> -            gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0);
> +            gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0, b);
>              t0 = tcg_temp_local_new();
>              gen_update_cc_op(s);
>              if (b == 0x102) {
> @@ -7584,7 +7745,7 @@ static target_ulong disas_insn(CPUX86State *env,
> DisasContext *s,
>          case 3: /* prefetchnt0 */
>              if (mod == 3)
>                  goto illegal_op;
> -            gen_lea_modrm(env, s, modrm);
> +            gen_lea_modrm(env, s, modrm, b);
>              /* nothing more to do */
>              break;
>          default: /* nop (multi byte) */
> @@ -7624,6 +7785,7 @@ static target_ulong disas_insn(CPUX86State *env,
> DisasContext *s,
>              case 4:
>              case 8:
>                  gen_update_cc_op(s);
> +                gen_update_ep(s);
>                  gen_jmp_im(pc_start - s->cs_base);
>                  if (b & 2) {
>                      gen_op_mov_v_reg(ot, cpu_T[0], rm);
> @@ -7696,7 +7858,7 @@ static target_ulong disas_insn(CPUX86State *env,
> DisasContext *s,
>              goto illegal_op;
>          reg = ((modrm >> 3) & 7) | rex_r;
>          /* generate a generic store */
> -        gen_ldst_modrm(env, s, modrm, ot, reg, 1);
> +        gen_ldst_modrm(env, s, modrm, ot, reg, 1, b);
>          break;
>      case 0x1ae:
>          modrm = cpu_ldub_code(env, s->pc++);
> @@ -7704,6 +7866,7 @@ static target_ulong disas_insn(CPUX86State *env,
> DisasContext *s,
>          op = (modrm >> 3) & 7;
>          switch(op) {
>          case 0: /* fxsave */
> +            gen_update_ep(s);
>              if (mod == 3 || !(s->cpuid_features & CPUID_FXSR) ||
>                  (s->prefix & PREFIX_LOCK))
>                  goto illegal_op;
> @@ -7711,10 +7874,13 @@ static target_ulong disas_insn(CPUX86State
> *env, DisasContext *s,
>                  gen_exception(s, EXCP07_PREX, pc_start - s->cs_base);
>                  break;
>              }
> -            gen_lea_modrm(env, s, modrm);
> +            gen_lea_modrm(env, s, modrm, b);
>              gen_update_cc_op(s);
> +            gen_update_ep(s);
>              gen_jmp_im(pc_start - s->cs_base);
> -            gen_helper_fxsave(cpu_env, cpu_A0, tcg_const_i32(dflag ==
> MO_64));
> +            gen_helper_fxsave(cpu_env, cpu_A0,
> +                              tcg_const_i32(dflag == MO_32),
> +                              tcg_const_i32(dflag == MO_64));
>              break;
>          case 1: /* fxrstor */
>              if (mod == 3 || !(s->cpuid_features & CPUID_FXSR) ||
> @@ -7724,10 +7890,12 @@ static target_ulong disas_insn(CPUX86State
> *env, DisasContext *s,
>                  gen_exception(s, EXCP07_PREX, pc_start - s->cs_base);
>                  break;
>              }
> -            gen_lea_modrm(env, s, modrm);
> +            gen_lea_modrm(env, s, modrm, b);
>              gen_update_cc_op(s);
>              gen_jmp_im(pc_start - s->cs_base);
> -            gen_helper_fxrstor(cpu_env, cpu_A0, tcg_const_i32(dflag ==
> MO_64));
> +            gen_helper_fxrstor(cpu_env, cpu_A0,
> +                               tcg_const_i32(dflag == MO_32),
> +                               tcg_const_i32(dflag == MO_64));
>              break;
>          case 2: /* ldmxcsr */
>          case 3: /* stmxcsr */
> @@ -7738,7 +7906,7 @@ static target_ulong disas_insn(CPUX86State *env,
> DisasContext *s,
>              if ((s->flags & HF_EM_MASK) || !(s->flags & HF_OSFXSR_MASK) ||
>                  mod == 3)
>                  goto illegal_op;
> -            gen_lea_modrm(env, s, modrm);
> +            gen_lea_modrm(env, s, modrm, b);
>              if (op == 2) {
>                  tcg_gen_qemu_ld_i32(cpu_tmp2_i32, cpu_A0,
>                                      s->mem_index, MO_LEUL);
> @@ -7763,7 +7931,7 @@ static target_ulong disas_insn(CPUX86State *env,
> DisasContext *s,
>                  /* clflush */
>                  if (!(s->cpuid_features & CPUID_CLFLUSH))
>                      goto illegal_op;
> -                gen_lea_modrm(env, s, modrm);
> +                gen_lea_modrm(env, s, modrm, b);
>              }
>              break;
>          default:
> @@ -7775,7 +7943,7 @@ static target_ulong disas_insn(CPUX86State *env,
> DisasContext *s,
>          mod = (modrm >> 6) & 3;
>          if (mod == 3)
>              goto illegal_op;
> -        gen_lea_modrm(env, s, modrm);
> +        gen_lea_modrm(env, s, modrm, b);
>          /* ignore for now */
>          break;
>      case 0x1aa: /* rsm */
> @@ -7783,6 +7951,7 @@ static target_ulong disas_insn(CPUX86State *env,
> DisasContext *s,
>          if (!(s->flags & HF_SMM_MASK))
>              goto illegal_op;
>          gen_update_cc_op(s);
> +        gen_update_ep(s);
>          gen_jmp_im(s->pc - s->cs_base);
>          gen_helper_rsm(cpu_env);
>          gen_eob(s);
> @@ -7803,7 +7972,7 @@ static target_ulong disas_insn(CPUX86State *env,
> DisasContext *s,
>              ot = mo_64_32(dflag);
>          }
>
> -        gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
> +        gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0, b);
>          gen_helper_popcnt(cpu_T[0], cpu_env, cpu_T[0], tcg_const_i32(ot));
>          gen_op_mov_reg_v(ot, reg, cpu_T[0]);
>
> @@ -7880,6 +8049,17 @@ void optimize_flags_init(void)
>      cpu_cc_src2 = tcg_global_mem_new(TCG_AREG0, offsetof(CPUX86State,
> cc_src2),
>                                       "cc_src2");
>
> +    cpu_fpop = tcg_global_mem_new_i32(TCG_AREG0,
> +                                      offsetof(CPUX86State, fpop),
> "fpop");
> +    cpu_fpip = tcg_global_mem_new(TCG_AREG0, offsetof(CPUX86State, fpip),
> +                                     "fpip");
> +    cpu_fpdp = tcg_global_mem_new(TCG_AREG0, offsetof(CPUX86State, fpdp),
> +                                     "fpdp");
> +    cpu_fpds = tcg_global_mem_new_i32(TCG_AREG0, offsetof(CPUX86State,
> fpds),
> +                                     "fpds");
> +    cpu_fpcs = tcg_global_mem_new_i32(TCG_AREG0, offsetof(CPUX86State,
> fpcs),
> +                                     "fpcs");
> +
>      for (i = 0; i < CPU_NB_REGS; ++i) {
>          cpu_regs[i] = tcg_global_mem_new(TCG_AREG0,
>                                           offsetof(CPUX86State, regs[i]),
> @@ -7924,6 +8104,8 @@ static inline void
> gen_intermediate_code_internal(X86CPU *cpu,
>      dc->singlestep_enabled = cs->singlestep_enabled;
>      dc->cc_op = CC_OP_DYNAMIC;
>      dc->cc_op_dirty = false;
> +    dc->fp_op = FP_EP_INVALID;
> +    dc->fp_ep_dirty = false;
>      dc->cs_base = cs_base;
>      dc->tb = tb;
>      dc->popl_esp_hack = 0;
> @@ -7997,6 +8179,9 @@ static inline void
> gen_intermediate_code_internal(X86CPU *cpu,
>              }
>              tcg_ctx.gen_opc_pc[lj] = pc_ptr;
>              gen_opc_cc_op[lj] = dc->cc_op;
> +            gen_opc_fp_op[lj] = dc->fp_op;
> +            gen_opc_fp_ip[lj] = dc->fp_ip;
> +            gen_opc_fp_cs[lj] = dc->fp_cs;
>              tcg_ctx.gen_opc_instr_start[lj] = 1;
>              tcg_ctx.gen_opc_icount[lj] = num_insns;
>          }
> @@ -8080,6 +8265,7 @@ void gen_intermediate_code_pc(CPUX86State *env,
> TranslationBlock *tb)
>  void restore_state_to_opc(CPUX86State *env, TranslationBlock *tb, int
> pc_pos)
>  {
>      int cc_op;
> +    uint16_t fp_op;
>  #ifdef DEBUG_DISAS
>      if (qemu_loglevel_mask(CPU_LOG_TB_OP)) {
>          int i;
> @@ -8099,4 +8285,10 @@ void restore_state_to_opc(CPUX86State *env,
> TranslationBlock *tb, int pc_pos)
>      cc_op = gen_opc_cc_op[pc_pos];
>      if (cc_op != CC_OP_DYNAMIC)
>          env->cc_op = cc_op;
> +    fp_op = gen_opc_fp_op[pc_pos];
> +    if (fp_op & FP_EP_VALID) {
> +        tcg_gen_movi_i32(cpu_fpop, fp_op);
> +        tcg_gen_movi_tl(cpu_fpip, gen_opc_fp_ip[pc_pos]);
> +        tcg_gen_movi_i32(cpu_fpcs, gen_opc_fp_cs[pc_pos]);
> +    }
>  }
>
> On Sun, Jun 22, 2014 at 9:17 PM, Jaume Martí <jaume.martif@gmail.com>
> wrote:
> > Thanks Richard for your feedback. I am going to correct the patch and
> > resubmit it.
> >
> > Best regards,
> > Jaume
> >
> > On Sun, Jun 22, 2014 at 8:55 PM, Richard Henderson <rth@twiddle.net>
> wrote:
> >> On 06/22/2014 07:55 AM, Jaume Martí wrote:
> >>> -        cpu_x86_fsave(env, fpstate_addr, 1);
> >>> -        fpstate->status = fpstate->sw;
> >>> -        magic = 0xffff;
> >>> +    cpu_x86_fsave(env, fpstate_addr);
> >>> +    fpstate->status = fpstate->sw;
> >>> +    magic = 0xffff;
> >>
> >> This patch needs to be split into format fixes and the actual change to
> be
> >> reviewed.
> >>
> >>> -    /* KVM-only so far */
> >>> -    uint16_t fpop;
> >>> +    union {
> >>> +        uint32_t tcg;
> >>> +        uint16_t kvm;
> >>> +    } fpop;
> >>
> >> This is highly questionable.
> >>
> >>>      .fields = (VMStateField[]) {
> >>> -        VMSTATE_UINT16(env.fpop, X86CPU),
> >>> +        VMSTATE_UINT16(env.fpop.kvm, X86CPU),
> >>
> >> You're breaking save/restore in tcg.  KVM is not required for migration.
> >>
> >>> +        if (non_control_x87_instr(modrm, b)) {
> >>> +            tcg_gen_movi_i32(cpu_fpop, ((b & 0x7) << 8) | (modrm &
> 0xff));
> >>> +            tcg_gen_movi_tl(cpu_fpip, pc_start - s->cs_base);
> >>> +            tcg_gen_movi_i32(cpu_fpcs, env->segs[R_CS].selector);
> >>> +        }
> >>
> >> I strongly suspect you can implement this feature without having to add
> 3
> >> (largely redundant) register writes to every x87 instruction executed.
> >>
> >> See how restore_state_to_opc works to compute the value of CC_OP during
> >> translation.  You can do the same thing to recover these three values.
> >>
> >> You do have to sync these values before normal exits from the TB, but
> you only
> >> have to do that once, not once for every insn executed.  See
> gen_update_cc_op.
> >>
> >>
> >> r~
>



-- 
Jaume

[-- Attachment #2: Type: text/html, Size: 229818 bytes --]

^ permalink raw reply related	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2014-07-21 18:57 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2014-06-21  0:16 [Qemu-devel] PATCH for bugs 661696 and 1248376: target-i386: x87 exception pointers using TCG Jaume Martí
2014-06-22 14:55 ` Jaume Martí
2014-06-22 18:55   ` Richard Henderson
2014-06-22 19:17     ` Jaume Martí
2014-07-19  0:36       ` Jaume Martí
2014-07-21 18:55         ` Jaume Martí

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).