qemu-devel.nongnu.org archive mirror
 help / color / mirror / Atom feed
From: Paolo Bonzini <pbonzini@redhat.com>
To: qemu-devel@nongnu.org
Cc: richard.henderson@linaro.org
Subject: [PATCH v2 09/10] target/i386: implement 32-bit SYSENTER for linux-user
Date: Tue, 20 Jun 2023 17:16:33 +0200	[thread overview]
Message-ID: <20230620151634.21053-10-pbonzini@redhat.com> (raw)
In-Reply-To: <20230620151634.21053-1-pbonzini@redhat.com>

TCG reports the SEP feature (SYSENTER/SYSEXIT) in user mode emulation,
but does not plumb it into the linux-user run loop.  Split the helper into
system emulation and user-mode emulation cases and implement the latter.

SYSENTER does not have the best design for a kernel-mode entry
instruction, and therefore Linux always makes it return to the
vsyscall page.  Because QEMU does not provide the _contents_ of
the vsyscall page, the instructions executed after SYSEXIT have
to be emulated by hand until the first RET.

Some corner cases, such as restarting the system call after the
system call has rewritten the SYSENTER instruction, are not emulated
correctly.  On Linux, the system call restart uses the SYSENTER
call in the vsyscall page, while on QEMU it uses the emulated
program's instruction.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 linux-user/i386/cpu_loop.c          | 51 +++++++++++++++++++++++++++--
 target/i386/cpu.c                   |  9 ++++-
 target/i386/cpu.h                   |  1 +
 target/i386/helper.h                |  2 +-
 target/i386/tcg/seg_helper.c        | 33 -------------------
 target/i386/tcg/sysemu/seg_helper.c | 33 +++++++++++++++++++
 target/i386/tcg/translate.c         |  2 +-
 target/i386/tcg/user/seg_helper.c   | 16 +++++++++
 8 files changed, 109 insertions(+), 38 deletions(-)

diff --git a/linux-user/i386/cpu_loop.c b/linux-user/i386/cpu_loop.c
index 6908bad14aa..690d9a42ee0 100644
--- a/linux-user/i386/cpu_loop.c
+++ b/linux-user/i386/cpu_loop.c
@@ -197,6 +197,41 @@ static bool maybe_handle_vm86_trap(CPUX86State *env, int trapnr)
     return false;
 }
 
+static void emulate_vsyscall_sysexit(CPUX86State *env)
+{
+    /*
+     * Emulate the pop and ret instructions after the sysenter instruction
+     * in the vsyscall page.  Any sysenter returns there, because sysenter
+     * does not save the old EIP!
+     */
+    abi_ulong word;
+    if (get_user_ual(word, env->regs[R_ESP])) {
+        goto segv;
+    }
+    env->regs[R_EBP] = word;
+    env->regs[R_ESP] += sizeof(target_ulong);
+    if (get_user_ual(word, env->regs[R_ESP])) {
+        goto segv;
+    }
+    env->regs[R_EDX] = word;
+    env->regs[R_ESP] += sizeof(target_ulong);
+    if (get_user_ual(word, env->regs[R_ESP])) {
+        goto segv;
+    }
+    env->regs[R_ECX] = word;
+    env->regs[R_ESP] += sizeof(target_ulong);
+    if (get_user_ual(word, env->regs[R_ESP])) {
+        goto segv;
+    }
+    env->eip = word;
+    env->regs[R_ESP] += sizeof(target_ulong);
+    return;
+
+segv:
+    env->error_code = PG_ERROR_W_MASK | PG_ERROR_U_MASK;
+    force_sig_fault(TARGET_SIGSEGV, TARGET_SEGV_MAPERR, env->regs[R_ESP]);
+}
+
 void cpu_loop(CPUX86State *env)
 {
     CPUState *cs = env_cpu(env);
@@ -213,6 +248,7 @@ void cpu_loop(CPUX86State *env)
         case 0x80:
 #ifdef TARGET_ABI32
         case EXCP_SYSCALL:
+        case EXCP_SYSENTER:
 #endif
             /* linux syscall from int $0x80 */
             ret = do_syscall(env,
@@ -226,12 +262,18 @@ void cpu_loop(CPUX86State *env)
                              0, 0);
             if (ret == -QEMU_ERESTARTSYS) {
                 env->eip -= 2;
-            } else if (ret != -QEMU_ESIGRETURN) {
+                break;
+            }
+            if (ret != -QEMU_ESIGRETURN) {
                 env->regs[R_EAX] = ret;
             }
+            if (trapnr == EXCP_SYSENTER) {
+                emulate_vsyscall_sysexit(env);
+            }
             break;
 #ifndef TARGET_ABI32
         case EXCP_SYSCALL:
+        case EXCP_SYSENTER:
             /* linux syscall from syscall instruction */
             ret = do_syscall(env,
                              env->regs[R_EAX],
@@ -244,9 +286,14 @@ void cpu_loop(CPUX86State *env)
                              0, 0);
             if (ret == -QEMU_ERESTARTSYS) {
                 env->eip -= 2;
-            } else if (ret != -QEMU_ESIGRETURN) {
+                break;
+            }
+            if (ret != -QEMU_ESIGRETURN) {
                 env->regs[R_EAX] = ret;
             }
+            if (trapnr == EXCP_SYSENTER) {
+                emulate_vsyscall_sysexit(env);
+            }
             break;
 #endif
 #ifdef TARGET_X86_64
diff --git a/target/i386/cpu.c b/target/i386/cpu.c
index 934360e4091..2c71c3ea32b 100644
--- a/target/i386/cpu.c
+++ b/target/i386/cpu.c
@@ -614,11 +614,18 @@ void x86_cpu_vendor_words2str(char *dst, uint32_t vendor1,
           CPUID_PAT | CPUID_FXSR | CPUID_MMX | CPUID_SSE | CPUID_SSE2 | \
           CPUID_PAE | CPUID_SEP | CPUID_APIC)
 
+#if defined CONFIG_SOFTMMU || defined CONFIG_LINUX_USER
+#define TCG_NOBSD_FEATURES CPUID_SEP
+#else
+#define TCG_NOBSD_FEATURES 0
+#endif
+
 #define TCG_FEATURES (CPUID_FP87 | CPUID_PSE | CPUID_TSC | CPUID_MSR | \
           CPUID_PAE | CPUID_MCE | CPUID_CX8 | CPUID_APIC | CPUID_SEP | \
           CPUID_MTRR | CPUID_PGE | CPUID_MCA | CPUID_CMOV | CPUID_PAT | \
           CPUID_PSE36 | CPUID_CLFLUSH | CPUID_ACPI | CPUID_MMX | \
-          CPUID_FXSR | CPUID_SSE | CPUID_SSE2 | CPUID_SS | CPUID_DE)
+          CPUID_FXSR | CPUID_SSE | CPUID_SSE2 | CPUID_SS | CPUID_DE | \
+          TCG_NOBSD_FEATURES)
           /* partly implemented:
           CPUID_MTRR, CPUID_MCA, CPUID_CLFLUSH (needed for Win64) */
           /* missing:
diff --git a/target/i386/cpu.h b/target/i386/cpu.h
index 7201a71de86..bc7d10bf863 100644
--- a/target/i386/cpu.h
+++ b/target/i386/cpu.h
@@ -1185,6 +1185,7 @@ uint64_t x86_cpu_get_supported_feature_word(FeatureWord w,
 #define EXCP_VMEXIT     0x100 /* only for system emulation */
 #define EXCP_SYSCALL    0x101 /* only for user emulation */
 #define EXCP_VSYSCALL   0x102 /* only for user emulation */
+#define EXCP_SYSENTER   0x103 /* only for user emulation */
 
 /* i386-specific interrupt pending bits.  */
 #define CPU_INTERRUPT_POLL      CPU_INTERRUPT_TGT_EXT_1
diff --git a/target/i386/helper.h b/target/i386/helper.h
index c2e86c6119c..49d2f537557 100644
--- a/target/i386/helper.h
+++ b/target/i386/helper.h
@@ -49,7 +49,7 @@ DEF_HELPER_FLAGS_3(set_dr, TCG_CALL_NO_WG, void, env, int, tl)
 DEF_HELPER_FLAGS_2(get_dr, TCG_CALL_NO_WG, tl, env, int)
 #endif /* !CONFIG_USER_ONLY */
 
-DEF_HELPER_1(sysenter, void, env)
+DEF_HELPER_2(sysenter, void, env, int)
 DEF_HELPER_2(sysexit, void, env, int)
 DEF_HELPER_2(syscall, void, env, int)
 #ifdef TARGET_X86_64
diff --git a/target/i386/tcg/seg_helper.c b/target/i386/tcg/seg_helper.c
index 03b58e94a2d..6899b8f6890 100644
--- a/target/i386/tcg/seg_helper.c
+++ b/target/i386/tcg/seg_helper.c
@@ -2147,39 +2147,6 @@ void helper_lret_protected(CPUX86State *env, int shift, int addend)
     helper_ret_protected(env, shift, 0, addend, GETPC());
 }
 
-void helper_sysenter(CPUX86State *env)
-{
-    if (env->sysenter_cs == 0) {
-        raise_exception_err_ra(env, EXCP0D_GPF, 0, GETPC());
-    }
-    env->eflags &= ~(VM_MASK | IF_MASK | RF_MASK);
-
-#ifdef TARGET_X86_64
-    if (env->hflags & HF_LMA_MASK) {
-        cpu_x86_load_seg_cache(env, R_CS, env->sysenter_cs & 0xfffc,
-                               0, 0xffffffff,
-                               DESC_G_MASK | DESC_B_MASK | DESC_P_MASK |
-                               DESC_S_MASK |
-                               DESC_CS_MASK | DESC_R_MASK | DESC_A_MASK |
-                               DESC_L_MASK);
-    } else
-#endif
-    {
-        cpu_x86_load_seg_cache(env, R_CS, env->sysenter_cs & 0xfffc,
-                               0, 0xffffffff,
-                               DESC_G_MASK | DESC_B_MASK | DESC_P_MASK |
-                               DESC_S_MASK |
-                               DESC_CS_MASK | DESC_R_MASK | DESC_A_MASK);
-    }
-    cpu_x86_load_seg_cache(env, R_SS, (env->sysenter_cs + 8) & 0xfffc,
-                           0, 0xffffffff,
-                           DESC_G_MASK | DESC_B_MASK | DESC_P_MASK |
-                           DESC_S_MASK |
-                           DESC_W_MASK | DESC_A_MASK);
-    env->regs[R_ESP] = env->sysenter_esp;
-    env->eip = env->sysenter_eip;
-}
-
 void helper_sysexit(CPUX86State *env, int dflag)
 {
     int cpl;
diff --git a/target/i386/tcg/sysemu/seg_helper.c b/target/i386/tcg/sysemu/seg_helper.c
index 2c9bd007adb..967882b6c69 100644
--- a/target/i386/tcg/sysemu/seg_helper.c
+++ b/target/i386/tcg/sysemu/seg_helper.c
@@ -215,3 +215,36 @@ void helper_check_io(CPUX86State *env, uint32_t addr, uint32_t size)
         raise_exception_err_ra(env, EXCP0D_GPF, 0, retaddr);
     }
 }
+
+void helper_sysenter(CPUX86State *env, int next_eip_addend)
+{
+    if (env->sysenter_cs == 0) {
+        raise_exception_err_ra(env, EXCP0D_GPF, 0, GETPC());
+    }
+    env->eflags &= ~(VM_MASK | IF_MASK | RF_MASK);
+
+#ifdef TARGET_X86_64
+    if (env->hflags & HF_LMA_MASK) {
+        cpu_x86_load_seg_cache(env, R_CS, env->sysenter_cs & 0xfffc,
+                               0, 0xffffffff,
+                               DESC_G_MASK | DESC_B_MASK | DESC_P_MASK |
+                               DESC_S_MASK |
+                               DESC_CS_MASK | DESC_R_MASK | DESC_A_MASK |
+                               DESC_L_MASK);
+    } else
+#endif
+    {
+        cpu_x86_load_seg_cache(env, R_CS, env->sysenter_cs & 0xfffc,
+                               0, 0xffffffff,
+                               DESC_G_MASK | DESC_B_MASK | DESC_P_MASK |
+                               DESC_S_MASK |
+                               DESC_CS_MASK | DESC_R_MASK | DESC_A_MASK);
+    }
+    cpu_x86_load_seg_cache(env, R_SS, (env->sysenter_cs + 8) & 0xfffc,
+                           0, 0xffffffff,
+                           DESC_G_MASK | DESC_B_MASK | DESC_P_MASK |
+                           DESC_S_MASK |
+                           DESC_W_MASK | DESC_A_MASK);
+    env->regs[R_ESP] = env->sysenter_esp;
+    env->eip = env->sysenter_eip;
+}
diff --git a/target/i386/tcg/translate.c b/target/i386/tcg/translate.c
index 0ddb689444e..af74c842f96 100644
--- a/target/i386/tcg/translate.c
+++ b/target/i386/tcg/translate.c
@@ -5667,7 +5667,7 @@ static bool disas_insn(DisasContext *s, CPUState *cpu)
         if (!PE(s)) {
             gen_exception_gpf(s);
         } else {
-            gen_helper_sysenter(cpu_env);
+            gen_helper_sysenter(cpu_env, cur_insn_len_i32(s));
             s->base.is_jmp = DISAS_EOB_ONLY;
         }
         break;
diff --git a/target/i386/tcg/user/seg_helper.c b/target/i386/tcg/user/seg_helper.c
index c45f2ac2ba6..1ac3ee39b5b 100644
--- a/target/i386/tcg/user/seg_helper.c
+++ b/target/i386/tcg/user/seg_helper.c
@@ -36,6 +36,22 @@ void helper_syscall(CPUX86State *env, int next_eip_addend)
     cpu_loop_exit(cs);
 }
 
+void helper_sysenter(CPUX86State *env, int next_eip_addend)
+{
+    CPUState *cs = env_cpu(env);
+
+    /*
+     * sysenter returns to the landing pad of the vDSO, which pops
+     * ebp/edx/ecx before executing a "ret".
+     */
+    cs->exception_index = EXCP_SYSENTER;
+    env->exception_is_int = 0;
+
+    /* Used for ERESTARTSYS.  */
+    env->exception_next_eip = env->eip + next_eip_addend;
+    cpu_loop_exit(cs);
+}
+
 /*
  * fake user mode interrupt. is_int is TRUE if coming from the int
  * instruction. next_eip is the env->eip value AFTER the interrupt
-- 
2.40.1



  parent reply	other threads:[~2023-06-20 15:18 UTC|newest]

Thread overview: 19+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-06-20 15:16 [PATCH v2 00/10] target/i386: add a few simple features Paolo Bonzini
2023-06-20 15:16 ` [PATCH v2 01/10] target/i386: fix INVD vmexit Paolo Bonzini
2023-06-20 15:16 ` [PATCH v2 02/10] target/i386: TCG supports 3DNow! prefetch(w) Paolo Bonzini
2023-06-20 15:16 ` [PATCH v2 03/10] target/i386: TCG supports RDSEED Paolo Bonzini
2023-06-20 16:24   ` Richard Henderson
2023-06-21  5:46     ` Paolo Bonzini
2023-06-20 15:16 ` [PATCH v2 04/10] target/i386: TCG supports XSAVEERPTR Paolo Bonzini
2023-06-20 15:16 ` [PATCH v2 05/10] target/i386: TCG supports WBNOINVD Paolo Bonzini
2023-06-20 15:16 ` [PATCH v2 06/10] target/i386: Intel only supports SYSCALL in long mode Paolo Bonzini
2023-06-20 15:57   ` Richard Henderson
2023-06-20 15:16 ` [PATCH v2 07/10] target/i386: sysret and sysexit are privileged Paolo Bonzini
2023-06-20 15:58   ` Richard Henderson
2023-06-20 15:16 ` [PATCH v2 08/10] target/i386: implement 32-bit SYSCALL for linux-user Paolo Bonzini
2023-06-20 16:10   ` Richard Henderson
2023-06-20 15:16 ` Paolo Bonzini [this message]
2023-06-20 16:22   ` [PATCH v2 09/10] target/i386: implement 32-bit SYSENTER " Richard Henderson
2023-06-20 16:27     ` Paolo Bonzini
2023-06-20 15:16 ` [PATCH v2 10/10] target/i386: implement RDPID in TCG Paolo Bonzini
2023-06-20 16:23   ` Richard Henderson

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20230620151634.21053-10-pbonzini@redhat.com \
    --to=pbonzini@redhat.com \
    --cc=qemu-devel@nongnu.org \
    --cc=richard.henderson@linaro.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).