qemu-devel.nongnu.org archive mirror
 help / color / mirror / Atom feed
From: Paolo Bonzini <pbonzini@redhat.com>
To: qemu-devel@nongnu.org
Cc: Richard Henderson <richard.henderson@linaro.org>
Subject: [PULL 21/46] target/i386: introduce flags writeback mechanism
Date: Sun, 31 Dec 2023 09:44:37 +0100	[thread overview]
Message-ID: <20231231084502.235366-22-pbonzini@redhat.com> (raw)
In-Reply-To: <20231231084502.235366-1-pbonzini@redhat.com>

ALU instructions can write to both memory and flags.  If the CC_SRC*
and CC_DST locations have been written already when a memory access
causes a fault, the value in CC_SRC* and CC_DST might be interpreted
with the wrong CC_OP (the one that is in effect before the instruction.

Besides just using the wrong result for the flags, something like
subtracting -1 can have disastrous effects if the current CC_OP is
CC_OP_EFLAGS: this is because QEMU does not expect bits outside the ALU
flags to be set in CC_SRC, and env->eflags can end up set to all-ones.
In the case of the attached testcase, this sets IOPL to 3 and would
cause an assertion failure if SUB is moved to the new decoder.

This mechanism is not really needed for BMI instructions, which can
only write to a register, but put it to use anyway for cleanliness.
In the case of BZHI, the code has to be modified slightly to ensure
that decode->cc_src is written, otherwise the new assertions trigger.

Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 target/i386/cpu.h                |  1 +
 target/i386/tcg/decode-new.c.inc | 34 +++++++++++++++++++++++++++++
 target/i386/tcg/decode-new.h     |  4 ++++
 target/i386/tcg/emit.c.inc       | 36 ++++++++++++++++++++-----------
 tests/tcg/i386/Makefile.target   |  2 +-
 tests/tcg/i386/test-flags.c      | 37 ++++++++++++++++++++++++++++++++
 6 files changed, 101 insertions(+), 13 deletions(-)
 create mode 100644 tests/tcg/i386/test-flags.c

diff --git a/target/i386/cpu.h b/target/i386/cpu.h
index ecdd4518c64..7f0786e8b98 100644
--- a/target/i386/cpu.h
+++ b/target/i386/cpu.h
@@ -1285,6 +1285,7 @@ typedef enum {
 
     CC_OP_NB,
 } CCOp;
+QEMU_BUILD_BUG_ON(CC_OP_NB >= 128);
 
 typedef struct SegmentCache {
     uint32_t selector;
diff --git a/target/i386/tcg/decode-new.c.inc b/target/i386/tcg/decode-new.c.inc
index f30889dbc0a..717d7307722 100644
--- a/target/i386/tcg/decode-new.c.inc
+++ b/target/i386/tcg/decode-new.c.inc
@@ -1662,6 +1662,7 @@ static void disas_insn_new(DisasContext *s, CPUState *cpu, int b)
     bool first = true;
     X86DecodedInsn decode;
     X86DecodeFunc decode_func = decode_root;
+    uint8_t cc_live;
 
     s->has_modrm = false;
 
@@ -1815,6 +1816,7 @@ static void disas_insn_new(DisasContext *s, CPUState *cpu, int b)
     }
 
     memset(&decode, 0, sizeof(decode));
+    decode.cc_op = -1;
     decode.b = b;
     if (!decode_insn(s, env, decode_func, &decode)) {
         goto illegal_op;
@@ -1953,6 +1955,38 @@ static void disas_insn_new(DisasContext *s, CPUState *cpu, int b)
         decode.e.gen(s, env, &decode);
         gen_writeback(s, &decode, 0, s->T0);
     }
+
+    /*
+     * Write back flags after last memory access.  Some newer ALU instructions, as
+     * well as SSE instructions, write flags in the gen_* function, but that can
+     * cause incorrect tracking of CC_OP for instructions that write to both memory
+     * and flags.
+     */
+    if (decode.cc_op != -1) {
+        if (decode.cc_dst) {
+            tcg_gen_mov_tl(cpu_cc_dst, decode.cc_dst);
+        }
+        if (decode.cc_src) {
+            tcg_gen_mov_tl(cpu_cc_src, decode.cc_src);
+        }
+        if (decode.cc_src2) {
+            tcg_gen_mov_tl(cpu_cc_src2, decode.cc_src2);
+        }
+        if (decode.cc_op == CC_OP_DYNAMIC) {
+            tcg_gen_mov_i32(cpu_cc_op, decode.cc_op_dynamic);
+        }
+        set_cc_op(s, decode.cc_op);
+        cc_live = cc_op_live[decode.cc_op];
+    } else {
+        cc_live = 0;
+    }
+    if (decode.cc_op != CC_OP_DYNAMIC) {
+        assert(!decode.cc_op_dynamic);
+        assert(!!decode.cc_dst == !!(cc_live & USES_CC_DST));
+        assert(!!decode.cc_src == !!(cc_live & USES_CC_SRC));
+        assert(!!decode.cc_src2 == !!(cc_live & USES_CC_SRC2));
+    }
+
     return;
  gp_fault:
     gen_exception_gpf(s);
diff --git a/target/i386/tcg/decode-new.h b/target/i386/tcg/decode-new.h
index 70b6717227f..25220fc4362 100644
--- a/target/i386/tcg/decode-new.h
+++ b/target/i386/tcg/decode-new.h
@@ -283,6 +283,10 @@ struct X86DecodedInsn {
     target_ulong immediate;
     AddressParts mem;
 
+    TCGv cc_dst, cc_src, cc_src2;
+    TCGv_i32 cc_op_dynamic;
+    int8_t cc_op;
+
     uint8_t b;
 };
 
diff --git a/target/i386/tcg/emit.c.inc b/target/i386/tcg/emit.c.inc
index 4c2006fdd09..fd120e7b9b4 100644
--- a/target/i386/tcg/emit.c.inc
+++ b/target/i386/tcg/emit.c.inc
@@ -339,6 +339,19 @@ static inline int vector_len(DisasContext *s, X86DecodedInsn *decode)
     return s->vex_l ? 32 : 16;
 }
 
+static void prepare_update1_cc(X86DecodedInsn *decode, DisasContext *s, CCOp op)
+{
+    decode->cc_dst = s->T0;
+    decode->cc_op = op;
+}
+
+static void prepare_update2_cc(X86DecodedInsn *decode, DisasContext *s, CCOp op)
+{
+    decode->cc_src = s->T1;
+    decode->cc_dst = s->T0;
+    decode->cc_op = op;
+}
+
 static void gen_store_sse(DisasContext *s, X86DecodedInsn *decode, int src_ofs)
 {
     MemOp ot = decode->op[0].ot;
@@ -1027,6 +1040,7 @@ static void gen_##uname(DisasContext *s, CPUX86State *env, X86DecodedInsn *decod
 VSIB_AVX(VPGATHERD, vpgatherd)
 VSIB_AVX(VPGATHERQ, vpgatherq)
 
+/* ADCX/ADOX do not have memory operands and can use set_cc_op.  */
 static void gen_ADCOX(DisasContext *s, CPUX86State *env, MemOp ot, int cc_op)
 {
     int opposite_cc_op;
@@ -1089,8 +1103,7 @@ static void gen_ANDN(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
     MemOp ot = decode->op[0].ot;
 
     tcg_gen_andc_tl(s->T0, s->T1, s->T0);
-    gen_op_update1_cc(s);
-    set_cc_op(s, CC_OP_LOGICB + ot);
+    prepare_update1_cc(decode, s, CC_OP_LOGICB + ot);
 }
 
 static void gen_BEXTR(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
@@ -1118,10 +1131,10 @@ static void gen_BEXTR(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
     tcg_gen_movcond_tl(TCG_COND_LEU, s->T1, s->A0, bound, s->T1, zero);
     tcg_gen_andc_tl(s->T0, s->T0, s->T1);
 
-    gen_op_update1_cc(s);
-    set_cc_op(s, CC_OP_LOGICB + ot);
+    prepare_update1_cc(decode, s, CC_OP_LOGICB + ot);
 }
 
+/* BLSI do not have memory operands and can use set_cc_op.  */
 static void gen_BLSI(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
 {
     MemOp ot = decode->op[0].ot;
@@ -1133,6 +1146,7 @@ static void gen_BLSI(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
     set_cc_op(s, CC_OP_BMILGB + ot);
 }
 
+/* BLSMSK do not have memory operands and can use set_cc_op.  */
 static void gen_BLSMSK(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
 {
     MemOp ot = decode->op[0].ot;
@@ -1144,6 +1158,7 @@ static void gen_BLSMSK(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode
     set_cc_op(s, CC_OP_BMILGB + ot);
 }
 
+/* BLSR do not have memory operands and can use set_cc_op.  */
 static void gen_BLSR(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
 {
     MemOp ot = decode->op[0].ot;
@@ -1164,18 +1179,15 @@ static void gen_BZHI(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
 
     tcg_gen_ext8u_tl(s->T1, s->T1);
 
+    tcg_gen_shl_tl(s->A0, mone, s->T1);
+    tcg_gen_movcond_tl(TCG_COND_LEU, s->A0, s->T1, bound, s->A0, zero);
+    tcg_gen_andc_tl(s->T0, s->T0, s->A0);
     /*
      * Note that since we're using BMILG (in order to get O
      * cleared) we need to store the inverse into C.
      */
-    tcg_gen_setcond_tl(TCG_COND_LEU, cpu_cc_src, s->T1, bound);
-
-    tcg_gen_shl_tl(s->A0, mone, s->T1);
-    tcg_gen_movcond_tl(TCG_COND_LEU, s->A0, s->T1, bound, s->A0, zero);
-    tcg_gen_andc_tl(s->T0, s->T0, s->A0);
-
-    gen_op_update1_cc(s);
-    set_cc_op(s, CC_OP_BMILGB + ot);
+    tcg_gen_setcond_tl(TCG_COND_LEU, s->T1, s->T1, bound);
+    prepare_update2_cc(decode, s, CC_OP_BMILGB + ot);
 }
 
 static void gen_CRC32(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
diff --git a/tests/tcg/i386/Makefile.target b/tests/tcg/i386/Makefile.target
index 3dec7c6c423..9906f9e116b 100644
--- a/tests/tcg/i386/Makefile.target
+++ b/tests/tcg/i386/Makefile.target
@@ -13,7 +13,7 @@ config-cc.mak: Makefile
 
 I386_SRCS=$(notdir $(wildcard $(I386_SRC)/*.c))
 ALL_X86_TESTS=$(I386_SRCS:.c=)
-SKIP_I386_TESTS=test-i386-ssse3 test-avx test-3dnow test-mmx
+SKIP_I386_TESTS=test-i386-ssse3 test-avx test-3dnow test-mmx test-flags
 X86_64_TESTS:=$(filter test-i386-adcox test-i386-bmi2 $(SKIP_I386_TESTS), $(ALL_X86_TESTS))
 
 test-i386-sse-exceptions: CFLAGS += -msse4.1 -mfpmath=sse
diff --git a/tests/tcg/i386/test-flags.c b/tests/tcg/i386/test-flags.c
new file mode 100644
index 00000000000..c379e296275
--- /dev/null
+++ b/tests/tcg/i386/test-flags.c
@@ -0,0 +1,37 @@
+#define _GNU_SOURCE
+#include <sys/mman.h>
+#include <signal.h>
+#include <stdio.h>
+#include <assert.h>
+
+volatile unsigned long flags;
+volatile unsigned long flags_after;
+int *addr;
+
+void sigsegv(int sig, siginfo_t *info, ucontext_t *uc)
+{
+    flags = uc->uc_mcontext.gregs[REG_EFL];
+    mprotect(addr, 4096, PROT_READ|PROT_WRITE);
+}
+
+int main()
+{
+    struct sigaction sa = { .sa_handler = (void *)sigsegv, .sa_flags = SA_SIGINFO };
+    sigaction(SIGSEGV, &sa, NULL);
+
+    /* fault in the page then protect it */
+    addr = mmap (NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANON, -1, 0);
+    *addr = 0x1234;
+    mprotect(addr, 4096, PROT_READ);
+
+    asm("# set flags to all ones    \n"
+        "mov $-1, %%eax             \n"
+        "movq addr, %%rdi           \n"
+        "sahf                       \n"
+        "sub %%eax, (%%rdi)         \n"
+        "pushf                      \n"
+        "pop  flags_after(%%rip)    \n" : : : "eax", "edi", "memory");
+
+    /* OF can have any value before the SUB instruction.  */
+    assert((flags & 0xff) == 0xd7 && (flags_after & 0x8ff) == 0x17);
+}
-- 
2.43.0



  parent reply	other threads:[~2023-12-31  8:51 UTC|newest]

Thread overview: 48+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-12-31  8:44 [PULL 00/46] (mostly) target/i386 and meson changes for 2023-12-31 Paolo Bonzini
2023-12-31  8:44 ` [PULL 01/46] configure: use a native non-cross compiler for linux-user Paolo Bonzini
2023-12-31  8:44 ` [PULL 02/46] target/i386: optimize computation of JL and JLE from flags Paolo Bonzini
2023-12-31  8:44 ` [PULL 03/46] target/i386: speedup JO/SETO after MUL or IMUL Paolo Bonzini
2023-12-31  8:44 ` [PULL 04/46] target/i386: remove unnecessary arguments from raise_interrupt Paolo Bonzini
2023-12-31  8:44 ` [PULL 05/46] target/i386: remove unnecessary truncations Paolo Bonzini
2023-12-31  8:44 ` [PULL 06/46] target/i386: clean up cpu_cc_compute_all Paolo Bonzini
2023-12-31  8:44 ` [PULL 07/46] target/i386: document more deviations from the manual Paolo Bonzini
2023-12-31  8:44 ` [PULL 08/46] target/i386: reimplement check for validity of LOCK prefix Paolo Bonzini
2023-12-31  8:44 ` [PULL 09/46] target/i386: avoid trunc and ext for MULX and RORX Paolo Bonzini
2023-12-31  8:44 ` [PULL 10/46] target/i386: rename zext0/zext2 and make them closer to the manual Paolo Bonzini
2023-12-31  8:44 ` [PULL 11/46] target/i386: add X86_SPECIALs for MOVSX and MOVZX Paolo Bonzini
2023-12-31  8:44 ` [PULL 12/46] target/i386: do not decode string source/destination into decode->mem Paolo Bonzini
2023-12-31  8:44 ` [PULL 13/46] target/i386: do not clobber A0 in POP translation Paolo Bonzini
2023-12-31  8:44 ` [PULL 14/46] target/i386: do not clobber T0 on string operations Paolo Bonzini
2023-12-31  8:44 ` [PULL 15/46] target/i386: split eflags computation out of gen_compute_eflags Paolo Bonzini
2023-12-31  8:44 ` [PULL 16/46] target/i386: do not use s->tmp4 for push Paolo Bonzini
2023-12-31  8:44 ` [PULL 17/46] target/i386: do not use s->tmp0 for jumps on ECX ==/!= 0 Paolo Bonzini
2023-12-31  8:44 ` [PULL 18/46] target/i386: prepare for implementation of STOS/SCAS in new decoder Paolo Bonzini
2023-12-31  8:44 ` [PULL 19/46] target/i386: move operand load and writeback out of gen_cmovcc1 Paolo Bonzini
2023-12-31  8:44 ` [PULL 20/46] target/i386: adjust decoding of J operand Paolo Bonzini
2023-12-31  8:44 ` Paolo Bonzini [this message]
2023-12-31  8:44 ` [PULL 22/46] target/i386: implement CMPccXADD Paolo Bonzini
2023-12-31  8:44 ` [PULL 23/46] target/i386: the sgx_epc_get_section stub is reachable Paolo Bonzini
2023-12-31  8:44 ` [PULL 24/46] esp: check for NULL result from scsi_device_find() Paolo Bonzini
2023-12-31  8:44 ` [PULL 25/46] meson: fix type of "relocatable" option Paolo Bonzini
2023-12-31  8:44 ` [PULL 26/46] meson: remove unused variable Paolo Bonzini
2023-12-31  8:44 ` [PULL 27/46] meson: use version_compare() to compare version Paolo Bonzini
2023-12-31  8:44 ` [PULL 28/46] Makefile: clean qemu-iotests output Paolo Bonzini
2023-12-31  8:44 ` [PULL 29/46] configure: remove unnecessary subshell Paolo Bonzini
2023-12-31  8:44 ` [PULL 30/46] configure: unify again the case arms in probe_target_compiler Paolo Bonzini
2023-12-31  8:44 ` [PULL 31/46] meson: add more sections to main meson.build Paolo Bonzini
2023-12-31  8:44 ` [PULL 32/46] meson: move program checks together Paolo Bonzini
2023-12-31  8:44 ` [PULL 33/46] meson: move option validation together Paolo Bonzini
2023-12-31  8:44 ` [PULL 34/46] meson: move accelerator dependency checks together Paolo Bonzini
2023-12-31  8:44 ` [PULL 35/46] meson: keep subprojects together Paolo Bonzini
2023-12-31  8:44 ` [PULL 36/46] meson: move CFI detection code with other compiler flags Paolo Bonzini
2023-12-31  8:44 ` [PULL 37/46] meson: move config-host.h definitions together Paolo Bonzini
2023-12-31  8:44 ` [PULL 38/46] meson: move subdirs to "Collect sources" section Paolo Bonzini
2023-12-31  8:44 ` [PULL 39/46] meson: always probe u2f and canokey if the option is enabled Paolo Bonzini
2023-12-31  8:44 ` [PULL 40/46] meson: remove OS definitions from config_targetos Paolo Bonzini
2023-12-31  8:44 ` [PULL 41/46] meson: remove CONFIG_POSIX and CONFIG_WIN32 " Paolo Bonzini
2023-12-31  8:44 ` [PULL 42/46] meson: remove config_targetos Paolo Bonzini
2023-12-31  8:44 ` [PULL 43/46] meson: remove CONFIG_ALL Paolo Bonzini
2023-12-31  8:45 ` [PULL 44/46] meson: rename config_all Paolo Bonzini
2023-12-31  8:45 ` [PULL 45/46] configure, meson: rename targetos to host_os Paolo Bonzini
2023-12-31  8:45 ` [PULL 46/46] meson.build: report graphics backends separately Paolo Bonzini
2024-01-05 12:53 ` [PULL 00/46] (mostly) target/i386 and meson changes for 2023-12-31 Peter Maydell

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20231231084502.235366-22-pbonzini@redhat.com \
    --to=pbonzini@redhat.com \
    --cc=qemu-devel@nongnu.org \
    --cc=richard.henderson@linaro.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).