* [PULL 0/2] late fixes for rc4
@ 2023-12-12 21:46 Richard Henderson
2023-12-12 21:46 ` [PULL 1/2] target/i386: Fix 32-bit wrapping of pc/eip computation Richard Henderson
` (2 more replies)
0 siblings, 3 replies; 4+ messages in thread
From: Richard Henderson @ 2023-12-12 21:46 UTC (permalink / raw)
To: qemu-devel
The following changes since commit 9c74490bff6c8886a922008d0c9ce6cae70dd17e:
Update version for v8.2.0-rc3 release (2023-12-06 14:34:20 -0500)
are available in the Git repository at:
https://gitlab.com/rth7680/qemu.git tags/pull-tcg-20231212
for you to fetch changes up to cbb145567c21d2bc41e8197a243c7d5480c6fca0:
tcg: Reduce serial context atomicity earlier (2023-12-12 13:35:19 -0800)
----------------------------------------------------------------
target/i386: Fix 32-bit wrapping of pc/eip computation (#2022)
tcg: Reduce serial context atomicity earlier (#2034)
----------------------------------------------------------------
Richard Henderson (2):
target/i386: Fix 32-bit wrapping of pc/eip computation
tcg: Reduce serial context atomicity earlier
target/i386/cpu.h | 9 +++++++--
target/i386/tcg/tcg-cpu.c | 11 +++++++++--
target/i386/tcg/translate.c | 23 +++++++++++++++++------
tcg/tcg-op-ldst.c | 28 ++++++++++++++++++++++++----
tcg/tcg.c | 9 +--------
5 files changed, 58 insertions(+), 22 deletions(-)
^ permalink raw reply [flat|nested] 4+ messages in thread
* [PULL 1/2] target/i386: Fix 32-bit wrapping of pc/eip computation
2023-12-12 21:46 [PULL 0/2] late fixes for rc4 Richard Henderson
@ 2023-12-12 21:46 ` Richard Henderson
2023-12-12 21:46 ` [PULL 2/2] tcg: Reduce serial context atomicity earlier Richard Henderson
2023-12-13 17:22 ` [PULL 0/2] late fixes for rc4 Stefan Hajnoczi
2 siblings, 0 replies; 4+ messages in thread
From: Richard Henderson @ 2023-12-12 21:46 UTC (permalink / raw)
To: qemu-devel; +Cc: Paolo Bonzini
In 32-bit mode, pc = eip + cs_base is also 32-bit, and must wrap.
Failure to do so results in incorrect memory exceptions to the guest.
Before 732d548732ed, this was implicitly done via truncation to
target_ulong but only in qemu-system-i386, not qemu-system-x86_64.
To fix this, we must add conditional zero-extensions.
Since we have to test for 32 vs 64-bit anyway, note that cs_base
is always zero in 64-bit mode.
Resolves: https://gitlab.com/qemu-project/qemu/-/issues/2022
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
Message-Id: <20231212172510.103305-1-richard.henderson@linaro.org>
---
target/i386/cpu.h | 9 +++++++--
target/i386/tcg/tcg-cpu.c | 11 +++++++++--
target/i386/tcg/translate.c | 23 +++++++++++++++++------
3 files changed, 33 insertions(+), 10 deletions(-)
diff --git a/target/i386/cpu.h b/target/i386/cpu.h
index cd2e295bd6..ef987f344c 100644
--- a/target/i386/cpu.h
+++ b/target/i386/cpu.h
@@ -2324,10 +2324,15 @@ static inline int cpu_mmu_index_kernel(CPUX86State *env)
static inline void cpu_get_tb_cpu_state(CPUX86State *env, vaddr *pc,
uint64_t *cs_base, uint32_t *flags)
{
- *cs_base = env->segs[R_CS].base;
- *pc = *cs_base + env->eip;
*flags = env->hflags |
(env->eflags & (IOPL_MASK | TF_MASK | RF_MASK | VM_MASK | AC_MASK));
+ if (env->hflags & HF_CS64_MASK) {
+ *cs_base = 0;
+ *pc = env->eip;
+ } else {
+ *cs_base = env->segs[R_CS].base;
+ *pc = (uint32_t)(*cs_base + env->eip);
+ }
}
void do_cpu_init(X86CPU *cpu);
diff --git a/target/i386/tcg/tcg-cpu.c b/target/i386/tcg/tcg-cpu.c
index 2c6a12c835..6e881e9e27 100644
--- a/target/i386/tcg/tcg-cpu.c
+++ b/target/i386/tcg/tcg-cpu.c
@@ -52,7 +52,12 @@ static void x86_cpu_synchronize_from_tb(CPUState *cs,
/* The instruction pointer is always up to date with CF_PCREL. */
if (!(tb_cflags(tb) & CF_PCREL)) {
CPUX86State *env = cpu_env(cs);
- env->eip = tb->pc - tb->cs_base;
+
+ if (tb->flags & HF_CS64_MASK) {
+ env->eip = tb->pc;
+ } else {
+ env->eip = (uint32_t)(tb->pc - tb->cs_base);
+ }
}
}
@@ -66,8 +71,10 @@ static void x86_restore_state_to_opc(CPUState *cs,
if (tb_cflags(tb) & CF_PCREL) {
env->eip = (env->eip & TARGET_PAGE_MASK) | data[0];
+ } else if (tb->flags & HF_CS64_MASK) {
+ env->eip = data[0];
} else {
- env->eip = data[0] - tb->cs_base;
+ env->eip = (uint32_t)(data[0] - tb->cs_base);
}
if (cc_op != CC_OP_DYNAMIC) {
env->cc_op = cc_op;
diff --git a/target/i386/tcg/translate.c b/target/i386/tcg/translate.c
index 587d88692a..037bc47e7c 100644
--- a/target/i386/tcg/translate.c
+++ b/target/i386/tcg/translate.c
@@ -552,8 +552,10 @@ static void gen_update_eip_cur(DisasContext *s)
assert(s->pc_save != -1);
if (tb_cflags(s->base.tb) & CF_PCREL) {
tcg_gen_addi_tl(cpu_eip, cpu_eip, s->base.pc_next - s->pc_save);
+ } else if (CODE64(s)) {
+ tcg_gen_movi_tl(cpu_eip, s->base.pc_next);
} else {
- tcg_gen_movi_tl(cpu_eip, s->base.pc_next - s->cs_base);
+ tcg_gen_movi_tl(cpu_eip, (uint32_t)(s->base.pc_next - s->cs_base));
}
s->pc_save = s->base.pc_next;
}
@@ -563,8 +565,10 @@ static void gen_update_eip_next(DisasContext *s)
assert(s->pc_save != -1);
if (tb_cflags(s->base.tb) & CF_PCREL) {
tcg_gen_addi_tl(cpu_eip, cpu_eip, s->pc - s->pc_save);
+ } else if (CODE64(s)) {
+ tcg_gen_movi_tl(cpu_eip, s->base.pc_next);
} else {
- tcg_gen_movi_tl(cpu_eip, s->pc - s->cs_base);
+ tcg_gen_movi_tl(cpu_eip, (uint32_t)(s->base.pc_next - s->cs_base));
}
s->pc_save = s->pc;
}
@@ -610,8 +614,10 @@ static TCGv eip_next_tl(DisasContext *s)
TCGv ret = tcg_temp_new();
tcg_gen_addi_tl(ret, cpu_eip, s->pc - s->pc_save);
return ret;
+ } else if (CODE64(s)) {
+ return tcg_constant_tl(s->pc);
} else {
- return tcg_constant_tl(s->pc - s->cs_base);
+ return tcg_constant_tl((uint32_t)(s->pc - s->cs_base));
}
}
@@ -622,8 +628,10 @@ static TCGv eip_cur_tl(DisasContext *s)
TCGv ret = tcg_temp_new();
tcg_gen_addi_tl(ret, cpu_eip, s->base.pc_next - s->pc_save);
return ret;
+ } else if (CODE64(s)) {
+ return tcg_constant_tl(s->base.pc_next);
} else {
- return tcg_constant_tl(s->base.pc_next - s->cs_base);
+ return tcg_constant_tl((uint32_t)(s->base.pc_next - s->cs_base));
}
}
@@ -2837,6 +2845,10 @@ static void gen_jmp_rel(DisasContext *s, MemOp ot, int diff, int tb_num)
}
}
new_eip &= mask;
+ new_pc = new_eip + s->cs_base;
+ if (!CODE64(s)) {
+ new_pc = (uint32_t)new_pc;
+ }
gen_update_cc_op(s);
set_cc_op(s, CC_OP_DYNAMIC);
@@ -2854,8 +2866,7 @@ static void gen_jmp_rel(DisasContext *s, MemOp ot, int diff, int tb_num)
}
}
- if (use_goto_tb &&
- translator_use_goto_tb(&s->base, new_eip + s->cs_base)) {
+ if (use_goto_tb && translator_use_goto_tb(&s->base, new_pc)) {
/* jump to same page: we can use a direct jump */
tcg_gen_goto_tb(tb_num);
if (!(tb_cflags(s->base.tb) & CF_PCREL)) {
--
2.34.1
^ permalink raw reply related [flat|nested] 4+ messages in thread
* [PULL 2/2] tcg: Reduce serial context atomicity earlier
2023-12-12 21:46 [PULL 0/2] late fixes for rc4 Richard Henderson
2023-12-12 21:46 ` [PULL 1/2] target/i386: Fix 32-bit wrapping of pc/eip computation Richard Henderson
@ 2023-12-12 21:46 ` Richard Henderson
2023-12-13 17:22 ` [PULL 0/2] late fixes for rc4 Stefan Hajnoczi
2 siblings, 0 replies; 4+ messages in thread
From: Richard Henderson @ 2023-12-12 21:46 UTC (permalink / raw)
To: qemu-devel; +Cc: Alex Bennée
Reduce atomicity while emitting opcodes, instead of later
during code generation. This ensures that any helper called
also sees the reduced atomicity requirement.
Resolves: https://gitlab.com/qemu-project/qemu/-/issues/2034
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
Tested-by: Alex Bennée <alex.bennee@linaro.org>
Message-Id: <20231212193542.149117-1-richard.henderson@linaro.org>
---
tcg/tcg-op-ldst.c | 28 ++++++++++++++++++++++++----
tcg/tcg.c | 9 +--------
2 files changed, 25 insertions(+), 12 deletions(-)
diff --git a/tcg/tcg-op-ldst.c b/tcg/tcg-op-ldst.c
index e2c55df217..f11043b449 100644
--- a/tcg/tcg-op-ldst.c
+++ b/tcg/tcg-op-ldst.c
@@ -77,6 +77,13 @@ static MemOp tcg_canonicalize_memop(MemOp op, bool is64, bool st)
if (st) {
op &= ~MO_SIGN;
}
+
+ /* In serial mode, reduce atomicity. */
+ if (!(tcg_ctx->gen_tb->cflags & CF_PARALLEL)) {
+ op &= ~MO_ATOM_MASK;
+ op |= MO_ATOM_NONE;
+ }
+
return op;
}
@@ -428,8 +435,7 @@ static bool use_two_i64_for_i128(MemOp mop)
case MO_ATOM_SUBALIGN:
case MO_ATOM_WITHIN16:
case MO_ATOM_WITHIN16_PAIR:
- /* In a serialized context, no atomicity is required. */
- return !(tcg_ctx->gen_tb->cflags & CF_PARALLEL);
+ return false;
default:
g_assert_not_reached();
}
@@ -499,13 +505,20 @@ static void maybe_free_addr64(TCGv_i64 a64)
static void tcg_gen_qemu_ld_i128_int(TCGv_i128 val, TCGTemp *addr,
TCGArg idx, MemOp memop)
{
- const MemOpIdx orig_oi = make_memop_idx(memop, idx);
+ MemOpIdx orig_oi;
TCGv_i64 ext_addr = NULL;
TCGOpcode opc;
check_max_alignment(get_alignment_bits(memop));
tcg_gen_req_mo(TCG_MO_LD_LD | TCG_MO_ST_LD);
+ /* In serial mode, reduce atomicity. */
+ if (!(tcg_ctx->gen_tb->cflags & CF_PARALLEL)) {
+ memop &= ~MO_ATOM_MASK;
+ memop |= MO_ATOM_NONE;
+ }
+ orig_oi = make_memop_idx(memop, idx);
+
/* TODO: For now, force 32-bit hosts to use the helper. */
if (TCG_TARGET_HAS_qemu_ldst_i128 && TCG_TARGET_REG_BITS == 64) {
TCGv_i64 lo, hi;
@@ -608,13 +621,20 @@ void tcg_gen_qemu_ld_i128_chk(TCGv_i128 val, TCGTemp *addr, TCGArg idx,
static void tcg_gen_qemu_st_i128_int(TCGv_i128 val, TCGTemp *addr,
TCGArg idx, MemOp memop)
{
- const MemOpIdx orig_oi = make_memop_idx(memop, idx);
+ MemOpIdx orig_oi;
TCGv_i64 ext_addr = NULL;
TCGOpcode opc;
check_max_alignment(get_alignment_bits(memop));
tcg_gen_req_mo(TCG_MO_ST_LD | TCG_MO_ST_ST);
+ /* In serial mode, reduce atomicity. */
+ if (!(tcg_ctx->gen_tb->cflags & CF_PARALLEL)) {
+ memop &= ~MO_ATOM_MASK;
+ memop |= MO_ATOM_NONE;
+ }
+ orig_oi = make_memop_idx(memop, idx);
+
/* TODO: For now, force 32-bit hosts to use the helper. */
if (TCG_TARGET_HAS_qemu_ldst_i128 && TCG_TARGET_REG_BITS == 64) {
diff --git a/tcg/tcg.c b/tcg/tcg.c
index d2ea22b397..896a36caeb 100644
--- a/tcg/tcg.c
+++ b/tcg/tcg.c
@@ -5440,15 +5440,8 @@ static TCGAtomAlign atom_and_align_for_opc(TCGContext *s, MemOp opc,
MemOp align = get_alignment_bits(opc);
MemOp size = opc & MO_SIZE;
MemOp half = size ? size - 1 : 0;
+ MemOp atom = opc & MO_ATOM_MASK;
MemOp atmax;
- MemOp atom;
-
- /* When serialized, no further atomicity required. */
- if (s->gen_tb->cflags & CF_PARALLEL) {
- atom = opc & MO_ATOM_MASK;
- } else {
- atom = MO_ATOM_NONE;
- }
switch (atom) {
case MO_ATOM_NONE:
--
2.34.1
^ permalink raw reply related [flat|nested] 4+ messages in thread
* Re: [PULL 0/2] late fixes for rc4
2023-12-12 21:46 [PULL 0/2] late fixes for rc4 Richard Henderson
2023-12-12 21:46 ` [PULL 1/2] target/i386: Fix 32-bit wrapping of pc/eip computation Richard Henderson
2023-12-12 21:46 ` [PULL 2/2] tcg: Reduce serial context atomicity earlier Richard Henderson
@ 2023-12-13 17:22 ` Stefan Hajnoczi
2 siblings, 0 replies; 4+ messages in thread
From: Stefan Hajnoczi @ 2023-12-13 17:22 UTC (permalink / raw)
To: Richard Henderson; +Cc: qemu-devel
[-- Attachment #1: Type: text/plain, Size: 115 bytes --]
Applied, thanks.
Please update the changelog at https://wiki.qemu.org/ChangeLog/8.2 for any user-visible changes.
[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 488 bytes --]
^ permalink raw reply [flat|nested] 4+ messages in thread
end of thread, other threads:[~2023-12-13 17:23 UTC | newest]
Thread overview: 4+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2023-12-12 21:46 [PULL 0/2] late fixes for rc4 Richard Henderson
2023-12-12 21:46 ` [PULL 1/2] target/i386: Fix 32-bit wrapping of pc/eip computation Richard Henderson
2023-12-12 21:46 ` [PULL 2/2] tcg: Reduce serial context atomicity earlier Richard Henderson
2023-12-13 17:22 ` [PULL 0/2] late fixes for rc4 Stefan Hajnoczi
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.