* [PATCH v2 01/28] target/i386: Add tcg/access.[ch]
2024-04-09 5:02 [PATCH for-9.1 v2 00/28] linux-user/i386: Properly align signal frame Richard Henderson
@ 2024-04-09 5:02 ` Richard Henderson
2024-04-09 7:09 ` Paolo Bonzini
2024-04-09 5:02 ` [PATCH v2 02/28] target/i386: Convert do_fldt, do_fstt to X86Access Richard Henderson
` (27 subsequent siblings)
28 siblings, 1 reply; 36+ messages in thread
From: Richard Henderson @ 2024-04-09 5:02 UTC (permalink / raw)
To: qemu-devel
Provide a method to amortize page lookup across large blocks.
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
target/i386/tcg/access.h | 40 +++++++++
target/i386/tcg/access.c | 160 ++++++++++++++++++++++++++++++++++++
target/i386/tcg/meson.build | 1 +
3 files changed, 201 insertions(+)
create mode 100644 target/i386/tcg/access.h
create mode 100644 target/i386/tcg/access.c
diff --git a/target/i386/tcg/access.h b/target/i386/tcg/access.h
new file mode 100644
index 0000000000..d70808a3a3
--- /dev/null
+++ b/target/i386/tcg/access.h
@@ -0,0 +1,40 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/* Access guest memory in blocks. */
+
+#ifndef X86_TCG_ACCESS_H
+#define X86_TCG_ACCESS_H
+
+/* An access covers at most sizeof(X86XSaveArea), at most 2 pages. */
+typedef struct X86Access {
+ target_ulong vaddr;
+ void *haddr1;
+ void *haddr2;
+ uint16_t size;
+ uint16_t size1;
+ /*
+ * If we can't access the host page directly, we'll have to do I/O access
+ * via ld/st helpers. These are internal details, so we store the rest
+ * to do the access here instead of passing it around in the helpers.
+ */
+ int mmu_idx;
+ CPUX86State *env;
+ uintptr_t ra;
+} X86Access;
+
+void access_prepare_mmu(X86Access *ret, CPUX86State *env,
+ vaddr vaddr, unsigned size,
+ MMUAccessType type, int mmu_idx, uintptr_t ra);
+void access_prepare(X86Access *ret, CPUX86State *env, vaddr vaddr,
+ unsigned size, MMUAccessType type, uintptr_t ra);
+
+uint8_t access_ldb(X86Access *ac, vaddr addr);
+uint16_t access_ldw(X86Access *ac, vaddr addr);
+uint32_t access_ldl(X86Access *ac, vaddr addr);
+uint64_t access_ldq(X86Access *ac, vaddr addr);
+
+void access_stb(X86Access *ac, vaddr addr, uint8_t val);
+void access_stw(X86Access *ac, vaddr addr, uint16_t val);
+void access_stl(X86Access *ac, vaddr addr, uint32_t val);
+void access_stq(X86Access *ac, vaddr addr, uint64_t val);
+
+#endif
diff --git a/target/i386/tcg/access.c b/target/i386/tcg/access.c
new file mode 100644
index 0000000000..8b70f3244b
--- /dev/null
+++ b/target/i386/tcg/access.c
@@ -0,0 +1,160 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/* Access guest memory in blocks. */
+
+#include "qemu/osdep.h"
+#include "cpu.h"
+#include "exec/cpu_ldst.h"
+#include "exec/exec-all.h"
+#include "access.h"
+
+
+void access_prepare_mmu(X86Access *ret, CPUX86State *env,
+ vaddr vaddr, unsigned size,
+ MMUAccessType type, int mmu_idx, uintptr_t ra)
+{
+ int size1, size2;
+ void *haddr1, *haddr2;
+
+ assert(size > 0 && size <= TARGET_PAGE_SIZE);
+
+ size1 = MIN(size, -(vaddr | TARGET_PAGE_MASK)),
+ size2 = size - size1;
+
+ memset(ret, 0, sizeof(*ret));
+ ret->vaddr = vaddr;
+ ret->size = size;
+ ret->size1 = size1;
+ ret->mmu_idx = mmu_idx;
+ ret->env = env;
+ ret->ra = ra;
+
+ haddr1 = probe_access(env, vaddr, size1, type, mmu_idx, ra);
+ ret->haddr1 = haddr1;
+
+ if (unlikely(size2)) {
+ haddr2 = probe_access(env, vaddr + size1, size2, type, mmu_idx, ra);
+ if (haddr2 == haddr1 + size1) {
+ ret->size1 = size;
+ } else {
+ ret->haddr2 = haddr2;
+ }
+ }
+}
+
+void access_prepare(X86Access *ret, CPUX86State *env, vaddr vaddr,
+ unsigned size, MMUAccessType type, uintptr_t ra)
+{
+ int mmu_idx = cpu_mmu_index(env_cpu(env), false);
+ access_prepare_mmu(ret, env, vaddr, size, type, mmu_idx, ra);
+}
+
+static void *access_ptr(X86Access *ac, vaddr addr, unsigned len)
+{
+ vaddr offset = addr - ac->vaddr;
+
+ assert(addr >= ac->vaddr);
+
+#ifdef CONFIG_USER_ONLY
+ assert(offset <= ac->size1 - len);
+ return ac->haddr1 + offset;
+#else
+ if (likely(offset <= ac->size1 - len)) {
+ return ac->haddr1;
+ }
+ assert(offset <= ac->size - len);
+ if (likely(offset >= ac->size1)) {
+ return ac->haddr2;
+ }
+ return NULL;
+#endif
+}
+
+#ifdef CONFIG_USER_ONLY
+# define test_ptr(p) true
+#else
+# define test_ptr(p) likely(p)
+#endif
+
+uint8_t access_ldb(X86Access *ac, vaddr addr)
+{
+ void *p = access_ptr(ac, addr, sizeof(uint8_t));
+
+ if (test_ptr(p)) {
+ return ldub_p(p);
+ }
+ return cpu_ldub_mmuidx_ra(ac->env, addr, ac->mmu_idx, ac->ra);
+}
+
+uint16_t access_ldw(X86Access *ac, vaddr addr)
+{
+ void *p = access_ptr(ac, addr, sizeof(uint16_t));
+
+ if (test_ptr(p)) {
+ return lduw_le_p(p);
+ }
+ return cpu_lduw_le_mmuidx_ra(ac->env, addr, ac->mmu_idx, ac->ra);
+}
+
+uint32_t access_ldl(X86Access *ac, vaddr addr)
+{
+ void *p = access_ptr(ac, addr, sizeof(uint32_t));
+
+ if (test_ptr(p)) {
+ return ldl_le_p(p);
+ }
+ return cpu_ldl_le_mmuidx_ra(ac->env, addr, ac->mmu_idx, ac->ra);
+}
+
+uint64_t access_ldq(X86Access *ac, vaddr addr)
+{
+ void *p = access_ptr(ac, addr, sizeof(uint64_t));
+
+ if (test_ptr(p)) {
+ return ldq_le_p(p);
+ }
+ return cpu_ldq_le_mmuidx_ra(ac->env, addr, ac->mmu_idx, ac->ra);
+}
+
+void access_stb(X86Access *ac, vaddr addr, uint8_t val)
+{
+ void *p = access_ptr(ac, addr, sizeof(uint8_t));
+
+ if (test_ptr(p)) {
+ stb_p(p, val);
+ } else {
+ cpu_stb_mmuidx_ra(ac->env, addr, val, ac->mmu_idx, ac->ra);
+ }
+}
+
+void access_stw(X86Access *ac, vaddr addr, uint16_t val)
+{
+ void *p = access_ptr(ac, addr, sizeof(uint16_t));
+
+ if (test_ptr(p)) {
+ stw_le_p(p, val);
+ } else {
+ cpu_stw_le_mmuidx_ra(ac->env, addr, val, ac->mmu_idx, ac->ra);
+ }
+}
+
+void access_stl(X86Access *ac, vaddr addr, uint32_t val)
+{
+ void *p = access_ptr(ac, addr, sizeof(uint32_t));
+
+ if (test_ptr(p)) {
+ stl_le_p(p, val);
+ } else {
+ cpu_stl_le_mmuidx_ra(ac->env, addr, val, ac->mmu_idx, ac->ra);
+ }
+}
+
+void access_stq(X86Access *ac, vaddr addr, uint64_t val)
+{
+ void *p = access_ptr(ac, addr, sizeof(uint64_t));
+
+ if (test_ptr(p)) {
+ stq_le_p(p, val);
+ } else {
+ cpu_stq_le_mmuidx_ra(ac->env, addr, val, ac->mmu_idx, ac->ra);
+ }
+}
diff --git a/target/i386/tcg/meson.build b/target/i386/tcg/meson.build
index f9110e890c..1105b35d92 100644
--- a/target/i386/tcg/meson.build
+++ b/target/i386/tcg/meson.build
@@ -1,4 +1,5 @@
i386_ss.add(when: 'CONFIG_TCG', if_true: files(
+ 'access.c',
'bpt_helper.c',
'cc_helper.c',
'excp_helper.c',
--
2.34.1
^ permalink raw reply related [flat|nested] 36+ messages in thread
* Re: [PATCH v2 01/28] target/i386: Add tcg/access.[ch]
2024-04-09 5:02 ` [PATCH v2 01/28] target/i386: Add tcg/access.[ch] Richard Henderson
@ 2024-04-09 7:09 ` Paolo Bonzini
0 siblings, 0 replies; 36+ messages in thread
From: Paolo Bonzini @ 2024-04-09 7:09 UTC (permalink / raw)
To: Richard Henderson, qemu-devel
On 4/9/24 07:02, Richard Henderson wrote:
> Provide a method to amortize page lookup across large blocks.
>
> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
> ---
> target/i386/tcg/access.h | 40 +++++++++
> target/i386/tcg/access.c | 160 ++++++++++++++++++++++++++++++++++++
> target/i386/tcg/meson.build | 1 +
> 3 files changed, 201 insertions(+)
> create mode 100644 target/i386/tcg/access.h
> create mode 100644 target/i386/tcg/access.c
>
> diff --git a/target/i386/tcg/access.h b/target/i386/tcg/access.h
> new file mode 100644
> index 0000000000..d70808a3a3
> --- /dev/null
> +++ b/target/i386/tcg/access.h
> @@ -0,0 +1,40 @@
> +/* SPDX-License-Identifier: GPL-2.0-or-later */
> +/* Access guest memory in blocks. */
> +
> +#ifndef X86_TCG_ACCESS_H
> +#define X86_TCG_ACCESS_H
> +
> +/* An access covers at most sizeof(X86XSaveArea), at most 2 pages. */
> +typedef struct X86Access {
> + target_ulong vaddr;
> + void *haddr1;
> + void *haddr2;
> + uint16_t size;
> + uint16_t size1;
> + /*
> + * If we can't access the host page directly, we'll have to do I/O access
> + * via ld/st helpers. These are internal details, so we store the rest
> + * to do the access here instead of passing it around in the helpers.
> + */
> + int mmu_idx;
> + CPUX86State *env;
> + uintptr_t ra;
> +} X86Access;
> +
> +void access_prepare_mmu(X86Access *ret, CPUX86State *env,
> + vaddr vaddr, unsigned size,
> + MMUAccessType type, int mmu_idx, uintptr_t ra);
> +void access_prepare(X86Access *ret, CPUX86State *env, vaddr vaddr,
> + unsigned size, MMUAccessType type, uintptr_t ra);
> +
> +uint8_t access_ldb(X86Access *ac, vaddr addr);
> +uint16_t access_ldw(X86Access *ac, vaddr addr);
> +uint32_t access_ldl(X86Access *ac, vaddr addr);
> +uint64_t access_ldq(X86Access *ac, vaddr addr);
> +
> +void access_stb(X86Access *ac, vaddr addr, uint8_t val);
> +void access_stw(X86Access *ac, vaddr addr, uint16_t val);
> +void access_stl(X86Access *ac, vaddr addr, uint32_t val);
> +void access_stq(X86Access *ac, vaddr addr, uint64_t val);
> +
> +#endif
> diff --git a/target/i386/tcg/access.c b/target/i386/tcg/access.c
> new file mode 100644
> index 0000000000..8b70f3244b
> --- /dev/null
> +++ b/target/i386/tcg/access.c
> @@ -0,0 +1,160 @@
> +/* SPDX-License-Identifier: GPL-2.0-or-later */
> +/* Access guest memory in blocks. */
> +
> +#include "qemu/osdep.h"
> +#include "cpu.h"
> +#include "exec/cpu_ldst.h"
> +#include "exec/exec-all.h"
> +#include "access.h"
> +
> +
> +void access_prepare_mmu(X86Access *ret, CPUX86State *env,
> + vaddr vaddr, unsigned size,
> + MMUAccessType type, int mmu_idx, uintptr_t ra)
> +{
> + int size1, size2;
> + void *haddr1, *haddr2;
> +
> + assert(size > 0 && size <= TARGET_PAGE_SIZE);
> +
> + size1 = MIN(size, -(vaddr | TARGET_PAGE_MASK)),
> + size2 = size - size1;
> +
> + memset(ret, 0, sizeof(*ret));
> + ret->vaddr = vaddr;
> + ret->size = size;
> + ret->size1 = size1;
> + ret->mmu_idx = mmu_idx;
> + ret->env = env;
> + ret->ra = ra;
> +
> + haddr1 = probe_access(env, vaddr, size1, type, mmu_idx, ra);
> + ret->haddr1 = haddr1;
> +
> + if (unlikely(size2)) {
> + haddr2 = probe_access(env, vaddr + size1, size2, type, mmu_idx, ra);
> + if (haddr2 == haddr1 + size1) {
> + ret->size1 = size;
> + } else {
> + ret->haddr2 = haddr2;
> + }
> + }
Should there be an assert(!ret->haddr2) here for the CONFIG_USER_ONLY
case, or alternatively a g_assert_unreachable() in the "else" above?
> +}
> +
> +void access_prepare(X86Access *ret, CPUX86State *env, vaddr vaddr,
> + unsigned size, MMUAccessType type, uintptr_t ra)
> +{
> + int mmu_idx = cpu_mmu_index(env_cpu(env), false);
> + access_prepare_mmu(ret, env, vaddr, size, type, mmu_idx, ra);
> +}
> +
> +static void *access_ptr(X86Access *ac, vaddr addr, unsigned len)
> +{
> + vaddr offset = addr - ac->vaddr;
> +
> + assert(addr >= ac->vaddr);
> +
> +#ifdef CONFIG_USER_ONLY
> + assert(offset <= ac->size1 - len);
> + return ac->haddr1 + offset;
> +#else
> + if (likely(offset <= ac->size1 - len)) {
> + return ac->haddr1;
> + }
> + assert(offset <= ac->size - len);
> + if (likely(offset >= ac->size1)) {
> + return ac->haddr2;
> + }
I think the returns should be (respectively) ac->haddr1 + offset and
ac->haddr2 + (offset - ac->size1)?
Also I would add a comment above the second "if", like
/*
* If the address is not naturally aligned, it might span
* both pages. Only return ac->haddr2 if the area is
* entirely within the second page, otherwise fall back
* to slow accesses.
*/
Paolo
> +uint8_t access_ldb(X86Access *ac, vaddr addr)
> +{
> + void *p = access_ptr(ac, addr, sizeof(uint8_t));
> +
> + if (test_ptr(p)) {
> + return ldub_p(p);
> + }
> + return cpu_ldub_mmuidx_ra(ac->env, addr, ac->mmu_idx, ac->ra);
> +}
> +
> +uint16_t access_ldw(X86Access *ac, vaddr addr)
> +{
> + void *p = access_ptr(ac, addr, sizeof(uint16_t));
> +
> + if (test_ptr(p)) {
> + return lduw_le_p(p);
> + }
> + return cpu_lduw_le_mmuidx_ra(ac->env, addr, ac->mmu_idx, ac->ra);
> +}
> +
> +uint32_t access_ldl(X86Access *ac, vaddr addr)
> +{
> + void *p = access_ptr(ac, addr, sizeof(uint32_t));
> +
> + if (test_ptr(p)) {
> + return ldl_le_p(p);
> + }
> + return cpu_ldl_le_mmuidx_ra(ac->env, addr, ac->mmu_idx, ac->ra);
> +}
> +
> +uint64_t access_ldq(X86Access *ac, vaddr addr)
> +{
> + void *p = access_ptr(ac, addr, sizeof(uint64_t));
> +
> + if (test_ptr(p)) {
> + return ldq_le_p(p);
> + }
> + return cpu_ldq_le_mmuidx_ra(ac->env, addr, ac->mmu_idx, ac->ra);
> +}
> +
> +void access_stb(X86Access *ac, vaddr addr, uint8_t val)
> +{
> + void *p = access_ptr(ac, addr, sizeof(uint8_t));
> +
> + if (test_ptr(p)) {
> + stb_p(p, val);
> + } else {
> + cpu_stb_mmuidx_ra(ac->env, addr, val, ac->mmu_idx, ac->ra);
> + }
> +}
> +
> +void access_stw(X86Access *ac, vaddr addr, uint16_t val)
> +{
> + void *p = access_ptr(ac, addr, sizeof(uint16_t));
> +
> + if (test_ptr(p)) {
> + stw_le_p(p, val);
> + } else {
> + cpu_stw_le_mmuidx_ra(ac->env, addr, val, ac->mmu_idx, ac->ra);
> + }
> +}
> +
> +void access_stl(X86Access *ac, vaddr addr, uint32_t val)
> +{
> + void *p = access_ptr(ac, addr, sizeof(uint32_t));
> +
> + if (test_ptr(p)) {
> + stl_le_p(p, val);
> + } else {
> + cpu_stl_le_mmuidx_ra(ac->env, addr, val, ac->mmu_idx, ac->ra);
> + }
> +}
> +
> +void access_stq(X86Access *ac, vaddr addr, uint64_t val)
> +{
> + void *p = access_ptr(ac, addr, sizeof(uint64_t));
> +
> + if (test_ptr(p)) {
> + stq_le_p(p, val);
> + } else {
> + cpu_stq_le_mmuidx_ra(ac->env, addr, val, ac->mmu_idx, ac->ra);
> + }
> +}
> diff --git a/target/i386/tcg/meson.build b/target/i386/tcg/meson.build
> index f9110e890c..1105b35d92 100644
> --- a/target/i386/tcg/meson.build
> +++ b/target/i386/tcg/meson.build
> @@ -1,4 +1,5 @@
> i386_ss.add(when: 'CONFIG_TCG', if_true: files(
> + 'access.c',
> 'bpt_helper.c',
> 'cc_helper.c',
> 'excp_helper.c',
^ permalink raw reply [flat|nested] 36+ messages in thread
* [PATCH v2 02/28] target/i386: Convert do_fldt, do_fstt to X86Access
2024-04-09 5:02 [PATCH for-9.1 v2 00/28] linux-user/i386: Properly align signal frame Richard Henderson
2024-04-09 5:02 ` [PATCH v2 01/28] target/i386: Add tcg/access.[ch] Richard Henderson
@ 2024-04-09 5:02 ` Richard Henderson
2024-04-09 7:52 ` Paolo Bonzini
2024-04-09 5:02 ` [PATCH v2 03/28] target/i386: Convert helper_{fbld, fbst}_ST0 " Richard Henderson
` (26 subsequent siblings)
28 siblings, 1 reply; 36+ messages in thread
From: Richard Henderson @ 2024-04-09 5:02 UTC (permalink / raw)
To: qemu-devel
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
target/i386/tcg/fpu_helper.c | 44 +++++++++++++++++++++++++-----------
1 file changed, 31 insertions(+), 13 deletions(-)
diff --git a/target/i386/tcg/fpu_helper.c b/target/i386/tcg/fpu_helper.c
index 4b965a5d6c..878fad9795 100644
--- a/target/i386/tcg/fpu_helper.c
+++ b/target/i386/tcg/fpu_helper.c
@@ -26,6 +26,7 @@
#include "fpu/softfloat.h"
#include "fpu/softfloat-macros.h"
#include "helper-tcg.h"
+#include "access.h"
/* float macros */
#define FT0 (env->ft0)
@@ -83,23 +84,22 @@ static inline void fpop(CPUX86State *env)
env->fpstt = (env->fpstt + 1) & 7;
}
-static floatx80 do_fldt(CPUX86State *env, target_ulong ptr, uintptr_t retaddr)
+static floatx80 do_fldt(X86Access *ac, target_ulong ptr)
{
CPU_LDoubleU temp;
- temp.l.lower = cpu_ldq_data_ra(env, ptr, retaddr);
- temp.l.upper = cpu_lduw_data_ra(env, ptr + 8, retaddr);
+ temp.l.lower = access_ldq(ac, ptr);
+ temp.l.upper = access_ldw(ac, ptr + 8);
return temp.d;
}
-static void do_fstt(CPUX86State *env, floatx80 f, target_ulong ptr,
- uintptr_t retaddr)
+static void do_fstt(X86Access *ac, target_ulong ptr, floatx80 f)
{
CPU_LDoubleU temp;
temp.d = f;
- cpu_stq_data_ra(env, ptr, temp.l.lower, retaddr);
- cpu_stw_data_ra(env, ptr + 8, temp.l.upper, retaddr);
+ access_stq(ac, ptr, temp.l.lower);
+ access_stw(ac, ptr + 8, temp.l.upper);
}
/* x87 FPU helpers */
@@ -381,16 +381,22 @@ int64_t helper_fisttll_ST0(CPUX86State *env)
void helper_fldt_ST0(CPUX86State *env, target_ulong ptr)
{
int new_fpstt;
+ X86Access ac;
+
+ access_prepare(&ac, env, ptr, 10, MMU_DATA_LOAD, GETPC());
new_fpstt = (env->fpstt - 1) & 7;
- env->fpregs[new_fpstt].d = do_fldt(env, ptr, GETPC());
+ env->fpregs[new_fpstt].d = do_fldt(&ac, ptr);
env->fpstt = new_fpstt;
env->fptags[new_fpstt] = 0; /* validate stack entry */
}
void helper_fstt_ST0(CPUX86State *env, target_ulong ptr)
{
- do_fstt(env, ST0, ptr, GETPC());
+ X86Access ac;
+
+ access_prepare(&ac, env, ptr, 10, MMU_DATA_STORE, GETPC());
+ do_fstt(&ac, ptr, ST0);
}
void helper_fpush(CPUX86State *env)
@@ -2459,15 +2465,18 @@ void helper_fldenv(CPUX86State *env, target_ulong ptr, int data32)
static void do_fsave(CPUX86State *env, target_ulong ptr, int data32,
uintptr_t retaddr)
{
+ X86Access ac;
floatx80 tmp;
int i;
do_fstenv(env, ptr, data32, retaddr);
ptr += (target_ulong)14 << data32;
+ access_prepare(&ac, env, ptr, 80, MMU_DATA_STORE, GETPC());
+
for (i = 0; i < 8; i++) {
tmp = ST(i);
- do_fstt(env, tmp, ptr, retaddr);
+ do_fstt(&ac, ptr, tmp);
ptr += 10;
}
@@ -2482,14 +2491,17 @@ void helper_fsave(CPUX86State *env, target_ulong ptr, int data32)
static void do_frstor(CPUX86State *env, target_ulong ptr, int data32,
uintptr_t retaddr)
{
+ X86Access ac;
floatx80 tmp;
int i;
do_fldenv(env, ptr, data32, retaddr);
ptr += (target_ulong)14 << data32;
+ access_prepare(&ac, env, ptr, 80, MMU_DATA_LOAD, retaddr);
+
for (i = 0; i < 8; i++) {
- tmp = do_fldt(env, ptr, retaddr);
+ tmp = do_fldt(&ac, ptr);
ST(i) = tmp;
ptr += 10;
}
@@ -2506,6 +2518,7 @@ static void do_xsave_fpu(CPUX86State *env, target_ulong ptr, uintptr_t ra)
{
int fpus, fptag, i;
target_ulong addr;
+ X86Access ac;
fpus = (env->fpus & ~0x3800) | (env->fpstt & 0x7) << 11;
fptag = 0;
@@ -2524,9 +2537,11 @@ static void do_xsave_fpu(CPUX86State *env, target_ulong ptr, uintptr_t ra)
cpu_stq_data_ra(env, ptr + XO(legacy.fpdp), 0, ra); /* edp+sel; rdp */
addr = ptr + XO(legacy.fpregs);
+ access_prepare(&ac, env, addr, 8 * 16, MMU_DATA_STORE, GETPC());
+
for (i = 0; i < 8; i++) {
floatx80 tmp = ST(i);
- do_fstt(env, tmp, addr, ra);
+ do_fstt(&ac, addr, tmp);
addr += 16;
}
}
@@ -2699,6 +2714,7 @@ static void do_xrstor_fpu(CPUX86State *env, target_ulong ptr, uintptr_t ra)
{
int i, fpuc, fpus, fptag;
target_ulong addr;
+ X86Access ac;
fpuc = cpu_lduw_data_ra(env, ptr + XO(legacy.fcw), ra);
fpus = cpu_lduw_data_ra(env, ptr + XO(legacy.fsw), ra);
@@ -2711,8 +2727,10 @@ static void do_xrstor_fpu(CPUX86State *env, target_ulong ptr, uintptr_t ra)
}
addr = ptr + XO(legacy.fpregs);
+ access_prepare(&ac, env, addr, 8 * 16, MMU_DATA_LOAD, GETPC());
+
for (i = 0; i < 8; i++) {
- floatx80 tmp = do_fldt(env, addr, ra);
+ floatx80 tmp = do_fldt(&ac, addr);
ST(i) = tmp;
addr += 16;
}
--
2.34.1
^ permalink raw reply related [flat|nested] 36+ messages in thread
* Re: [PATCH v2 02/28] target/i386: Convert do_fldt, do_fstt to X86Access
2024-04-09 5:02 ` [PATCH v2 02/28] target/i386: Convert do_fldt, do_fstt to X86Access Richard Henderson
@ 2024-04-09 7:52 ` Paolo Bonzini
0 siblings, 0 replies; 36+ messages in thread
From: Paolo Bonzini @ 2024-04-09 7:52 UTC (permalink / raw)
To: Richard Henderson, qemu-devel
On 4/9/24 07:02, Richard Henderson wrote:
> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
> ---
> target/i386/tcg/fpu_helper.c | 44 +++++++++++++++++++++++++-----------
> 1 file changed, 31 insertions(+), 13 deletions(-)
Three incorrect GETPC()s that get fixed later in the series:
do_fsave:
> @@ -2459,15 +2465,18 @@ void helper_fldenv(CPUX86State *env, target_ulong ptr, int data32)
> static void do_fsave(CPUX86State *env, target_ulong ptr, int data32,
> uintptr_t retaddr)
> {
> + X86Access ac;
> floatx80 tmp;
> int i;
>
> do_fstenv(env, ptr, data32, retaddr);
>
> ptr += (target_ulong)14 << data32;
> + access_prepare(&ac, env, ptr, 80, MMU_DATA_STORE, GETPC());
> +
do_xsave_fpu:
> @@ -2506,6 +2518,7 @@ static void do_xsave_fpu(CPUX86State *env, target_ulong ptr, uintptr_t ra)
> {
> int fpus, fptag, i;
> target_ulong addr;
> + X86Access ac;
>
> fpus = (env->fpus & ~0x3800) | (env->fpstt & 0x7) << 11;
> fptag = 0;
> @@ -2524,9 +2537,11 @@ static void do_xsave_fpu(CPUX86State *env, target_ulong ptr, uintptr_t ra)
> cpu_stq_data_ra(env, ptr + XO(legacy.fpdp), 0, ra); /* edp+sel; rdp */
>
> addr = ptr + XO(legacy.fpregs);
> + access_prepare(&ac, env, addr, 8 * 16, MMU_DATA_STORE, GETPC());
> +
> for (i = 0; i < 8; i++) {
> floatx80 tmp = ST(i);
> - do_fstt(env, tmp, addr, ra);
> + do_fstt(&ac, addr, tmp);
> addr += 16;
> }
> }
do_xrstor_fpu:
> @@ -2699,6 +2714,7 @@ static void do_xrstor_fpu(CPUX86State *env, target_ulong ptr, uintptr_t ra)
> {
> int i, fpuc, fpus, fptag;
> target_ulong addr;
> + X86Access ac;
>
> fpuc = cpu_lduw_data_ra(env, ptr + XO(legacy.fcw), ra);
> fpus = cpu_lduw_data_ra(env, ptr + XO(legacy.fsw), ra);
> @@ -2711,8 +2727,10 @@ static void do_xrstor_fpu(CPUX86State *env, target_ulong ptr, uintptr_t ra)
> }
>
> addr = ptr + XO(legacy.fpregs);
> + access_prepare(&ac, env, addr, 8 * 16, MMU_DATA_LOAD, GETPC());
> +
> for (i = 0; i < 8; i++) {
> - floatx80 tmp = do_fldt(env, addr, ra);
> + floatx80 tmp = do_fldt(&ac, addr);
> ST(i) = tmp;
> addr += 16;
> }
^ permalink raw reply [flat|nested] 36+ messages in thread
* [PATCH v2 03/28] target/i386: Convert helper_{fbld, fbst}_ST0 to X86Access
2024-04-09 5:02 [PATCH for-9.1 v2 00/28] linux-user/i386: Properly align signal frame Richard Henderson
2024-04-09 5:02 ` [PATCH v2 01/28] target/i386: Add tcg/access.[ch] Richard Henderson
2024-04-09 5:02 ` [PATCH v2 02/28] target/i386: Convert do_fldt, do_fstt to X86Access Richard Henderson
@ 2024-04-09 5:02 ` Richard Henderson
2024-04-09 5:02 ` [PATCH v2 04/28] target/i386: Convert do_fldenv " Richard Henderson
` (25 subsequent siblings)
28 siblings, 0 replies; 36+ messages in thread
From: Richard Henderson @ 2024-04-09 5:02 UTC (permalink / raw)
To: qemu-devel
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
target/i386/tcg/fpu_helper.c | 25 +++++++++++++++----------
1 file changed, 15 insertions(+), 10 deletions(-)
diff --git a/target/i386/tcg/fpu_helper.c b/target/i386/tcg/fpu_helper.c
index 878fad9795..ad8b536cb5 100644
--- a/target/i386/tcg/fpu_helper.c
+++ b/target/i386/tcg/fpu_helper.c
@@ -772,18 +772,21 @@ void helper_fninit(CPUX86State *env)
void helper_fbld_ST0(CPUX86State *env, target_ulong ptr)
{
+ X86Access ac;
floatx80 tmp;
uint64_t val;
unsigned int v;
int i;
+ access_prepare(&ac, env, ptr, 10, MMU_DATA_LOAD, GETPC());
+
val = 0;
for (i = 8; i >= 0; i--) {
- v = cpu_ldub_data_ra(env, ptr + i, GETPC());
+ v = access_ldb(&ac, ptr + i);
val = (val * 100) + ((v >> 4) * 10) + (v & 0xf);
}
tmp = int64_to_floatx80(val, &env->fp_status);
- if (cpu_ldub_data_ra(env, ptr + 9, GETPC()) & 0x80) {
+ if (access_ldb(&ac, ptr + 9) & 0x80) {
tmp = floatx80_chs(tmp);
}
fpush(env);
@@ -797,7 +800,9 @@ void helper_fbst_ST0(CPUX86State *env, target_ulong ptr)
target_ulong mem_ref, mem_end;
int64_t val;
CPU_LDoubleU temp;
+ X86Access ac;
+ access_prepare(&ac, env, ptr, 10, MMU_DATA_STORE, GETPC());
temp.d = ST0;
val = floatx80_to_int64(ST0, &env->fp_status);
@@ -805,20 +810,20 @@ void helper_fbst_ST0(CPUX86State *env, target_ulong ptr)
if (val >= 1000000000000000000LL || val <= -1000000000000000000LL) {
set_float_exception_flags(float_flag_invalid, &env->fp_status);
while (mem_ref < ptr + 7) {
- cpu_stb_data_ra(env, mem_ref++, 0, GETPC());
+ access_stb(&ac, mem_ref++, 0);
}
- cpu_stb_data_ra(env, mem_ref++, 0xc0, GETPC());
- cpu_stb_data_ra(env, mem_ref++, 0xff, GETPC());
- cpu_stb_data_ra(env, mem_ref++, 0xff, GETPC());
+ access_stb(&ac, mem_ref++, 0xc0);
+ access_stb(&ac, mem_ref++, 0xff);
+ access_stb(&ac, mem_ref++, 0xff);
merge_exception_flags(env, old_flags);
return;
}
mem_end = mem_ref + 9;
if (SIGND(temp)) {
- cpu_stb_data_ra(env, mem_end, 0x80, GETPC());
+ access_stb(&ac, mem_end, 0x80);
val = -val;
} else {
- cpu_stb_data_ra(env, mem_end, 0x00, GETPC());
+ access_stb(&ac, mem_end, 0x00);
}
while (mem_ref < mem_end) {
if (val == 0) {
@@ -827,10 +832,10 @@ void helper_fbst_ST0(CPUX86State *env, target_ulong ptr)
v = val % 100;
val = val / 100;
v = ((v / 10) << 4) | (v % 10);
- cpu_stb_data_ra(env, mem_ref++, v, GETPC());
+ access_stb(&ac, mem_ref++, v);
}
while (mem_ref < mem_end) {
- cpu_stb_data_ra(env, mem_ref++, 0, GETPC());
+ access_stb(&ac, mem_ref++, 0);
}
merge_exception_flags(env, old_flags);
}
--
2.34.1
^ permalink raw reply related [flat|nested] 36+ messages in thread
* [PATCH v2 04/28] target/i386: Convert do_fldenv to X86Access
2024-04-09 5:02 [PATCH for-9.1 v2 00/28] linux-user/i386: Properly align signal frame Richard Henderson
` (2 preceding siblings ...)
2024-04-09 5:02 ` [PATCH v2 03/28] target/i386: Convert helper_{fbld, fbst}_ST0 " Richard Henderson
@ 2024-04-09 5:02 ` Richard Henderson
2024-04-09 5:02 ` [PATCH v2 05/28] target/i386: Convert do_fstenv " Richard Henderson
` (24 subsequent siblings)
28 siblings, 0 replies; 36+ messages in thread
From: Richard Henderson @ 2024-04-09 5:02 UTC (permalink / raw)
To: qemu-devel
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
target/i386/tcg/fpu_helper.c | 30 ++++++++++++++----------------
1 file changed, 14 insertions(+), 16 deletions(-)
diff --git a/target/i386/tcg/fpu_helper.c b/target/i386/tcg/fpu_helper.c
index ad8b536cb5..28ae8100f6 100644
--- a/target/i386/tcg/fpu_helper.c
+++ b/target/i386/tcg/fpu_helper.c
@@ -2441,20 +2441,15 @@ static void cpu_set_fpus(CPUX86State *env, uint16_t fpus)
#endif
}
-static void do_fldenv(CPUX86State *env, target_ulong ptr, int data32,
- uintptr_t retaddr)
+static void do_fldenv(X86Access *ac, target_ulong ptr, int data32)
{
int i, fpus, fptag;
+ CPUX86State *env = ac->env;
+
+ cpu_set_fpuc(env, access_ldw(ac, ptr));
+ fpus = access_ldw(ac, ptr + (2 << data32));
+ fptag = access_ldw(ac, ptr + (4 << data32));
- if (data32) {
- cpu_set_fpuc(env, cpu_lduw_data_ra(env, ptr, retaddr));
- fpus = cpu_lduw_data_ra(env, ptr + 4, retaddr);
- fptag = cpu_lduw_data_ra(env, ptr + 8, retaddr);
- } else {
- cpu_set_fpuc(env, cpu_lduw_data_ra(env, ptr, retaddr));
- fpus = cpu_lduw_data_ra(env, ptr + 2, retaddr);
- fptag = cpu_lduw_data_ra(env, ptr + 4, retaddr);
- }
cpu_set_fpus(env, fpus);
for (i = 0; i < 8; i++) {
env->fptags[i] = ((fptag & 3) == 3);
@@ -2464,7 +2459,10 @@ static void do_fldenv(CPUX86State *env, target_ulong ptr, int data32,
void helper_fldenv(CPUX86State *env, target_ulong ptr, int data32)
{
- do_fldenv(env, ptr, data32, GETPC());
+ X86Access ac;
+
+ access_prepare(&ac, env, ptr, 14 << data32, MMU_DATA_STORE, GETPC());
+ do_fldenv(&ac, ptr, data32);
}
static void do_fsave(CPUX86State *env, target_ulong ptr, int data32,
@@ -2498,12 +2496,12 @@ static void do_frstor(CPUX86State *env, target_ulong ptr, int data32,
{
X86Access ac;
floatx80 tmp;
- int i;
+ int i, envsize = 14 << data32;
- do_fldenv(env, ptr, data32, retaddr);
- ptr += (target_ulong)14 << data32;
+ access_prepare(&ac, env, ptr, envsize + 80, MMU_DATA_LOAD, retaddr);
- access_prepare(&ac, env, ptr, 80, MMU_DATA_LOAD, retaddr);
+ do_fldenv(&ac, ptr, data32);
+ ptr += envsize;
for (i = 0; i < 8; i++) {
tmp = do_fldt(&ac, ptr);
--
2.34.1
^ permalink raw reply related [flat|nested] 36+ messages in thread
* [PATCH v2 05/28] target/i386: Convert do_fstenv to X86Access
2024-04-09 5:02 [PATCH for-9.1 v2 00/28] linux-user/i386: Properly align signal frame Richard Henderson
` (3 preceding siblings ...)
2024-04-09 5:02 ` [PATCH v2 04/28] target/i386: Convert do_fldenv " Richard Henderson
@ 2024-04-09 5:02 ` Richard Henderson
2024-04-09 5:02 ` [PATCH v2 06/28] target/i386: Convert do_fsave, do_frstor " Richard Henderson
` (23 subsequent siblings)
28 siblings, 0 replies; 36+ messages in thread
From: Richard Henderson @ 2024-04-09 5:02 UTC (permalink / raw)
To: qemu-devel
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
target/i386/tcg/fpu_helper.c | 45 +++++++++++++++++++-----------------
1 file changed, 24 insertions(+), 21 deletions(-)
diff --git a/target/i386/tcg/fpu_helper.c b/target/i386/tcg/fpu_helper.c
index 28ae8100f6..25074af0ce 100644
--- a/target/i386/tcg/fpu_helper.c
+++ b/target/i386/tcg/fpu_helper.c
@@ -2372,9 +2372,9 @@ void helper_fxam_ST0(CPUX86State *env)
}
}
-static void do_fstenv(CPUX86State *env, target_ulong ptr, int data32,
- uintptr_t retaddr)
+static void do_fstenv(X86Access *ac, target_ulong ptr, int data32)
{
+ CPUX86State *env = ac->env;
int fpus, fptag, exp, i;
uint64_t mant;
CPU_LDoubleU tmp;
@@ -2401,28 +2401,31 @@ static void do_fstenv(CPUX86State *env, target_ulong ptr, int data32,
}
if (data32) {
/* 32 bit */
- cpu_stl_data_ra(env, ptr, env->fpuc, retaddr);
- cpu_stl_data_ra(env, ptr + 4, fpus, retaddr);
- cpu_stl_data_ra(env, ptr + 8, fptag, retaddr);
- cpu_stl_data_ra(env, ptr + 12, env->fpip, retaddr); /* fpip */
- cpu_stl_data_ra(env, ptr + 16, env->fpcs, retaddr); /* fpcs */
- cpu_stl_data_ra(env, ptr + 20, env->fpdp, retaddr); /* fpoo */
- cpu_stl_data_ra(env, ptr + 24, env->fpds, retaddr); /* fpos */
+ access_stl(ac, ptr, env->fpuc);
+ access_stl(ac, ptr + 4, fpus);
+ access_stl(ac, ptr + 8, fptag);
+ access_stl(ac, ptr + 12, env->fpip); /* fpip */
+ access_stl(ac, ptr + 16, env->fpcs); /* fpcs */
+ access_stl(ac, ptr + 20, env->fpdp); /* fpoo */
+ access_stl(ac, ptr + 24, env->fpds); /* fpos */
} else {
/* 16 bit */
- cpu_stw_data_ra(env, ptr, env->fpuc, retaddr);
- cpu_stw_data_ra(env, ptr + 2, fpus, retaddr);
- cpu_stw_data_ra(env, ptr + 4, fptag, retaddr);
- cpu_stw_data_ra(env, ptr + 6, env->fpip, retaddr);
- cpu_stw_data_ra(env, ptr + 8, env->fpcs, retaddr);
- cpu_stw_data_ra(env, ptr + 10, env->fpdp, retaddr);
- cpu_stw_data_ra(env, ptr + 12, env->fpds, retaddr);
+ access_stw(ac, ptr, env->fpuc);
+ access_stw(ac, ptr + 2, fpus);
+ access_stw(ac, ptr + 4, fptag);
+ access_stw(ac, ptr + 6, env->fpip);
+ access_stw(ac, ptr + 8, env->fpcs);
+ access_stw(ac, ptr + 10, env->fpdp);
+ access_stw(ac, ptr + 12, env->fpds);
}
}
void helper_fstenv(CPUX86State *env, target_ulong ptr, int data32)
{
- do_fstenv(env, ptr, data32, GETPC());
+ X86Access ac;
+
+ access_prepare(&ac, env, ptr, 14 << data32, MMU_DATA_STORE, GETPC());
+ do_fstenv(&ac, ptr, data32);
}
static void cpu_set_fpus(CPUX86State *env, uint16_t fpus)
@@ -2470,12 +2473,12 @@ static void do_fsave(CPUX86State *env, target_ulong ptr, int data32,
{
X86Access ac;
floatx80 tmp;
- int i;
+ int i, envsize = 14 << data32;
- do_fstenv(env, ptr, data32, retaddr);
+ access_prepare(&ac, env, ptr, envsize + 80, MMU_DATA_STORE, GETPC());
- ptr += (target_ulong)14 << data32;
- access_prepare(&ac, env, ptr, 80, MMU_DATA_STORE, GETPC());
+ do_fstenv(&ac, ptr, data32);
+ ptr += envsize;
for (i = 0; i < 8; i++) {
tmp = ST(i);
--
2.34.1
^ permalink raw reply related [flat|nested] 36+ messages in thread
* [PATCH v2 06/28] target/i386: Convert do_fsave, do_frstor to X86Access
2024-04-09 5:02 [PATCH for-9.1 v2 00/28] linux-user/i386: Properly align signal frame Richard Henderson
` (4 preceding siblings ...)
2024-04-09 5:02 ` [PATCH v2 05/28] target/i386: Convert do_fstenv " Richard Henderson
@ 2024-04-09 5:02 ` Richard Henderson
2024-04-09 5:02 ` [PATCH v2 07/28] target/i386: Convert do_xsave_{fpu, mxcr, sse} " Richard Henderson
` (22 subsequent siblings)
28 siblings, 0 replies; 36+ messages in thread
From: Richard Henderson @ 2024-04-09 5:02 UTC (permalink / raw)
To: qemu-devel
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
target/i386/tcg/fpu_helper.c | 60 ++++++++++++++++++++----------------
1 file changed, 33 insertions(+), 27 deletions(-)
diff --git a/target/i386/tcg/fpu_helper.c b/target/i386/tcg/fpu_helper.c
index 25074af0ce..e6fa161aa0 100644
--- a/target/i386/tcg/fpu_helper.c
+++ b/target/i386/tcg/fpu_helper.c
@@ -2468,21 +2468,16 @@ void helper_fldenv(CPUX86State *env, target_ulong ptr, int data32)
do_fldenv(&ac, ptr, data32);
}
-static void do_fsave(CPUX86State *env, target_ulong ptr, int data32,
- uintptr_t retaddr)
+static void do_fsave(X86Access *ac, target_ulong ptr, int data32)
{
- X86Access ac;
- floatx80 tmp;
- int i, envsize = 14 << data32;
+ CPUX86State *env = ac->env;
- access_prepare(&ac, env, ptr, envsize + 80, MMU_DATA_STORE, GETPC());
+ do_fstenv(ac, ptr, data32);
+ ptr += 14 << data32;
- do_fstenv(&ac, ptr, data32);
- ptr += envsize;
-
- for (i = 0; i < 8; i++) {
- tmp = ST(i);
- do_fstt(&ac, ptr, tmp);
+ for (int i = 0; i < 8; i++) {
+ floatx80 tmp = ST(i);
+ do_fstt(ac, ptr, tmp);
ptr += 10;
}
@@ -2491,23 +2486,22 @@ static void do_fsave(CPUX86State *env, target_ulong ptr, int data32,
void helper_fsave(CPUX86State *env, target_ulong ptr, int data32)
{
- do_fsave(env, ptr, data32, GETPC());
+ int size = (14 << data32) + 80;
+ X86Access ac;
+
+ access_prepare(&ac, env, ptr, size, MMU_DATA_STORE, GETPC());
+ do_fsave(&ac, ptr, data32);
}
-static void do_frstor(CPUX86State *env, target_ulong ptr, int data32,
- uintptr_t retaddr)
+static void do_frstor(X86Access *ac, target_ulong ptr, int data32)
{
- X86Access ac;
- floatx80 tmp;
- int i, envsize = 14 << data32;
+ CPUX86State *env = ac->env;
- access_prepare(&ac, env, ptr, envsize + 80, MMU_DATA_LOAD, retaddr);
+ do_fldenv(ac, ptr, data32);
+ ptr += 14 << data32;
- do_fldenv(&ac, ptr, data32);
- ptr += envsize;
-
- for (i = 0; i < 8; i++) {
- tmp = do_fldt(&ac, ptr);
+ for (int i = 0; i < 8; i++) {
+ floatx80 tmp = do_fldt(ac, ptr);
ST(i) = tmp;
ptr += 10;
}
@@ -2515,7 +2509,11 @@ static void do_frstor(CPUX86State *env, target_ulong ptr, int data32,
void helper_frstor(CPUX86State *env, target_ulong ptr, int data32)
{
- do_frstor(env, ptr, data32, GETPC());
+ int size = (14 << data32) + 80;
+ X86Access ac;
+
+ access_prepare(&ac, env, ptr, size, MMU_DATA_LOAD, GETPC());
+ do_frstor(&ac, ptr, data32);
}
#define XO(X) offsetof(X86XSaveArea, X)
@@ -2971,12 +2969,20 @@ void helper_xrstor(CPUX86State *env, target_ulong ptr, uint64_t rfbm)
#if defined(CONFIG_USER_ONLY)
void cpu_x86_fsave(CPUX86State *env, target_ulong ptr, int data32)
{
- do_fsave(env, ptr, data32, 0);
+ int size = (14 << data32) + 80;
+ X86Access ac;
+
+ access_prepare(&ac, env, ptr, size, MMU_DATA_STORE, 0);
+ do_fsave(&ac, ptr, data32);
}
void cpu_x86_frstor(CPUX86State *env, target_ulong ptr, int data32)
{
- do_frstor(env, ptr, data32, 0);
+ int size = (14 << data32) + 80;
+ X86Access ac;
+
+ access_prepare(&ac, env, ptr, size, MMU_DATA_LOAD, 0);
+ do_frstor(&ac, ptr, data32);
}
void cpu_x86_fxsave(CPUX86State *env, target_ulong ptr)
--
2.34.1
^ permalink raw reply related [flat|nested] 36+ messages in thread
* [PATCH v2 07/28] target/i386: Convert do_xsave_{fpu, mxcr, sse} to X86Access
2024-04-09 5:02 [PATCH for-9.1 v2 00/28] linux-user/i386: Properly align signal frame Richard Henderson
` (5 preceding siblings ...)
2024-04-09 5:02 ` [PATCH v2 06/28] target/i386: Convert do_fsave, do_frstor " Richard Henderson
@ 2024-04-09 5:02 ` Richard Henderson
2024-04-09 5:02 ` [PATCH v2 08/28] target/i386: Convert do_xrstor_{fpu, " Richard Henderson
` (21 subsequent siblings)
28 siblings, 0 replies; 36+ messages in thread
From: Richard Henderson @ 2024-04-09 5:02 UTC (permalink / raw)
To: qemu-devel
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
target/i386/tcg/fpu_helper.c | 52 +++++++++++++++++++++---------------
1 file changed, 31 insertions(+), 21 deletions(-)
diff --git a/target/i386/tcg/fpu_helper.c b/target/i386/tcg/fpu_helper.c
index e6fa161aa0..643e017bef 100644
--- a/target/i386/tcg/fpu_helper.c
+++ b/target/i386/tcg/fpu_helper.c
@@ -2518,11 +2518,11 @@ void helper_frstor(CPUX86State *env, target_ulong ptr, int data32)
#define XO(X) offsetof(X86XSaveArea, X)
-static void do_xsave_fpu(CPUX86State *env, target_ulong ptr, uintptr_t ra)
+static void do_xsave_fpu(X86Access *ac, target_ulong ptr)
{
+ CPUX86State *env = ac->env;
int fpus, fptag, i;
target_ulong addr;
- X86Access ac;
fpus = (env->fpus & ~0x3800) | (env->fpstt & 0x7) << 11;
fptag = 0;
@@ -2530,35 +2530,37 @@ static void do_xsave_fpu(CPUX86State *env, target_ulong ptr, uintptr_t ra)
fptag |= (env->fptags[i] << i);
}
- cpu_stw_data_ra(env, ptr + XO(legacy.fcw), env->fpuc, ra);
- cpu_stw_data_ra(env, ptr + XO(legacy.fsw), fpus, ra);
- cpu_stw_data_ra(env, ptr + XO(legacy.ftw), fptag ^ 0xff, ra);
+ access_stw(ac, ptr + XO(legacy.fcw), env->fpuc);
+ access_stw(ac, ptr + XO(legacy.fsw), fpus);
+ access_stw(ac, ptr + XO(legacy.ftw), fptag ^ 0xff);
/* In 32-bit mode this is eip, sel, dp, sel.
In 64-bit mode this is rip, rdp.
But in either case we don't write actual data, just zeros. */
- cpu_stq_data_ra(env, ptr + XO(legacy.fpip), 0, ra); /* eip+sel; rip */
- cpu_stq_data_ra(env, ptr + XO(legacy.fpdp), 0, ra); /* edp+sel; rdp */
+ access_stq(ac, ptr + XO(legacy.fpip), 0); /* eip+sel; rip */
+ access_stq(ac, ptr + XO(legacy.fpdp), 0); /* edp+sel; rdp */
addr = ptr + XO(legacy.fpregs);
- access_prepare(&ac, env, addr, 8 * 16, MMU_DATA_STORE, GETPC());
for (i = 0; i < 8; i++) {
floatx80 tmp = ST(i);
- do_fstt(&ac, addr, tmp);
+ do_fstt(ac, addr, tmp);
addr += 16;
}
}
-static void do_xsave_mxcsr(CPUX86State *env, target_ulong ptr, uintptr_t ra)
+static void do_xsave_mxcsr(X86Access *ac, target_ulong ptr)
{
+ CPUX86State *env = ac->env;
+
update_mxcsr_from_sse_status(env);
- cpu_stl_data_ra(env, ptr + XO(legacy.mxcsr), env->mxcsr, ra);
- cpu_stl_data_ra(env, ptr + XO(legacy.mxcsr_mask), 0x0000ffff, ra);
+ access_stl(ac, ptr + XO(legacy.mxcsr), env->mxcsr);
+ access_stl(ac, ptr + XO(legacy.mxcsr_mask), 0x0000ffff);
}
-static void do_xsave_sse(CPUX86State *env, target_ulong ptr, uintptr_t ra)
+static void do_xsave_sse(X86Access *ac, target_ulong ptr)
{
+ CPUX86State *env = ac->env;
int i, nb_xmm_regs;
target_ulong addr;
@@ -2570,8 +2572,8 @@ static void do_xsave_sse(CPUX86State *env, target_ulong ptr, uintptr_t ra)
addr = ptr + XO(legacy.xmm_regs);
for (i = 0; i < nb_xmm_regs; i++) {
- cpu_stq_data_ra(env, addr, env->xmm_regs[i].ZMM_Q(0), ra);
- cpu_stq_data_ra(env, addr + 8, env->xmm_regs[i].ZMM_Q(1), ra);
+ access_stq(ac, addr, env->xmm_regs[i].ZMM_Q(0));
+ access_stq(ac, addr + 8, env->xmm_regs[i].ZMM_Q(1));
addr += 16;
}
}
@@ -2618,20 +2620,24 @@ static void do_xsave_pkru(CPUX86State *env, target_ulong ptr, uintptr_t ra)
static void do_fxsave(CPUX86State *env, target_ulong ptr, uintptr_t ra)
{
+ X86Access ac;
+
/* The operand must be 16 byte aligned */
if (ptr & 0xf) {
raise_exception_ra(env, EXCP0D_GPF, ra);
}
- do_xsave_fpu(env, ptr, ra);
+ access_prepare(&ac, env, ptr, sizeof(X86LegacyXSaveArea),
+ MMU_DATA_STORE, ra);
+ do_xsave_fpu(&ac, ptr);
if (env->cr[4] & CR4_OSFXSR_MASK) {
- do_xsave_mxcsr(env, ptr, ra);
+ do_xsave_mxcsr(&ac, ptr);
/* Fast FXSAVE leaves out the XMM registers */
if (!(env->efer & MSR_EFER_FFXSR)
|| (env->hflags & HF_CPL_MASK)
|| !(env->hflags & HF_LMA_MASK)) {
- do_xsave_sse(env, ptr, ra);
+ do_xsave_sse(&ac, ptr);
}
}
}
@@ -2659,6 +2665,7 @@ static void do_xsave(CPUX86State *env, target_ulong ptr, uint64_t rfbm,
uint64_t inuse, uint64_t opt, uintptr_t ra)
{
uint64_t old_bv, new_bv;
+ X86Access ac;
/* The OS must have enabled XSAVE. */
if (!(env->cr[4] & CR4_OSXSAVE_MASK)) {
@@ -2674,15 +2681,18 @@ static void do_xsave(CPUX86State *env, target_ulong ptr, uint64_t rfbm,
rfbm &= env->xcr0;
opt &= rfbm;
+ access_prepare(&ac, env, ptr, sizeof(X86LegacyXSaveArea),
+ MMU_DATA_STORE, ra);
+
if (opt & XSTATE_FP_MASK) {
- do_xsave_fpu(env, ptr, ra);
+ do_xsave_fpu(&ac, ptr);
}
if (rfbm & XSTATE_SSE_MASK) {
/* Note that saving MXCSR is not suppressed by XSAVEOPT. */
- do_xsave_mxcsr(env, ptr, ra);
+ do_xsave_mxcsr(&ac, ptr);
}
if (opt & XSTATE_SSE_MASK) {
- do_xsave_sse(env, ptr, ra);
+ do_xsave_sse(&ac, ptr);
}
if (opt & XSTATE_YMM_MASK) {
do_xsave_ymmh(env, ptr + XO(avx_state), ra);
--
2.34.1
^ permalink raw reply related [flat|nested] 36+ messages in thread
* [PATCH v2 08/28] target/i386: Convert do_xrstor_{fpu, mxcr, sse} to X86Access
2024-04-09 5:02 [PATCH for-9.1 v2 00/28] linux-user/i386: Properly align signal frame Richard Henderson
` (6 preceding siblings ...)
2024-04-09 5:02 ` [PATCH v2 07/28] target/i386: Convert do_xsave_{fpu, mxcr, sse} " Richard Henderson
@ 2024-04-09 5:02 ` Richard Henderson
2024-04-09 5:02 ` [PATCH v2 09/28] tagret/i386: Convert do_fxsave, do_fxrstor " Richard Henderson
` (20 subsequent siblings)
28 siblings, 0 replies; 36+ messages in thread
From: Richard Henderson @ 2024-04-09 5:02 UTC (permalink / raw)
To: qemu-devel
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
target/i386/tcg/fpu_helper.c | 46 ++++++++++++++++++++++--------------
1 file changed, 28 insertions(+), 18 deletions(-)
diff --git a/target/i386/tcg/fpu_helper.c b/target/i386/tcg/fpu_helper.c
index 643e017bef..59f73ad075 100644
--- a/target/i386/tcg/fpu_helper.c
+++ b/target/i386/tcg/fpu_helper.c
@@ -2724,39 +2724,41 @@ void helper_xsaveopt(CPUX86State *env, target_ulong ptr, uint64_t rfbm)
do_xsave(env, ptr, rfbm, inuse, inuse, GETPC());
}
-static void do_xrstor_fpu(CPUX86State *env, target_ulong ptr, uintptr_t ra)
+static void do_xrstor_fpu(X86Access *ac, target_ulong ptr)
{
+ CPUX86State *env = ac->env;
int i, fpuc, fpus, fptag;
target_ulong addr;
- X86Access ac;
- fpuc = cpu_lduw_data_ra(env, ptr + XO(legacy.fcw), ra);
- fpus = cpu_lduw_data_ra(env, ptr + XO(legacy.fsw), ra);
- fptag = cpu_lduw_data_ra(env, ptr + XO(legacy.ftw), ra);
+ fpuc = access_ldw(ac, ptr + XO(legacy.fcw));
+ fpus = access_ldw(ac, ptr + XO(legacy.fsw));
+ fptag = access_ldw(ac, ptr + XO(legacy.ftw));
cpu_set_fpuc(env, fpuc);
cpu_set_fpus(env, fpus);
+
fptag ^= 0xff;
for (i = 0; i < 8; i++) {
env->fptags[i] = ((fptag >> i) & 1);
}
addr = ptr + XO(legacy.fpregs);
- access_prepare(&ac, env, addr, 8 * 16, MMU_DATA_LOAD, GETPC());
for (i = 0; i < 8; i++) {
- floatx80 tmp = do_fldt(&ac, addr);
+ floatx80 tmp = do_fldt(ac, addr);
ST(i) = tmp;
addr += 16;
}
}
-static void do_xrstor_mxcsr(CPUX86State *env, target_ulong ptr, uintptr_t ra)
+static void do_xrstor_mxcsr(X86Access *ac, target_ulong ptr)
{
- cpu_set_mxcsr(env, cpu_ldl_data_ra(env, ptr + XO(legacy.mxcsr), ra));
+ CPUX86State *env = ac->env;
+ cpu_set_mxcsr(env, access_ldl(ac, ptr + XO(legacy.mxcsr)));
}
-static void do_xrstor_sse(CPUX86State *env, target_ulong ptr, uintptr_t ra)
+static void do_xrstor_sse(X86Access *ac, target_ulong ptr)
{
+ CPUX86State *env = ac->env;
int i, nb_xmm_regs;
target_ulong addr;
@@ -2768,8 +2770,8 @@ static void do_xrstor_sse(CPUX86State *env, target_ulong ptr, uintptr_t ra)
addr = ptr + XO(legacy.xmm_regs);
for (i = 0; i < nb_xmm_regs; i++) {
- env->xmm_regs[i].ZMM_Q(0) = cpu_ldq_data_ra(env, addr, ra);
- env->xmm_regs[i].ZMM_Q(1) = cpu_ldq_data_ra(env, addr + 8, ra);
+ env->xmm_regs[i].ZMM_Q(0) = access_ldq(ac, addr);
+ env->xmm_regs[i].ZMM_Q(1) = access_ldq(ac, addr + 8);
addr += 16;
}
}
@@ -2849,20 +2851,24 @@ static void do_xrstor_pkru(CPUX86State *env, target_ulong ptr, uintptr_t ra)
static void do_fxrstor(CPUX86State *env, target_ulong ptr, uintptr_t ra)
{
+ X86Access ac;
+
/* The operand must be 16 byte aligned */
if (ptr & 0xf) {
raise_exception_ra(env, EXCP0D_GPF, ra);
}
- do_xrstor_fpu(env, ptr, ra);
+ access_prepare(&ac, env, ptr, sizeof(X86LegacyXSaveArea),
+ MMU_DATA_LOAD, ra);
+ do_xrstor_fpu(&ac, ptr);
if (env->cr[4] & CR4_OSFXSR_MASK) {
- do_xrstor_mxcsr(env, ptr, ra);
+ do_xrstor_mxcsr(&ac, ptr);
/* Fast FXRSTOR leaves out the XMM registers */
if (!(env->efer & MSR_EFER_FFXSR)
|| (env->hflags & HF_CPL_MASK)
|| !(env->hflags & HF_LMA_MASK)) {
- do_xrstor_sse(env, ptr, ra);
+ do_xrstor_sse(&ac, ptr);
}
}
}
@@ -2875,6 +2881,7 @@ void helper_fxrstor(CPUX86State *env, target_ulong ptr)
static void do_xrstor(CPUX86State *env, target_ulong ptr, uint64_t rfbm, uintptr_t ra)
{
uint64_t xstate_bv, xcomp_bv, reserve0;
+ X86Access ac;
rfbm &= env->xcr0;
@@ -2913,9 +2920,12 @@ static void do_xrstor(CPUX86State *env, target_ulong ptr, uint64_t rfbm, uintptr
raise_exception_ra(env, EXCP0D_GPF, ra);
}
+ access_prepare(&ac, env, ptr, sizeof(X86LegacyXSaveArea),
+ MMU_DATA_LOAD, ra);
+
if (rfbm & XSTATE_FP_MASK) {
if (xstate_bv & XSTATE_FP_MASK) {
- do_xrstor_fpu(env, ptr, ra);
+ do_xrstor_fpu(&ac, ptr);
} else {
do_fninit(env);
memset(env->fpregs, 0, sizeof(env->fpregs));
@@ -2924,9 +2934,9 @@ static void do_xrstor(CPUX86State *env, target_ulong ptr, uint64_t rfbm, uintptr
if (rfbm & XSTATE_SSE_MASK) {
/* Note that the standard form of XRSTOR loads MXCSR from memory
whether or not the XSTATE_BV bit is set. */
- do_xrstor_mxcsr(env, ptr, ra);
+ do_xrstor_mxcsr(&ac, ptr);
if (xstate_bv & XSTATE_SSE_MASK) {
- do_xrstor_sse(env, ptr, ra);
+ do_xrstor_sse(&ac, ptr);
} else {
do_clear_sse(env);
}
--
2.34.1
^ permalink raw reply related [flat|nested] 36+ messages in thread
* [PATCH v2 09/28] tagret/i386: Convert do_fxsave, do_fxrstor to X86Access
2024-04-09 5:02 [PATCH for-9.1 v2 00/28] linux-user/i386: Properly align signal frame Richard Henderson
` (7 preceding siblings ...)
2024-04-09 5:02 ` [PATCH v2 08/28] target/i386: Convert do_xrstor_{fpu, " Richard Henderson
@ 2024-04-09 5:02 ` Richard Henderson
2024-04-09 5:02 ` [PATCH v2 10/28] target/i386: Convert do_xsave_* " Richard Henderson
` (19 subsequent siblings)
28 siblings, 0 replies; 36+ messages in thread
From: Richard Henderson @ 2024-04-09 5:02 UTC (permalink / raw)
To: qemu-devel
Move the alignment fault from do_* to helper_*, as it need
not apply to usage from within user-only signal handling.
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
target/i386/tcg/fpu_helper.c | 84 ++++++++++++++++++++----------------
1 file changed, 48 insertions(+), 36 deletions(-)
diff --git a/target/i386/tcg/fpu_helper.c b/target/i386/tcg/fpu_helper.c
index 59f73ad075..23e22e4521 100644
--- a/target/i386/tcg/fpu_helper.c
+++ b/target/i386/tcg/fpu_helper.c
@@ -2618,8 +2618,25 @@ static void do_xsave_pkru(CPUX86State *env, target_ulong ptr, uintptr_t ra)
cpu_stq_data_ra(env, ptr, env->pkru, ra);
}
-static void do_fxsave(CPUX86State *env, target_ulong ptr, uintptr_t ra)
+static void do_fxsave(X86Access *ac, target_ulong ptr)
{
+ CPUX86State *env = ac->env;
+
+ do_xsave_fpu(ac, ptr);
+ if (env->cr[4] & CR4_OSFXSR_MASK) {
+ do_xsave_mxcsr(ac, ptr);
+ /* Fast FXSAVE leaves out the XMM registers */
+ if (!(env->efer & MSR_EFER_FFXSR)
+ || (env->hflags & HF_CPL_MASK)
+ || !(env->hflags & HF_LMA_MASK)) {
+ do_xsave_sse(ac, ptr);
+ }
+ }
+}
+
+void helper_fxsave(CPUX86State *env, target_ulong ptr)
+{
+ uintptr_t ra = GETPC();
X86Access ac;
/* The operand must be 16 byte aligned */
@@ -2629,22 +2646,7 @@ static void do_fxsave(CPUX86State *env, target_ulong ptr, uintptr_t ra)
access_prepare(&ac, env, ptr, sizeof(X86LegacyXSaveArea),
MMU_DATA_STORE, ra);
- do_xsave_fpu(&ac, ptr);
-
- if (env->cr[4] & CR4_OSFXSR_MASK) {
- do_xsave_mxcsr(&ac, ptr);
- /* Fast FXSAVE leaves out the XMM registers */
- if (!(env->efer & MSR_EFER_FFXSR)
- || (env->hflags & HF_CPL_MASK)
- || !(env->hflags & HF_LMA_MASK)) {
- do_xsave_sse(&ac, ptr);
- }
- }
-}
-
-void helper_fxsave(CPUX86State *env, target_ulong ptr)
-{
- do_fxsave(env, ptr, GETPC());
+ do_fxsave(&ac, ptr);
}
static uint64_t get_xinuse(CPUX86State *env)
@@ -2849,8 +2851,25 @@ static void do_xrstor_pkru(CPUX86State *env, target_ulong ptr, uintptr_t ra)
env->pkru = cpu_ldq_data_ra(env, ptr, ra);
}
-static void do_fxrstor(CPUX86State *env, target_ulong ptr, uintptr_t ra)
+static void do_fxrstor(X86Access *ac, target_ulong ptr)
{
+ CPUX86State *env = ac->env;
+
+ do_xrstor_fpu(ac, ptr);
+ if (env->cr[4] & CR4_OSFXSR_MASK) {
+ do_xrstor_mxcsr(ac, ptr);
+ /* Fast FXRSTOR leaves out the XMM registers */
+ if (!(env->efer & MSR_EFER_FFXSR)
+ || (env->hflags & HF_CPL_MASK)
+ || !(env->hflags & HF_LMA_MASK)) {
+ do_xrstor_sse(ac, ptr);
+ }
+ }
+}
+
+void helper_fxrstor(CPUX86State *env, target_ulong ptr)
+{
+ uintptr_t ra = GETPC();
X86Access ac;
/* The operand must be 16 byte aligned */
@@ -2860,22 +2879,7 @@ static void do_fxrstor(CPUX86State *env, target_ulong ptr, uintptr_t ra)
access_prepare(&ac, env, ptr, sizeof(X86LegacyXSaveArea),
MMU_DATA_LOAD, ra);
- do_xrstor_fpu(&ac, ptr);
-
- if (env->cr[4] & CR4_OSFXSR_MASK) {
- do_xrstor_mxcsr(&ac, ptr);
- /* Fast FXRSTOR leaves out the XMM registers */
- if (!(env->efer & MSR_EFER_FFXSR)
- || (env->hflags & HF_CPL_MASK)
- || !(env->hflags & HF_LMA_MASK)) {
- do_xrstor_sse(&ac, ptr);
- }
- }
-}
-
-void helper_fxrstor(CPUX86State *env, target_ulong ptr)
-{
- do_fxrstor(env, ptr, GETPC());
+ do_fxrstor(&ac, ptr);
}
static void do_xrstor(CPUX86State *env, target_ulong ptr, uint64_t rfbm, uintptr_t ra)
@@ -3007,12 +3011,20 @@ void cpu_x86_frstor(CPUX86State *env, target_ulong ptr, int data32)
void cpu_x86_fxsave(CPUX86State *env, target_ulong ptr)
{
- do_fxsave(env, ptr, 0);
+ X86Access ac;
+
+ access_prepare(&ac, env, ptr, sizeof(X86LegacyXSaveArea),
+ MMU_DATA_STORE, 0);
+ do_fxsave(&ac, ptr);
}
void cpu_x86_fxrstor(CPUX86State *env, target_ulong ptr)
{
- do_fxrstor(env, ptr, 0);
+ X86Access ac;
+
+ access_prepare(&ac, env, ptr, sizeof(X86LegacyXSaveArea),
+ MMU_DATA_LOAD, 0);
+ do_fxrstor(&ac, ptr);
}
void cpu_x86_xsave(CPUX86State *env, target_ulong ptr)
--
2.34.1
^ permalink raw reply related [flat|nested] 36+ messages in thread
* [PATCH v2 10/28] target/i386: Convert do_xsave_* to X86Access
2024-04-09 5:02 [PATCH for-9.1 v2 00/28] linux-user/i386: Properly align signal frame Richard Henderson
` (8 preceding siblings ...)
2024-04-09 5:02 ` [PATCH v2 09/28] tagret/i386: Convert do_fxsave, do_fxrstor " Richard Henderson
@ 2024-04-09 5:02 ` Richard Henderson
2024-04-09 5:02 ` [PATCH v2 11/28] target/i386: Convert do_xrstor_* " Richard Henderson
` (18 subsequent siblings)
28 siblings, 0 replies; 36+ messages in thread
From: Richard Henderson @ 2024-04-09 5:02 UTC (permalink / raw)
To: qemu-devel
The body of do_xsave is now fully converted.
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
target/i386/tcg/fpu_helper.c | 47 ++++++++++++++++++++----------------
1 file changed, 26 insertions(+), 21 deletions(-)
diff --git a/target/i386/tcg/fpu_helper.c b/target/i386/tcg/fpu_helper.c
index 23e22e4521..82a041f4bf 100644
--- a/target/i386/tcg/fpu_helper.c
+++ b/target/i386/tcg/fpu_helper.c
@@ -2578,8 +2578,9 @@ static void do_xsave_sse(X86Access *ac, target_ulong ptr)
}
}
-static void do_xsave_ymmh(CPUX86State *env, target_ulong ptr, uintptr_t ra)
+static void do_xsave_ymmh(X86Access *ac, target_ulong ptr)
{
+ CPUX86State *env = ac->env;
int i, nb_xmm_regs;
if (env->hflags & HF_CS64_MASK) {
@@ -2589,33 +2590,36 @@ static void do_xsave_ymmh(CPUX86State *env, target_ulong ptr, uintptr_t ra)
}
for (i = 0; i < nb_xmm_regs; i++, ptr += 16) {
- cpu_stq_data_ra(env, ptr, env->xmm_regs[i].ZMM_Q(2), ra);
- cpu_stq_data_ra(env, ptr + 8, env->xmm_regs[i].ZMM_Q(3), ra);
+ access_stq(ac, ptr, env->xmm_regs[i].ZMM_Q(2));
+ access_stq(ac, ptr + 8, env->xmm_regs[i].ZMM_Q(3));
}
}
-static void do_xsave_bndregs(CPUX86State *env, target_ulong ptr, uintptr_t ra)
+static void do_xsave_bndregs(X86Access *ac, target_ulong ptr)
{
+ CPUX86State *env = ac->env;
target_ulong addr = ptr + offsetof(XSaveBNDREG, bnd_regs);
int i;
for (i = 0; i < 4; i++, addr += 16) {
- cpu_stq_data_ra(env, addr, env->bnd_regs[i].lb, ra);
- cpu_stq_data_ra(env, addr + 8, env->bnd_regs[i].ub, ra);
+ access_stq(ac, addr, env->bnd_regs[i].lb);
+ access_stq(ac, addr + 8, env->bnd_regs[i].ub);
}
}
-static void do_xsave_bndcsr(CPUX86State *env, target_ulong ptr, uintptr_t ra)
+static void do_xsave_bndcsr(X86Access *ac, target_ulong ptr)
{
- cpu_stq_data_ra(env, ptr + offsetof(XSaveBNDCSR, bndcsr.cfgu),
- env->bndcs_regs.cfgu, ra);
- cpu_stq_data_ra(env, ptr + offsetof(XSaveBNDCSR, bndcsr.sts),
- env->bndcs_regs.sts, ra);
+ CPUX86State *env = ac->env;
+
+ access_stq(ac, ptr + offsetof(XSaveBNDCSR, bndcsr.cfgu),
+ env->bndcs_regs.cfgu);
+ access_stq(ac, ptr + offsetof(XSaveBNDCSR, bndcsr.sts),
+ env->bndcs_regs.sts);
}
-static void do_xsave_pkru(CPUX86State *env, target_ulong ptr, uintptr_t ra)
+static void do_xsave_pkru(X86Access *ac, target_ulong ptr)
{
- cpu_stq_data_ra(env, ptr, env->pkru, ra);
+ access_stq(ac, ptr, ac->env->pkru);
}
static void do_fxsave(X86Access *ac, target_ulong ptr)
@@ -2668,6 +2672,7 @@ static void do_xsave(CPUX86State *env, target_ulong ptr, uint64_t rfbm,
{
uint64_t old_bv, new_bv;
X86Access ac;
+ unsigned size;
/* The OS must have enabled XSAVE. */
if (!(env->cr[4] & CR4_OSXSAVE_MASK)) {
@@ -2683,8 +2688,8 @@ static void do_xsave(CPUX86State *env, target_ulong ptr, uint64_t rfbm,
rfbm &= env->xcr0;
opt &= rfbm;
- access_prepare(&ac, env, ptr, sizeof(X86LegacyXSaveArea),
- MMU_DATA_STORE, ra);
+ size = xsave_area_size(opt, false);
+ access_prepare(&ac, env, ptr, size, MMU_DATA_STORE, ra);
if (opt & XSTATE_FP_MASK) {
do_xsave_fpu(&ac, ptr);
@@ -2697,22 +2702,22 @@ static void do_xsave(CPUX86State *env, target_ulong ptr, uint64_t rfbm,
do_xsave_sse(&ac, ptr);
}
if (opt & XSTATE_YMM_MASK) {
- do_xsave_ymmh(env, ptr + XO(avx_state), ra);
+ do_xsave_ymmh(&ac, ptr + XO(avx_state));
}
if (opt & XSTATE_BNDREGS_MASK) {
- do_xsave_bndregs(env, ptr + XO(bndreg_state), ra);
+ do_xsave_bndregs(&ac, ptr + XO(bndreg_state));
}
if (opt & XSTATE_BNDCSR_MASK) {
- do_xsave_bndcsr(env, ptr + XO(bndcsr_state), ra);
+ do_xsave_bndcsr(&ac, ptr + XO(bndcsr_state));
}
if (opt & XSTATE_PKRU_MASK) {
- do_xsave_pkru(env, ptr + XO(pkru_state), ra);
+ do_xsave_pkru(&ac, ptr + XO(pkru_state));
}
/* Update the XSTATE_BV field. */
- old_bv = cpu_ldq_data_ra(env, ptr + XO(header.xstate_bv), ra);
+ old_bv = access_ldq(&ac, ptr + XO(header.xstate_bv));
new_bv = (old_bv & ~rfbm) | (inuse & rfbm);
- cpu_stq_data_ra(env, ptr + XO(header.xstate_bv), new_bv, ra);
+ access_stq(&ac, ptr + XO(header.xstate_bv), new_bv);
}
void helper_xsave(CPUX86State *env, target_ulong ptr, uint64_t rfbm)
--
2.34.1
^ permalink raw reply related [flat|nested] 36+ messages in thread
* [PATCH v2 11/28] target/i386: Convert do_xrstor_* to X86Access
2024-04-09 5:02 [PATCH for-9.1 v2 00/28] linux-user/i386: Properly align signal frame Richard Henderson
` (9 preceding siblings ...)
2024-04-09 5:02 ` [PATCH v2 10/28] target/i386: Convert do_xsave_* " Richard Henderson
@ 2024-04-09 5:02 ` Richard Henderson
2024-04-09 5:02 ` [PATCH v2 12/28] target/i386: Split out do_xsave_chk Richard Henderson
` (17 subsequent siblings)
28 siblings, 0 replies; 36+ messages in thread
From: Richard Henderson @ 2024-04-09 5:02 UTC (permalink / raw)
To: qemu-devel
The body of do_xrstor is now fully converted.
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
target/i386/tcg/fpu_helper.c | 51 ++++++++++++++++++++++--------------
1 file changed, 31 insertions(+), 20 deletions(-)
diff --git a/target/i386/tcg/fpu_helper.c b/target/i386/tcg/fpu_helper.c
index 82a041f4bf..883002dc22 100644
--- a/target/i386/tcg/fpu_helper.c
+++ b/target/i386/tcg/fpu_helper.c
@@ -2799,8 +2799,9 @@ static void do_clear_sse(CPUX86State *env)
}
}
-static void do_xrstor_ymmh(CPUX86State *env, target_ulong ptr, uintptr_t ra)
+static void do_xrstor_ymmh(X86Access *ac, target_ulong ptr)
{
+ CPUX86State *env = ac->env;
int i, nb_xmm_regs;
if (env->hflags & HF_CS64_MASK) {
@@ -2810,8 +2811,8 @@ static void do_xrstor_ymmh(CPUX86State *env, target_ulong ptr, uintptr_t ra)
}
for (i = 0; i < nb_xmm_regs; i++, ptr += 16) {
- env->xmm_regs[i].ZMM_Q(2) = cpu_ldq_data_ra(env, ptr, ra);
- env->xmm_regs[i].ZMM_Q(3) = cpu_ldq_data_ra(env, ptr + 8, ra);
+ env->xmm_regs[i].ZMM_Q(2) = access_ldq(ac, ptr);
+ env->xmm_regs[i].ZMM_Q(3) = access_ldq(ac, ptr + 8);
}
}
@@ -2831,29 +2832,32 @@ static void do_clear_ymmh(CPUX86State *env)
}
}
-static void do_xrstor_bndregs(CPUX86State *env, target_ulong ptr, uintptr_t ra)
+static void do_xrstor_bndregs(X86Access *ac, target_ulong ptr)
{
+ CPUX86State *env = ac->env;
target_ulong addr = ptr + offsetof(XSaveBNDREG, bnd_regs);
int i;
for (i = 0; i < 4; i++, addr += 16) {
- env->bnd_regs[i].lb = cpu_ldq_data_ra(env, addr, ra);
- env->bnd_regs[i].ub = cpu_ldq_data_ra(env, addr + 8, ra);
+ env->bnd_regs[i].lb = access_ldq(ac, addr);
+ env->bnd_regs[i].ub = access_ldq(ac, addr + 8);
}
}
-static void do_xrstor_bndcsr(CPUX86State *env, target_ulong ptr, uintptr_t ra)
+static void do_xrstor_bndcsr(X86Access *ac, target_ulong ptr)
{
+ CPUX86State *env = ac->env;
+
/* FIXME: Extend highest implemented bit of linear address. */
env->bndcs_regs.cfgu
- = cpu_ldq_data_ra(env, ptr + offsetof(XSaveBNDCSR, bndcsr.cfgu), ra);
+ = access_ldq(ac, ptr + offsetof(XSaveBNDCSR, bndcsr.cfgu));
env->bndcs_regs.sts
- = cpu_ldq_data_ra(env, ptr + offsetof(XSaveBNDCSR, bndcsr.sts), ra);
+ = access_ldq(ac, ptr + offsetof(XSaveBNDCSR, bndcsr.sts));
}
-static void do_xrstor_pkru(CPUX86State *env, target_ulong ptr, uintptr_t ra)
+static void do_xrstor_pkru(X86Access *ac, target_ulong ptr)
{
- env->pkru = cpu_ldq_data_ra(env, ptr, ra);
+ ac->env->pkru = access_ldq(ac, ptr);
}
static void do_fxrstor(X86Access *ac, target_ulong ptr)
@@ -2891,6 +2895,7 @@ static void do_xrstor(CPUX86State *env, target_ulong ptr, uint64_t rfbm, uintptr
{
uint64_t xstate_bv, xcomp_bv, reserve0;
X86Access ac;
+ unsigned size, size_ext;
rfbm &= env->xcr0;
@@ -2904,7 +2909,10 @@ static void do_xrstor(CPUX86State *env, target_ulong ptr, uint64_t rfbm, uintptr
raise_exception_ra(env, EXCP0D_GPF, ra);
}
- xstate_bv = cpu_ldq_data_ra(env, ptr + XO(header.xstate_bv), ra);
+ size = sizeof(X86LegacyXSaveArea) + sizeof(X86XSaveHeader);
+ access_prepare(&ac, env, ptr, size, MMU_DATA_LOAD, ra);
+
+ xstate_bv = access_ldq(&ac, ptr + XO(header.xstate_bv));
if ((int64_t)xstate_bv < 0) {
/* FIXME: Compact form. */
@@ -2923,14 +2931,17 @@ static void do_xrstor(CPUX86State *env, target_ulong ptr, uint64_t rfbm, uintptr
describes only XCOMP_BV, but the description of the standard form
of XRSTOR (Vol 1, Sec 13.8.1) checks bytes 23:8 for zero, which
includes the next 64-bit field. */
- xcomp_bv = cpu_ldq_data_ra(env, ptr + XO(header.xcomp_bv), ra);
- reserve0 = cpu_ldq_data_ra(env, ptr + XO(header.reserve0), ra);
+ xcomp_bv = access_ldq(&ac, ptr + XO(header.xcomp_bv));
+ reserve0 = access_ldq(&ac, ptr + XO(header.reserve0));
if (xcomp_bv || reserve0) {
raise_exception_ra(env, EXCP0D_GPF, ra);
}
- access_prepare(&ac, env, ptr, sizeof(X86LegacyXSaveArea),
- MMU_DATA_LOAD, ra);
+ size_ext = xsave_area_size(rfbm & xstate_bv, false);
+ if (size < size_ext) {
+ /* TODO: See if existing page probe has covered extra size. */
+ access_prepare(&ac, env, ptr, size_ext, MMU_DATA_LOAD, ra);
+ }
if (rfbm & XSTATE_FP_MASK) {
if (xstate_bv & XSTATE_FP_MASK) {
@@ -2952,14 +2963,14 @@ static void do_xrstor(CPUX86State *env, target_ulong ptr, uint64_t rfbm, uintptr
}
if (rfbm & XSTATE_YMM_MASK) {
if (xstate_bv & XSTATE_YMM_MASK) {
- do_xrstor_ymmh(env, ptr + XO(avx_state), ra);
+ do_xrstor_ymmh(&ac, ptr + XO(avx_state));
} else {
do_clear_ymmh(env);
}
}
if (rfbm & XSTATE_BNDREGS_MASK) {
if (xstate_bv & XSTATE_BNDREGS_MASK) {
- do_xrstor_bndregs(env, ptr + XO(bndreg_state), ra);
+ do_xrstor_bndregs(&ac, ptr + XO(bndreg_state));
env->hflags |= HF_MPX_IU_MASK;
} else {
memset(env->bnd_regs, 0, sizeof(env->bnd_regs));
@@ -2968,7 +2979,7 @@ static void do_xrstor(CPUX86State *env, target_ulong ptr, uint64_t rfbm, uintptr
}
if (rfbm & XSTATE_BNDCSR_MASK) {
if (xstate_bv & XSTATE_BNDCSR_MASK) {
- do_xrstor_bndcsr(env, ptr + XO(bndcsr_state), ra);
+ do_xrstor_bndcsr(&ac, ptr + XO(bndcsr_state));
} else {
memset(&env->bndcs_regs, 0, sizeof(env->bndcs_regs));
}
@@ -2977,7 +2988,7 @@ static void do_xrstor(CPUX86State *env, target_ulong ptr, uint64_t rfbm, uintptr
if (rfbm & XSTATE_PKRU_MASK) {
uint64_t old_pkru = env->pkru;
if (xstate_bv & XSTATE_PKRU_MASK) {
- do_xrstor_pkru(env, ptr + XO(pkru_state), ra);
+ do_xrstor_pkru(&ac, ptr + XO(pkru_state));
} else {
env->pkru = 0;
}
--
2.34.1
^ permalink raw reply related [flat|nested] 36+ messages in thread
* [PATCH v2 12/28] target/i386: Split out do_xsave_chk
2024-04-09 5:02 [PATCH for-9.1 v2 00/28] linux-user/i386: Properly align signal frame Richard Henderson
` (10 preceding siblings ...)
2024-04-09 5:02 ` [PATCH v2 11/28] target/i386: Convert do_xrstor_* " Richard Henderson
@ 2024-04-09 5:02 ` Richard Henderson
2024-04-09 5:02 ` [PATCH v2 13/28] target/i386: Add rbfm argument to cpu_x86_{xsave, xrstor} Richard Henderson
` (16 subsequent siblings)
28 siblings, 0 replies; 36+ messages in thread
From: Richard Henderson @ 2024-04-09 5:02 UTC (permalink / raw)
To: qemu-devel
This path is not required by user-only, and can in fact
be shared between xsave and xrstor.
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
target/i386/tcg/fpu_helper.c | 51 +++++++++++++++++++-----------------
1 file changed, 27 insertions(+), 24 deletions(-)
diff --git a/target/i386/tcg/fpu_helper.c b/target/i386/tcg/fpu_helper.c
index 883002dc22..11c60152de 100644
--- a/target/i386/tcg/fpu_helper.c
+++ b/target/i386/tcg/fpu_helper.c
@@ -2674,16 +2674,6 @@ static void do_xsave(CPUX86State *env, target_ulong ptr, uint64_t rfbm,
X86Access ac;
unsigned size;
- /* The OS must have enabled XSAVE. */
- if (!(env->cr[4] & CR4_OSXSAVE_MASK)) {
- raise_exception_ra(env, EXCP06_ILLOP, ra);
- }
-
- /* The operand must be 64 byte aligned. */
- if (ptr & 63) {
- raise_exception_ra(env, EXCP0D_GPF, ra);
- }
-
/* Never save anything not enabled by XCR0. */
rfbm &= env->xcr0;
opt &= rfbm;
@@ -2720,15 +2710,35 @@ static void do_xsave(CPUX86State *env, target_ulong ptr, uint64_t rfbm,
access_stq(&ac, ptr + XO(header.xstate_bv), new_bv);
}
+static void do_xsave_chk(CPUX86State *env, target_ulong ptr, uintptr_t ra)
+{
+ /* The OS must have enabled XSAVE. */
+ if (!(env->cr[4] & CR4_OSXSAVE_MASK)) {
+ raise_exception_ra(env, EXCP06_ILLOP, ra);
+ }
+
+ /* The operand must be 64 byte aligned. */
+ if (ptr & 63) {
+ raise_exception_ra(env, EXCP0D_GPF, ra);
+ }
+}
+
void helper_xsave(CPUX86State *env, target_ulong ptr, uint64_t rfbm)
{
- do_xsave(env, ptr, rfbm, get_xinuse(env), -1, GETPC());
+ uintptr_t ra = GETPC();
+
+ do_xsave_chk(env, ptr, ra);
+ do_xsave(env, ptr, rfbm, get_xinuse(env), -1, ra);
}
void helper_xsaveopt(CPUX86State *env, target_ulong ptr, uint64_t rfbm)
{
- uint64_t inuse = get_xinuse(env);
- do_xsave(env, ptr, rfbm, inuse, inuse, GETPC());
+ uintptr_t ra = GETPC();
+ uint64_t inuse;
+
+ do_xsave_chk(env, ptr, ra);
+ inuse = get_xinuse(env);
+ do_xsave(env, ptr, rfbm, inuse, inuse, ra);
}
static void do_xrstor_fpu(X86Access *ac, target_ulong ptr)
@@ -2899,16 +2909,6 @@ static void do_xrstor(CPUX86State *env, target_ulong ptr, uint64_t rfbm, uintptr
rfbm &= env->xcr0;
- /* The OS must have enabled XSAVE. */
- if (!(env->cr[4] & CR4_OSXSAVE_MASK)) {
- raise_exception_ra(env, EXCP06_ILLOP, ra);
- }
-
- /* The operand must be 64 byte aligned. */
- if (ptr & 63) {
- raise_exception_ra(env, EXCP0D_GPF, ra);
- }
-
size = sizeof(X86LegacyXSaveArea) + sizeof(X86XSaveHeader);
access_prepare(&ac, env, ptr, size, MMU_DATA_LOAD, ra);
@@ -3003,7 +3003,10 @@ static void do_xrstor(CPUX86State *env, target_ulong ptr, uint64_t rfbm, uintptr
void helper_xrstor(CPUX86State *env, target_ulong ptr, uint64_t rfbm)
{
- do_xrstor(env, ptr, rfbm, GETPC());
+ uintptr_t ra = GETPC();
+
+ do_xsave_chk(env, ptr, ra);
+ do_xrstor(env, ptr, rfbm, ra);
}
#if defined(CONFIG_USER_ONLY)
--
2.34.1
^ permalink raw reply related [flat|nested] 36+ messages in thread
* [PATCH v2 13/28] target/i386: Add rbfm argument to cpu_x86_{xsave, xrstor}
2024-04-09 5:02 [PATCH for-9.1 v2 00/28] linux-user/i386: Properly align signal frame Richard Henderson
` (11 preceding siblings ...)
2024-04-09 5:02 ` [PATCH v2 12/28] target/i386: Split out do_xsave_chk Richard Henderson
@ 2024-04-09 5:02 ` Richard Henderson
2024-04-09 5:02 ` [PATCH v2 14/28] target/i386: Add {hw, sw}_reserved to X86LegacyXSaveArea Richard Henderson
` (15 subsequent siblings)
28 siblings, 0 replies; 36+ messages in thread
From: Richard Henderson @ 2024-04-09 5:02 UTC (permalink / raw)
To: qemu-devel
For now, continue to pass all 1's from signal.c.
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
target/i386/cpu.h | 4 ++--
linux-user/i386/signal.c | 4 ++--
target/i386/tcg/fpu_helper.c | 8 ++++----
3 files changed, 8 insertions(+), 8 deletions(-)
diff --git a/target/i386/cpu.h b/target/i386/cpu.h
index 6b05738079..5860acb0c3 100644
--- a/target/i386/cpu.h
+++ b/target/i386/cpu.h
@@ -2223,8 +2223,8 @@ void cpu_x86_fsave(CPUX86State *s, target_ulong ptr, int data32);
void cpu_x86_frstor(CPUX86State *s, target_ulong ptr, int data32);
void cpu_x86_fxsave(CPUX86State *s, target_ulong ptr);
void cpu_x86_fxrstor(CPUX86State *s, target_ulong ptr);
-void cpu_x86_xsave(CPUX86State *s, target_ulong ptr);
-void cpu_x86_xrstor(CPUX86State *s, target_ulong ptr);
+void cpu_x86_xsave(CPUX86State *s, target_ulong ptr, uint64_t rbfm);
+void cpu_x86_xrstor(CPUX86State *s, target_ulong ptr, uint64_t rbfm);
/* cpu.c */
void x86_cpu_vendor_words2str(char *dst, uint32_t vendor1,
diff --git a/linux-user/i386/signal.c b/linux-user/i386/signal.c
index cfe70fc5cf..68659fa1db 100644
--- a/linux-user/i386/signal.c
+++ b/linux-user/i386/signal.c
@@ -267,7 +267,7 @@ static void xsave_sigcontext(CPUX86State *env, struct target_fpstate_fxsave *fxs
/* Zero the header, XSAVE *adds* features to an existing save state. */
memset(fxsave->xfeatures, 0, 64);
- cpu_x86_xsave(env, fxsave_addr);
+ cpu_x86_xsave(env, fxsave_addr, -1);
__put_user(TARGET_FP_XSTATE_MAGIC1, &fxsave->sw_reserved.magic1);
__put_user(extended_size, &fxsave->sw_reserved.extended_size);
__put_user(env->xcr0, &fxsave->sw_reserved.xfeatures);
@@ -568,7 +568,7 @@ static int xrstor_sigcontext(CPUX86State *env, struct target_fpstate_fxsave *fxs
return 1;
}
if (tswapl(*(uint32_t *) &fxsave->xfeatures[xfeatures_size]) == TARGET_FP_XSTATE_MAGIC2) {
- cpu_x86_xrstor(env, fxsave_addr);
+ cpu_x86_xrstor(env, fxsave_addr, -1);
return 0;
}
}
diff --git a/target/i386/tcg/fpu_helper.c b/target/i386/tcg/fpu_helper.c
index 11c60152de..dbc1e5d8dd 100644
--- a/target/i386/tcg/fpu_helper.c
+++ b/target/i386/tcg/fpu_helper.c
@@ -3046,14 +3046,14 @@ void cpu_x86_fxrstor(CPUX86State *env, target_ulong ptr)
do_fxrstor(&ac, ptr);
}
-void cpu_x86_xsave(CPUX86State *env, target_ulong ptr)
+void cpu_x86_xsave(CPUX86State *env, target_ulong ptr, uint64_t rfbm)
{
- do_xsave(env, ptr, -1, get_xinuse(env), -1, 0);
+ do_xsave(env, ptr, rfbm, get_xinuse(env), -1, 0);
}
-void cpu_x86_xrstor(CPUX86State *env, target_ulong ptr)
+void cpu_x86_xrstor(CPUX86State *env, target_ulong ptr, uint64_t rfbm)
{
- do_xrstor(env, ptr, -1, 0);
+ do_xrstor(env, ptr, rfbm, 0);
}
#endif
--
2.34.1
^ permalink raw reply related [flat|nested] 36+ messages in thread
* [PATCH v2 14/28] target/i386: Add {hw, sw}_reserved to X86LegacyXSaveArea
2024-04-09 5:02 [PATCH for-9.1 v2 00/28] linux-user/i386: Properly align signal frame Richard Henderson
` (12 preceding siblings ...)
2024-04-09 5:02 ` [PATCH v2 13/28] target/i386: Add rbfm argument to cpu_x86_{xsave, xrstor} Richard Henderson
@ 2024-04-09 5:02 ` Richard Henderson
2024-04-09 5:02 ` [PATCH v2 15/28] linux-user/i386: Drop xfeatures_size from sigcontext arithmetic Richard Henderson
` (14 subsequent siblings)
28 siblings, 0 replies; 36+ messages in thread
From: Richard Henderson @ 2024-04-09 5:02 UTC (permalink / raw)
To: qemu-devel
This completes the 512 byte structure, allowing the union to
be removed. Assert that the structure layout is as expected.
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
target/i386/cpu.h | 39 +++++++++++++++++++++++++--------------
1 file changed, 25 insertions(+), 14 deletions(-)
diff --git a/target/i386/cpu.h b/target/i386/cpu.h
index 5860acb0c3..5f9c420084 100644
--- a/target/i386/cpu.h
+++ b/target/i386/cpu.h
@@ -1419,23 +1419,34 @@ typedef struct {
*/
#define UNASSIGNED_APIC_ID 0xFFFFFFFF
-typedef union X86LegacyXSaveArea {
- struct {
- uint16_t fcw;
- uint16_t fsw;
- uint8_t ftw;
- uint8_t reserved;
- uint16_t fpop;
- uint64_t fpip;
- uint64_t fpdp;
- uint32_t mxcsr;
- uint32_t mxcsr_mask;
- FPReg fpregs[8];
- uint8_t xmm_regs[16][16];
+typedef struct X86LegacyXSaveArea {
+ uint16_t fcw;
+ uint16_t fsw;
+ uint8_t ftw;
+ uint8_t reserved;
+ uint16_t fpop;
+ union {
+ struct {
+ uint64_t fpip;
+ uint64_t fpdp;
+ };
+ struct {
+ uint32_t fip;
+ uint32_t fcs;
+ uint32_t foo;
+ uint32_t fos;
+ };
};
- uint8_t data[512];
+ uint32_t mxcsr;
+ uint32_t mxcsr_mask;
+ FPReg fpregs[8];
+ uint8_t xmm_regs[16][16];
+ uint32_t hw_reserved[12];
+ uint32_t sw_reserved[12];
} X86LegacyXSaveArea;
+QEMU_BUILD_BUG_ON(sizeof(X86LegacyXSaveArea) != 512);
+
typedef struct X86XSaveHeader {
uint64_t xstate_bv;
uint64_t xcomp_bv;
--
2.34.1
^ permalink raw reply related [flat|nested] 36+ messages in thread
* [PATCH v2 15/28] linux-user/i386: Drop xfeatures_size from sigcontext arithmetic
2024-04-09 5:02 [PATCH for-9.1 v2 00/28] linux-user/i386: Properly align signal frame Richard Henderson
` (13 preceding siblings ...)
2024-04-09 5:02 ` [PATCH v2 14/28] target/i386: Add {hw, sw}_reserved to X86LegacyXSaveArea Richard Henderson
@ 2024-04-09 5:02 ` Richard Henderson
2024-04-09 5:02 ` [PATCH v2 16/28] linux-user/i386: Remove xfeatures from target_fpstate_fxsave Richard Henderson
` (13 subsequent siblings)
28 siblings, 0 replies; 36+ messages in thread
From: Richard Henderson @ 2024-04-09 5:02 UTC (permalink / raw)
To: qemu-devel
This is subtracting sizeof(target_fpstate_fxsave) in
TARGET_FXSAVE_SIZE, then adding it again via &fxsave->xfeatures.
Perform the same computation using xstate_size alone.
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
linux-user/i386/signal.c | 7 +++----
1 file changed, 3 insertions(+), 4 deletions(-)
diff --git a/linux-user/i386/signal.c b/linux-user/i386/signal.c
index 68659fa1db..547c7cc685 100644
--- a/linux-user/i386/signal.c
+++ b/linux-user/i386/signal.c
@@ -252,7 +252,6 @@ static void xsave_sigcontext(CPUX86State *env, struct target_fpstate_fxsave *fxs
__put_user(0, &fxsave->sw_reserved.magic1);
} else {
uint32_t xstate_size = xsave_area_size(env->xcr0, false);
- uint32_t xfeatures_size = xstate_size - TARGET_FXSAVE_SIZE;
/*
* extended_size is the offset from fpstate_addr to right after the end
@@ -272,7 +271,8 @@ static void xsave_sigcontext(CPUX86State *env, struct target_fpstate_fxsave *fxs
__put_user(extended_size, &fxsave->sw_reserved.extended_size);
__put_user(env->xcr0, &fxsave->sw_reserved.xfeatures);
__put_user(xstate_size, &fxsave->sw_reserved.xstate_size);
- __put_user(TARGET_FP_XSTATE_MAGIC2, (uint32_t *) &fxsave->xfeatures[xfeatures_size]);
+ __put_user(TARGET_FP_XSTATE_MAGIC2,
+ (uint32_t *)((void *)fxsave + xstate_size));
}
}
@@ -558,7 +558,6 @@ static int xrstor_sigcontext(CPUX86State *env, struct target_fpstate_fxsave *fxs
if (env->features[FEAT_1_ECX] & CPUID_EXT_XSAVE) {
uint32_t extended_size = tswapl(fxsave->sw_reserved.extended_size);
uint32_t xstate_size = tswapl(fxsave->sw_reserved.xstate_size);
- uint32_t xfeatures_size = xstate_size - TARGET_FXSAVE_SIZE;
/* Linux checks MAGIC2 using xstate_size, not extended_size. */
if (tswapl(fxsave->sw_reserved.magic1) == TARGET_FP_XSTATE_MAGIC1 &&
@@ -567,7 +566,7 @@ static int xrstor_sigcontext(CPUX86State *env, struct target_fpstate_fxsave *fxs
extended_size - TARGET_FPSTATE_FXSAVE_OFFSET)) {
return 1;
}
- if (tswapl(*(uint32_t *) &fxsave->xfeatures[xfeatures_size]) == TARGET_FP_XSTATE_MAGIC2) {
+ if (tswapl(*(uint32_t *)((void *)fxsave + xstate_size)) == TARGET_FP_XSTATE_MAGIC2) {
cpu_x86_xrstor(env, fxsave_addr, -1);
return 0;
}
--
2.34.1
^ permalink raw reply related [flat|nested] 36+ messages in thread
* [PATCH v2 16/28] linux-user/i386: Remove xfeatures from target_fpstate_fxsave
2024-04-09 5:02 [PATCH for-9.1 v2 00/28] linux-user/i386: Properly align signal frame Richard Henderson
` (14 preceding siblings ...)
2024-04-09 5:02 ` [PATCH v2 15/28] linux-user/i386: Drop xfeatures_size from sigcontext arithmetic Richard Henderson
@ 2024-04-09 5:02 ` Richard Henderson
2024-04-09 5:02 ` [PATCH v2 17/28] linux-user/i386: Replace target_fpstate_fxsave with X86LegacyXSaveArea Richard Henderson
` (12 subsequent siblings)
28 siblings, 0 replies; 36+ messages in thread
From: Richard Henderson @ 2024-04-09 5:02 UTC (permalink / raw)
To: qemu-devel
This is easily computed by advancing past the structure.
At the same time, replace the magic number "64".
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
linux-user/i386/signal.c | 3 +--
1 file changed, 1 insertion(+), 2 deletions(-)
diff --git a/linux-user/i386/signal.c b/linux-user/i386/signal.c
index 547c7cc685..a4748b743d 100644
--- a/linux-user/i386/signal.c
+++ b/linux-user/i386/signal.c
@@ -66,7 +66,6 @@ struct target_fpstate_fxsave {
uint32_t xmm_space[64];
uint32_t hw_reserved[12];
struct target_fpx_sw_bytes sw_reserved;
- uint8_t xfeatures[];
};
#define TARGET_FXSAVE_SIZE sizeof(struct target_fpstate_fxsave)
QEMU_BUILD_BUG_ON(TARGET_FXSAVE_SIZE != 512);
@@ -265,7 +264,7 @@ static void xsave_sigcontext(CPUX86State *env, struct target_fpstate_fxsave *fxs
assert(!(fxsave_addr & 0x3f));
/* Zero the header, XSAVE *adds* features to an existing save state. */
- memset(fxsave->xfeatures, 0, 64);
+ memset(fxsave + 1, 0, sizeof(X86XSaveHeader));
cpu_x86_xsave(env, fxsave_addr, -1);
__put_user(TARGET_FP_XSTATE_MAGIC1, &fxsave->sw_reserved.magic1);
__put_user(extended_size, &fxsave->sw_reserved.extended_size);
--
2.34.1
^ permalink raw reply related [flat|nested] 36+ messages in thread
* [PATCH v2 17/28] linux-user/i386: Replace target_fpstate_fxsave with X86LegacyXSaveArea
2024-04-09 5:02 [PATCH for-9.1 v2 00/28] linux-user/i386: Properly align signal frame Richard Henderson
` (15 preceding siblings ...)
2024-04-09 5:02 ` [PATCH v2 16/28] linux-user/i386: Remove xfeatures from target_fpstate_fxsave Richard Henderson
@ 2024-04-09 5:02 ` Richard Henderson
2024-04-09 5:02 ` [PATCH v2 18/28] linux-user/i386: Split out struct target_fregs_state Richard Henderson
` (11 subsequent siblings)
28 siblings, 0 replies; 36+ messages in thread
From: Richard Henderson @ 2024-04-09 5:02 UTC (permalink / raw)
To: qemu-devel
Use the structure definition from target/i386/cpu.h.
The only minor quirk is re-casting the sw_reserved
area to the OS specific struct target_fpx_sw_bytes.
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
linux-user/i386/signal.c | 71 +++++++++++++++-------------------------
1 file changed, 26 insertions(+), 45 deletions(-)
diff --git a/linux-user/i386/signal.c b/linux-user/i386/signal.c
index a4748b743d..ed98b4d073 100644
--- a/linux-user/i386/signal.c
+++ b/linux-user/i386/signal.c
@@ -33,16 +33,6 @@ struct target_fpreg {
uint16_t exponent;
};
-struct target_fpxreg {
- uint16_t significand[4];
- uint16_t exponent;
- uint16_t padding[3];
-};
-
-struct target_xmmreg {
- uint32_t element[4];
-};
-
struct target_fpx_sw_bytes {
uint32_t magic1;
uint32_t extended_size;
@@ -52,25 +42,6 @@ struct target_fpx_sw_bytes {
};
QEMU_BUILD_BUG_ON(sizeof(struct target_fpx_sw_bytes) != 12*4);
-struct target_fpstate_fxsave {
- /* FXSAVE format */
- uint16_t cw;
- uint16_t sw;
- uint16_t twd;
- uint16_t fop;
- uint64_t rip;
- uint64_t rdp;
- uint32_t mxcsr;
- uint32_t mxcsr_mask;
- uint32_t st_space[32];
- uint32_t xmm_space[64];
- uint32_t hw_reserved[12];
- struct target_fpx_sw_bytes sw_reserved;
-};
-#define TARGET_FXSAVE_SIZE sizeof(struct target_fpstate_fxsave)
-QEMU_BUILD_BUG_ON(TARGET_FXSAVE_SIZE != 512);
-QEMU_BUILD_BUG_ON(offsetof(struct target_fpstate_fxsave, sw_reserved) != 464);
-
struct target_fpstate_32 {
/* Regular FPU environment */
uint32_t cw;
@@ -83,7 +54,7 @@ struct target_fpstate_32 {
struct target_fpreg st[8];
uint16_t status;
uint16_t magic; /* 0xffff = regular FPU data only */
- struct target_fpstate_fxsave fxsave;
+ X86LegacyXSaveArea fxsave;
};
/*
@@ -96,7 +67,7 @@ QEMU_BUILD_BUG_ON(offsetof(struct target_fpstate_32, fxsave) & 15);
# define target_fpstate target_fpstate_32
# define TARGET_FPSTATE_FXSAVE_OFFSET offsetof(struct target_fpstate_32, fxsave)
#else
-# define target_fpstate target_fpstate_fxsave
+# define target_fpstate X86LegacyXSaveArea
# define TARGET_FPSTATE_FXSAVE_OFFSET 0
#endif
@@ -240,15 +211,17 @@ struct rt_sigframe {
* Set up a signal frame.
*/
-static void xsave_sigcontext(CPUX86State *env, struct target_fpstate_fxsave *fxsave,
+static void xsave_sigcontext(CPUX86State *env, X86LegacyXSaveArea *fxsave,
abi_ulong fxsave_addr)
{
+ struct target_fpx_sw_bytes *sw = (void *)&fxsave->sw_reserved;
+
if (!(env->features[FEAT_1_ECX] & CPUID_EXT_XSAVE)) {
/* fxsave_addr must be 16 byte aligned for fxsave */
assert(!(fxsave_addr & 0xf));
cpu_x86_fxsave(env, fxsave_addr);
- __put_user(0, &fxsave->sw_reserved.magic1);
+ __put_user(0, &sw->magic1);
} else {
uint32_t xstate_size = xsave_area_size(env->xcr0, false);
@@ -266,10 +239,10 @@ static void xsave_sigcontext(CPUX86State *env, struct target_fpstate_fxsave *fxs
/* Zero the header, XSAVE *adds* features to an existing save state. */
memset(fxsave + 1, 0, sizeof(X86XSaveHeader));
cpu_x86_xsave(env, fxsave_addr, -1);
- __put_user(TARGET_FP_XSTATE_MAGIC1, &fxsave->sw_reserved.magic1);
- __put_user(extended_size, &fxsave->sw_reserved.extended_size);
- __put_user(env->xcr0, &fxsave->sw_reserved.xfeatures);
- __put_user(xstate_size, &fxsave->sw_reserved.xstate_size);
+ __put_user(TARGET_FP_XSTATE_MAGIC1, &sw->magic1);
+ __put_user(extended_size, &sw->extended_size);
+ __put_user(env->xcr0, &sw->xfeatures);
+ __put_user(xstate_size, &sw->xstate_size);
__put_user(TARGET_FP_XSTATE_MAGIC2,
(uint32_t *)((void *)fxsave + xstate_size));
}
@@ -383,9 +356,9 @@ get_sigframe(struct target_sigaction *ka, CPUX86State *env, size_t fxsave_offset
}
if (!(env->features[FEAT_1_EDX] & CPUID_FXSR)) {
- return (esp - (fxsave_offset + TARGET_FXSAVE_SIZE)) & -8ul;
+ return (esp - (fxsave_offset + sizeof(X86LegacyXSaveArea))) & -8ul;
} else if (!(env->features[FEAT_1_ECX] & CPUID_EXT_XSAVE)) {
- return ((esp - TARGET_FXSAVE_SIZE) & -16ul) - fxsave_offset;
+ return ((esp - sizeof(X86LegacyXSaveArea)) & -16ul) - fxsave_offset;
} else {
size_t xstate_size =
xsave_area_size(env->xcr0, false) + TARGET_FP_XSTATE_MAGIC2_SIZE;
@@ -551,21 +524,29 @@ give_sigsegv:
force_sigsegv(sig);
}
-static int xrstor_sigcontext(CPUX86State *env, struct target_fpstate_fxsave *fxsave,
+static int xrstor_sigcontext(CPUX86State *env, X86LegacyXSaveArea *fxsave,
abi_ulong fxsave_addr)
{
+ struct target_fpx_sw_bytes *sw = (void *)&fxsave->sw_reserved;
+
if (env->features[FEAT_1_ECX] & CPUID_EXT_XSAVE) {
- uint32_t extended_size = tswapl(fxsave->sw_reserved.extended_size);
- uint32_t xstate_size = tswapl(fxsave->sw_reserved.xstate_size);
+ uint32_t magic1 = tswapl(sw->magic1);
+ uint32_t extended_size = tswapl(sw->extended_size);
+ uint32_t xstate_size = tswapl(sw->xstate_size);
+ uint32_t minimum_size = (TARGET_FPSTATE_FXSAVE_OFFSET
+ + TARGET_FP_XSTATE_MAGIC2_SIZE
+ + xstate_size);
+ uint32_t magic2;
/* Linux checks MAGIC2 using xstate_size, not extended_size. */
- if (tswapl(fxsave->sw_reserved.magic1) == TARGET_FP_XSTATE_MAGIC1 &&
- extended_size >= TARGET_FPSTATE_FXSAVE_OFFSET + xstate_size + TARGET_FP_XSTATE_MAGIC2_SIZE) {
+ if (magic1 == TARGET_FP_XSTATE_MAGIC1
+ && extended_size >= minimum_size) {
if (!access_ok(env_cpu(env), VERIFY_READ, fxsave_addr,
extended_size - TARGET_FPSTATE_FXSAVE_OFFSET)) {
return 1;
}
- if (tswapl(*(uint32_t *)((void *)fxsave + xstate_size)) == TARGET_FP_XSTATE_MAGIC2) {
+ magic2 = tswapl(*(uint32_t *)((void *)fxsave + xstate_size));
+ if (magic2 == TARGET_FP_XSTATE_MAGIC2) {
cpu_x86_xrstor(env, fxsave_addr, -1);
return 0;
}
--
2.34.1
^ permalink raw reply related [flat|nested] 36+ messages in thread
* [PATCH v2 18/28] linux-user/i386: Split out struct target_fregs_state
2024-04-09 5:02 [PATCH for-9.1 v2 00/28] linux-user/i386: Properly align signal frame Richard Henderson
` (16 preceding siblings ...)
2024-04-09 5:02 ` [PATCH v2 17/28] linux-user/i386: Replace target_fpstate_fxsave with X86LegacyXSaveArea Richard Henderson
@ 2024-04-09 5:02 ` Richard Henderson
2024-04-09 5:02 ` [PATCH v2 19/28] linux-user/i386: Fix -mregparm=3 for signal delivery Richard Henderson
` (10 subsequent siblings)
28 siblings, 0 replies; 36+ messages in thread
From: Richard Henderson @ 2024-04-09 5:02 UTC (permalink / raw)
To: qemu-devel
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
linux-user/i386/signal.c | 43 +++++++++++++++++++++++-----------------
1 file changed, 25 insertions(+), 18 deletions(-)
diff --git a/linux-user/i386/signal.c b/linux-user/i386/signal.c
index ed98b4d073..559b63c25b 100644
--- a/linux-user/i386/signal.c
+++ b/linux-user/i386/signal.c
@@ -33,6 +33,23 @@ struct target_fpreg {
uint16_t exponent;
};
+/* Legacy x87 fpu state format for FSAVE/FRESTOR. */
+struct target_fregs_state {
+ uint32_t cwd;
+ uint32_t swd;
+ uint32_t twd;
+ uint32_t fip;
+ uint32_t fcs;
+ uint32_t foo;
+ uint32_t fos;
+ struct target_fpreg st[8];
+
+ /* Software status information [not touched by FSAVE]. */
+ uint16_t status;
+ uint16_t magic; /* 0xffff: FPU data only, 0x0000: FXSR FPU data */
+};
+QEMU_BUILD_BUG_ON(sizeof(struct target_fregs_state) != 32 + 80);
+
struct target_fpx_sw_bytes {
uint32_t magic1;
uint32_t extended_size;
@@ -43,29 +60,19 @@ struct target_fpx_sw_bytes {
QEMU_BUILD_BUG_ON(sizeof(struct target_fpx_sw_bytes) != 12*4);
struct target_fpstate_32 {
- /* Regular FPU environment */
- uint32_t cw;
- uint32_t sw;
- uint32_t tag;
- uint32_t ipoff;
- uint32_t cssel;
- uint32_t dataoff;
- uint32_t datasel;
- struct target_fpreg st[8];
- uint16_t status;
- uint16_t magic; /* 0xffff = regular FPU data only */
- X86LegacyXSaveArea fxsave;
+ struct target_fregs_state fpstate;
+ X86LegacyXSaveArea fxstate;
};
/*
* For simplicity, setup_frame aligns struct target_fpstate_32 to
* 16 bytes, so ensure that the FXSAVE area is also aligned.
*/
-QEMU_BUILD_BUG_ON(offsetof(struct target_fpstate_32, fxsave) & 15);
+QEMU_BUILD_BUG_ON(offsetof(struct target_fpstate_32, fxstate) & 15);
#ifndef TARGET_X86_64
# define target_fpstate target_fpstate_32
-# define TARGET_FPSTATE_FXSAVE_OFFSET offsetof(struct target_fpstate_32, fxsave)
+# define TARGET_FPSTATE_FXSAVE_OFFSET offsetof(struct target_fpstate_32, fxstate)
#else
# define target_fpstate X86LegacyXSaveArea
# define TARGET_FPSTATE_FXSAVE_OFFSET 0
@@ -278,15 +285,15 @@ static void setup_sigcontext(struct target_sigcontext *sc,
__put_user(env->segs[R_SS].selector, (unsigned int *)&sc->ss);
cpu_x86_fsave(env, fpstate_addr, 1);
- fpstate->status = fpstate->sw;
+ fpstate->fpstate.status = fpstate->fpstate.swd;
if (!(env->features[FEAT_1_EDX] & CPUID_FXSR)) {
magic = 0xffff;
} else {
- xsave_sigcontext(env, &fpstate->fxsave,
+ xsave_sigcontext(env, &fpstate->fxstate,
fpstate_addr + TARGET_FPSTATE_FXSAVE_OFFSET);
magic = 0;
}
- __put_user(magic, &fpstate->magic);
+ __put_user(magic, &fpstate->fpstate.magic);
#else
__put_user(env->regs[R_EDI], &sc->rdi);
__put_user(env->regs[R_ESI], &sc->rsi);
@@ -622,7 +629,7 @@ restore_sigcontext(CPUX86State *env, struct target_sigcontext *sc)
cpu_x86_frstor(env, fpstate_addr, 1);
err = 0;
} else {
- err = xrstor_sigcontext(env, &fpstate->fxsave,
+ err = xrstor_sigcontext(env, &fpstate->fxstate,
fpstate_addr + TARGET_FPSTATE_FXSAVE_OFFSET);
}
#else
--
2.34.1
^ permalink raw reply related [flat|nested] 36+ messages in thread
* [PATCH v2 19/28] linux-user/i386: Fix -mregparm=3 for signal delivery
2024-04-09 5:02 [PATCH for-9.1 v2 00/28] linux-user/i386: Properly align signal frame Richard Henderson
` (17 preceding siblings ...)
2024-04-09 5:02 ` [PATCH v2 18/28] linux-user/i386: Split out struct target_fregs_state Richard Henderson
@ 2024-04-09 5:02 ` Richard Henderson
2024-04-09 7:31 ` Paolo Bonzini
2024-04-09 5:02 ` [PATCH v2 20/28] linux-user/i386: Return boolean success from restore_sigcontext Richard Henderson
` (9 subsequent siblings)
28 siblings, 1 reply; 36+ messages in thread
From: Richard Henderson @ 2024-04-09 5:02 UTC (permalink / raw)
To: qemu-devel
Since v2.6.19, the kernel has supported -mregparm=3.
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
linux-user/i386/signal.c | 20 +++++++++-----------
1 file changed, 9 insertions(+), 11 deletions(-)
diff --git a/linux-user/i386/signal.c b/linux-user/i386/signal.c
index 559b63c25b..f8cc0cff07 100644
--- a/linux-user/i386/signal.c
+++ b/linux-user/i386/signal.c
@@ -427,6 +427,11 @@ void setup_frame(int sig, struct target_sigaction *ka,
env->regs[R_ESP] = frame_addr;
env->eip = ka->_sa_handler;
+ /* Make -mregparm=3 work */
+ env->regs[R_EAX] = sig;
+ env->regs[R_EDX] = 0;
+ env->regs[R_ECX] = 0;
+
cpu_x86_load_seg(env, R_DS, __USER_DS);
cpu_x86_load_seg(env, R_ES, __USER_DS);
cpu_x86_load_seg(env, R_SS, __USER_DS);
@@ -448,9 +453,6 @@ void setup_rt_frame(int sig, struct target_sigaction *ka,
target_sigset_t *set, CPUX86State *env)
{
abi_ulong frame_addr;
-#ifndef TARGET_X86_64
- abi_ulong addr;
-#endif
struct rt_sigframe *frame;
int i;
@@ -460,14 +462,6 @@ void setup_rt_frame(int sig, struct target_sigaction *ka,
if (!lock_user_struct(VERIFY_WRITE, frame, frame_addr, 0))
goto give_sigsegv;
- /* These fields are only in rt_sigframe on 32 bit */
-#ifndef TARGET_X86_64
- __put_user(sig, &frame->sig);
- addr = frame_addr + offsetof(struct rt_sigframe, info);
- __put_user(addr, &frame->pinfo);
- addr = frame_addr + offsetof(struct rt_sigframe, uc);
- __put_user(addr, &frame->puc);
-#endif
if (ka->sa_flags & TARGET_SA_SIGINFO) {
frame->info = *info;
}
@@ -507,9 +501,13 @@ void setup_rt_frame(int sig, struct target_sigaction *ka,
env->eip = ka->_sa_handler;
#ifndef TARGET_X86_64
+ /* Store arguments for both -mregparm=3 and standard. */
env->regs[R_EAX] = sig;
+ __put_user(sig, &frame->sig);
env->regs[R_EDX] = frame_addr + offsetof(struct rt_sigframe, info);
+ __put_user(env->regs[R_EDX], &frame->pinfo);
env->regs[R_ECX] = frame_addr + offsetof(struct rt_sigframe, uc);
+ __put_user(env->regs[R_ECX], &frame->puc);
#else
env->regs[R_EAX] = 0;
env->regs[R_EDI] = sig;
--
2.34.1
^ permalink raw reply related [flat|nested] 36+ messages in thread
* Re: [PATCH v2 19/28] linux-user/i386: Fix -mregparm=3 for signal delivery
2024-04-09 5:02 ` [PATCH v2 19/28] linux-user/i386: Fix -mregparm=3 for signal delivery Richard Henderson
@ 2024-04-09 7:31 ` Paolo Bonzini
0 siblings, 0 replies; 36+ messages in thread
From: Paolo Bonzini @ 2024-04-09 7:31 UTC (permalink / raw)
To: Richard Henderson, qemu-devel
On 4/9/24 07:02, Richard Henderson wrote:
> Since v2.6.19, the kernel has supported -mregparm=3.
>
> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
> ---
> linux-user/i386/signal.c | 20 +++++++++-----------
> 1 file changed, 9 insertions(+), 11 deletions(-)
>
> diff --git a/linux-user/i386/signal.c b/linux-user/i386/signal.c
> index 559b63c25b..f8cc0cff07 100644
> --- a/linux-user/i386/signal.c
> +++ b/linux-user/i386/signal.c
> @@ -427,6 +427,11 @@ void setup_frame(int sig, struct target_sigaction *ka,
> env->regs[R_ESP] = frame_addr;
> env->eip = ka->_sa_handler;
>
> + /* Make -mregparm=3 work */
> + env->regs[R_EAX] = sig;
> + env->regs[R_EDX] = 0;
> + env->regs[R_ECX] = 0;
Perhaps also move here the
__put_user(sig, &frame->sig);
from above, for consistency with setup_rt_frame?
Paolo
> cpu_x86_load_seg(env, R_DS, __USER_DS);
> cpu_x86_load_seg(env, R_ES, __USER_DS);
> cpu_x86_load_seg(env, R_SS, __USER_DS);
> @@ -448,9 +453,6 @@ void setup_rt_frame(int sig, struct target_sigaction *ka,
> target_sigset_t *set, CPUX86State *env)
> {
> abi_ulong frame_addr;
> -#ifndef TARGET_X86_64
> - abi_ulong addr;
> -#endif
> struct rt_sigframe *frame;
> int i;
>
> @@ -460,14 +462,6 @@ void setup_rt_frame(int sig, struct target_sigaction *ka,
> if (!lock_user_struct(VERIFY_WRITE, frame, frame_addr, 0))
> goto give_sigsegv;
>
> - /* These fields are only in rt_sigframe on 32 bit */
> -#ifndef TARGET_X86_64
> - __put_user(sig, &frame->sig);
> - addr = frame_addr + offsetof(struct rt_sigframe, info);
> - __put_user(addr, &frame->pinfo);
> - addr = frame_addr + offsetof(struct rt_sigframe, uc);
> - __put_user(addr, &frame->puc);
> -#endif
> if (ka->sa_flags & TARGET_SA_SIGINFO) {
> frame->info = *info;
> }
> @@ -507,9 +501,13 @@ void setup_rt_frame(int sig, struct target_sigaction *ka,
> env->eip = ka->_sa_handler;
>
> #ifndef TARGET_X86_64
> + /* Store arguments for both -mregparm=3 and standard. */
> env->regs[R_EAX] = sig;
> + __put_user(sig, &frame->sig);
> env->regs[R_EDX] = frame_addr + offsetof(struct rt_sigframe, info);
> + __put_user(env->regs[R_EDX], &frame->pinfo);
> env->regs[R_ECX] = frame_addr + offsetof(struct rt_sigframe, uc);
> + __put_user(env->regs[R_ECX], &frame->puc);
> #else
> env->regs[R_EAX] = 0;
> env->regs[R_EDI] = sig;
^ permalink raw reply [flat|nested] 36+ messages in thread
* [PATCH v2 20/28] linux-user/i386: Return boolean success from restore_sigcontext
2024-04-09 5:02 [PATCH for-9.1 v2 00/28] linux-user/i386: Properly align signal frame Richard Henderson
` (18 preceding siblings ...)
2024-04-09 5:02 ` [PATCH v2 19/28] linux-user/i386: Fix -mregparm=3 for signal delivery Richard Henderson
@ 2024-04-09 5:02 ` Richard Henderson
2024-04-09 5:02 ` [PATCH v2 21/28] linux-user/i386: Return boolean success from xrstor_sigcontext Richard Henderson
` (8 subsequent siblings)
28 siblings, 0 replies; 36+ messages in thread
From: Richard Henderson @ 2024-04-09 5:02 UTC (permalink / raw)
To: qemu-devel
Invert the sense of the return value and use bool.
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
linux-user/i386/signal.c | 51 ++++++++++++++++++++--------------------
1 file changed, 25 insertions(+), 26 deletions(-)
diff --git a/linux-user/i386/signal.c b/linux-user/i386/signal.c
index f8cc0cff07..1571ff8553 100644
--- a/linux-user/i386/signal.c
+++ b/linux-user/i386/signal.c
@@ -563,12 +563,12 @@ static int xrstor_sigcontext(CPUX86State *env, X86LegacyXSaveArea *fxsave,
return 0;
}
-static int
-restore_sigcontext(CPUX86State *env, struct target_sigcontext *sc)
+static bool restore_sigcontext(CPUX86State *env, struct target_sigcontext *sc)
{
- int err = 1;
abi_ulong fpstate_addr;
unsigned int tmpflags;
+ struct target_fpstate *fpstate;
+ bool ok;
#ifndef TARGET_X86_64
cpu_x86_load_seg(env, R_GS, tswap16(sc->gs));
@@ -616,29 +616,27 @@ restore_sigcontext(CPUX86State *env, struct target_sigcontext *sc)
// regs->orig_eax = -1; /* disable syscall checks */
fpstate_addr = tswapl(sc->fpstate);
- if (fpstate_addr != 0) {
- struct target_fpstate *fpstate;
- if (!lock_user_struct(VERIFY_READ, fpstate, fpstate_addr,
- sizeof(struct target_fpstate))) {
- return err;
- }
-#ifndef TARGET_X86_64
- if (!(env->features[FEAT_1_EDX] & CPUID_FXSR)) {
- cpu_x86_frstor(env, fpstate_addr, 1);
- err = 0;
- } else {
- err = xrstor_sigcontext(env, &fpstate->fxstate,
- fpstate_addr + TARGET_FPSTATE_FXSAVE_OFFSET);
- }
-#else
- err = xrstor_sigcontext(env, fpstate, fpstate_addr);
-#endif
- unlock_user_struct(fpstate, fpstate_addr, 0);
- } else {
- err = 0;
+ if (fpstate_addr == 0) {
+ return true;
}
+ if (!lock_user_struct(VERIFY_READ, fpstate, fpstate_addr,
+ sizeof(struct target_fpstate))) {
+ return false;
+ }
+#ifndef TARGET_X86_64
+ if (!(env->features[FEAT_1_EDX] & CPUID_FXSR)) {
+ cpu_x86_frstor(env, fpstate_addr, 1);
+ ok = true;
+ } else {
+ ok = !xrstor_sigcontext(env, &fpstate->fxstate,
+ fpstate_addr + TARGET_FPSTATE_FXSAVE_OFFSET);
+ }
+#else
+ ok = !xrstor_sigcontext(env, fpstate, fpstate_addr);
+#endif
+ unlock_user_struct(fpstate, fpstate_addr, 0);
- return err;
+ return ok;
}
/* Note: there is no sigreturn on x86_64, there is only rt_sigreturn */
@@ -664,8 +662,9 @@ long do_sigreturn(CPUX86State *env)
set_sigmask(&set);
/* restore registers */
- if (restore_sigcontext(env, &frame->sc))
+ if (!restore_sigcontext(env, &frame->sc)) {
goto badframe;
+ }
unlock_user_struct(frame, frame_addr, 0);
return -QEMU_ESIGRETURN;
@@ -689,7 +688,7 @@ long do_rt_sigreturn(CPUX86State *env)
target_to_host_sigset(&set, &frame->uc.tuc_sigmask);
set_sigmask(&set);
- if (restore_sigcontext(env, &frame->uc.tuc_mcontext)) {
+ if (!restore_sigcontext(env, &frame->uc.tuc_mcontext)) {
goto badframe;
}
--
2.34.1
^ permalink raw reply related [flat|nested] 36+ messages in thread
* [PATCH v2 21/28] linux-user/i386: Return boolean success from xrstor_sigcontext
2024-04-09 5:02 [PATCH for-9.1 v2 00/28] linux-user/i386: Properly align signal frame Richard Henderson
` (19 preceding siblings ...)
2024-04-09 5:02 ` [PATCH v2 20/28] linux-user/i386: Return boolean success from restore_sigcontext Richard Henderson
@ 2024-04-09 5:02 ` Richard Henderson
2024-04-09 5:02 ` [PATCH v2 22/28] linux-user/i386: Fix allocation and alignment of fp state Richard Henderson
` (7 subsequent siblings)
28 siblings, 0 replies; 36+ messages in thread
From: Richard Henderson @ 2024-04-09 5:02 UTC (permalink / raw)
To: qemu-devel
Invert the sense of the return value and use bool.
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
linux-user/i386/signal.c | 16 ++++++++--------
1 file changed, 8 insertions(+), 8 deletions(-)
diff --git a/linux-user/i386/signal.c b/linux-user/i386/signal.c
index 1571ff8553..d600a4355b 100644
--- a/linux-user/i386/signal.c
+++ b/linux-user/i386/signal.c
@@ -529,8 +529,8 @@ give_sigsegv:
force_sigsegv(sig);
}
-static int xrstor_sigcontext(CPUX86State *env, X86LegacyXSaveArea *fxsave,
- abi_ulong fxsave_addr)
+static bool xrstor_sigcontext(CPUX86State *env, X86LegacyXSaveArea *fxsave,
+ abi_ulong fxsave_addr)
{
struct target_fpx_sw_bytes *sw = (void *)&fxsave->sw_reserved;
@@ -548,19 +548,19 @@ static int xrstor_sigcontext(CPUX86State *env, X86LegacyXSaveArea *fxsave,
&& extended_size >= minimum_size) {
if (!access_ok(env_cpu(env), VERIFY_READ, fxsave_addr,
extended_size - TARGET_FPSTATE_FXSAVE_OFFSET)) {
- return 1;
+ return false;
}
magic2 = tswapl(*(uint32_t *)((void *)fxsave + xstate_size));
if (magic2 == TARGET_FP_XSTATE_MAGIC2) {
cpu_x86_xrstor(env, fxsave_addr, -1);
- return 0;
+ return true;
}
}
/* fall through to fxrstor */
}
cpu_x86_fxrstor(env, fxsave_addr);
- return 0;
+ return true;
}
static bool restore_sigcontext(CPUX86State *env, struct target_sigcontext *sc)
@@ -628,11 +628,11 @@ static bool restore_sigcontext(CPUX86State *env, struct target_sigcontext *sc)
cpu_x86_frstor(env, fpstate_addr, 1);
ok = true;
} else {
- ok = !xrstor_sigcontext(env, &fpstate->fxstate,
- fpstate_addr + TARGET_FPSTATE_FXSAVE_OFFSET);
+ ok = xrstor_sigcontext(env, &fpstate->fxstate,
+ fpstate_addr + TARGET_FPSTATE_FXSAVE_OFFSET);
}
#else
- ok = !xrstor_sigcontext(env, fpstate, fpstate_addr);
+ ok = xrstor_sigcontext(env, fpstate, fpstate_addr);
#endif
unlock_user_struct(fpstate, fpstate_addr, 0);
--
2.34.1
^ permalink raw reply related [flat|nested] 36+ messages in thread
* [PATCH v2 22/28] linux-user/i386: Fix allocation and alignment of fp state
2024-04-09 5:02 [PATCH for-9.1 v2 00/28] linux-user/i386: Properly align signal frame Richard Henderson
` (20 preceding siblings ...)
2024-04-09 5:02 ` [PATCH v2 21/28] linux-user/i386: Return boolean success from xrstor_sigcontext Richard Henderson
@ 2024-04-09 5:02 ` Richard Henderson
2024-04-09 5:02 ` [PATCH v2 23/28] target/i386: Honor xfeatures in xrstor_sigcontext Richard Henderson
` (6 subsequent siblings)
28 siblings, 0 replies; 36+ messages in thread
From: Richard Henderson @ 2024-04-09 5:02 UTC (permalink / raw)
To: qemu-devel
For modern cpus, the kernel uses xsave to store all extra
cpu state across the signal handler. For xsave/xrstor to
work, the pointer must be 64 byte aligned. Moreover, the
regular part of the signal frame must be 16 byte aligned.
Attempt to mirror the kernel code as much as possible.
Use enum FPStateKind instead of use_xsave() and use_fxsr().
Resolves: https://gitlab.com/qemu-project/qemu/-/issues/1648
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
linux-user/i386/signal.c | 558 +++++++++++++++++++------------
tests/tcg/x86_64/test-1648.c | 33 ++
tests/tcg/x86_64/Makefile.target | 1 +
3 files changed, 377 insertions(+), 215 deletions(-)
create mode 100644 tests/tcg/x86_64/test-1648.c
diff --git a/linux-user/i386/signal.c b/linux-user/i386/signal.c
index d600a4355b..d015fe520a 100644
--- a/linux-user/i386/signal.c
+++ b/linux-user/i386/signal.c
@@ -64,20 +64,6 @@ struct target_fpstate_32 {
X86LegacyXSaveArea fxstate;
};
-/*
- * For simplicity, setup_frame aligns struct target_fpstate_32 to
- * 16 bytes, so ensure that the FXSAVE area is also aligned.
- */
-QEMU_BUILD_BUG_ON(offsetof(struct target_fpstate_32, fxstate) & 15);
-
-#ifndef TARGET_X86_64
-# define target_fpstate target_fpstate_32
-# define TARGET_FPSTATE_FXSAVE_OFFSET offsetof(struct target_fpstate_32, fxstate)
-#else
-# define target_fpstate X86LegacyXSaveArea
-# define TARGET_FPSTATE_FXSAVE_OFFSET 0
-#endif
-
struct target_sigcontext_32 {
uint16_t gs, __gsh;
uint16_t fs, __fsh;
@@ -160,24 +146,16 @@ struct sigframe {
int sig;
struct target_sigcontext sc;
/*
- * The actual fpstate is placed after retcode[] below, to make
- * room for the variable-sized xsave data. The older unused fpstate
- * has to be kept to avoid changing the offset of extramask[], which
+ * The actual fpstate is placed after retcode[] below, to make room
+ * for the variable-sized xsave data. The older unused fpstate has
+ * to be kept to avoid changing the offset of extramask[], which
* is part of the ABI.
*/
- struct target_fpstate fpstate_unused;
+ struct target_fpstate_32 fpstate_unused;
abi_ulong extramask[TARGET_NSIG_WORDS-1];
char retcode[8];
-
- /*
- * This field will be 16-byte aligned in memory. Applying QEMU_ALIGNED
- * to it ensures that the base of the frame has an appropriate alignment
- * too.
- */
- struct target_fpstate fpstate QEMU_ALIGNED(8);
+ /* fp state follows here */
};
-#define TARGET_SIGFRAME_FXSAVE_OFFSET ( \
- offsetof(struct sigframe, fpstate) + TARGET_FPSTATE_FXSAVE_OFFSET)
struct rt_sigframe {
abi_ulong pretcode;
@@ -187,10 +165,8 @@ struct rt_sigframe {
struct target_siginfo info;
struct target_ucontext uc;
char retcode[8];
- struct target_fpstate fpstate QEMU_ALIGNED(8);
+ /* fp state follows here */
};
-#define TARGET_RT_SIGFRAME_FXSAVE_OFFSET ( \
- offsetof(struct rt_sigframe, fpstate) + TARGET_FPSTATE_FXSAVE_OFFSET)
/*
* Verify that vdso-asmoffset.h constants match.
@@ -208,66 +184,178 @@ struct rt_sigframe {
abi_ulong pretcode;
struct target_ucontext uc;
struct target_siginfo info;
- struct target_fpstate fpstate QEMU_ALIGNED(16);
+ /* fp state follows here */
};
-#define TARGET_RT_SIGFRAME_FXSAVE_OFFSET ( \
- offsetof(struct rt_sigframe, fpstate) + TARGET_FPSTATE_FXSAVE_OFFSET)
#endif
+typedef enum {
+#ifndef TARGET_X86_64
+ FPSTATE_FSAVE,
+#endif
+ FPSTATE_FXSAVE,
+ FPSTATE_XSAVE
+} FPStateKind;
+
+static FPStateKind get_fpstate_kind(CPUX86State *env)
+{
+ if (env->features[FEAT_1_ECX] & CPUID_EXT_XSAVE) {
+ return FPSTATE_XSAVE;
+ }
+#ifdef TARGET_X86_64
+ return FPSTATE_FXSAVE;
+#else
+ if (env->features[FEAT_1_EDX] & CPUID_FXSR) {
+ return FPSTATE_FXSAVE;
+ }
+ return FPSTATE_FSAVE;
+#endif
+}
+
+static unsigned get_fpstate_size(CPUX86State *env, FPStateKind fpkind)
+{
+ /*
+ * Kernel:
+ * fpu__alloc_mathframe
+ * xstate_sigframe_size(current->thread.fpu.fpstate);
+ * size = fpstate->user_size
+ * use_xsave() ? size + FP_XSTATE_MAGIC2_SIZE : size
+ * where fpstate->user_size is computed at init in
+ * fpu__init_system_xstate_size_legacy and
+ * fpu__init_system_xstate.
+ *
+ * Here we have no place to pre-compute, so inline it all.
+ */
+ switch (fpkind) {
+ case FPSTATE_XSAVE:
+ return (xsave_area_size(env->xcr0, false)
+ + TARGET_FP_XSTATE_MAGIC2_SIZE);
+ case FPSTATE_FXSAVE:
+ return sizeof(X86LegacyXSaveArea);
+#ifndef TARGET_X86_64
+ case FPSTATE_FSAVE:
+ return sizeof(struct target_fregs_state);
+#endif
+ }
+ g_assert_not_reached();
+}
+
+static abi_ptr get_sigframe(struct target_sigaction *ka, CPUX86State *env,
+ unsigned frame_size, FPStateKind fpkind,
+ abi_ptr *fpstate, abi_ptr *fxstate, abi_ptr *fpend)
+{
+ abi_ptr sp;
+ unsigned math_size;
+
+ /* Default to using normal stack */
+ sp = get_sp_from_cpustate(env);
+#ifdef TARGET_X86_64
+ sp -= 128; /* this is the redzone */
+#endif
+
+ /* This is the X/Open sanctioned signal stack switching. */
+ if (ka->sa_flags & TARGET_SA_ONSTACK) {
+ sp = target_sigsp(sp, ka);
+ } else {
+#ifndef TARGET_X86_64
+ /* This is the legacy signal stack switching. */
+ if ((env->segs[R_SS].selector & 0xffff) != __USER_DS
+ && !(ka->sa_flags & TARGET_SA_RESTORER)
+ && ka->sa_restorer) {
+ sp = ka->sa_restorer;
+ }
+#endif
+ }
+
+ math_size = get_fpstate_size(env, fpkind);
+ sp = ROUND_DOWN(sp - math_size, 64);
+ *fpend = sp + math_size;
+ *fxstate = sp;
+#ifndef TARGET_X86_64
+ if (fpkind != FPSTATE_FSAVE) {
+ sp -= sizeof(struct target_fregs_state);
+ }
+#endif
+ *fpstate = sp;
+
+ sp -= frame_size;
+ /*
+ * Align the stack pointer according to the ABI, i.e. so that on
+ * function entry ((sp + sizeof(return_addr)) & 15) == 0.
+ */
+ sp += sizeof(target_ulong);
+ sp = ROUND_DOWN(sp, 16);
+ sp -= sizeof(target_ulong);
+
+ return sp;
+}
+
/*
* Set up a signal frame.
*/
-static void xsave_sigcontext(CPUX86State *env, X86LegacyXSaveArea *fxsave,
- abi_ulong fxsave_addr)
+static void fxsave_sigcontext(CPUX86State *env, X86LegacyXSaveArea *fxstate,
+ abi_ptr fxstate_addr)
{
- struct target_fpx_sw_bytes *sw = (void *)&fxsave->sw_reserved;
+ struct target_fpx_sw_bytes *sw = (void *)&fxstate->sw_reserved;
- if (!(env->features[FEAT_1_ECX] & CPUID_EXT_XSAVE)) {
- /* fxsave_addr must be 16 byte aligned for fxsave */
- assert(!(fxsave_addr & 0xf));
-
- cpu_x86_fxsave(env, fxsave_addr);
- __put_user(0, &sw->magic1);
- } else {
- uint32_t xstate_size = xsave_area_size(env->xcr0, false);
-
- /*
- * extended_size is the offset from fpstate_addr to right after the end
- * of the extended save states. On 32-bit that includes the legacy
- * FSAVE area.
- */
- uint32_t extended_size = TARGET_FPSTATE_FXSAVE_OFFSET
- + xstate_size + TARGET_FP_XSTATE_MAGIC2_SIZE;
-
- /* fxsave_addr must be 64 byte aligned for xsave */
- assert(!(fxsave_addr & 0x3f));
-
- /* Zero the header, XSAVE *adds* features to an existing save state. */
- memset(fxsave + 1, 0, sizeof(X86XSaveHeader));
- cpu_x86_xsave(env, fxsave_addr, -1);
- __put_user(TARGET_FP_XSTATE_MAGIC1, &sw->magic1);
- __put_user(extended_size, &sw->extended_size);
- __put_user(env->xcr0, &sw->xfeatures);
- __put_user(xstate_size, &sw->xstate_size);
- __put_user(TARGET_FP_XSTATE_MAGIC2,
- (uint32_t *)((void *)fxsave + xstate_size));
- }
+ /* fxstate_addr must be 16 byte aligned for fxsave */
+ assert(!(fxstate_addr & 0xf));
+ cpu_x86_fxsave(env, fxstate_addr);
+ __put_user(0, &sw->magic1);
}
-static void setup_sigcontext(struct target_sigcontext *sc,
- struct target_fpstate *fpstate, CPUX86State *env, abi_ulong mask,
- abi_ulong fpstate_addr)
+static void xsave_sigcontext(CPUX86State *env,
+ X86LegacyXSaveArea *fxstate,
+ abi_ptr fpstate_addr,
+ abi_ptr xstate_addr,
+ abi_ptr fpend_addr)
+{
+ struct target_fpx_sw_bytes *sw = (void *)&fxstate->sw_reserved;
+ /*
+ * extended_size is the offset from fpstate_addr to right after
+ * the end of the extended save states. On 32-bit that includes
+ * the legacy FSAVE area.
+ */
+ uint32_t extended_size = fpend_addr - fpstate_addr;
+ /* Recover xstate_size by removing magic2. */
+ uint32_t xstate_size = (fpend_addr - xstate_addr
+ - TARGET_FP_XSTATE_MAGIC2_SIZE);
+ /* magic2 goes just after xstate. */
+ uint32_t *magic2 = (void *)fxstate + xstate_size;
+
+ /* xstate_addr must be 64 byte aligned for xsave */
+ assert(!(xstate_addr & 0x3f));
+
+ /* Zero the header, XSAVE *adds* features to an existing save state. */
+ memset(fxstate + 1, 0, sizeof(X86XSaveHeader));
+ cpu_x86_xsave(env, xstate_addr, -1);
+
+ __put_user(TARGET_FP_XSTATE_MAGIC1, &sw->magic1);
+ __put_user(extended_size, &sw->extended_size);
+ __put_user(env->xcr0, &sw->xfeatures);
+ __put_user(xstate_size, &sw->xstate_size);
+ __put_user(TARGET_FP_XSTATE_MAGIC2, magic2);
+}
+
+static void setup_sigcontext(CPUX86State *env,
+ struct target_sigcontext *sc,
+ abi_ulong mask, FPStateKind fpkind,
+ struct target_fregs_state *fpstate,
+ abi_ptr fpstate_addr,
+ X86LegacyXSaveArea *fxstate,
+ abi_ptr fxstate_addr,
+ abi_ptr fpend_addr)
{
CPUState *cs = env_cpu(env);
+
#ifndef TARGET_X86_64
uint16_t magic;
/* already locked in setup_frame() */
- __put_user(env->segs[R_GS].selector, (unsigned int *)&sc->gs);
- __put_user(env->segs[R_FS].selector, (unsigned int *)&sc->fs);
- __put_user(env->segs[R_ES].selector, (unsigned int *)&sc->es);
- __put_user(env->segs[R_DS].selector, (unsigned int *)&sc->ds);
+ __put_user(env->segs[R_GS].selector, (uint32_t *)&sc->gs);
+ __put_user(env->segs[R_FS].selector, (uint32_t *)&sc->fs);
+ __put_user(env->segs[R_ES].selector, (uint32_t *)&sc->es);
+ __put_user(env->segs[R_DS].selector, (uint32_t *)&sc->ds);
__put_user(env->regs[R_EDI], &sc->edi);
__put_user(env->regs[R_ESI], &sc->esi);
__put_user(env->regs[R_EBP], &sc->ebp);
@@ -279,21 +367,15 @@ static void setup_sigcontext(struct target_sigcontext *sc,
__put_user(cs->exception_index, &sc->trapno);
__put_user(env->error_code, &sc->err);
__put_user(env->eip, &sc->eip);
- __put_user(env->segs[R_CS].selector, (unsigned int *)&sc->cs);
+ __put_user(env->segs[R_CS].selector, (uint32_t *)&sc->cs);
__put_user(env->eflags, &sc->eflags);
__put_user(env->regs[R_ESP], &sc->esp_at_signal);
- __put_user(env->segs[R_SS].selector, (unsigned int *)&sc->ss);
+ __put_user(env->segs[R_SS].selector, (uint32_t *)&sc->ss);
cpu_x86_fsave(env, fpstate_addr, 1);
- fpstate->fpstate.status = fpstate->fpstate.swd;
- if (!(env->features[FEAT_1_EDX] & CPUID_FXSR)) {
- magic = 0xffff;
- } else {
- xsave_sigcontext(env, &fpstate->fxstate,
- fpstate_addr + TARGET_FPSTATE_FXSAVE_OFFSET);
- magic = 0;
- }
- __put_user(magic, &fpstate->fpstate.magic);
+ fpstate->status = fpstate->swd;
+ magic = (fpkind == FPSTATE_FSAVE ? 0 : 0xffff);
+ __put_user(magic, &fpstate->magic);
#else
__put_user(env->regs[R_EDI], &sc->rdi);
__put_user(env->regs[R_ESI], &sc->rsi);
@@ -322,57 +404,25 @@ static void setup_sigcontext(struct target_sigcontext *sc,
__put_user((uint16_t)0, &sc->gs);
__put_user((uint16_t)0, &sc->fs);
__put_user(env->segs[R_SS].selector, &sc->ss);
-
- xsave_sigcontext(env, fpstate, fpstate_addr);
#endif
- __put_user(fpstate_addr, &sc->fpstate);
+ switch (fpkind) {
+ case FPSTATE_XSAVE:
+ xsave_sigcontext(env, fxstate, fpstate_addr, fxstate_addr, fpend_addr);
+ break;
+ case FPSTATE_FXSAVE:
+ fxsave_sigcontext(env, fxstate, fxstate_addr);
+ break;
+ default:
+ break;
+ }
+ __put_user(fpstate_addr, &sc->fpstate);
/* non-iBCS2 extensions.. */
__put_user(mask, &sc->oldmask);
__put_user(env->cr[2], &sc->cr2);
}
-/*
- * Determine which stack to use..
- */
-
-static inline abi_ulong
-get_sigframe(struct target_sigaction *ka, CPUX86State *env, size_t fxsave_offset)
-{
- unsigned long esp;
-
- /* Default to using normal stack */
- esp = get_sp_from_cpustate(env);
-#ifdef TARGET_X86_64
- esp -= 128; /* this is the redzone */
-#endif
-
- /* This is the X/Open sanctioned signal stack switching. */
- if (ka->sa_flags & TARGET_SA_ONSTACK) {
- esp = target_sigsp(esp, ka);
- } else {
-#ifndef TARGET_X86_64
- /* This is the legacy signal stack switching. */
- if ((env->segs[R_SS].selector & 0xffff) != __USER_DS &&
- !(ka->sa_flags & TARGET_SA_RESTORER) &&
- ka->sa_restorer) {
- esp = (unsigned long) ka->sa_restorer;
- }
-#endif
- }
-
- if (!(env->features[FEAT_1_EDX] & CPUID_FXSR)) {
- return (esp - (fxsave_offset + sizeof(X86LegacyXSaveArea))) & -8ul;
- } else if (!(env->features[FEAT_1_ECX] & CPUID_EXT_XSAVE)) {
- return ((esp - sizeof(X86LegacyXSaveArea)) & -16ul) - fxsave_offset;
- } else {
- size_t xstate_size =
- xsave_area_size(env->xcr0, false) + TARGET_FP_XSTATE_MAGIC2_SIZE;
- return ((esp - xstate_size) & -64ul) - fxsave_offset;
- }
-}
-
#ifndef TARGET_X86_64
static void install_sigtramp(void *tramp)
{
@@ -394,22 +444,38 @@ static void install_rt_sigtramp(void *tramp)
void setup_frame(int sig, struct target_sigaction *ka,
target_sigset_t *set, CPUX86State *env)
{
- abi_ulong frame_addr;
+ abi_ptr frame_addr, fpstate_addr, fxstate_addr, fpend_addr;
struct sigframe *frame;
- int i;
+ struct target_fregs_state *fpstate;
+ X86LegacyXSaveArea *fxstate;
+ unsigned total_size;
+ FPStateKind fpkind;
- frame_addr = get_sigframe(ka, env, TARGET_SIGFRAME_FXSAVE_OFFSET);
+ fpkind = get_fpstate_kind(env);
+ frame_addr = get_sigframe(ka, env, sizeof(struct sigframe), fpkind,
+ &fpstate_addr, &fxstate_addr, &fpend_addr);
trace_user_setup_frame(env, frame_addr);
- if (!lock_user_struct(VERIFY_WRITE, frame, frame_addr, 0))
- goto give_sigsegv;
+ total_size = fpend_addr - frame_addr;
+ frame = lock_user(VERIFY_WRITE, frame_addr, total_size, 0);
+ if (!frame) {
+ force_sigsegv(sig);
+ return;
+ }
__put_user(sig, &frame->sig);
- setup_sigcontext(&frame->sc, &frame->fpstate, env, set->sig[0],
- frame_addr + offsetof(struct sigframe, fpstate));
+ fxstate = (void *)frame + (fxstate_addr - frame_addr);
+#ifdef TARGET_X86_64
+ fpstate = NULL;
+#else
+ fpstate = (void *)frame + (fpstate_addr - frame_addr);
+#endif
- for (i = 1; i < TARGET_NSIG_WORDS; i++) {
+ setup_sigcontext(env, &frame->sc, set->sig[0], fpkind,
+ fpstate, fpstate_addr, fxstate, fxstate_addr, fpend_addr);
+
+ for (int i = 1; i < TARGET_NSIG_WORDS; i++) {
__put_user(set->sig[i], &frame->extramask[i - 1]);
}
@@ -422,6 +488,7 @@ void setup_frame(int sig, struct target_sigaction *ka,
install_sigtramp(frame->retcode);
__put_user(default_sigreturn, &frame->pretcode);
}
+ unlock_user(frame, frame_addr, total_size);
/* Set up registers for signal handler */
env->regs[R_ESP] = frame_addr;
@@ -437,13 +504,6 @@ void setup_frame(int sig, struct target_sigaction *ka,
cpu_x86_load_seg(env, R_SS, __USER_DS);
cpu_x86_load_seg(env, R_CS, __USER_CS);
env->eflags &= ~TF_MASK;
-
- unlock_user_struct(frame, frame_addr, 1);
-
- return;
-
-give_sigsegv:
- force_sigsegv(sig);
}
#endif
@@ -452,37 +512,51 @@ void setup_rt_frame(int sig, struct target_sigaction *ka,
target_siginfo_t *info,
target_sigset_t *set, CPUX86State *env)
{
- abi_ulong frame_addr;
+ abi_ptr frame_addr, fpstate_addr, fxstate_addr, fpend_addr;
struct rt_sigframe *frame;
- int i;
+ X86LegacyXSaveArea *fxstate;
+ struct target_fregs_state *fpstate;
+ unsigned total_size;
+ FPStateKind fpkind;
- frame_addr = get_sigframe(ka, env, TARGET_RT_SIGFRAME_FXSAVE_OFFSET);
+ fpkind = get_fpstate_kind(env);
+ frame_addr = get_sigframe(ka, env, sizeof(struct rt_sigframe), fpkind,
+ &fpstate_addr, &fxstate_addr, &fpend_addr);
trace_user_setup_rt_frame(env, frame_addr);
- if (!lock_user_struct(VERIFY_WRITE, frame, frame_addr, 0))
+ total_size = fpend_addr - frame_addr;
+ frame = lock_user(VERIFY_WRITE, frame_addr, total_size, 0);
+ if (!frame) {
goto give_sigsegv;
+ }
if (ka->sa_flags & TARGET_SA_SIGINFO) {
frame->info = *info;
}
/* Create the ucontext. */
- if (env->features[FEAT_1_ECX] & CPUID_EXT_XSAVE) {
- __put_user(1, &frame->uc.tuc_flags);
- } else {
- __put_user(0, &frame->uc.tuc_flags);
- }
+ __put_user(fpkind == FPSTATE_XSAVE, &frame->uc.tuc_flags);
__put_user(0, &frame->uc.tuc_link);
target_save_altstack(&frame->uc.tuc_stack, env);
- setup_sigcontext(&frame->uc.tuc_mcontext, &frame->fpstate, env,
- set->sig[0], frame_addr + offsetof(struct rt_sigframe, fpstate));
- for (i = 0; i < TARGET_NSIG_WORDS; i++) {
+ fxstate = (void *)frame + (fxstate_addr - frame_addr);
+#ifdef TARGET_X86_64
+ fpstate = NULL;
+#else
+ fpstate = (void *)frame + (fpstate_addr - frame_addr);
+#endif
+
+ setup_sigcontext(env, &frame->uc.tuc_mcontext, set->sig[0], fpkind,
+ fpstate, fpstate_addr, fxstate, fxstate_addr, fpend_addr);
+
+ for (int i = 0; i < TARGET_NSIG_WORDS; i++) {
__put_user(set->sig[i], &frame->uc.tuc_sigmask.sig[i]);
}
- /* Set up to return from userspace. If provided, use a stub
- already in userspace. */
+ /*
+ * Set up to return from userspace. If provided, use a stub
+ * already in userspace.
+ */
if (ka->sa_flags & TARGET_SA_RESTORER) {
__put_user(ka->sa_restorer, &frame->pretcode);
} else {
@@ -514,60 +588,113 @@ void setup_rt_frame(int sig, struct target_sigaction *ka,
env->regs[R_ESI] = frame_addr + offsetof(struct rt_sigframe, info);
env->regs[R_EDX] = frame_addr + offsetof(struct rt_sigframe, uc);
#endif
+ unlock_user(frame, frame_addr, total_size);
cpu_x86_load_seg(env, R_DS, __USER_DS);
cpu_x86_load_seg(env, R_ES, __USER_DS);
cpu_x86_load_seg(env, R_CS, __USER_CS);
cpu_x86_load_seg(env, R_SS, __USER_DS);
env->eflags &= ~TF_MASK;
-
- unlock_user_struct(frame, frame_addr, 1);
-
return;
give_sigsegv:
force_sigsegv(sig);
}
-static bool xrstor_sigcontext(CPUX86State *env, X86LegacyXSaveArea *fxsave,
- abi_ulong fxsave_addr)
+/*
+ * Restore a signal frame.
+ */
+
+static bool xrstor_sigcontext(CPUX86State *env, FPStateKind fpkind,
+ X86LegacyXSaveArea *fxstate,
+ abi_ptr fxstate_addr)
{
- struct target_fpx_sw_bytes *sw = (void *)&fxsave->sw_reserved;
+ struct target_fpx_sw_bytes *sw = (void *)&fxstate->sw_reserved;
+ uint32_t magic1, magic2;
+ uint32_t extended_size, xstate_size, min_size, max_size;
- if (env->features[FEAT_1_ECX] & CPUID_EXT_XSAVE) {
- uint32_t magic1 = tswapl(sw->magic1);
- uint32_t extended_size = tswapl(sw->extended_size);
- uint32_t xstate_size = tswapl(sw->xstate_size);
- uint32_t minimum_size = (TARGET_FPSTATE_FXSAVE_OFFSET
- + TARGET_FP_XSTATE_MAGIC2_SIZE
- + xstate_size);
- uint32_t magic2;
+ switch (fpkind) {
+ case FPSTATE_XSAVE:
+ magic1 = tswap32(sw->magic1);
+ extended_size = tswap32(sw->extended_size);
+ xstate_size = tswap32(sw->xstate_size);
+ min_size = sizeof(X86LegacyXSaveArea) + sizeof(X86XSaveHeader);
+ max_size = xsave_area_size(env->xcr0, false);
- /* Linux checks MAGIC2 using xstate_size, not extended_size. */
- if (magic1 == TARGET_FP_XSTATE_MAGIC1
- && extended_size >= minimum_size) {
- if (!access_ok(env_cpu(env), VERIFY_READ, fxsave_addr,
- extended_size - TARGET_FPSTATE_FXSAVE_OFFSET)) {
- return false;
- }
- magic2 = tswapl(*(uint32_t *)((void *)fxsave + xstate_size));
- if (magic2 == TARGET_FP_XSTATE_MAGIC2) {
- cpu_x86_xrstor(env, fxsave_addr, -1);
- return true;
- }
+ /* Check for the first magic field and other error scenarios. */
+ if (magic1 != FP_XSTATE_MAGIC1 ||
+ xstate_size < min_size ||
+ xstate_size > max_size ||
+ xstate_size > extended_size) {
+ break;
}
- /* fall through to fxrstor */
+ if (!access_ok(env_cpu(env), VERIFY_READ, fxstate_addr,
+ xstate_size + TARGET_FP_XSTATE_MAGIC2_SIZE)) {
+ return false;
+ }
+ /*
+ * Check for the presence of second magic word at the end of memory
+ * layout. This detects the case where the user just copied the legacy
+ * fpstate layout with out copying the extended state information
+ * in the memory layout.
+ */
+ if (get_user_u32(magic2, fxstate_addr + xstate_size)) {
+ return false;
+ }
+ if (magic2 != FP_XSTATE_MAGIC2) {
+ break;
+ }
+ cpu_x86_xrstor(env, fxstate_addr, -1);
+ return true;
+
+ default:
+ break;
}
- cpu_x86_fxrstor(env, fxsave_addr);
+ cpu_x86_fxrstor(env, fxstate_addr);
return true;
}
+#ifndef TARGET_X86_64
+static bool frstor_sigcontext(CPUX86State *env, FPStateKind fpkind,
+ struct target_fregs_state *fpstate,
+ abi_ptr fpstate_addr,
+ X86LegacyXSaveArea *fxstate,
+ abi_ptr fxstate_addr)
+{
+ switch (fpkind) {
+ case FPSTATE_XSAVE:
+ if (!xrstor_sigcontext(env, fpkind, fxstate, fxstate_addr)) {
+ return false;
+ }
+ break;
+ case FPSTATE_FXSAVE:
+ cpu_x86_fxrstor(env, fxstate_addr);
+ break;
+ case FPSTATE_FSAVE:
+ break;
+ default:
+ g_assert_not_reached();
+ }
+
+ /*
+ * Copy the legacy state because the FP portion of the FX frame has
+ * to be ignored for histerical raisins. The kernel folds the two
+ * states together and then performs a single load; here we perform
+ * the merge within ENV by loading XSTATE/FXSTATE first, then
+ * overriding with the FSTATE afterward.
+ */
+ cpu_x86_frstor(env, fpstate_addr, 1);
+ return true;
+}
+#endif
+
static bool restore_sigcontext(CPUX86State *env, struct target_sigcontext *sc)
{
- abi_ulong fpstate_addr;
- unsigned int tmpflags;
- struct target_fpstate *fpstate;
+ abi_ptr fpstate_addr;
+ unsigned tmpflags, math_size;
+ FPStateKind fpkind;
+ void *fpstate;
bool ok;
#ifndef TARGET_X86_64
@@ -613,29 +740,33 @@ static bool restore_sigcontext(CPUX86State *env, struct target_sigcontext *sc)
tmpflags = tswapl(sc->eflags);
env->eflags = (env->eflags & ~0x40DD5) | (tmpflags & 0x40DD5);
- // regs->orig_eax = -1; /* disable syscall checks */
fpstate_addr = tswapl(sc->fpstate);
if (fpstate_addr == 0) {
return true;
}
- if (!lock_user_struct(VERIFY_READ, fpstate, fpstate_addr,
- sizeof(struct target_fpstate))) {
+
+ fpkind = get_fpstate_kind(env);
+ math_size = get_fpstate_size(env, fpkind);
+#ifndef TARGET_X86_64
+ if (fpkind != FPSTATE_FSAVE) {
+ math_size += sizeof(struct target_fregs_state);
+ }
+#endif
+ fpstate = lock_user(VERIFY_READ, fpstate_addr, math_size, 1);
+ if (!fpstate) {
return false;
}
-#ifndef TARGET_X86_64
- if (!(env->features[FEAT_1_EDX] & CPUID_FXSR)) {
- cpu_x86_frstor(env, fpstate_addr, 1);
- ok = true;
- } else {
- ok = xrstor_sigcontext(env, &fpstate->fxstate,
- fpstate_addr + TARGET_FPSTATE_FXSAVE_OFFSET);
- }
-#else
- ok = xrstor_sigcontext(env, fpstate, fpstate_addr);
-#endif
- unlock_user_struct(fpstate, fpstate_addr, 0);
+#ifdef TARGET_X86_64
+ ok = xrstor_sigcontext(env, fpkind, fpstate, fpstate_addr);
+#else
+ ok = frstor_sigcontext(env, fpkind, fpstate, fpstate_addr,
+ fpstate + sizeof(struct target_fregs_state),
+ fpstate_addr + sizeof(struct target_fregs_state));
+#endif
+
+ unlock_user(fpstate, fpstate_addr, 0);
return ok;
}
@@ -647,30 +778,27 @@ long do_sigreturn(CPUX86State *env)
abi_ulong frame_addr = env->regs[R_ESP] - 8;
target_sigset_t target_set;
sigset_t set;
- int i;
trace_user_do_sigreturn(env, frame_addr);
- if (!lock_user_struct(VERIFY_READ, frame, frame_addr, 1))
- goto badframe;
- /* set blocked signals */
- __get_user(target_set.sig[0], &frame->sc.oldmask);
- for(i = 1; i < TARGET_NSIG_WORDS; i++) {
- __get_user(target_set.sig[i], &frame->extramask[i - 1]);
+ if (!lock_user_struct(VERIFY_READ, frame, frame_addr, 1)) {
+ force_sig(TARGET_SIGSEGV);
+ return -QEMU_ESIGRETURN;
}
+ /* Set blocked signals. */
+ __get_user(target_set.sig[0], &frame->sc.oldmask);
+ for (int i = 1; i < TARGET_NSIG_WORDS; i++) {
+ __get_user(target_set.sig[i], &frame->extramask[i - 1]);
+ }
target_to_host_sigset_internal(&set, &target_set);
set_sigmask(&set);
- /* restore registers */
+ /* Restore registers */
if (!restore_sigcontext(env, &frame->sc)) {
- goto badframe;
+ force_sig(TARGET_SIGSEGV);
}
- unlock_user_struct(frame, frame_addr, 0);
- return -QEMU_ESIGRETURN;
-badframe:
unlock_user_struct(frame, frame_addr, 0);
- force_sig(TARGET_SIGSEGV);
return -QEMU_ESIGRETURN;
}
#endif
diff --git a/tests/tcg/x86_64/test-1648.c b/tests/tcg/x86_64/test-1648.c
new file mode 100644
index 0000000000..fd0644a8ce
--- /dev/null
+++ b/tests/tcg/x86_64/test-1648.c
@@ -0,0 +1,33 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/* See https://gitlab.com/qemu-project/qemu/-/issues/1648 */
+
+#include <signal.h>
+
+__attribute__((noinline))
+void bar(void)
+{
+ /* Success! Continue through sigreturn. */
+}
+
+/*
+ * Because of the change of ABI between foo and bar, the compiler is
+ * required to save XMM6-XMM15. The compiler will use MOVAPS or MOVDQA,
+ * which will trap if the stack frame is not 16 byte aligned.
+ */
+__attribute__((noinline, ms_abi))
+void foo(void)
+{
+ bar();
+}
+
+void sighandler(int num)
+{
+ foo();
+}
+
+int main(void)
+{
+ signal(SIGUSR1, sighandler);
+ raise(SIGUSR1);
+ return 0;
+}
diff --git a/tests/tcg/x86_64/Makefile.target b/tests/tcg/x86_64/Makefile.target
index e64aab1b81..5fedf22117 100644
--- a/tests/tcg/x86_64/Makefile.target
+++ b/tests/tcg/x86_64/Makefile.target
@@ -13,6 +13,7 @@ X86_64_TESTS += vsyscall
X86_64_TESTS += noexec
X86_64_TESTS += cmpxchg
X86_64_TESTS += adox
+X86_64_TESTS += test-1648
TESTS=$(MULTIARCH_TESTS) $(X86_64_TESTS) test-x86_64
else
TESTS=$(MULTIARCH_TESTS)
--
2.34.1
^ permalink raw reply related [flat|nested] 36+ messages in thread
* [PATCH v2 23/28] target/i386: Honor xfeatures in xrstor_sigcontext
2024-04-09 5:02 [PATCH for-9.1 v2 00/28] linux-user/i386: Properly align signal frame Richard Henderson
` (21 preceding siblings ...)
2024-04-09 5:02 ` [PATCH v2 22/28] linux-user/i386: Fix allocation and alignment of fp state Richard Henderson
@ 2024-04-09 5:02 ` Richard Henderson
2024-04-09 7:44 ` Paolo Bonzini
2024-04-09 5:02 ` [PATCH v2 24/28] target/i386: Convert do_xsave to X86Access Richard Henderson
` (5 subsequent siblings)
28 siblings, 1 reply; 36+ messages in thread
From: Richard Henderson @ 2024-04-09 5:02 UTC (permalink / raw)
To: qemu-devel
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
linux-user/i386/signal.c | 19 ++++++++++++++++++-
1 file changed, 18 insertions(+), 1 deletion(-)
diff --git a/linux-user/i386/signal.c b/linux-user/i386/signal.c
index d015fe520a..fd09c973d4 100644
--- a/linux-user/i386/signal.c
+++ b/linux-user/i386/signal.c
@@ -612,6 +612,7 @@ static bool xrstor_sigcontext(CPUX86State *env, FPStateKind fpkind,
struct target_fpx_sw_bytes *sw = (void *)&fxstate->sw_reserved;
uint32_t magic1, magic2;
uint32_t extended_size, xstate_size, min_size, max_size;
+ uint64_t xfeatures;
switch (fpkind) {
case FPSTATE_XSAVE:
@@ -628,10 +629,25 @@ static bool xrstor_sigcontext(CPUX86State *env, FPStateKind fpkind,
xstate_size > extended_size) {
break;
}
+
+ /*
+ * Restore the features indicated in the frame, masked by
+ * those currently enabled. Re-check the frame size.
+ * ??? It is not clear where the kernel does this, but it
+ * is not in check_xstate_in_sigframe, and so (probably)
+ * does not fall back to fxrstor.
+ */
+ xfeatures = tswap64(sw->xfeatures) & env->xcr0;
+ min_size = xsave_area_size(xfeatures, false);
+ if (xstate_size < min_size) {
+ return false;
+ }
+
if (!access_ok(env_cpu(env), VERIFY_READ, fxstate_addr,
xstate_size + TARGET_FP_XSTATE_MAGIC2_SIZE)) {
return false;
}
+
/*
* Check for the presence of second magic word at the end of memory
* layout. This detects the case where the user just copied the legacy
@@ -644,7 +660,8 @@ static bool xrstor_sigcontext(CPUX86State *env, FPStateKind fpkind,
if (magic2 != FP_XSTATE_MAGIC2) {
break;
}
- cpu_x86_xrstor(env, fxstate_addr, -1);
+
+ cpu_x86_xrstor(env, fxstate_addr, xfeatures);
return true;
default:
--
2.34.1
^ permalink raw reply related [flat|nested] 36+ messages in thread
* Re: [PATCH v2 23/28] target/i386: Honor xfeatures in xrstor_sigcontext
2024-04-09 5:02 ` [PATCH v2 23/28] target/i386: Honor xfeatures in xrstor_sigcontext Richard Henderson
@ 2024-04-09 7:44 ` Paolo Bonzini
2024-04-09 18:09 ` Richard Henderson
0 siblings, 1 reply; 36+ messages in thread
From: Paolo Bonzini @ 2024-04-09 7:44 UTC (permalink / raw)
To: Richard Henderson, qemu-devel
On 4/9/24 07:02, Richard Henderson wrote:
> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
> ---
> linux-user/i386/signal.c | 19 ++++++++++++++++++-
> 1 file changed, 18 insertions(+), 1 deletion(-)
>
> diff --git a/linux-user/i386/signal.c b/linux-user/i386/signal.c
> index d015fe520a..fd09c973d4 100644
> --- a/linux-user/i386/signal.c
> +++ b/linux-user/i386/signal.c
> @@ -612,6 +612,7 @@ static bool xrstor_sigcontext(CPUX86State *env, FPStateKind fpkind,
> struct target_fpx_sw_bytes *sw = (void *)&fxstate->sw_reserved;
> uint32_t magic1, magic2;
> uint32_t extended_size, xstate_size, min_size, max_size;
> + uint64_t xfeatures;
>
> switch (fpkind) {
> case FPSTATE_XSAVE:
> @@ -628,10 +629,25 @@ static bool xrstor_sigcontext(CPUX86State *env, FPStateKind fpkind,
> xstate_size > extended_size) {
> break;
> }
> +
> + /*
> + * Restore the features indicated in the frame, masked by
> + * those currently enabled. Re-check the frame size.
> + * ??? It is not clear where the kernel does this, but it
> + * is not in check_xstate_in_sigframe, and so (probably)
> + * does not fall back to fxrstor.
> + */
I think you're referring to this in __fpu_restore_sig?
if (use_xsave()) {
/*
* Remove all UABI feature bits not set in user_xfeatures
* from the memory xstate header which makes the full
* restore below bring them into init state. This works for
* fx_only mode as well because that has only FP and SSE
* set in user_xfeatures.
*
* Preserve supervisor states!
*/
u64 mask = user_xfeatures | xfeatures_mask_supervisor();
fpregs->xsave.header.xfeatures &= mask;
success = !os_xrstor_safe(fpu->fpstate,
fpu_kernel_cfg.max_features);
It is not masking against the user process's xcr0, but qemu-user's xcr0
is effectively user_xfeatures (it's computed in x86_cpu_reset_hold() and
will never change afterwards since XSETBV is privileged).
Paolo
> + xfeatures = tswap64(sw->xfeatures) & env->xcr0;
> + min_size = xsave_area_size(xfeatures, false);
> + if (xstate_size < min_size) {
> + return false;
> + }
> +
> if (!access_ok(env_cpu(env), VERIFY_READ, fxstate_addr,
> xstate_size + TARGET_FP_XSTATE_MAGIC2_SIZE)) {
> return false;
> }
> +
> /*
> * Check for the presence of second magic word at the end of memory
> * layout. This detects the case where the user just copied the legacy
> @@ -644,7 +660,8 @@ static bool xrstor_sigcontext(CPUX86State *env, FPStateKind fpkind,
> if (magic2 != FP_XSTATE_MAGIC2) {
> break;
> }
> - cpu_x86_xrstor(env, fxstate_addr, -1);
> +
> + cpu_x86_xrstor(env, fxstate_addr, xfeatures);
> return true;
>
> default:
^ permalink raw reply [flat|nested] 36+ messages in thread
* Re: [PATCH v2 23/28] target/i386: Honor xfeatures in xrstor_sigcontext
2024-04-09 7:44 ` Paolo Bonzini
@ 2024-04-09 18:09 ` Richard Henderson
2024-04-10 0:27 ` Richard Henderson
0 siblings, 1 reply; 36+ messages in thread
From: Richard Henderson @ 2024-04-09 18:09 UTC (permalink / raw)
To: Paolo Bonzini, qemu-devel
On 4/8/24 21:44, Paolo Bonzini wrote:
>> + /*
>> + * Restore the features indicated in the frame, masked by
>> + * those currently enabled. Re-check the frame size.
>> + * ??? It is not clear where the kernel does this, but it
>> + * is not in check_xstate_in_sigframe, and so (probably)
>> + * does not fall back to fxrstor.
>> + */
>
> I think you're referring to this in __fpu_restore_sig?
>
> if (use_xsave()) {
> /*
> * Remove all UABI feature bits not set in user_xfeatures
> * from the memory xstate header which makes the full
> * restore below bring them into init state. This works for
> * fx_only mode as well because that has only FP and SSE
> * set in user_xfeatures.
> *
> * Preserve supervisor states!
> */
> u64 mask = user_xfeatures | xfeatures_mask_supervisor();
>
> fpregs->xsave.header.xfeatures &= mask;
> success = !os_xrstor_safe(fpu->fpstate,
> fpu_kernel_cfg.max_features);
>
> It is not masking against the user process's xcr0, but qemu-user's xcr0
> is effectively user_xfeatures (it's computed in x86_cpu_reset_hold() and
> will never change afterwards since XSETBV is privileged).
No, I'm talking about verifying that the xstate_size is large enough.
In check_xstate_in_sigframe,
if (fx_sw->magic1 != FP_XSTATE_MAGIC1 ||
fx_sw->xstate_size < min_xstate_size ||
Check for the trivially too small case (fxregs + header).
fx_sw->xstate_size > current->thread.fpu.fpstate->user_size ||
Check for the trivially too large case (presumably this is to catch stupidly large values,
assuming garbage).
fx_sw->xstate_size > fx_sw->extended_size)
Check for trivial mismatch between fields.
goto setfx;
But there's a middle case: if xfeatures > 3, then xstate_size must be > min_xstate_size.
I know that the kernel will handle any #GP in xrstor_from_user_sigframe, but there doesn't
seem to be a real check for reading garbage beyond the given size.
r~
^ permalink raw reply [flat|nested] 36+ messages in thread
* Re: [PATCH v2 23/28] target/i386: Honor xfeatures in xrstor_sigcontext
2024-04-09 18:09 ` Richard Henderson
@ 2024-04-10 0:27 ` Richard Henderson
0 siblings, 0 replies; 36+ messages in thread
From: Richard Henderson @ 2024-04-10 0:27 UTC (permalink / raw)
To: Paolo Bonzini, qemu-devel
On 4/9/24 08:09, Richard Henderson wrote:
> On 4/8/24 21:44, Paolo Bonzini wrote:
>>> + /*
>>> + * Restore the features indicated in the frame, masked by
>>> + * those currently enabled. Re-check the frame size.
>>> + * ??? It is not clear where the kernel does this, but it
>>> + * is not in check_xstate_in_sigframe, and so (probably)
>>> + * does not fall back to fxrstor.
>>> + */
>>
>> I think you're referring to this in __fpu_restore_sig?
>>
>> if (use_xsave()) {
>> /*
>> * Remove all UABI feature bits not set in user_xfeatures
>> * from the memory xstate header which makes the full
>> * restore below bring them into init state. This works for
>> * fx_only mode as well because that has only FP and SSE
>> * set in user_xfeatures.
>> *
>> * Preserve supervisor states!
>> */
>> u64 mask = user_xfeatures | xfeatures_mask_supervisor();
>>
>> fpregs->xsave.header.xfeatures &= mask;
>> success = !os_xrstor_safe(fpu->fpstate,
>> fpu_kernel_cfg.max_features);
>>
>> It is not masking against the user process's xcr0, but qemu-user's xcr0
>> is effectively user_xfeatures (it's computed in x86_cpu_reset_hold() and
>> will never change afterwards since XSETBV is privileged).
>
> No, I'm talking about verifying that the xstate_size is large enough.
>
> In check_xstate_in_sigframe,
>
> if (fx_sw->magic1 != FP_XSTATE_MAGIC1 ||
> fx_sw->xstate_size < min_xstate_size ||
>
> Check for the trivially too small case (fxregs + header).
>
> fx_sw->xstate_size > current->thread.fpu.fpstate->user_size ||
>
> Check for the trivially too large case (presumably this is to catch stupidly large values,
> assuming garbage).
>
> fx_sw->xstate_size > fx_sw->extended_size)
>
> Check for trivial mismatch between fields.
>
> goto setfx;
>
> But there's a middle case: if xfeatures > 3, then xstate_size must be > min_xstate_size.
>
> I know that the kernel will handle any #GP in xrstor_from_user_sigframe, but there doesn't
> seem to be a real check for reading garbage beyond the given size.
Oh, I meant to mention, following the
__fpu_restore_sig:
user_xfeatures = fx_sw_user.xfeatures;
...
if (!ia32_fxstate)
restore_fpregs_from_user(..., user_xfeatures, ...)
restore_fpregs_from_user(..., xrestore, ...)
xrestore &= fpu->fpstate->user_xfeatures;
__restore_fpregs_from_user(..., xrestore, ...)
path.
r~
^ permalink raw reply [flat|nested] 36+ messages in thread
* [PATCH v2 24/28] target/i386: Convert do_xsave to X86Access
2024-04-09 5:02 [PATCH for-9.1 v2 00/28] linux-user/i386: Properly align signal frame Richard Henderson
` (22 preceding siblings ...)
2024-04-09 5:02 ` [PATCH v2 23/28] target/i386: Honor xfeatures in xrstor_sigcontext Richard Henderson
@ 2024-04-09 5:02 ` Richard Henderson
2024-04-09 5:02 ` [PATCH v2 25/28] target/i386: Convert do_xrstor " Richard Henderson
` (4 subsequent siblings)
28 siblings, 0 replies; 36+ messages in thread
From: Richard Henderson @ 2024-04-09 5:02 UTC (permalink / raw)
To: qemu-devel
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
linux-user/i386/signal.c | 2 +-
target/i386/tcg/fpu_helper.c | 72 +++++++++++++++++++++---------------
2 files changed, 43 insertions(+), 31 deletions(-)
diff --git a/linux-user/i386/signal.c b/linux-user/i386/signal.c
index fd09c973d4..ba17d27219 100644
--- a/linux-user/i386/signal.c
+++ b/linux-user/i386/signal.c
@@ -328,7 +328,7 @@ static void xsave_sigcontext(CPUX86State *env,
/* Zero the header, XSAVE *adds* features to an existing save state. */
memset(fxstate + 1, 0, sizeof(X86XSaveHeader));
- cpu_x86_xsave(env, xstate_addr, -1);
+ cpu_x86_xsave(env, xstate_addr, env->xcr0);
__put_user(TARGET_FP_XSTATE_MAGIC1, &sw->magic1);
__put_user(extended_size, &sw->extended_size);
diff --git a/target/i386/tcg/fpu_helper.c b/target/i386/tcg/fpu_helper.c
index dbc1e5d8dd..d4dd09dc95 100644
--- a/target/i386/tcg/fpu_helper.c
+++ b/target/i386/tcg/fpu_helper.c
@@ -2667,47 +2667,38 @@ static uint64_t get_xinuse(CPUX86State *env)
return inuse;
}
-static void do_xsave(CPUX86State *env, target_ulong ptr, uint64_t rfbm,
- uint64_t inuse, uint64_t opt, uintptr_t ra)
+static void do_xsave_access(X86Access *ac, target_ulong ptr, uint64_t rfbm,
+ uint64_t inuse, uint64_t opt)
{
uint64_t old_bv, new_bv;
- X86Access ac;
- unsigned size;
-
- /* Never save anything not enabled by XCR0. */
- rfbm &= env->xcr0;
- opt &= rfbm;
-
- size = xsave_area_size(opt, false);
- access_prepare(&ac, env, ptr, size, MMU_DATA_STORE, ra);
if (opt & XSTATE_FP_MASK) {
- do_xsave_fpu(&ac, ptr);
+ do_xsave_fpu(ac, ptr);
}
if (rfbm & XSTATE_SSE_MASK) {
/* Note that saving MXCSR is not suppressed by XSAVEOPT. */
- do_xsave_mxcsr(&ac, ptr);
+ do_xsave_mxcsr(ac, ptr);
}
if (opt & XSTATE_SSE_MASK) {
- do_xsave_sse(&ac, ptr);
+ do_xsave_sse(ac, ptr);
}
if (opt & XSTATE_YMM_MASK) {
- do_xsave_ymmh(&ac, ptr + XO(avx_state));
+ do_xsave_ymmh(ac, ptr + XO(avx_state));
}
if (opt & XSTATE_BNDREGS_MASK) {
- do_xsave_bndregs(&ac, ptr + XO(bndreg_state));
+ do_xsave_bndregs(ac, ptr + XO(bndreg_state));
}
if (opt & XSTATE_BNDCSR_MASK) {
- do_xsave_bndcsr(&ac, ptr + XO(bndcsr_state));
+ do_xsave_bndcsr(ac, ptr + XO(bndcsr_state));
}
if (opt & XSTATE_PKRU_MASK) {
- do_xsave_pkru(&ac, ptr + XO(pkru_state));
+ do_xsave_pkru(ac, ptr + XO(pkru_state));
}
/* Update the XSTATE_BV field. */
- old_bv = access_ldq(&ac, ptr + XO(header.xstate_bv));
+ old_bv = access_ldq(ac, ptr + XO(header.xstate_bv));
new_bv = (old_bv & ~rfbm) | (inuse & rfbm);
- access_stq(&ac, ptr + XO(header.xstate_bv), new_bv);
+ access_stq(ac, ptr + XO(header.xstate_bv), new_bv);
}
static void do_xsave_chk(CPUX86State *env, target_ulong ptr, uintptr_t ra)
@@ -2723,22 +2714,32 @@ static void do_xsave_chk(CPUX86State *env, target_ulong ptr, uintptr_t ra)
}
}
-void helper_xsave(CPUX86State *env, target_ulong ptr, uint64_t rfbm)
+static void do_xsave(CPUX86State *env, target_ulong ptr, uint64_t rfbm,
+ uint64_t inuse, uint64_t opt, uintptr_t ra)
{
- uintptr_t ra = GETPC();
+ X86Access ac;
+ unsigned size;
do_xsave_chk(env, ptr, ra);
- do_xsave(env, ptr, rfbm, get_xinuse(env), -1, ra);
+
+ /* Never save anything not enabled by XCR0. */
+ rfbm &= env->xcr0;
+ opt &= rfbm;
+ size = xsave_area_size(opt, false);
+
+ access_prepare(&ac, env, ptr, size, MMU_DATA_STORE, ra);
+ do_xsave_access(&ac, ptr, rfbm, inuse, opt);
+}
+
+void helper_xsave(CPUX86State *env, target_ulong ptr, uint64_t rfbm)
+{
+ do_xsave(env, ptr, rfbm, get_xinuse(env), rfbm, GETPC());
}
void helper_xsaveopt(CPUX86State *env, target_ulong ptr, uint64_t rfbm)
{
- uintptr_t ra = GETPC();
- uint64_t inuse;
-
- do_xsave_chk(env, ptr, ra);
- inuse = get_xinuse(env);
- do_xsave(env, ptr, rfbm, inuse, inuse, ra);
+ uint64_t inuse = get_xinuse(env);
+ do_xsave(env, ptr, rfbm, inuse, inuse, GETPC());
}
static void do_xrstor_fpu(X86Access *ac, target_ulong ptr)
@@ -3048,7 +3049,18 @@ void cpu_x86_fxrstor(CPUX86State *env, target_ulong ptr)
void cpu_x86_xsave(CPUX86State *env, target_ulong ptr, uint64_t rfbm)
{
- do_xsave(env, ptr, rfbm, get_xinuse(env), -1, 0);
+ X86Access ac;
+ unsigned size;
+
+ /*
+ * Since this is only called from user-level signal handling,
+ * we should have done the job correctly there.
+ */
+ assert((rfbm & ~env->xcr0) == 0);
+ size = xsave_area_size(rfbm, false);
+
+ access_prepare(&ac, env, ptr, size, MMU_DATA_STORE, 0);
+ do_xsave_access(&ac, ptr, rfbm, get_xinuse(env), rfbm);
}
void cpu_x86_xrstor(CPUX86State *env, target_ulong ptr, uint64_t rfbm)
--
2.34.1
^ permalink raw reply related [flat|nested] 36+ messages in thread
* [PATCH v2 25/28] target/i386: Convert do_xrstor to X86Access
2024-04-09 5:02 [PATCH for-9.1 v2 00/28] linux-user/i386: Properly align signal frame Richard Henderson
` (23 preceding siblings ...)
2024-04-09 5:02 ` [PATCH v2 24/28] target/i386: Convert do_xsave to X86Access Richard Henderson
@ 2024-04-09 5:02 ` Richard Henderson
2024-04-09 5:03 ` [PATCH v2 26/28] target/i386: Pass host pointer and size to cpu_x86_{fsave, frstor} Richard Henderson
` (3 subsequent siblings)
28 siblings, 0 replies; 36+ messages in thread
From: Richard Henderson @ 2024-04-09 5:02 UTC (permalink / raw)
To: qemu-devel
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
target/i386/tcg/fpu_helper.c | 106 +++++++++++++++++++++--------------
1 file changed, 64 insertions(+), 42 deletions(-)
diff --git a/target/i386/tcg/fpu_helper.c b/target/i386/tcg/fpu_helper.c
index d4dd09dc95..909da05f91 100644
--- a/target/i386/tcg/fpu_helper.c
+++ b/target/i386/tcg/fpu_helper.c
@@ -2902,51 +2902,38 @@ void helper_fxrstor(CPUX86State *env, target_ulong ptr)
do_fxrstor(&ac, ptr);
}
-static void do_xrstor(CPUX86State *env, target_ulong ptr, uint64_t rfbm, uintptr_t ra)
+static bool valid_xrstor_header(X86Access *ac, uint64_t *pxsbv,
+ target_ulong ptr)
{
uint64_t xstate_bv, xcomp_bv, reserve0;
- X86Access ac;
- unsigned size, size_ext;
- rfbm &= env->xcr0;
+ xstate_bv = access_ldq(ac, ptr + XO(header.xstate_bv));
+ xcomp_bv = access_ldq(ac, ptr + XO(header.xcomp_bv));
+ reserve0 = access_ldq(ac, ptr + XO(header.reserve0));
+ *pxsbv = xstate_bv;
- size = sizeof(X86LegacyXSaveArea) + sizeof(X86XSaveHeader);
- access_prepare(&ac, env, ptr, size, MMU_DATA_LOAD, ra);
-
- xstate_bv = access_ldq(&ac, ptr + XO(header.xstate_bv));
-
- if ((int64_t)xstate_bv < 0) {
- /* FIXME: Compact form. */
- raise_exception_ra(env, EXCP0D_GPF, ra);
+ /*
+ * XCOMP_BV bit 63 indicates compact form, which we do not support,
+ * and thus must raise #GP. That leaves us in standard form.
+ * In standard form, bytes 23:8 must be zero -- which is both
+ * XCOMP_BV and the following 64-bit field.
+ */
+ if (xcomp_bv || reserve0) {
+ return false;
}
- /* Standard form. */
-
/* The XSTATE_BV field must not set bits not present in XCR0. */
- if (xstate_bv & ~env->xcr0) {
- raise_exception_ra(env, EXCP0D_GPF, ra);
- }
+ return (xstate_bv & ~ac->env->xcr0) == 0;
+}
- /* The XCOMP_BV field must be zero. Note that, as of the April 2016
- revision, the description of the XSAVE Header (Vol 1, Sec 13.4.2)
- describes only XCOMP_BV, but the description of the standard form
- of XRSTOR (Vol 1, Sec 13.8.1) checks bytes 23:8 for zero, which
- includes the next 64-bit field. */
- xcomp_bv = access_ldq(&ac, ptr + XO(header.xcomp_bv));
- reserve0 = access_ldq(&ac, ptr + XO(header.reserve0));
- if (xcomp_bv || reserve0) {
- raise_exception_ra(env, EXCP0D_GPF, ra);
- }
-
- size_ext = xsave_area_size(rfbm & xstate_bv, false);
- if (size < size_ext) {
- /* TODO: See if existing page probe has covered extra size. */
- access_prepare(&ac, env, ptr, size_ext, MMU_DATA_LOAD, ra);
- }
+static void do_xrstor(X86Access *ac, target_ulong ptr,
+ uint64_t rfbm, uint64_t xstate_bv)
+{
+ CPUX86State *env = ac->env;
if (rfbm & XSTATE_FP_MASK) {
if (xstate_bv & XSTATE_FP_MASK) {
- do_xrstor_fpu(&ac, ptr);
+ do_xrstor_fpu(ac, ptr);
} else {
do_fninit(env);
memset(env->fpregs, 0, sizeof(env->fpregs));
@@ -2955,23 +2942,23 @@ static void do_xrstor(CPUX86State *env, target_ulong ptr, uint64_t rfbm, uintptr
if (rfbm & XSTATE_SSE_MASK) {
/* Note that the standard form of XRSTOR loads MXCSR from memory
whether or not the XSTATE_BV bit is set. */
- do_xrstor_mxcsr(&ac, ptr);
+ do_xrstor_mxcsr(ac, ptr);
if (xstate_bv & XSTATE_SSE_MASK) {
- do_xrstor_sse(&ac, ptr);
+ do_xrstor_sse(ac, ptr);
} else {
do_clear_sse(env);
}
}
if (rfbm & XSTATE_YMM_MASK) {
if (xstate_bv & XSTATE_YMM_MASK) {
- do_xrstor_ymmh(&ac, ptr + XO(avx_state));
+ do_xrstor_ymmh(ac, ptr + XO(avx_state));
} else {
do_clear_ymmh(env);
}
}
if (rfbm & XSTATE_BNDREGS_MASK) {
if (xstate_bv & XSTATE_BNDREGS_MASK) {
- do_xrstor_bndregs(&ac, ptr + XO(bndreg_state));
+ do_xrstor_bndregs(ac, ptr + XO(bndreg_state));
env->hflags |= HF_MPX_IU_MASK;
} else {
memset(env->bnd_regs, 0, sizeof(env->bnd_regs));
@@ -2980,7 +2967,7 @@ static void do_xrstor(CPUX86State *env, target_ulong ptr, uint64_t rfbm, uintptr
}
if (rfbm & XSTATE_BNDCSR_MASK) {
if (xstate_bv & XSTATE_BNDCSR_MASK) {
- do_xrstor_bndcsr(&ac, ptr + XO(bndcsr_state));
+ do_xrstor_bndcsr(ac, ptr + XO(bndcsr_state));
} else {
memset(&env->bndcs_regs, 0, sizeof(env->bndcs_regs));
}
@@ -2989,7 +2976,7 @@ static void do_xrstor(CPUX86State *env, target_ulong ptr, uint64_t rfbm, uintptr
if (rfbm & XSTATE_PKRU_MASK) {
uint64_t old_pkru = env->pkru;
if (xstate_bv & XSTATE_PKRU_MASK) {
- do_xrstor_pkru(&ac, ptr + XO(pkru_state));
+ do_xrstor_pkru(ac, ptr + XO(pkru_state));
} else {
env->pkru = 0;
}
@@ -3005,9 +2992,27 @@ static void do_xrstor(CPUX86State *env, target_ulong ptr, uint64_t rfbm, uintptr
void helper_xrstor(CPUX86State *env, target_ulong ptr, uint64_t rfbm)
{
uintptr_t ra = GETPC();
+ X86Access ac;
+ uint64_t xstate_bv;
+ unsigned size, size_ext;
do_xsave_chk(env, ptr, ra);
- do_xrstor(env, ptr, rfbm, ra);
+
+ /* Begin with just the minimum size to validate the header. */
+ size = sizeof(X86LegacyXSaveArea) + sizeof(X86XSaveHeader);
+ access_prepare(&ac, env, ptr, size, MMU_DATA_LOAD, ra);
+ if (!valid_xrstor_header(&ac, &xstate_bv, ptr)) {
+ raise_exception_ra(env, EXCP0D_GPF, ra);
+ }
+
+ rfbm &= env->xcr0;
+ size_ext = xsave_area_size(rfbm & xstate_bv, false);
+ if (size < size_ext) {
+ /* TODO: See if existing page probe has covered extra size. */
+ access_prepare(&ac, env, ptr, size_ext, MMU_DATA_LOAD, ra);
+ }
+
+ do_xrstor(&ac, ptr, rfbm, xstate_bv);
}
#if defined(CONFIG_USER_ONLY)
@@ -3065,7 +3070,24 @@ void cpu_x86_xsave(CPUX86State *env, target_ulong ptr, uint64_t rfbm)
void cpu_x86_xrstor(CPUX86State *env, target_ulong ptr, uint64_t rfbm)
{
- do_xrstor(env, ptr, rfbm, 0);
+ X86Access ac;
+ uint64_t xstate_bv;
+ unsigned size;
+
+ /*
+ * Since this is only called from user-level signal handling,
+ * we should have done the job correctly there.
+ */
+ assert((rfbm & ~env->xcr0) == 0);
+ size = xsave_area_size(rfbm, false);
+ access_prepare(&ac, env, ptr, size, MMU_DATA_LOAD, 0);
+
+ if (!valid_xrstor_header(&ac, &xstate_bv, ptr)) {
+ /* TODO: Report failure to caller. */
+ xstate_bv &= env->xcr0;
+ }
+
+ do_xrstor(&ac, ptr, rfbm, xstate_bv);
}
#endif
--
2.34.1
^ permalink raw reply related [flat|nested] 36+ messages in thread
* [PATCH v2 26/28] target/i386: Pass host pointer and size to cpu_x86_{fsave, frstor}
2024-04-09 5:02 [PATCH for-9.1 v2 00/28] linux-user/i386: Properly align signal frame Richard Henderson
` (24 preceding siblings ...)
2024-04-09 5:02 ` [PATCH v2 25/28] target/i386: Convert do_xrstor " Richard Henderson
@ 2024-04-09 5:03 ` Richard Henderson
2024-04-09 5:03 ` [PATCH v2 27/28] target/i386: Pass host pointer and size to cpu_x86_{fxsave, fxrstor} Richard Henderson
` (2 subsequent siblings)
28 siblings, 0 replies; 36+ messages in thread
From: Richard Henderson @ 2024-04-09 5:03 UTC (permalink / raw)
To: qemu-devel
We have already validated the memory region in the course of
validating the signal frame. No need to do it again within
the helper function.
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
target/i386/cpu.h | 10 ++++++----
linux-user/i386/signal.c | 4 ++--
target/i386/tcg/fpu_helper.c | 26 ++++++++++++++++----------
3 files changed, 24 insertions(+), 16 deletions(-)
diff --git a/target/i386/cpu.h b/target/i386/cpu.h
index 5f9c420084..8eb97fdd7a 100644
--- a/target/i386/cpu.h
+++ b/target/i386/cpu.h
@@ -2227,11 +2227,13 @@ int cpu_x86_get_descr_debug(CPUX86State *env, unsigned int selector,
/* used for debug or cpu save/restore */
/* cpu-exec.c */
-/* the following helpers are only usable in user mode simulation as
- they can trigger unexpected exceptions */
+/*
+ * The following helpers are only usable in user mode simulation.
+ * The host pointers should come from lock_user().
+ */
void cpu_x86_load_seg(CPUX86State *s, X86Seg seg_reg, int selector);
-void cpu_x86_fsave(CPUX86State *s, target_ulong ptr, int data32);
-void cpu_x86_frstor(CPUX86State *s, target_ulong ptr, int data32);
+void cpu_x86_fsave(CPUX86State *s, void *host, size_t len);
+void cpu_x86_frstor(CPUX86State *s, void *host, size_t len);
void cpu_x86_fxsave(CPUX86State *s, target_ulong ptr);
void cpu_x86_fxrstor(CPUX86State *s, target_ulong ptr);
void cpu_x86_xsave(CPUX86State *s, target_ulong ptr, uint64_t rbfm);
diff --git a/linux-user/i386/signal.c b/linux-user/i386/signal.c
index ba17d27219..7178440d67 100644
--- a/linux-user/i386/signal.c
+++ b/linux-user/i386/signal.c
@@ -372,7 +372,7 @@ static void setup_sigcontext(CPUX86State *env,
__put_user(env->regs[R_ESP], &sc->esp_at_signal);
__put_user(env->segs[R_SS].selector, (uint32_t *)&sc->ss);
- cpu_x86_fsave(env, fpstate_addr, 1);
+ cpu_x86_fsave(env, fpstate, sizeof(*fpstate));
fpstate->status = fpstate->swd;
magic = (fpkind == FPSTATE_FSAVE ? 0 : 0xffff);
__put_user(magic, &fpstate->magic);
@@ -701,7 +701,7 @@ static bool frstor_sigcontext(CPUX86State *env, FPStateKind fpkind,
* the merge within ENV by loading XSTATE/FXSTATE first, then
* overriding with the FSTATE afterward.
*/
- cpu_x86_frstor(env, fpstate_addr, 1);
+ cpu_x86_frstor(env, fpstate, sizeof(*fpstate));
return true;
}
#endif
diff --git a/target/i386/tcg/fpu_helper.c b/target/i386/tcg/fpu_helper.c
index 909da05f91..0a91757690 100644
--- a/target/i386/tcg/fpu_helper.c
+++ b/target/i386/tcg/fpu_helper.c
@@ -3016,22 +3016,28 @@ void helper_xrstor(CPUX86State *env, target_ulong ptr, uint64_t rfbm)
}
#if defined(CONFIG_USER_ONLY)
-void cpu_x86_fsave(CPUX86State *env, target_ulong ptr, int data32)
+void cpu_x86_fsave(CPUX86State *env, void *host, size_t len)
{
- int size = (14 << data32) + 80;
- X86Access ac;
+ X86Access ac = {
+ .haddr1 = host,
+ .size = 4 * 7 + 8 * 10,
+ .env = env,
+ };
- access_prepare(&ac, env, ptr, size, MMU_DATA_STORE, 0);
- do_fsave(&ac, ptr, data32);
+ assert(ac.size <= len);
+ do_fsave(&ac, 0, true);
}
-void cpu_x86_frstor(CPUX86State *env, target_ulong ptr, int data32)
+void cpu_x86_frstor(CPUX86State *env, void *host, size_t len)
{
- int size = (14 << data32) + 80;
- X86Access ac;
+ X86Access ac = {
+ .haddr1 = host,
+ .size = 4 * 7 + 8 * 10,
+ .env = env,
+ };
- access_prepare(&ac, env, ptr, size, MMU_DATA_LOAD, 0);
- do_frstor(&ac, ptr, data32);
+ assert(ac.size <= len);
+ do_frstor(&ac, 0, true);
}
void cpu_x86_fxsave(CPUX86State *env, target_ulong ptr)
--
2.34.1
^ permalink raw reply related [flat|nested] 36+ messages in thread
* [PATCH v2 27/28] target/i386: Pass host pointer and size to cpu_x86_{fxsave, fxrstor}
2024-04-09 5:02 [PATCH for-9.1 v2 00/28] linux-user/i386: Properly align signal frame Richard Henderson
` (25 preceding siblings ...)
2024-04-09 5:03 ` [PATCH v2 26/28] target/i386: Pass host pointer and size to cpu_x86_{fsave, frstor} Richard Henderson
@ 2024-04-09 5:03 ` Richard Henderson
2024-04-09 5:03 ` [PATCH v2 28/28] target/i386: Pass host pointer and size to cpu_x86_{xsave, xrstor} Richard Henderson
2024-04-09 7:52 ` [PATCH for-9.1 v2 00/28] linux-user/i386: Properly align signal frame Paolo Bonzini
28 siblings, 0 replies; 36+ messages in thread
From: Richard Henderson @ 2024-04-09 5:03 UTC (permalink / raw)
To: qemu-devel
We have already validated the memory region in the course of
validating the signal frame. No need to do it again within
the helper function.
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
target/i386/cpu.h | 4 ++--
linux-user/i386/signal.c | 13 +++++--------
target/i386/tcg/fpu_helper.c | 26 ++++++++++++++++----------
3 files changed, 23 insertions(+), 20 deletions(-)
diff --git a/target/i386/cpu.h b/target/i386/cpu.h
index 8eb97fdd7a..35a8bf831f 100644
--- a/target/i386/cpu.h
+++ b/target/i386/cpu.h
@@ -2234,8 +2234,8 @@ int cpu_x86_get_descr_debug(CPUX86State *env, unsigned int selector,
void cpu_x86_load_seg(CPUX86State *s, X86Seg seg_reg, int selector);
void cpu_x86_fsave(CPUX86State *s, void *host, size_t len);
void cpu_x86_frstor(CPUX86State *s, void *host, size_t len);
-void cpu_x86_fxsave(CPUX86State *s, target_ulong ptr);
-void cpu_x86_fxrstor(CPUX86State *s, target_ulong ptr);
+void cpu_x86_fxsave(CPUX86State *s, void *host, size_t len);
+void cpu_x86_fxrstor(CPUX86State *s, void *host, size_t len);
void cpu_x86_xsave(CPUX86State *s, target_ulong ptr, uint64_t rbfm);
void cpu_x86_xrstor(CPUX86State *s, target_ulong ptr, uint64_t rbfm);
diff --git a/linux-user/i386/signal.c b/linux-user/i386/signal.c
index 7178440d67..b823dee17f 100644
--- a/linux-user/i386/signal.c
+++ b/linux-user/i386/signal.c
@@ -293,14 +293,11 @@ static abi_ptr get_sigframe(struct target_sigaction *ka, CPUX86State *env,
* Set up a signal frame.
*/
-static void fxsave_sigcontext(CPUX86State *env, X86LegacyXSaveArea *fxstate,
- abi_ptr fxstate_addr)
+static void fxsave_sigcontext(CPUX86State *env, X86LegacyXSaveArea *fxstate)
{
struct target_fpx_sw_bytes *sw = (void *)&fxstate->sw_reserved;
- /* fxstate_addr must be 16 byte aligned for fxsave */
- assert(!(fxstate_addr & 0xf));
- cpu_x86_fxsave(env, fxstate_addr);
+ cpu_x86_fxsave(env, fxstate, sizeof(*fxstate));
__put_user(0, &sw->magic1);
}
@@ -411,7 +408,7 @@ static void setup_sigcontext(CPUX86State *env,
xsave_sigcontext(env, fxstate, fpstate_addr, fxstate_addr, fpend_addr);
break;
case FPSTATE_FXSAVE:
- fxsave_sigcontext(env, fxstate, fxstate_addr);
+ fxsave_sigcontext(env, fxstate);
break;
default:
break;
@@ -668,7 +665,7 @@ static bool xrstor_sigcontext(CPUX86State *env, FPStateKind fpkind,
break;
}
- cpu_x86_fxrstor(env, fxstate_addr);
+ cpu_x86_fxrstor(env, fxstate, sizeof(*fxstate));
return true;
}
@@ -686,7 +683,7 @@ static bool frstor_sigcontext(CPUX86State *env, FPStateKind fpkind,
}
break;
case FPSTATE_FXSAVE:
- cpu_x86_fxrstor(env, fxstate_addr);
+ cpu_x86_fxrstor(env, fxstate, sizeof(*fxstate));
break;
case FPSTATE_FSAVE:
break;
diff --git a/target/i386/tcg/fpu_helper.c b/target/i386/tcg/fpu_helper.c
index 0a91757690..1c2121c559 100644
--- a/target/i386/tcg/fpu_helper.c
+++ b/target/i386/tcg/fpu_helper.c
@@ -3040,22 +3040,28 @@ void cpu_x86_frstor(CPUX86State *env, void *host, size_t len)
do_frstor(&ac, 0, true);
}
-void cpu_x86_fxsave(CPUX86State *env, target_ulong ptr)
+void cpu_x86_fxsave(CPUX86State *env, void *host, size_t len)
{
- X86Access ac;
+ X86Access ac = {
+ .haddr1 = host,
+ .size = sizeof(X86LegacyXSaveArea),
+ .env = env,
+ };
- access_prepare(&ac, env, ptr, sizeof(X86LegacyXSaveArea),
- MMU_DATA_STORE, 0);
- do_fxsave(&ac, ptr);
+ assert(ac.size <= len);
+ do_fxsave(&ac, 0);
}
-void cpu_x86_fxrstor(CPUX86State *env, target_ulong ptr)
+void cpu_x86_fxrstor(CPUX86State *env, void *host, size_t len)
{
- X86Access ac;
+ X86Access ac = {
+ .haddr1 = host,
+ .size = sizeof(X86LegacyXSaveArea),
+ .env = env,
+ };
- access_prepare(&ac, env, ptr, sizeof(X86LegacyXSaveArea),
- MMU_DATA_LOAD, 0);
- do_fxrstor(&ac, ptr);
+ assert(ac.size <= len);
+ do_fxrstor(&ac, 0);
}
void cpu_x86_xsave(CPUX86State *env, target_ulong ptr, uint64_t rfbm)
--
2.34.1
^ permalink raw reply related [flat|nested] 36+ messages in thread
* [PATCH v2 28/28] target/i386: Pass host pointer and size to cpu_x86_{xsave, xrstor}
2024-04-09 5:02 [PATCH for-9.1 v2 00/28] linux-user/i386: Properly align signal frame Richard Henderson
` (26 preceding siblings ...)
2024-04-09 5:03 ` [PATCH v2 27/28] target/i386: Pass host pointer and size to cpu_x86_{fxsave, fxrstor} Richard Henderson
@ 2024-04-09 5:03 ` Richard Henderson
2024-04-09 7:52 ` [PATCH for-9.1 v2 00/28] linux-user/i386: Properly align signal frame Paolo Bonzini
28 siblings, 0 replies; 36+ messages in thread
From: Richard Henderson @ 2024-04-09 5:03 UTC (permalink / raw)
To: qemu-devel
We have already validated the memory region in the course of
validating the signal frame. No need to do it again within
the helper function.
In addition, return failure when the header contains invalid
xstate_bv. The kernel handles this via exception handling
within XSTATE_OP within xrstor_from_user_sigframe.
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
target/i386/cpu.h | 4 ++--
linux-user/i386/signal.c | 20 ++++++++++++--------
target/i386/tcg/fpu_helper.c | 36 +++++++++++++++++++-----------------
3 files changed, 33 insertions(+), 27 deletions(-)
diff --git a/target/i386/cpu.h b/target/i386/cpu.h
index 35a8bf831f..21d905d669 100644
--- a/target/i386/cpu.h
+++ b/target/i386/cpu.h
@@ -2236,8 +2236,8 @@ void cpu_x86_fsave(CPUX86State *s, void *host, size_t len);
void cpu_x86_frstor(CPUX86State *s, void *host, size_t len);
void cpu_x86_fxsave(CPUX86State *s, void *host, size_t len);
void cpu_x86_fxrstor(CPUX86State *s, void *host, size_t len);
-void cpu_x86_xsave(CPUX86State *s, target_ulong ptr, uint64_t rbfm);
-void cpu_x86_xrstor(CPUX86State *s, target_ulong ptr, uint64_t rbfm);
+void cpu_x86_xsave(CPUX86State *s, void *host, size_t len, uint64_t rbfm);
+bool cpu_x86_xrstor(CPUX86State *s, void *host, size_t len, uint64_t rbfm);
/* cpu.c */
void x86_cpu_vendor_words2str(char *dst, uint32_t vendor1,
diff --git a/linux-user/i386/signal.c b/linux-user/i386/signal.c
index b823dee17f..d8803e7df3 100644
--- a/linux-user/i386/signal.c
+++ b/linux-user/i386/signal.c
@@ -325,7 +325,7 @@ static void xsave_sigcontext(CPUX86State *env,
/* Zero the header, XSAVE *adds* features to an existing save state. */
memset(fxstate + 1, 0, sizeof(X86XSaveHeader));
- cpu_x86_xsave(env, xstate_addr, env->xcr0);
+ cpu_x86_xsave(env, fxstate, fpend_addr - xstate_addr, env->xcr0);
__put_user(TARGET_FP_XSTATE_MAGIC1, &sw->magic1);
__put_user(extended_size, &sw->extended_size);
@@ -610,6 +610,8 @@ static bool xrstor_sigcontext(CPUX86State *env, FPStateKind fpkind,
uint32_t magic1, magic2;
uint32_t extended_size, xstate_size, min_size, max_size;
uint64_t xfeatures;
+ void *xstate;
+ bool ok;
switch (fpkind) {
case FPSTATE_XSAVE:
@@ -640,8 +642,10 @@ static bool xrstor_sigcontext(CPUX86State *env, FPStateKind fpkind,
return false;
}
- if (!access_ok(env_cpu(env), VERIFY_READ, fxstate_addr,
- xstate_size + TARGET_FP_XSTATE_MAGIC2_SIZE)) {
+ /* Re-lock the entire xstate area, with the extensions and magic. */
+ xstate = lock_user(VERIFY_READ, fxstate_addr,
+ xstate_size + TARGET_FP_XSTATE_MAGIC2_SIZE, 1);
+ if (!xstate) {
return false;
}
@@ -651,15 +655,15 @@ static bool xrstor_sigcontext(CPUX86State *env, FPStateKind fpkind,
* fpstate layout with out copying the extended state information
* in the memory layout.
*/
- if (get_user_u32(magic2, fxstate_addr + xstate_size)) {
- return false;
- }
+ magic2 = tswap32(*(uint32_t *)(xstate + xstate_size));
if (magic2 != FP_XSTATE_MAGIC2) {
+ unlock_user(xstate, fxstate_addr, 0);
break;
}
- cpu_x86_xrstor(env, fxstate_addr, xfeatures);
- return true;
+ ok = cpu_x86_xrstor(env, xstate, xstate_size, xfeatures);
+ unlock_user(xstate, fxstate_addr, 0);
+ return ok;
default:
break;
diff --git a/target/i386/tcg/fpu_helper.c b/target/i386/tcg/fpu_helper.c
index 1c2121c559..4ec0f3786f 100644
--- a/target/i386/tcg/fpu_helper.c
+++ b/target/i386/tcg/fpu_helper.c
@@ -3064,42 +3064,44 @@ void cpu_x86_fxrstor(CPUX86State *env, void *host, size_t len)
do_fxrstor(&ac, 0);
}
-void cpu_x86_xsave(CPUX86State *env, target_ulong ptr, uint64_t rfbm)
+void cpu_x86_xsave(CPUX86State *env, void *host, size_t len, uint64_t rfbm)
{
- X86Access ac;
- unsigned size;
+ X86Access ac = {
+ .haddr1 = host,
+ .env = env,
+ };
/*
* Since this is only called from user-level signal handling,
* we should have done the job correctly there.
*/
assert((rfbm & ~env->xcr0) == 0);
- size = xsave_area_size(rfbm, false);
-
- access_prepare(&ac, env, ptr, size, MMU_DATA_STORE, 0);
- do_xsave_access(&ac, ptr, rfbm, get_xinuse(env), rfbm);
+ ac.size = xsave_area_size(rfbm, false);
+ assert(ac.size <= len);
+ do_xsave_access(&ac, 0, rfbm, get_xinuse(env), rfbm);
}
-void cpu_x86_xrstor(CPUX86State *env, target_ulong ptr, uint64_t rfbm)
+bool cpu_x86_xrstor(CPUX86State *env, void *host, size_t len, uint64_t rfbm)
{
- X86Access ac;
+ X86Access ac = {
+ .haddr1 = host,
+ .env = env,
+ };
uint64_t xstate_bv;
- unsigned size;
/*
* Since this is only called from user-level signal handling,
* we should have done the job correctly there.
*/
assert((rfbm & ~env->xcr0) == 0);
- size = xsave_area_size(rfbm, false);
- access_prepare(&ac, env, ptr, size, MMU_DATA_LOAD, 0);
+ ac.size = xsave_area_size(rfbm, false);
+ assert(ac.size <= len);
- if (!valid_xrstor_header(&ac, &xstate_bv, ptr)) {
- /* TODO: Report failure to caller. */
- xstate_bv &= env->xcr0;
+ if (!valid_xrstor_header(&ac, &xstate_bv, 0)) {
+ return false;
}
-
- do_xrstor(&ac, ptr, rfbm, xstate_bv);
+ do_xrstor(&ac, 0, rfbm, xstate_bv);
+ return true;
}
#endif
--
2.34.1
^ permalink raw reply related [flat|nested] 36+ messages in thread
* Re: [PATCH for-9.1 v2 00/28] linux-user/i386: Properly align signal frame
2024-04-09 5:02 [PATCH for-9.1 v2 00/28] linux-user/i386: Properly align signal frame Richard Henderson
` (27 preceding siblings ...)
2024-04-09 5:03 ` [PATCH v2 28/28] target/i386: Pass host pointer and size to cpu_x86_{xsave, xrstor} Richard Henderson
@ 2024-04-09 7:52 ` Paolo Bonzini
28 siblings, 0 replies; 36+ messages in thread
From: Paolo Bonzini @ 2024-04-09 7:52 UTC (permalink / raw)
To: Richard Henderson, qemu-devel
On 4/9/24 07:02, Richard Henderson wrote:
> v1: https://lore.kernel.org/qemu-devel/20230524054647.1093758-1-richard.henderson@linaro.org/
>
> But v1 isn't particularly complet or korrect.
>
> Disconnect fpstate from sigframe, just like the kernel does.
> Return the separate portions of the frame from get_sigframe.
> Alter all of the target fpu routines to access memory that
> has already been translated and sized.
With the exception of patch 22, and with small nits in patches 1/19/23:
Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
>
> r~
>
>
> Richard Henderson (28):
> target/i386: Add tcg/access.[ch]
> target/i386: Convert do_fldt, do_fstt to X86Access
> target/i386: Convert helper_{fbld,fbst}_ST0 to X86Access
> target/i386: Convert do_fldenv to X86Access
> target/i386: Convert do_fstenv to X86Access
> target/i386: Convert do_fsave, do_frstor to X86Access
> target/i386: Convert do_xsave_{fpu,mxcr,sse} to X86Access
> target/i386: Convert do_xrstor_{fpu,mxcr,sse} to X86Access
> tagret/i386: Convert do_fxsave, do_fxrstor to X86Access
> target/i386: Convert do_xsave_* to X86Access
> target/i386: Convert do_xrstor_* to X86Access
> target/i386: Split out do_xsave_chk
> target/i386: Add rbfm argument to cpu_x86_{xsave,xrstor}
> target/i386: Add {hw,sw}_reserved to X86LegacyXSaveArea
> linux-user/i386: Drop xfeatures_size from sigcontext arithmetic
> linux-user/i386: Remove xfeatures from target_fpstate_fxsave
> linux-user/i386: Replace target_fpstate_fxsave with X86LegacyXSaveArea
> linux-user/i386: Split out struct target_fregs_state
> linux-user/i386: Fix -mregparm=3 for signal delivery
> linux-user/i386: Return boolean success from restore_sigcontext
> linux-user/i386: Return boolean success from xrstor_sigcontext
> linux-user/i386: Fix allocation and alignment of fp state
> target/i386: Honor xfeatures in xrstor_sigcontext
> target/i386: Convert do_xsave to X86Access
> target/i386: Convert do_xrstor to X86Access
> target/i386: Pass host pointer and size to cpu_x86_{fsave,frstor}
> target/i386: Pass host pointer and size to cpu_x86_{fxsave,fxrstor}
> target/i386: Pass host pointer and size to cpu_x86_{xsave,xrstor}
>
> target/i386/cpu.h | 57 ++-
> target/i386/tcg/access.h | 40 ++
> linux-user/i386/signal.c | 669 ++++++++++++++++++-------------
> target/i386/tcg/access.c | 160 ++++++++
> target/i386/tcg/fpu_helper.c | 561 ++++++++++++++++----------
> tests/tcg/x86_64/test-1648.c | 33 ++
> target/i386/tcg/meson.build | 1 +
> tests/tcg/x86_64/Makefile.target | 1 +
> 8 files changed, 1014 insertions(+), 508 deletions(-)
> create mode 100644 target/i386/tcg/access.h
> create mode 100644 target/i386/tcg/access.c
> create mode 100644 tests/tcg/x86_64/test-1648.c
>
^ permalink raw reply [flat|nested] 36+ messages in thread