qemu-devel.nongnu.org archive mirror
 help / color / mirror / Atom feed
From: Paolo Bonzini <pbonzini@redhat.com>
To: qemu-devel@nongnu.org
Cc: Peter Maydell <peter.maydell@linaro.org>
Subject: [PULL 52/53] target/i386: do not access beyond the low 128 bits of SSE registers
Date: Tue, 19 Apr 2022 07:51:08 +0200	[thread overview]
Message-ID: <20220419055109.142788-53-pbonzini@redhat.com> (raw)
In-Reply-To: <20220419055109.142788-1-pbonzini@redhat.com>

The i386 target consolidates all vector registers so that instead of
XMMReg, YMMReg and ZMMReg structs there is a single ZMMReg that can
fit all of SSE, AVX and AVX512.

When TCG copies data from and to the SSE registers, it uses the
full 64-byte width.  This is not a correctness issue because TCG
never lets guest code see beyond the first 128 bits of the ZMM
registers, however it causes uninitialized stack memory to
make it to the CPU's migration stream.

Fix it by only copying the low 16 bytes of the ZMMReg union into
the destination register.

Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 target/i386/ops_sse.h | 75 +++++++++++++++++++++++++++----------------
 1 file changed, 47 insertions(+), 28 deletions(-)

diff --git a/target/i386/ops_sse.h b/target/i386/ops_sse.h
index 6f1fc174b3..e4d74b814a 100644
--- a/target/i386/ops_sse.h
+++ b/target/i386/ops_sse.h
@@ -22,6 +22,7 @@
 
 #if SHIFT == 0
 #define Reg MMXReg
+#define SIZE 8
 #define XMM_ONLY(...)
 #define B(n) MMX_B(n)
 #define W(n) MMX_W(n)
@@ -30,6 +31,7 @@
 #define SUFFIX _mmx
 #else
 #define Reg ZMMReg
+#define SIZE 16
 #define XMM_ONLY(...) __VA_ARGS__
 #define B(n) ZMM_B(n)
 #define W(n) ZMM_W(n)
@@ -38,6 +40,22 @@
 #define SUFFIX _xmm
 #endif
 
+/*
+ * Copy the relevant parts of a Reg value around. In the case where
+ * sizeof(Reg) > SIZE, these helpers operate only on the lower bytes of
+ * a 64 byte ZMMReg, so we must copy only those and keep the top bytes
+ * untouched in the guest-visible destination destination register.
+ * Note that the "lower bytes" are placed last in memory on big-endian
+ * hosts, which store the vector backwards in memory.  In that case the
+ * copy *starts* at B(SIZE - 1) and ends at B(0), the opposite of
+ * the little-endian case.
+ */
+#if HOST_BIG_ENDIAN
+#define MOVE(d, r) memcpy(&((d).B(SIZE - 1)), &(r).B(SIZE - 1), SIZE)
+#else
+#define MOVE(d, r) memcpy(&(d).B(0), &(r).B(0), SIZE)
+#endif
+
 void glue(helper_psrlw, SUFFIX)(CPUX86State *env, Reg *d, Reg *s)
 {
     int shift;
@@ -516,7 +534,7 @@ void glue(helper_pshufw, SUFFIX)(Reg *d, Reg *s, int order)
     r.W(1) = s->W((order >> 2) & 3);
     r.W(2) = s->W((order >> 4) & 3);
     r.W(3) = s->W((order >> 6) & 3);
-    *d = r;
+    MOVE(*d, r);
 }
 #else
 void helper_shufps(Reg *d, Reg *s, int order)
@@ -527,7 +545,7 @@ void helper_shufps(Reg *d, Reg *s, int order)
     r.L(1) = d->L((order >> 2) & 3);
     r.L(2) = s->L((order >> 4) & 3);
     r.L(3) = s->L((order >> 6) & 3);
-    *d = r;
+    MOVE(*d, r);
 }
 
 void helper_shufpd(Reg *d, Reg *s, int order)
@@ -536,7 +554,7 @@ void helper_shufpd(Reg *d, Reg *s, int order)
 
     r.Q(0) = d->Q(order & 1);
     r.Q(1) = s->Q((order >> 1) & 1);
-    *d = r;
+    MOVE(*d, r);
 }
 
 void glue(helper_pshufd, SUFFIX)(Reg *d, Reg *s, int order)
@@ -547,7 +565,7 @@ void glue(helper_pshufd, SUFFIX)(Reg *d, Reg *s, int order)
     r.L(1) = s->L((order >> 2) & 3);
     r.L(2) = s->L((order >> 4) & 3);
     r.L(3) = s->L((order >> 6) & 3);
-    *d = r;
+    MOVE(*d, r);
 }
 
 void glue(helper_pshuflw, SUFFIX)(Reg *d, Reg *s, int order)
@@ -559,7 +577,7 @@ void glue(helper_pshuflw, SUFFIX)(Reg *d, Reg *s, int order)
     r.W(2) = s->W((order >> 4) & 3);
     r.W(3) = s->W((order >> 6) & 3);
     r.Q(1) = s->Q(1);
-    *d = r;
+    MOVE(*d, r);
 }
 
 void glue(helper_pshufhw, SUFFIX)(Reg *d, Reg *s, int order)
@@ -571,7 +589,7 @@ void glue(helper_pshufhw, SUFFIX)(Reg *d, Reg *s, int order)
     r.W(5) = s->W(4 + ((order >> 2) & 3));
     r.W(6) = s->W(4 + ((order >> 4) & 3));
     r.W(7) = s->W(4 + ((order >> 6) & 3));
-    *d = r;
+    MOVE(*d, r);
 }
 #endif
 
@@ -937,7 +955,7 @@ void helper_haddps(CPUX86State *env, ZMMReg *d, ZMMReg *s)
     r.ZMM_S(1) = float32_add(d->ZMM_S(2), d->ZMM_S(3), &env->sse_status);
     r.ZMM_S(2) = float32_add(s->ZMM_S(0), s->ZMM_S(1), &env->sse_status);
     r.ZMM_S(3) = float32_add(s->ZMM_S(2), s->ZMM_S(3), &env->sse_status);
-    *d = r;
+    MOVE(*d, r);
 }
 
 void helper_haddpd(CPUX86State *env, ZMMReg *d, ZMMReg *s)
@@ -946,7 +964,7 @@ void helper_haddpd(CPUX86State *env, ZMMReg *d, ZMMReg *s)
 
     r.ZMM_D(0) = float64_add(d->ZMM_D(0), d->ZMM_D(1), &env->sse_status);
     r.ZMM_D(1) = float64_add(s->ZMM_D(0), s->ZMM_D(1), &env->sse_status);
-    *d = r;
+    MOVE(*d, r);
 }
 
 void helper_hsubps(CPUX86State *env, ZMMReg *d, ZMMReg *s)
@@ -957,7 +975,7 @@ void helper_hsubps(CPUX86State *env, ZMMReg *d, ZMMReg *s)
     r.ZMM_S(1) = float32_sub(d->ZMM_S(2), d->ZMM_S(3), &env->sse_status);
     r.ZMM_S(2) = float32_sub(s->ZMM_S(0), s->ZMM_S(1), &env->sse_status);
     r.ZMM_S(3) = float32_sub(s->ZMM_S(2), s->ZMM_S(3), &env->sse_status);
-    *d = r;
+    MOVE(*d, r);
 }
 
 void helper_hsubpd(CPUX86State *env, ZMMReg *d, ZMMReg *s)
@@ -966,7 +984,7 @@ void helper_hsubpd(CPUX86State *env, ZMMReg *d, ZMMReg *s)
 
     r.ZMM_D(0) = float64_sub(d->ZMM_D(0), d->ZMM_D(1), &env->sse_status);
     r.ZMM_D(1) = float64_sub(s->ZMM_D(0), s->ZMM_D(1), &env->sse_status);
-    *d = r;
+    MOVE(*d, r);
 }
 
 void helper_addsubps(CPUX86State *env, ZMMReg *d, ZMMReg *s)
@@ -1153,7 +1171,7 @@ void glue(helper_packsswb, SUFFIX)(CPUX86State *env, Reg *d, Reg *s)
     r.B(14) = satsb((int16_t)s->W(6));
     r.B(15) = satsb((int16_t)s->W(7));
 #endif
-    *d = r;
+    MOVE(*d, r);
 }
 
 void glue(helper_packuswb, SUFFIX)(CPUX86State *env, Reg *d, Reg *s)
@@ -1180,7 +1198,7 @@ void glue(helper_packuswb, SUFFIX)(CPUX86State *env, Reg *d, Reg *s)
     r.B(14) = satub((int16_t)s->W(6));
     r.B(15) = satub((int16_t)s->W(7));
 #endif
-    *d = r;
+    MOVE(*d, r);
 }
 
 void glue(helper_packssdw, SUFFIX)(CPUX86State *env, Reg *d, Reg *s)
@@ -1199,7 +1217,7 @@ void glue(helper_packssdw, SUFFIX)(CPUX86State *env, Reg *d, Reg *s)
     r.W(6) = satsw(s->L(2));
     r.W(7) = satsw(s->L(3));
 #endif
-    *d = r;
+    MOVE(*d, r);
 }
 
 #define UNPCK_OP(base_name, base)                                       \
@@ -1227,7 +1245,7 @@ void glue(helper_packssdw, SUFFIX)(CPUX86State *env, Reg *d, Reg *s)
                  r.B(14) = d->B((base << (SHIFT + 2)) + 7);             \
                  r.B(15) = s->B((base << (SHIFT + 2)) + 7);             \
                                                                       ) \
-            *d = r;                                                     \
+        MOVE(*d, r);                                                    \
     }                                                                   \
                                                                         \
     void glue(helper_punpck ## base_name ## wd, SUFFIX)(CPUX86State *env,\
@@ -1245,7 +1263,7 @@ void glue(helper_packssdw, SUFFIX)(CPUX86State *env, Reg *d, Reg *s)
                  r.W(6) = d->W((base << (SHIFT + 1)) + 3);              \
                  r.W(7) = s->W((base << (SHIFT + 1)) + 3);              \
                                                                       ) \
-            *d = r;                                                     \
+            MOVE(*d, r);                                                \
     }                                                                   \
                                                                         \
     void glue(helper_punpck ## base_name ## dq, SUFFIX)(CPUX86State *env,\
@@ -1259,7 +1277,7 @@ void glue(helper_packssdw, SUFFIX)(CPUX86State *env, Reg *d, Reg *s)
                  r.L(2) = d->L((base << SHIFT) + 1);                    \
                  r.L(3) = s->L((base << SHIFT) + 1);                    \
                                                                       ) \
-            *d = r;                                                     \
+            MOVE(*d, r);                                                \
     }                                                                   \
                                                                         \
     XMM_ONLY(                                                           \
@@ -1272,7 +1290,7 @@ void glue(helper_packssdw, SUFFIX)(CPUX86State *env, Reg *d, Reg *s)
                                                                         \
                  r.Q(0) = d->Q(base);                                   \
                  r.Q(1) = s->Q(base);                                   \
-                 *d = r;                                                \
+                 MOVE(*d, r);                                           \
              }                                                          \
                                                                         )
 
@@ -1313,7 +1331,7 @@ void helper_pfacc(CPUX86State *env, MMXReg *d, MMXReg *s)
 
     r.MMX_S(0) = float32_add(d->MMX_S(0), d->MMX_S(1), &env->mmx_status);
     r.MMX_S(1) = float32_add(s->MMX_S(0), s->MMX_S(1), &env->mmx_status);
-    *d = r;
+    MOVE(*d, r);
 }
 
 void helper_pfadd(CPUX86State *env, MMXReg *d, MMXReg *s)
@@ -1378,7 +1396,7 @@ void helper_pfnacc(CPUX86State *env, MMXReg *d, MMXReg *s)
 
     r.MMX_S(0) = float32_sub(d->MMX_S(0), d->MMX_S(1), &env->mmx_status);
     r.MMX_S(1) = float32_sub(s->MMX_S(0), s->MMX_S(1), &env->mmx_status);
-    *d = r;
+    MOVE(*d, r);
 }
 
 void helper_pfpnacc(CPUX86State *env, MMXReg *d, MMXReg *s)
@@ -1387,7 +1405,7 @@ void helper_pfpnacc(CPUX86State *env, MMXReg *d, MMXReg *s)
 
     r.MMX_S(0) = float32_sub(d->MMX_S(0), d->MMX_S(1), &env->mmx_status);
     r.MMX_S(1) = float32_add(s->MMX_S(0), s->MMX_S(1), &env->mmx_status);
-    *d = r;
+    MOVE(*d, r);
 }
 
 void helper_pfrcp(CPUX86State *env, MMXReg *d, MMXReg *s)
@@ -1424,7 +1442,7 @@ void helper_pswapd(CPUX86State *env, MMXReg *d, MMXReg *s)
 
     r.MMX_L(0) = s->MMX_L(1);
     r.MMX_L(1) = s->MMX_L(0);
-    *d = r;
+    MOVE(*d, r);
 }
 #endif
 
@@ -1438,7 +1456,7 @@ void glue(helper_pshufb, SUFFIX)(CPUX86State *env, Reg *d, Reg *s)
         r.B(i) = (s->B(i) & 0x80) ? 0 : (d->B(s->B(i) & ((8 << SHIFT) - 1)));
     }
 
-    *d = r;
+    MOVE(*d, r);
 }
 
 void glue(helper_phaddw, SUFFIX)(CPUX86State *env, Reg *d, Reg *s)
@@ -1455,7 +1473,7 @@ void glue(helper_phaddw, SUFFIX)(CPUX86State *env, Reg *d, Reg *s)
     XMM_ONLY(r.W(6) = (int16_t)s->W(4) + (int16_t)s->W(5));
     XMM_ONLY(r.W(7) = (int16_t)s->W(6) + (int16_t)s->W(7));
 
-    *d = r;
+    MOVE(*d, r);
 }
 
 void glue(helper_phaddd, SUFFIX)(CPUX86State *env, Reg *d, Reg *s)
@@ -1467,7 +1485,7 @@ void glue(helper_phaddd, SUFFIX)(CPUX86State *env, Reg *d, Reg *s)
     r.L((1 << SHIFT) + 0) = (int32_t)s->L(0) + (int32_t)s->L(1);
     XMM_ONLY(r.L(3) = (int32_t)s->L(2) + (int32_t)s->L(3));
 
-    *d = r;
+    MOVE(*d, r);
 }
 
 void glue(helper_phaddsw, SUFFIX)(CPUX86State *env, Reg *d, Reg *s)
@@ -1483,7 +1501,7 @@ void glue(helper_phaddsw, SUFFIX)(CPUX86State *env, Reg *d, Reg *s)
     XMM_ONLY(r.W(6) = satsw((int16_t)s->W(4) + (int16_t)s->W(5)));
     XMM_ONLY(r.W(7) = satsw((int16_t)s->W(6) + (int16_t)s->W(7)));
 
-    *d = r;
+    MOVE(*d, r);
 }
 
 void glue(helper_pmaddubsw, SUFFIX)(CPUX86State *env, Reg *d, Reg *s)
@@ -1585,7 +1603,7 @@ void glue(helper_palignr, SUFFIX)(CPUX86State *env, Reg *d, Reg *s,
 #undef SHR
     }
 
-    *d = r;
+    MOVE(*d, r);
 }
 
 #define XMM0 (env->xmm_regs[0])
@@ -1718,7 +1736,7 @@ void glue(helper_packusdw, SUFFIX)(CPUX86State *env, Reg *d, Reg *s)
     r.W(5) = satuw((int32_t) s->L(1));
     r.W(6) = satuw((int32_t) s->L(2));
     r.W(7) = satuw((int32_t) s->L(3));
-    *d = r;
+    MOVE(*d, r);
 }
 
 #define FMINSB(d, s) MIN((int8_t)d, (int8_t)s)
@@ -1984,7 +2002,7 @@ void glue(helper_mpsadbw, SUFFIX)(CPUX86State *env, Reg *d, Reg *s,
         r.W(i) += abs1(d->B(d0 + 3) - s->B(s0 + 3));
     }
 
-    *d = r;
+    MOVE(*d, r);
 }
 
 /* SSE4.2 op helpers */
@@ -2324,3 +2342,4 @@ void glue(helper_aeskeygenassist, SUFFIX)(CPUX86State *env, Reg *d, Reg *s,
 #undef L
 #undef Q
 #undef SUFFIX
+#undef SIZE
-- 
2.35.1




  parent reply	other threads:[~2022-04-19  7:56 UTC|newest]

Thread overview: 58+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-04-19  5:50 [PULL for-7.1 00/53] Misc pull request for QEMU 7.1 Paolo Bonzini
2022-04-19  5:50 ` [PULL 01/53] qapi, target/i386/sev: Add cpu0-id to query-sev-capabilities Paolo Bonzini
2022-04-19  7:16   ` Dov Murik
2022-04-20 19:06     ` Dov Murik
2022-04-19  5:50 ` [PULL 02/53] qtest: replace gettimeofday with GTimer Paolo Bonzini
2022-04-19  5:50 ` [PULL 03/53] qga: replace qemu_gettimeofday() with g_get_real_time() Paolo Bonzini
2022-04-19  5:50 ` [PULL 04/53] Replace " Paolo Bonzini
2022-04-19  5:50 ` [PULL 05/53] oslib: drop qemu_gettimeofday() Paolo Bonzini
2022-04-19  5:50 ` [PULL 06/53] meson: use chardev_ss dependencies Paolo Bonzini
2022-04-19  5:50 ` [PULL 07/53] meson: add util dependency for oslib-posix on freebsd Paolo Bonzini
2022-04-19  5:50 ` [PULL 08/53] meson: remove unneeded py3 Paolo Bonzini
2022-04-19  5:50 ` [PULL 09/53] meson: remove test-qdev-global-props dependency on testqapi Paolo Bonzini
2022-04-19  5:50 ` [PULL 10/53] char: move qemu_openpty_raw from util/ to char/ Paolo Bonzini
2022-04-19  5:50 ` [PULL 11/53] Replace config-time define HOST_WORDS_BIGENDIAN Paolo Bonzini
2022-04-19  5:50 ` [PULL 12/53] Replace TARGET_WORDS_BIGENDIAN Paolo Bonzini
2022-04-19  5:50 ` [PULL 13/53] osdep: poison {HOST,TARGET}_WORDS_BIGENDIAN Paolo Bonzini
2022-04-19  5:50 ` [PULL 14/53] include/qapi: add g_autoptr support for qobject types Paolo Bonzini
2022-04-19  5:50 ` [PULL 15/53] tests: replace free_all() usage with g_auto Paolo Bonzini
2022-04-19  5:50 ` [PULL 16/53] Replace qemu_real_host_page variables with inlined functions Paolo Bonzini
2022-04-19  5:50 ` [PULL 17/53] qga: replace deprecated g_get_current_time() Paolo Bonzini
2022-04-19  5:50 ` [PULL 18/53] error-report: replace deprecated g_get_current_time() with glib >= 2.62 Paolo Bonzini
2022-04-19  5:50 ` [PULL 19/53] util: rename qemu-error.c to match its header name Paolo Bonzini
2022-04-19  5:50 ` [PULL 20/53] error-report: use error_printf() for program prefix Paolo Bonzini
2022-04-19  5:50 ` [PULL 21/53] include: move TFR to osdep.h Paolo Bonzini
2022-04-19  5:50 ` [PULL 22/53] include: move qemu_write_full() declaration " Paolo Bonzini
2022-04-19  5:50 ` [PULL 23/53] include: move qemu_pipe() " Paolo Bonzini
2022-04-19  5:50 ` [PULL 24/53] include: move coroutine IO functions to coroutine.h Paolo Bonzini
2022-04-19  5:50 ` [PULL 25/53] include: move dump_in_progress() to runstate.h Paolo Bonzini
2022-04-19  5:50 ` [PULL 26/53] include: move C/util-related declarations to cutils.h Paolo Bonzini
2022-04-19  5:50 ` [PULL 27/53] include: move cpu_exec* declarations to cpu-common.h Paolo Bonzini
2022-04-19  5:50 ` [PULL 28/53] include: move target page bits declaration to page-vary.h Paolo Bonzini
2022-04-19  5:50 ` [PULL 29/53] include: move progress API to qemu-progress.h Paolo Bonzini
2022-04-19  5:50 ` [PULL 30/53] include: move qemu_get_vm_name() to sysemu.h Paolo Bonzini
2022-04-19  5:50 ` [PULL 31/53] include: move os_*() to os-foo.h Paolo Bonzini
2022-04-19  5:50 ` [PULL 32/53] include: move page_size_init() to include/hw/core/cpu.h Paolo Bonzini
2022-04-19  5:50 ` [PULL 33/53] Move CPU softfloat unions to cpu-float.h Paolo Bonzini
2022-04-19  5:50 ` [PULL 34/53] Move fcntl_setfl() to oslib-posix Paolo Bonzini
2022-04-19  5:50 ` [PULL 35/53] qga: remove explicit environ argument from exec/spawn Paolo Bonzini
2022-04-19  5:50 ` [PULL 36/53] Remove qemu-common.h include from most units Paolo Bonzini
2022-04-19  5:50 ` [PULL 37/53] build-sys: drop ntddscsi.h check Paolo Bonzini
2022-04-19  5:50 ` [PULL 38/53] build-sys: simplify AF_VSOCK check Paolo Bonzini
2022-04-19  5:50 ` [PULL 39/53] whpx: Added support for breakpoints and stepping Paolo Bonzini
2022-04-19  5:50 ` [PULL 40/53] thread-posix: remove the posix semaphore support Paolo Bonzini
2022-04-19  5:50 ` [PULL 41/53] thread-posix: use monotonic clock for QemuCond and QemuSemaphore Paolo Bonzini
2022-04-19  5:50 ` [PULL 42/53] thread-posix: implement Semaphore with QemuCond and QemuMutex Paolo Bonzini
2022-04-19  5:50 ` [PULL 43/53] thread-posix: optimize qemu_sem_timedwait with zero timeout Paolo Bonzini
2022-04-19  5:51 ` [PULL 44/53] hyperv: SControl is optional to enable SynIc Paolo Bonzini
2022-04-19  5:51 ` [PULL 45/53] hyperv: Add definitions for syndbg Paolo Bonzini
2022-04-19  5:51 ` [PULL 46/53] hyperv: Add support to process syndbg commands Paolo Bonzini
2022-04-19  5:51 ` [PULL 47/53] hw: hyperv: Initial commit for Synthetic Debugging device Paolo Bonzini
2022-04-19  5:51 ` [PULL 48/53] s390x: follow qdev tree to detect SCSI device on a CCW bus Paolo Bonzini
2022-04-19  5:51 ` [PULL 49/53] virtio-ccw: move vhost_ccw_scsi to a separate file Paolo Bonzini
2022-04-19  5:51 ` [PULL 50/53] virtio-ccw: move device type declarations to .c files Paolo Bonzini
2022-04-19  5:51 ` [PULL 51/53] virtio-ccw: do not include headers for all virtio devices Paolo Bonzini
2022-04-19  5:51 ` Paolo Bonzini [this message]
2022-04-19  5:51 ` [PULL 53/53] target/i386: Remove unused XMMReg, YMMReg types and CPUState fields Paolo Bonzini
2022-04-19  9:32 ` [PULL for-7.1 00/53] Misc pull request for QEMU 7.1 Peter Maydell
2022-04-20  3:57 ` Richard Henderson

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20220419055109.142788-53-pbonzini@redhat.com \
    --to=pbonzini@redhat.com \
    --cc=peter.maydell@linaro.org \
    --cc=qemu-devel@nongnu.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).