[PATCH] include/qemu/atomic128: Support 16-byte atomic read/write for Intel AVX

qemu-devel.nongnu.org archive mirror
 help / color / mirror / Atom feed

* [PATCH] include/qemu/atomic128: Support 16-byte atomic read/write for Intel AVX
@ 2022-10-08 15:36 Richard Henderson
  2022-10-08 15:43 ` Richard Henderson
  2022-10-10  9:49 ` Peter Maydell
  0 siblings, 2 replies; 4+ messages in thread
From: Richard Henderson @ 2022-10-08 15:36 UTC (permalink / raw)
  To: qemu-devel; +Cc: pbonzini

Intel has now given guarantees about the atomicity of SSE read
and write instructions on cpus supporting AVX.  We can use these
instead of the much slower cmpxchg16b.

Derived from https://gcc.gnu.org/bugzilla/show_bug.cgi?id=104688

Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---

Paolo, we probably ought to modify gen_ld[oy]_env_A0 to match,
at least with CF_PARALLEL set.


r~

---
 include/qemu/atomic128.h | 44 ++++++++++++++++++++++++++
 util/atomic128.c         | 67 ++++++++++++++++++++++++++++++++++++++++
 util/meson.build         |  1 +
 3 files changed, 112 insertions(+)
 create mode 100644 util/atomic128.c

diff --git a/include/qemu/atomic128.h b/include/qemu/atomic128.h
index adb9a1a260..d179c05ede 100644
--- a/include/qemu/atomic128.h
+++ b/include/qemu/atomic128.h
@@ -127,6 +127,50 @@ static inline void atomic16_set(Int128 *ptr, Int128 val)
         : [l] "r"(l), [h] "r"(h));
 }
 
+# define HAVE_ATOMIC128 1
+#elif !defined(CONFIG_USER_ONLY) && defined(__x86_64__)
+/*
+ * The latest Intel SDM has added:
+ *     Processors that enumerate support for Intel® AVX (by setting
+ *     the feature flag CPUID.01H:ECX.AVX[bit 28]) guarantee that the
+ *     16-byte memory operations performed by the following instructions
+ *     will always be carried out atomically:
+ *      - MOVAPD, MOVAPS, and MOVDQA.
+ *      - VMOVAPD, VMOVAPS, and VMOVDQA when encoded with VEX.128.
+ *      - VMOVAPD, VMOVAPS, VMOVDQA32, and VMOVDQA64 when encoded
+ *        with EVEX.128 and k0 (masking disabled).
+ *    Note that these instructions require the linear addresses of their
+ *    memory operands to be 16-byte aligned.
+ *
+ * We do not yet have a similar guarantee from AMD, so we detect this
+ * at runtime rather than assuming the fact when __AVX__ is defined.
+ */
+extern bool have_atomic128;
+
+static inline Int128 atomic16_read(Int128 *ptr)
+{
+    Int128 ret;
+    if (have_atomic128) {
+        asm("vmovdqa %1, %0" : "=x" (ret) : "m" (*ptr));
+    } else {
+        ret = atomic16_cmpxchg(ptr, 0, 0);
+    }
+    return ret;
+}
+
+static inline void atomic16_set(Int128 *ptr, Int128 val)
+{
+    if (have_atomic128) {
+        asm("vmovdqa %1, %0" : "=m" (*ptr) : "x" (val));
+    } else {
+        Int128 old = *ptr, cmp;
+        do {
+            cmp = old;
+            old = atomic16_cmpxchg(ptr, cmp, val);
+        } while (old != cmp);
+    }
+}
+
 # define HAVE_ATOMIC128 1
 #elif !defined(CONFIG_USER_ONLY) && HAVE_CMPXCHG128
 static inline Int128 atomic16_read(Int128 *ptr)
diff --git a/util/atomic128.c b/util/atomic128.c
new file mode 100644
index 0000000000..55863ce9bd
--- /dev/null
+++ b/util/atomic128.c
@@ -0,0 +1,67 @@
+/*
+ * Copyright (C) 2022, Linaro Ltd.
+ *
+ * License: GNU GPL, version 2 or later.
+ *   See the COPYING file in the top-level directory.
+ */
+#include "qemu/osdep.h"
+#include "qemu/atomic128.h"
+
+#ifdef __x86_64__
+#include "qemu/cpuid.h"
+
+#ifndef signature_INTEL_ecx
+/* "Genu ineI ntel" */
+#define signature_INTEL_ebx     0x756e6547
+#define signature_INTEL_edx     0x49656e69
+#define signature_INTEL_ecx     0x6c65746e
+#endif
+
+/*
+ * The latest Intel SDM has added:
+ *     Processors that enumerate support for Intel® AVX (by setting
+ *     the feature flag CPUID.01H:ECX.AVX[bit 28]) guarantee that the
+ *     16-byte memory operations performed by the following instructions
+ *     will always be carried out atomically:
+ *      - MOVAPD, MOVAPS, and MOVDQA.
+ *      - VMOVAPD, VMOVAPS, and VMOVDQA when encoded with VEX.128.
+ *      - VMOVAPD, VMOVAPS, VMOVDQA32, and VMOVDQA64 when encoded
+ *        with EVEX.128 and k0 (masking disabled).
+ *    Note that these instructions require the linear addresses of their
+ *    memory operands to be 16-byte aligned.
+ *
+ * We do not yet have a similar guarantee from AMD, so we detect this
+ * at runtime rather than assuming the fact when __AVX__ is defined.
+ */
+bool have_atomic128;
+
+static void __attribute__((constructor))
+init_have_atomic128(void)
+{
+    unsigned int a, b, c, d, xcrl, xcrh;
+
+    __cpuid(0, a, b, c, d);
+    if (a < 1) {
+        return; /* AVX leaf not present */
+    }
+    if (c != signature_INTEL_ecx) {
+        return; /* Not an Intel product */
+    }
+
+    __cpuid(1, a, b, c, d);
+    if ((c & (bit_AVX | bit_OSXSAVE)) != (bit_AVX | bit_OSXSAVE)) {
+        return; /* AVX not present or XSAVE not enabled by OS */
+    }
+
+    /*
+     * The xgetbv instruction is not available to older versions of
+     * the assembler, so we encode the instruction manually.
+     */
+    asm(".byte 0x0f, 0x01, 0xd0" : "=a" (xcrl), "=d" (xcrh) : "c" (0));
+    if ((xcrl & 6) != 6) {
+        return; /* AVX not enabled by OS */
+    }
+
+    have_atomic128 = true;
+}
+#endif /* __x86_64__ */
diff --git a/util/meson.build b/util/meson.build
index 5e282130df..4b29b719a8 100644
--- a/util/meson.build
+++ b/util/meson.build
@@ -2,6 +2,7 @@ util_ss.add(files('osdep.c', 'cutils.c', 'unicode.c', 'qemu-timer-common.c'))
 if not config_host_data.get('CONFIG_ATOMIC64')
   util_ss.add(files('atomic64.c'))
 endif
+util_ss.add(when: 'CONFIG_SOFTMMU', if_true: files('atomic128.c'))
 util_ss.add(when: 'CONFIG_POSIX', if_true: files('aio-posix.c'))
 util_ss.add(when: 'CONFIG_POSIX', if_true: files('fdmon-poll.c'))
 if config_host_data.get('CONFIG_EPOLL_CREATE1')
-- 
2.34.1



^ permalink raw reply related	[flat|nested] 4+ messages in thread

* Re: [PATCH] include/qemu/atomic128: Support 16-byte atomic read/write for Intel AVX
  2022-10-08 15:36 [PATCH] include/qemu/atomic128: Support 16-byte atomic read/write for Intel AVX Richard Henderson
@ 2022-10-08 15:43 ` Richard Henderson
  2022-10-10  9:49 ` Peter Maydell
  1 sibling, 0 replies; 4+ messages in thread
From: Richard Henderson @ 2022-10-08 15:43 UTC (permalink / raw)
  To: qemu-devel; +Cc: pbonzini

On 10/8/22 08:36, Richard Henderson wrote:
> Intel has now given guarantees about the atomicity of SSE read
> and write instructions on cpus supporting AVX.  We can use these
> instead of the much slower cmpxchg16b.
> 
> Derived from https://gcc.gnu.org/bugzilla/show_bug.cgi?id=104688
> 
> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
> ---
> 
> Paolo, we probably ought to modify gen_ld[oy]_env_A0 to match,
> at least with CF_PARALLEL set.

Or, rather, just gen_ldo/sto.
Curiously, there are no guarantees at all for

   vmovdqa mem, %ymmN


r~


^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH] include/qemu/atomic128: Support 16-byte atomic read/write for Intel AVX
  2022-10-08 15:36 [PATCH] include/qemu/atomic128: Support 16-byte atomic read/write for Intel AVX Richard Henderson
  2022-10-08 15:43 ` Richard Henderson
@ 2022-10-10  9:49 ` Peter Maydell
  2022-10-10 14:02   ` Richard Henderson
  1 sibling, 1 reply; 4+ messages in thread
From: Peter Maydell @ 2022-10-10  9:49 UTC (permalink / raw)
  To: Richard Henderson; +Cc: qemu-devel, pbonzini

On Sat, 8 Oct 2022 at 16:38, Richard Henderson
<richard.henderson@linaro.org> wrote:
>
> Intel has now given guarantees about the atomicity of SSE read
> and write instructions on cpus supporting AVX.  We can use these
> instead of the much slower cmpxchg16b.
>
> Derived from https://gcc.gnu.org/bugzilla/show_bug.cgi?id=104688
>
> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>

> +    /*
> +     * The xgetbv instruction is not available to older versions of
> +     * the assembler, so we encode the instruction manually.
> +     */
> +    asm(".byte 0x0f, 0x01, 0xd0" : "=a" (xcrl), "=d" (xcrh) : "c" (0));

This would make the third place in the tree where we hand-code
this asm instruction (we already do it in the xgetbv() function
in target/i386/hvf/x86_cpuid.c and opencoded in tcg_target_init()):
can we abstract this out somehow, please?

(There is also a just-written-out "xgetbv" in init_cpuid_cache(),
but that one's guarded by ifdefs so presumably OK.)

thanks
-- PMM


^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH] include/qemu/atomic128: Support 16-byte atomic read/write for Intel AVX
  2022-10-10  9:49 ` Peter Maydell
@ 2022-10-10 14:02   ` Richard Henderson
  0 siblings, 0 replies; 4+ messages in thread
From: Richard Henderson @ 2022-10-10 14:02 UTC (permalink / raw)
  To: Peter Maydell; +Cc: qemu-devel, pbonzini

On 10/10/22 02:49, Peter Maydell wrote:
> On Sat, 8 Oct 2022 at 16:38, Richard Henderson
> <richard.henderson@linaro.org> wrote:
>>
>> Intel has now given guarantees about the atomicity of SSE read
>> and write instructions on cpus supporting AVX.  We can use these
>> instead of the much slower cmpxchg16b.
>>
>> Derived from https://gcc.gnu.org/bugzilla/show_bug.cgi?id=104688
>>
>> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
> 
>> +    /*
>> +     * The xgetbv instruction is not available to older versions of
>> +     * the assembler, so we encode the instruction manually.
>> +     */
>> +    asm(".byte 0x0f, 0x01, 0xd0" : "=a" (xcrl), "=d" (xcrh) : "c" (0));
> 
> This would make the third place in the tree where we hand-code
> this asm instruction (we already do it in the xgetbv() function
> in target/i386/hvf/x86_cpuid.c and opencoded in tcg_target_init()):
> can we abstract this out somehow, please?
> 
> (There is also a just-written-out "xgetbv" in init_cpuid_cache(),
> but that one's guarded by ifdefs so presumably OK.)

It's also possible that the Xcode revision that didn't know xgetbv is now unsupported. 
Something to check, I suppose.


r~



^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2022-10-10 14:05 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2022-10-08 15:36 [PATCH] include/qemu/atomic128: Support 16-byte atomic read/write for Intel AVX Richard Henderson
2022-10-08 15:43 ` Richard Henderson
2022-10-10  9:49 ` Peter Maydell
2022-10-10 14:02   ` Richard Henderson

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).