[PATCH v3 0/2] accel/tcg: Improvements to atomic128.h

qemu-devel.nongnu.org archive mirror
 help / color / mirror / Atom feed

* [PATCH v3 0/2] accel/tcg: Improvements to atomic128.h
@ 2023-05-24 18:32 Richard Henderson
  2023-05-24 18:32 ` [PATCH v3 1/2] meson: Split test for __int128_t type from __int128_t arithmetic Richard Henderson
                   ` (2 more replies)
  0 siblings, 3 replies; 4+ messages in thread
From: Richard Henderson @ 2023-05-24 18:32 UTC (permalink / raw)
  To: qemu-devel; +Cc: peter.maydell, alex.bennee

Changes for v3:
  * Most of the v2 patch set merged, except x86_64 atomic128-ldst.h,
    which failed testing with clang-11 with debian 11.

  * New patch to change __int128_t detection.

  * This in turn enabled CONFIG_ATOMIC128, which was not ideal.
    This clang bug/mis-feature of using a cmpxchg sequence for
    implementing __atomic_load_n was already noted for aarch64,
    so I should have expected it would also be true for x86_64.
    Given that I am adding inline assembly for CPUINFO_ATOMIC_VMOVDQA
    anyway, this isn't a big deal, but I did need to adjust the ifdefs.


r~


Richard Henderson (2):
  meson: Split test for __int128_t type from __int128_t arithmetic
  qemu/atomic128: Add x86_64 atomic128-ldst.h

 meson.build                               | 15 +++--
 host/include/x86_64/host/atomic128-ldst.h | 68 +++++++++++++++++++++++
 include/qemu/int128.h                     |  4 +-
 3 files changed, 80 insertions(+), 7 deletions(-)
 create mode 100644 host/include/x86_64/host/atomic128-ldst.h

-- 
2.34.1



^ permalink raw reply	[flat|nested] 4+ messages in thread

* [PATCH v3 1/2] meson: Split test for __int128_t type from __int128_t arithmetic
  2023-05-24 18:32 [PATCH v3 0/2] accel/tcg: Improvements to atomic128.h Richard Henderson
@ 2023-05-24 18:32 ` Richard Henderson
  2023-05-24 18:32 ` [PATCH v3 2/2] qemu/atomic128: Add x86_64 atomic128-ldst.h Richard Henderson
  2023-05-26  0:24 ` [PATCH v3 0/2] accel/tcg: Improvements to atomic128.h Richard Henderson
  2 siblings, 0 replies; 4+ messages in thread
From: Richard Henderson @ 2023-05-24 18:32 UTC (permalink / raw)
  To: qemu-devel; +Cc: peter.maydell, alex.bennee

Older versions of clang have missing runtime functions for arithmetic
with -fsanitize=undefined (see 464e3671f9d5c), so we cannot use
__int128_t for implementing Int128.  But __int128_t is present,
data movement works, and can be use for atomic128.

Probe for both CONFIG_INT128_TYPE and CONFIG_INT128, adjust
qemu/int128.h to define Int128Alias if CONFIG_INT128_TYPE,
and adjust the meson probe for atomics to use has_int128_type.

Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 meson.build           | 15 ++++++++++-----
 include/qemu/int128.h |  4 ++--
 2 files changed, 12 insertions(+), 7 deletions(-)

diff --git a/meson.build b/meson.build
index ef181ff2df..1f73c68a41 100644
--- a/meson.build
+++ b/meson.build
@@ -2536,7 +2536,13 @@ config_host_data.set('CONFIG_ATOMIC64', cc.links('''
     return 0;
   }'''))
 
-has_int128 = cc.links('''
+has_int128_type = cc.compiles('''
+  __int128_t a;
+  __uint128_t b;
+  int main(void) { b = a; }''')
+config_host_data.set('CONFIG_INT128_TYPE', has_int128_type)
+
+has_int128 = has_int128_type and cc.links('''
   __int128_t a;
   __uint128_t b;
   int main (void) {
@@ -2545,10 +2551,9 @@ has_int128 = cc.links('''
     a = a * a;
     return 0;
   }''')
-
 config_host_data.set('CONFIG_INT128', has_int128)
 
-if has_int128
+if has_int128_type
   # "do we have 128-bit atomics which are handled inline and specifically not
   # via libatomic". The reason we can't use libatomic is documented in the
   # comment starting "GCC is a house divided" in include/qemu/atomic128.h.
@@ -2557,7 +2562,7 @@ if has_int128
   # __alignof(unsigned __int128) for the host.
   atomic_test_128 = '''
     int main(int ac, char **av) {
-      unsigned __int128 *p = __builtin_assume_aligned(av[ac - 1], 16);
+      __uint128_t *p = __builtin_assume_aligned(av[ac - 1], 16);
       p[1] = __atomic_load_n(&p[0], __ATOMIC_RELAXED);
       __atomic_store_n(&p[2], p[3], __ATOMIC_RELAXED);
       __atomic_compare_exchange_n(&p[4], &p[5], p[6], 0, __ATOMIC_RELAXED, __ATOMIC_RELAXED);
@@ -2579,7 +2584,7 @@ if has_int128
       config_host_data.set('CONFIG_CMPXCHG128', cc.links('''
         int main(void)
         {
-          unsigned __int128 x = 0, y = 0;
+          __uint128_t x = 0, y = 0;
           __sync_val_compare_and_swap_16(&x, y, x);
           return 0;
         }
diff --git a/include/qemu/int128.h b/include/qemu/int128.h
index 9e46cfaefc..73624e8be7 100644
--- a/include/qemu/int128.h
+++ b/include/qemu/int128.h
@@ -481,7 +481,7 @@ static inline void bswap128s(Int128 *s)
  * a possible structure and the native types.  Ease parameter passing
  * via use of the transparent union extension.
  */
-#ifdef CONFIG_INT128
+#ifdef CONFIG_INT128_TYPE
 typedef union {
     __uint128_t u;
     __int128_t i;
@@ -489,6 +489,6 @@ typedef union {
 } Int128Alias __attribute__((transparent_union));
 #else
 typedef Int128 Int128Alias;
-#endif /* CONFIG_INT128 */
+#endif /* CONFIG_INT128_TYPE */
 
 #endif /* INT128_H */
-- 
2.34.1



^ permalink raw reply related	[flat|nested] 4+ messages in thread

* [PATCH v3 2/2] qemu/atomic128: Add x86_64 atomic128-ldst.h
  2023-05-24 18:32 [PATCH v3 0/2] accel/tcg: Improvements to atomic128.h Richard Henderson
  2023-05-24 18:32 ` [PATCH v3 1/2] meson: Split test for __int128_t type from __int128_t arithmetic Richard Henderson
@ 2023-05-24 18:32 ` Richard Henderson
  2023-05-26  0:24 ` [PATCH v3 0/2] accel/tcg: Improvements to atomic128.h Richard Henderson
  2 siblings, 0 replies; 4+ messages in thread
From: Richard Henderson @ 2023-05-24 18:32 UTC (permalink / raw)
  To: qemu-devel; +Cc: peter.maydell, alex.bennee

With CPUINFO_ATOMIC_VMOVDQA, we can perform proper atomic
load/store without cmpxchg16b.

Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 host/include/x86_64/host/atomic128-ldst.h | 68 +++++++++++++++++++++++
 1 file changed, 68 insertions(+)
 create mode 100644 host/include/x86_64/host/atomic128-ldst.h

diff --git a/host/include/x86_64/host/atomic128-ldst.h b/host/include/x86_64/host/atomic128-ldst.h
new file mode 100644
index 0000000000..adc9332f91
--- /dev/null
+++ b/host/include/x86_64/host/atomic128-ldst.h
@@ -0,0 +1,68 @@
+/*
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ * Load/store for 128-bit atomic operations, x86_64 version.
+ *
+ * Copyright (C) 2023 Linaro, Ltd.
+ *
+ * See docs/devel/atomics.rst for discussion about the guarantees each
+ * atomic primitive is meant to provide.
+ */
+
+#ifndef AARCH64_ATOMIC128_LDST_H
+#define AARCH64_ATOMIC128_LDST_H
+
+#ifdef CONFIG_INT128_TYPE
+#include "host/cpuinfo.h"
+#include "tcg/debug-assert.h"
+
+/*
+ * Through clang 16, with -mcx16, __atomic_load_n is incorrectly
+ * expanded to a read-write operation: lock cmpxchg16b.
+ */
+
+#define HAVE_ATOMIC128_RO  likely(cpuinfo & CPUINFO_ATOMIC_VMOVDQA)
+#define HAVE_ATOMIC128_RW  1
+
+static inline Int128 atomic16_read_ro(const Int128 *ptr)
+{
+    Int128Alias r;
+
+    tcg_debug_assert(HAVE_ATOMIC128_RO);
+    asm("vmovdqa %1, %0" : "=x" (r.i) : "m" (*ptr));
+
+    return r.s;
+}
+
+static inline Int128 atomic16_read_rw(Int128 *ptr)
+{
+    __int128_t *ptr_align = __builtin_assume_aligned(ptr, 16);
+    Int128Alias r;
+
+    if (HAVE_ATOMIC128_RO) {
+        asm("vmovdqa %1, %0" : "=x" (r.i) : "m" (*ptr_align));
+    } else {
+        r.i = __sync_val_compare_and_swap_16(ptr_align, 0, 0);
+    }
+    return r.s;
+}
+
+static inline void atomic16_set(Int128 *ptr, Int128 val)
+{
+    __int128_t *ptr_align = __builtin_assume_aligned(ptr, 16);
+    Int128Alias new = { .s = val };
+
+    if (HAVE_ATOMIC128_RO) {
+        asm("vmovdqa %1, %0" : "=m"(*ptr_align) : "x" (new.i));
+    } else {
+        __int128_t old;
+        do {
+            old = *ptr_align;
+        } while (!__sync_bool_compare_and_swap_16(ptr_align, old, new.i));
+    }
+}
+#else
+/* Provide QEMU_ERROR stubs. */
+#include "host/include/generic/host/atomic128-ldst.h"
+#endif
+
+#endif /* AARCH64_ATOMIC128_LDST_H */
-- 
2.34.1



^ permalink raw reply related	[flat|nested] 4+ messages in thread

* Re: [PATCH v3 0/2] accel/tcg: Improvements to atomic128.h
  2023-05-24 18:32 [PATCH v3 0/2] accel/tcg: Improvements to atomic128.h Richard Henderson
  2023-05-24 18:32 ` [PATCH v3 1/2] meson: Split test for __int128_t type from __int128_t arithmetic Richard Henderson
  2023-05-24 18:32 ` [PATCH v3 2/2] qemu/atomic128: Add x86_64 atomic128-ldst.h Richard Henderson
@ 2023-05-26  0:24 ` Richard Henderson
  2 siblings, 0 replies; 4+ messages in thread
From: Richard Henderson @ 2023-05-26  0:24 UTC (permalink / raw)
  To: qemu-devel; +Cc: peter.maydell, alex.bennee

On 5/24/23 11:32, Richard Henderson wrote:
> Changes for v3:
>    * Most of the v2 patch set merged, except x86_64 atomic128-ldst.h,
>      which failed testing with clang-11 with debian 11.
> 
>    * New patch to change __int128_t detection.
> 
>    * This in turn enabled CONFIG_ATOMIC128, which was not ideal.
>      This clang bug/mis-feature of using a cmpxchg sequence for
>      implementing __atomic_load_n was already noted for aarch64,
>      so I should have expected it would also be true for x86_64.
>      Given that I am adding inline assembly for CPUINFO_ATOMIC_VMOVDQA
>      anyway, this isn't a big deal, but I did need to adjust the ifdefs.
> 
> 
> r~
> 
> 
> Richard Henderson (2):
>    meson: Split test for __int128_t type from __int128_t arithmetic
>    qemu/atomic128: Add x86_64 atomic128-ldst.h
> 
>   meson.build                               | 15 +++--
>   host/include/x86_64/host/atomic128-ldst.h | 68 +++++++++++++++++++++++
>   include/qemu/int128.h                     |  4 +-
>   3 files changed, 80 insertions(+), 7 deletions(-)
>   create mode 100644 host/include/x86_64/host/atomic128-ldst.h
> 

Superseded by
Message-Id: <20230526002334.1760495-1-richard.henderson@linaro.org>

r~


^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2023-05-26  0:25 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2023-05-24 18:32 [PATCH v3 0/2] accel/tcg: Improvements to atomic128.h Richard Henderson
2023-05-24 18:32 ` [PATCH v3 1/2] meson: Split test for __int128_t type from __int128_t arithmetic Richard Henderson
2023-05-24 18:32 ` [PATCH v3 2/2] qemu/atomic128: Add x86_64 atomic128-ldst.h Richard Henderson
2023-05-26  0:24 ` [PATCH v3 0/2] accel/tcg: Improvements to atomic128.h Richard Henderson

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).