qemu-devel.nongnu.org archive mirror
 help / color / mirror / Atom feed
* [PATCH 0/5] Reinstate ability to use Qemu on pre-SSE4.1 x86 hosts
@ 2024-06-12 10:55 Alexander Monakov
  2024-06-12 10:55 ` [PATCH 1/5] Revert "host/i386: assume presence of POPCNT" Alexander Monakov
                   ` (5 more replies)
  0 siblings, 6 replies; 30+ messages in thread
From: Alexander Monakov @ 2024-06-12 10:55 UTC (permalink / raw)
  To: qemu-devel; +Cc: Richard Henderson, Paolo Bonzini, Alexander Monakov

Hello,

I'm sending straightforward reverts to recent patches that bumped minimum
required x86 instruction set to SSE4.2. The older chips did not stop working,
and people still test and use new software on older hardware:
https://sourceware.org/bugzilla/show_bug.cgi?id=31867

Considering the very minor gains from the baseline raise, I'm honestly not
sure why it happened. It seems better to let distributions handle that.

Alexander Monakov (5):
  Revert "host/i386: assume presence of POPCNT"
  Revert "host/i386: assume presence of SSSE3"
  Revert "host/i386: assume presence of SSE2"
  Revert "host/i386: assume presence of CMOV"
  Revert "meson: assume x86-64-v2 baseline ISA"

 host/include/i386/host/cpuinfo.h |  3 +++
 meson.build                      | 10 +++-------
 tcg/i386/tcg-target.c.inc        | 15 ++++++++++++++-
 tcg/i386/tcg-target.h            |  5 +++--
 util/bufferiszero.c              |  4 ++--
 util/cpuinfo-i386.c              |  7 +++++--
 6 files changed, 30 insertions(+), 14 deletions(-)

-- 
2.32.0



^ permalink raw reply	[flat|nested] 30+ messages in thread

* [PATCH 1/5] Revert "host/i386: assume presence of POPCNT"
  2024-06-12 10:55 [PATCH 0/5] Reinstate ability to use Qemu on pre-SSE4.1 x86 hosts Alexander Monakov
@ 2024-06-12 10:55 ` Alexander Monakov
  2024-06-12 10:55 ` [PATCH 2/5] Revert "host/i386: assume presence of SSSE3" Alexander Monakov
                   ` (4 subsequent siblings)
  5 siblings, 0 replies; 30+ messages in thread
From: Alexander Monakov @ 2024-06-12 10:55 UTC (permalink / raw)
  To: qemu-devel; +Cc: Richard Henderson, Paolo Bonzini, Alexander Monakov

This reverts commit 45ccdbcb24baf99667997fac5cf60318e5e7db51.

Revert in preparation to rolling back x86_64-v2 ISA requirement.

Signed-off-by: Alexander Monakov <amonakov@ispras.ru>
---
 host/include/i386/host/cpuinfo.h | 1 +
 tcg/i386/tcg-target.h            | 5 +++--
 util/cpuinfo-i386.c              | 1 +
 3 files changed, 5 insertions(+), 2 deletions(-)

diff --git a/host/include/i386/host/cpuinfo.h b/host/include/i386/host/cpuinfo.h
index c1e94d75..72f6fad6 100644
--- a/host/include/i386/host/cpuinfo.h
+++ b/host/include/i386/host/cpuinfo.h
@@ -11,6 +11,7 @@
 #define CPUINFO_ALWAYS          (1u << 0)  /* so cpuinfo is nonzero */
 #define CPUINFO_MOVBE           (1u << 2)
 #define CPUINFO_LZCNT           (1u << 3)
+#define CPUINFO_POPCNT          (1u << 4)
 #define CPUINFO_BMI1            (1u << 5)
 #define CPUINFO_BMI2            (1u << 6)
 #define CPUINFO_AVX1            (1u << 9)
diff --git a/tcg/i386/tcg-target.h b/tcg/i386/tcg-target.h
index ecc69827..2f67a97e 100644
--- a/tcg/i386/tcg-target.h
+++ b/tcg/i386/tcg-target.h
@@ -111,6 +111,7 @@ typedef enum {
 #endif
 
 #define have_bmi1         (cpuinfo & CPUINFO_BMI1)
+#define have_popcnt       (cpuinfo & CPUINFO_POPCNT)
 #define have_avx1         (cpuinfo & CPUINFO_AVX1)
 #define have_avx2         (cpuinfo & CPUINFO_AVX2)
 #define have_movbe        (cpuinfo & CPUINFO_MOVBE)
@@ -142,7 +143,7 @@ typedef enum {
 #define TCG_TARGET_HAS_nor_i32          0
 #define TCG_TARGET_HAS_clz_i32          1
 #define TCG_TARGET_HAS_ctz_i32          1
-#define TCG_TARGET_HAS_ctpop_i32        1
+#define TCG_TARGET_HAS_ctpop_i32        have_popcnt
 #define TCG_TARGET_HAS_deposit_i32      1
 #define TCG_TARGET_HAS_extract_i32      1
 #define TCG_TARGET_HAS_sextract_i32     1
@@ -177,7 +178,7 @@ typedef enum {
 #define TCG_TARGET_HAS_nor_i64          0
 #define TCG_TARGET_HAS_clz_i64          1
 #define TCG_TARGET_HAS_ctz_i64          1
-#define TCG_TARGET_HAS_ctpop_i64        1
+#define TCG_TARGET_HAS_ctpop_i64        have_popcnt
 #define TCG_TARGET_HAS_deposit_i64      1
 #define TCG_TARGET_HAS_extract_i64      1
 #define TCG_TARGET_HAS_sextract_i64     0
diff --git a/util/cpuinfo-i386.c b/util/cpuinfo-i386.c
index 8f2694d8..6d474a62 100644
--- a/util/cpuinfo-i386.c
+++ b/util/cpuinfo-i386.c
@@ -35,6 +35,7 @@ unsigned __attribute__((constructor)) cpuinfo_init(void)
         __cpuid(1, a, b, c, d);
 
         info |= (c & bit_MOVBE ? CPUINFO_MOVBE : 0);
+        info |= (c & bit_POPCNT ? CPUINFO_POPCNT : 0);
         info |= (c & bit_PCLMUL ? CPUINFO_PCLMUL : 0);
 
         /* NOTE: our AES support requires SSSE3 (PSHUFB) as well. */
-- 
2.32.0



^ permalink raw reply related	[flat|nested] 30+ messages in thread

* [PATCH 2/5] Revert "host/i386: assume presence of SSSE3"
  2024-06-12 10:55 [PATCH 0/5] Reinstate ability to use Qemu on pre-SSE4.1 x86 hosts Alexander Monakov
  2024-06-12 10:55 ` [PATCH 1/5] Revert "host/i386: assume presence of POPCNT" Alexander Monakov
@ 2024-06-12 10:55 ` Alexander Monakov
  2024-06-12 10:55 ` [PATCH 3/5] Revert "host/i386: assume presence of SSE2" Alexander Monakov
                   ` (3 subsequent siblings)
  5 siblings, 0 replies; 30+ messages in thread
From: Alexander Monakov @ 2024-06-12 10:55 UTC (permalink / raw)
  To: qemu-devel; +Cc: Richard Henderson, Paolo Bonzini, Alexander Monakov

This reverts commit 433cd6d94a8256af70a5200f236dc8047c3c1468.

Revert in preparation to rolling back x86_64-v2 ISA requirement.

Signed-off-by: Alexander Monakov <amonakov@ispras.ru>
---
 util/cpuinfo-i386.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/util/cpuinfo-i386.c b/util/cpuinfo-i386.c
index 6d474a62..ca74ef04 100644
--- a/util/cpuinfo-i386.c
+++ b/util/cpuinfo-i386.c
@@ -38,8 +38,8 @@ unsigned __attribute__((constructor)) cpuinfo_init(void)
         info |= (c & bit_POPCNT ? CPUINFO_POPCNT : 0);
         info |= (c & bit_PCLMUL ? CPUINFO_PCLMUL : 0);
 
-        /* NOTE: our AES support requires SSSE3 (PSHUFB) as well. */
-        info |= (c & bit_AES) ? CPUINFO_AES : 0;
+        /* Our AES support requires PSHUFB as well. */
+        info |= ((c & bit_AES) && (c & bit_SSSE3) ? CPUINFO_AES : 0);
 
         /* For AVX features, we must check available and usable. */
         if ((c & bit_AVX) && (c & bit_OSXSAVE)) {
-- 
2.32.0



^ permalink raw reply related	[flat|nested] 30+ messages in thread

* [PATCH 3/5] Revert "host/i386: assume presence of SSE2"
  2024-06-12 10:55 [PATCH 0/5] Reinstate ability to use Qemu on pre-SSE4.1 x86 hosts Alexander Monakov
  2024-06-12 10:55 ` [PATCH 1/5] Revert "host/i386: assume presence of POPCNT" Alexander Monakov
  2024-06-12 10:55 ` [PATCH 2/5] Revert "host/i386: assume presence of SSSE3" Alexander Monakov
@ 2024-06-12 10:55 ` Alexander Monakov
  2024-06-12 10:55 ` [PATCH 4/5] Revert "host/i386: assume presence of CMOV" Alexander Monakov
                   ` (2 subsequent siblings)
  5 siblings, 0 replies; 30+ messages in thread
From: Alexander Monakov @ 2024-06-12 10:55 UTC (permalink / raw)
  To: qemu-devel; +Cc: Richard Henderson, Paolo Bonzini, Alexander Monakov

This reverts commit b18236897ca15c3db1506d8edb9a191dfe51429c.

Revert in preparation to rolling back x86_64-v2 ISA requirement.

Signed-off-by: Alexander Monakov <amonakov@ispras.ru>
---
 host/include/i386/host/cpuinfo.h | 1 +
 util/bufferiszero.c              | 4 ++--
 util/cpuinfo-i386.c              | 1 +
 3 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/host/include/i386/host/cpuinfo.h b/host/include/i386/host/cpuinfo.h
index 72f6fad6..81771733 100644
--- a/host/include/i386/host/cpuinfo.h
+++ b/host/include/i386/host/cpuinfo.h
@@ -14,6 +14,7 @@
 #define CPUINFO_POPCNT          (1u << 4)
 #define CPUINFO_BMI1            (1u << 5)
 #define CPUINFO_BMI2            (1u << 6)
+#define CPUINFO_SSE2            (1u << 7)
 #define CPUINFO_AVX1            (1u << 9)
 #define CPUINFO_AVX2            (1u << 10)
 #define CPUINFO_AVX512F         (1u << 11)
diff --git a/util/bufferiszero.c b/util/bufferiszero.c
index 11c080e0..74864f7b 100644
--- a/util/bufferiszero.c
+++ b/util/bufferiszero.c
@@ -188,14 +188,14 @@ static biz_accel_fn const accel_table[] = {
 
 static unsigned best_accel(void)
 {
-#ifdef CONFIG_AVX2_OPT
     unsigned info = cpuinfo_init();
 
+#ifdef CONFIG_AVX2_OPT
     if (info & CPUINFO_AVX2) {
         return 2;
     }
 #endif
-    return 1;
+    return info & CPUINFO_SSE2 ? 1 : 0;
 }
 
 #elif defined(__aarch64__) && defined(__ARM_NEON)
diff --git a/util/cpuinfo-i386.c b/util/cpuinfo-i386.c
index ca74ef04..90f92a42 100644
--- a/util/cpuinfo-i386.c
+++ b/util/cpuinfo-i386.c
@@ -34,6 +34,7 @@ unsigned __attribute__((constructor)) cpuinfo_init(void)
     if (max >= 1) {
         __cpuid(1, a, b, c, d);
 
+        info |= (d & bit_SSE2 ? CPUINFO_SSE2 : 0);
         info |= (c & bit_MOVBE ? CPUINFO_MOVBE : 0);
         info |= (c & bit_POPCNT ? CPUINFO_POPCNT : 0);
         info |= (c & bit_PCLMUL ? CPUINFO_PCLMUL : 0);
-- 
2.32.0



^ permalink raw reply related	[flat|nested] 30+ messages in thread

* [PATCH 4/5] Revert "host/i386: assume presence of CMOV"
  2024-06-12 10:55 [PATCH 0/5] Reinstate ability to use Qemu on pre-SSE4.1 x86 hosts Alexander Monakov
                   ` (2 preceding siblings ...)
  2024-06-12 10:55 ` [PATCH 3/5] Revert "host/i386: assume presence of SSE2" Alexander Monakov
@ 2024-06-12 10:55 ` Alexander Monakov
  2024-06-12 10:55 ` [PATCH 5/5] Revert "meson: assume x86-64-v2 baseline ISA" Alexander Monakov
  2024-06-12 11:04 ` [PATCH 0/5] Reinstate ability to use Qemu on pre-SSE4.1 x86 hosts Daniel P. Berrangé
  5 siblings, 0 replies; 30+ messages in thread
From: Alexander Monakov @ 2024-06-12 10:55 UTC (permalink / raw)
  To: qemu-devel; +Cc: Richard Henderson, Paolo Bonzini, Alexander Monakov

This reverts commit e68e97ce55b3d17af22dd62c3b3dc72f761b0862.

Revert in preparation to rolling back x86_64-v2 ISA requirement.

Signed-off-by: Alexander Monakov <amonakov@ispras.ru>
---
 host/include/i386/host/cpuinfo.h |  1 +
 tcg/i386/tcg-target.c.inc        | 15 ++++++++++++++-
 util/cpuinfo-i386.c              |  1 +
 3 files changed, 16 insertions(+), 1 deletion(-)

diff --git a/host/include/i386/host/cpuinfo.h b/host/include/i386/host/cpuinfo.h
index 81771733..9386c749 100644
--- a/host/include/i386/host/cpuinfo.h
+++ b/host/include/i386/host/cpuinfo.h
@@ -9,6 +9,7 @@
 /* Digested version of <cpuid.h> */
 
 #define CPUINFO_ALWAYS          (1u << 0)  /* so cpuinfo is nonzero */
+#define CPUINFO_CMOV            (1u << 1)
 #define CPUINFO_MOVBE           (1u << 2)
 #define CPUINFO_LZCNT           (1u << 3)
 #define CPUINFO_POPCNT          (1u << 4)
diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc
index 9a54ef7f..59235b4f 100644
--- a/tcg/i386/tcg-target.c.inc
+++ b/tcg/i386/tcg-target.c.inc
@@ -157,6 +157,12 @@ static TCGReg tcg_target_call_oarg_reg(TCGCallReturnKind kind, int slot)
 #define SOFTMMU_RESERVE_REGS \
     (tcg_use_softmmu ? (1 << TCG_REG_L0) | (1 << TCG_REG_L1) : 0)
 
+/* For 64-bit, we always know that CMOV is available.  */
+#if TCG_TARGET_REG_BITS == 64
+# define have_cmov      true
+#else
+# define have_cmov      (cpuinfo & CPUINFO_CMOV)
+#endif
 #define have_bmi2       (cpuinfo & CPUINFO_BMI2)
 #define have_lzcnt      (cpuinfo & CPUINFO_LZCNT)
 
@@ -1809,7 +1815,14 @@ static void tcg_out_setcond2(TCGContext *s, const TCGArg *args,
 static void tcg_out_cmov(TCGContext *s, int jcc, int rexw,
                          TCGReg dest, TCGReg v1)
 {
-    tcg_out_modrm(s, OPC_CMOVCC | jcc | rexw, dest, v1);
+    if (have_cmov) {
+        tcg_out_modrm(s, OPC_CMOVCC | jcc | rexw, dest, v1);
+    } else {
+        TCGLabel *over = gen_new_label();
+        tcg_out_jxx(s, jcc ^ 1, over, 1);
+        tcg_out_mov(s, TCG_TYPE_I32, dest, v1);
+        tcg_out_label(s, over);
+    }
 }
 
 static void tcg_out_movcond(TCGContext *s, int rexw, TCGCond cond,
diff --git a/util/cpuinfo-i386.c b/util/cpuinfo-i386.c
index 90f92a42..18ab747a 100644
--- a/util/cpuinfo-i386.c
+++ b/util/cpuinfo-i386.c
@@ -34,6 +34,7 @@ unsigned __attribute__((constructor)) cpuinfo_init(void)
     if (max >= 1) {
         __cpuid(1, a, b, c, d);
 
+        info |= (d & bit_CMOV ? CPUINFO_CMOV : 0);
         info |= (d & bit_SSE2 ? CPUINFO_SSE2 : 0);
         info |= (c & bit_MOVBE ? CPUINFO_MOVBE : 0);
         info |= (c & bit_POPCNT ? CPUINFO_POPCNT : 0);
-- 
2.32.0



^ permalink raw reply related	[flat|nested] 30+ messages in thread

* [PATCH 5/5] Revert "meson: assume x86-64-v2 baseline ISA"
  2024-06-12 10:55 [PATCH 0/5] Reinstate ability to use Qemu on pre-SSE4.1 x86 hosts Alexander Monakov
                   ` (3 preceding siblings ...)
  2024-06-12 10:55 ` [PATCH 4/5] Revert "host/i386: assume presence of CMOV" Alexander Monakov
@ 2024-06-12 10:55 ` Alexander Monakov
  2024-06-12 11:04 ` [PATCH 0/5] Reinstate ability to use Qemu on pre-SSE4.1 x86 hosts Daniel P. Berrangé
  5 siblings, 0 replies; 30+ messages in thread
From: Alexander Monakov @ 2024-06-12 10:55 UTC (permalink / raw)
  To: qemu-devel; +Cc: Richard Henderson, Paolo Bonzini, Alexander Monakov

This reverts commit 294ac64e459aca023f43441651d860980c9784f1.

Reinstate the ability to use Qemu on x86 hosts that do not meet
x86_64-v2 ISA baseline.

Signed-off-by: Alexander Monakov <amonakov@ispras.ru>
---
 meson.build | 10 +++-------
 1 file changed, 3 insertions(+), 7 deletions(-)

diff --git a/meson.build b/meson.build
index ec59effc..49962cce 100644
--- a/meson.build
+++ b/meson.build
@@ -336,13 +336,9 @@ if host_arch == 'i386' and not cc.links('''
   qemu_common_flags = ['-march=i486'] + qemu_common_flags
 endif
 
-# Assume x86-64-v2 (minus CMPXCHG16B for 32-bit code)
-if host_arch == 'i386'
-  qemu_common_flags = ['-mfpmath=sse'] + qemu_common_flags
-endif
-if host_arch in ['i386', 'x86_64']
-  qemu_common_flags = ['-mpopcnt', '-msse4.2'] + qemu_common_flags
-endif
+# ??? Only extremely old AMD cpus do not have cmpxchg16b.
+# If we truly care, we should simply detect this case at
+# runtime and generate the fallback to serial emulation.
 if host_arch == 'x86_64'
   qemu_common_flags = ['-mcx16'] + qemu_common_flags
 endif
-- 
2.32.0



^ permalink raw reply related	[flat|nested] 30+ messages in thread

* Re: [PATCH 0/5] Reinstate ability to use Qemu on pre-SSE4.1 x86 hosts
  2024-06-12 10:55 [PATCH 0/5] Reinstate ability to use Qemu on pre-SSE4.1 x86 hosts Alexander Monakov
                   ` (4 preceding siblings ...)
  2024-06-12 10:55 ` [PATCH 5/5] Revert "meson: assume x86-64-v2 baseline ISA" Alexander Monakov
@ 2024-06-12 11:04 ` Daniel P. Berrangé
  2024-06-12 11:12   ` Paolo Bonzini
  2024-06-12 11:14   ` Alexander Monakov
  5 siblings, 2 replies; 30+ messages in thread
From: Daniel P. Berrangé @ 2024-06-12 11:04 UTC (permalink / raw)
  To: Alexander Monakov; +Cc: qemu-devel, Richard Henderson, Paolo Bonzini

On Wed, Jun 12, 2024 at 01:55:20PM +0300, Alexander Monakov wrote:
> Hello,
> 
> I'm sending straightforward reverts to recent patches that bumped minimum
> required x86 instruction set to SSE4.2. The older chips did not stop working,
> and people still test and use new software on older hardware:
> https://sourceware.org/bugzilla/show_bug.cgi?id=31867
> 
> Considering the very minor gains from the baseline raise, I'm honestly not
> sure why it happened. It seems better to let distributions handle that.

Indeed distros are opinionated about the x86_64 baseline they want
to target.

While RHEL-9 switched to a x86_64-v2 baseline, Fedora has repeatedly
rejected the idea of moving to an x86_64-v2 baseline, wanting to retain
full backwards compat. So this assumption in QEMU is preventing the
distros from satisfying their chosen build target goals.

> Alexander Monakov (5):
>   Revert "host/i386: assume presence of POPCNT"
>   Revert "host/i386: assume presence of SSSE3"
>   Revert "host/i386: assume presence of SSE2"
>   Revert "host/i386: assume presence of CMOV"
>   Revert "meson: assume x86-64-v2 baseline ISA"
> 
>  host/include/i386/host/cpuinfo.h |  3 +++
>  meson.build                      | 10 +++-------
>  tcg/i386/tcg-target.c.inc        | 15 ++++++++++++++-
>  tcg/i386/tcg-target.h            |  5 +++--
>  util/bufferiszero.c              |  4 ++--
>  util/cpuinfo-i386.c              |  7 +++++--
>  6 files changed, 30 insertions(+), 14 deletions(-)
> 
> -- 
> 2.32.0
> 
> 

With regards,
Daniel
-- 
|: https://berrange.com      -o-    https://www.flickr.com/photos/dberrange :|
|: https://libvirt.org         -o-            https://fstop138.berrange.com :|
|: https://entangle-photo.org    -o-    https://www.instagram.com/dberrange :|



^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [PATCH 0/5] Reinstate ability to use Qemu on pre-SSE4.1 x86 hosts
  2024-06-12 11:04 ` [PATCH 0/5] Reinstate ability to use Qemu on pre-SSE4.1 x86 hosts Daniel P. Berrangé
@ 2024-06-12 11:12   ` Paolo Bonzini
  2024-06-12 11:19     ` Alexander Monakov
                       ` (2 more replies)
  2024-06-12 11:14   ` Alexander Monakov
  1 sibling, 3 replies; 30+ messages in thread
From: Paolo Bonzini @ 2024-06-12 11:12 UTC (permalink / raw)
  To: Daniel P. Berrangé; +Cc: Alexander Monakov, qemu-devel, Richard Henderson

On Wed, Jun 12, 2024 at 1:04 PM Daniel P. Berrangé <berrange@redhat.com> wrote:
>
> On Wed, Jun 12, 2024 at 01:55:20PM +0300, Alexander Monakov wrote:
> > Hello,
> >
> > I'm sending straightforward reverts to recent patches that bumped minimum
> > required x86 instruction set to SSE4.2. The older chips did not stop working,
> > and people still test and use new software on older hardware:
> > https://sourceware.org/bugzilla/show_bug.cgi?id=31867
> >
> > Considering the very minor gains from the baseline raise, I'm honestly not
> > sure why it happened. It seems better to let distributions handle that.
>
> Indeed distros are opinionated about the x86_64 baseline they want
> to target.
>
> While RHEL-9 switched to a x86_64-v2 baseline, Fedora has repeatedly
> rejected the idea of moving to an x86_64-v2 baseline, wanting to retain
> full backwards compat. So this assumption in QEMU is preventing the
> distros from satisfying their chosen build target goals.

I didn't do this because of RHEL9, I did it because it's silly that
QEMU cannot use POPCNT and has to waste 2% of the L1 d-cache to
compute the x86 parity flag (and POPCNT was introduced at the same
time as SSE4.2).

Intel x86_64-v2 processors have been around for about 15 years, AMD
for a little less (2011). I'd rather hear from users about the
usecases for running QEMU on such old processors before reverting, as
this does not get in the way of booting/installing distros on old
machines. Unless QEMU is run from within the installation media, which
it isn't, requiring a particular processor family does not prevent
Fedora from being installable on pre-v2 processors.

Paolo



^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [PATCH 0/5] Reinstate ability to use Qemu on pre-SSE4.1 x86 hosts
  2024-06-12 11:04 ` [PATCH 0/5] Reinstate ability to use Qemu on pre-SSE4.1 x86 hosts Daniel P. Berrangé
  2024-06-12 11:12   ` Paolo Bonzini
@ 2024-06-12 11:14   ` Alexander Monakov
  1 sibling, 0 replies; 30+ messages in thread
From: Alexander Monakov @ 2024-06-12 11:14 UTC (permalink / raw)
  To: Daniel P. Berrangé; +Cc: qemu-devel, Richard Henderson, Paolo Bonzini

[-- Attachment #1: Type: text/plain, Size: 1028 bytes --]


On Wed, 12 Jun 2024, Daniel P. Berrangé wrote:

> On Wed, Jun 12, 2024 at 01:55:20PM +0300, Alexander Monakov wrote:
> > Hello,
> > 
> > I'm sending straightforward reverts to recent patches that bumped minimum
> > required x86 instruction set to SSE4.2. The older chips did not stop working,
> > and people still test and use new software on older hardware:
> > https://sourceware.org/bugzilla/show_bug.cgi?id=31867
> > 
> > Considering the very minor gains from the baseline raise, I'm honestly not
> > sure why it happened. It seems better to let distributions handle that.
> 
> Indeed distros are opinionated about the x86_64 baseline they want
> to target.
> 
> While RHEL-9 switched to a x86_64-v2 baseline, Fedora has repeatedly
> rejected the idea of moving to an x86_64-v2 baseline, wanting to retain
> full backwards compat. So this assumption in QEMU is preventing the
> distros from satisfying their chosen build target goals.

So, to make sure I parsed that correctly, you're in support of the reverts?

Alexander

^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [PATCH 0/5] Reinstate ability to use Qemu on pre-SSE4.1 x86 hosts
  2024-06-12 11:12   ` Paolo Bonzini
@ 2024-06-12 11:19     ` Alexander Monakov
  2024-06-12 11:29       ` Paolo Bonzini
  2024-06-12 11:38     ` Daniel P. Berrangé
  2024-06-23 21:27     ` Alexander Monakov
  2 siblings, 1 reply; 30+ messages in thread
From: Alexander Monakov @ 2024-06-12 11:19 UTC (permalink / raw)
  To: Paolo Bonzini; +Cc: Daniel P. Berrangé, qemu-devel, Richard Henderson


On Wed, 12 Jun 2024, Paolo Bonzini wrote:

> I didn't do this because of RHEL9, I did it because it's silly that
> QEMU cannot use POPCNT and has to waste 2% of the L1 d-cache to
> compute the x86 parity flag (and POPCNT was introduced at the same
> time as SSE4.2).

From looking at that POPCNT patch I understood that Qemu detects
presence of POPCNT at runtime and will only use the fallback when
POPCNT is unavailable. Did I misunderstand?

Alexander


^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [PATCH 0/5] Reinstate ability to use Qemu on pre-SSE4.1 x86 hosts
  2024-06-12 11:19     ` Alexander Monakov
@ 2024-06-12 11:29       ` Paolo Bonzini
  2024-06-12 11:46         ` Alexander Monakov
  0 siblings, 1 reply; 30+ messages in thread
From: Paolo Bonzini @ 2024-06-12 11:29 UTC (permalink / raw)
  To: Alexander Monakov; +Cc: Daniel P. Berrangé, qemu-devel, Richard Henderson

On Wed, Jun 12, 2024 at 1:19 PM Alexander Monakov <amonakov@ispras.ru> wrote:
> On Wed, 12 Jun 2024, Paolo Bonzini wrote:
> > I didn't do this because of RHEL9, I did it because it's silly that
> > QEMU cannot use POPCNT and has to waste 2% of the L1 d-cache to
> > compute the x86 parity flag (and POPCNT was introduced at the same
> > time as SSE4.2).
>
> From looking at that POPCNT patch I understood that Qemu detects
> presence of POPCNT at runtime and will only use the fallback when
> POPCNT is unavailable. Did I misunderstand?

-mpopcnt allows GCC to generate the POPCNT instruction for helper
code. Right now we have code like this in
target/i386/tcg/cc_helper_template.h:

    pf = parity_table[(uint8_t)dst];

and it could be instead something like

#if defined __i386__ || defined __x86_64__ || defined __s390x__||
defined __riscv_zbb
static inline unsigned int compute_pf(uint8_t x)
{
    return __builtin_parity(x) * CC_P;
}
#else
extern const uint8_t parity_table[256];
static inline unsigned int compute_pf(uint8_t x)
{
    return parity_table[x];
}
#endif

The code generated for __builtin_parity, if you don't have it
available in hardware, is pretty bad.

Paolo



^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [PATCH 0/5] Reinstate ability to use Qemu on pre-SSE4.1 x86 hosts
  2024-06-12 11:12   ` Paolo Bonzini
  2024-06-12 11:19     ` Alexander Monakov
@ 2024-06-12 11:38     ` Daniel P. Berrangé
  2024-06-12 11:51       ` Paolo Bonzini
  2024-06-23 21:27     ` Alexander Monakov
  2 siblings, 1 reply; 30+ messages in thread
From: Daniel P. Berrangé @ 2024-06-12 11:38 UTC (permalink / raw)
  To: Paolo Bonzini; +Cc: Alexander Monakov, qemu-devel, Richard Henderson

On Wed, Jun 12, 2024 at 01:12:43PM +0200, Paolo Bonzini wrote:
> On Wed, Jun 12, 2024 at 1:04 PM Daniel P. Berrangé <berrange@redhat.com> wrote:
> >
> > On Wed, Jun 12, 2024 at 01:55:20PM +0300, Alexander Monakov wrote:
> > > Hello,
> > >
> > > I'm sending straightforward reverts to recent patches that bumped minimum
> > > required x86 instruction set to SSE4.2. The older chips did not stop working,
> > > and people still test and use new software on older hardware:
> > > https://sourceware.org/bugzilla/show_bug.cgi?id=31867
> > >
> > > Considering the very minor gains from the baseline raise, I'm honestly not
> > > sure why it happened. It seems better to let distributions handle that.
> >
> > Indeed distros are opinionated about the x86_64 baseline they want
> > to target.
> >
> > While RHEL-9 switched to a x86_64-v2 baseline, Fedora has repeatedly
> > rejected the idea of moving to an x86_64-v2 baseline, wanting to retain
> > full backwards compat. So this assumption in QEMU is preventing the
> > distros from satisfying their chosen build target goals.
> 
> I didn't do this because of RHEL9, I did it because it's silly that
> QEMU cannot use POPCNT and has to waste 2% of the L1 d-cache to
> compute the x86 parity flag (and POPCNT was introduced at the same
> time as SSE4.2).
> 
> Intel x86_64-v2 processors have been around for about 15 years, AMD
> for a little less (2011). I'd rather hear from users about the
> usecases for running QEMU on such old processors before reverting, as
> this does not get in the way of booting/installing distros on old
> machines. Unless QEMU is run from within the installation media, which
> it isn't, requiring a particular processor family does not prevent
> Fedora from being installable on pre-v2 processors.

This isn't anything to do with the distro installer. The use case is that
the distro wants all its software to be able to run on the x86_64 baseline
it has chosen to build with.

If we want to use POPCNT in the TCG code, can we not do a runtime check
and selectively build pieces of code with  __attribute__((target("popcnt"))),
as we've done historically for the bufferiszero.c code, rather than
changing the entire QEMU baseline ?


With regards,
Daniel
-- 
|: https://berrange.com      -o-    https://www.flickr.com/photos/dberrange :|
|: https://libvirt.org         -o-            https://fstop138.berrange.com :|
|: https://entangle-photo.org    -o-    https://www.instagram.com/dberrange :|



^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [PATCH 0/5] Reinstate ability to use Qemu on pre-SSE4.1 x86 hosts
  2024-06-12 11:29       ` Paolo Bonzini
@ 2024-06-12 11:46         ` Alexander Monakov
  2024-06-12 11:58           ` Paolo Bonzini
  0 siblings, 1 reply; 30+ messages in thread
From: Alexander Monakov @ 2024-06-12 11:46 UTC (permalink / raw)
  To: Paolo Bonzini; +Cc: Daniel P. Berrangé, qemu-devel, Richard Henderson

[-- Attachment #1: Type: text/plain, Size: 1715 bytes --]


On Wed, 12 Jun 2024, Paolo Bonzini wrote:

> On Wed, Jun 12, 2024 at 1:19 PM Alexander Monakov <amonakov@ispras.ru> wrote:
> > On Wed, 12 Jun 2024, Paolo Bonzini wrote:
> > > I didn't do this because of RHEL9, I did it because it's silly that
> > > QEMU cannot use POPCNT and has to waste 2% of the L1 d-cache to
> > > compute the x86 parity flag (and POPCNT was introduced at the same
> > > time as SSE4.2).
> >
> > From looking at that POPCNT patch I understood that Qemu detects
> > presence of POPCNT at runtime and will only use the fallback when
> > POPCNT is unavailable. Did I misunderstand?
> 
> -mpopcnt allows GCC to generate the POPCNT instruction for helper
> code. Right now we have code like this in
> target/i386/tcg/cc_helper_template.h:
> 
>     pf = parity_table[(uint8_t)dst];
> 
> and it could be instead something like
> 
> #if defined __i386__ || defined __x86_64__ || defined __s390x__||
> defined __riscv_zbb

GCC also predefines __POPCNT__ when -mpopcnt is active, so that would be
available for ifdef testing like above, but...

> static inline unsigned int compute_pf(uint8_t x)
> {
>     return __builtin_parity(x) * CC_P;
> }
> #else
> extern const uint8_t parity_table[256];
> static inline unsigned int compute_pf(uint8_t x)
> {
>     return parity_table[x];
> }
> #endif
> 
> The code generated for __builtin_parity, if you don't have it
> available in hardware, is pretty bad.

On x86 parity _is_ available in baseline ISA, no? Here's what gcc-14 generates:

        xor     eax, eax
        test    dil, dil
        setnp   al
        sal     eax, 2

and with -mpopcnt:

        movsx   eax, dil
        popcnt  eax, eax
        and     eax, 1
        sal     eax, 2

Alexander

^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [PATCH 0/5] Reinstate ability to use Qemu on pre-SSE4.1 x86 hosts
  2024-06-12 11:38     ` Daniel P. Berrangé
@ 2024-06-12 11:51       ` Paolo Bonzini
  2024-06-12 12:21         ` Daniel P. Berrangé
  2024-06-12 17:00         ` Daniel P. Berrangé
  0 siblings, 2 replies; 30+ messages in thread
From: Paolo Bonzini @ 2024-06-12 11:51 UTC (permalink / raw)
  To: Daniel P. Berrangé; +Cc: Alexander Monakov, qemu-devel, Richard Henderson

On Wed, Jun 12, 2024 at 1:38 PM Daniel P. Berrangé <berrange@redhat.com> wrote:
> This isn't anything to do with the distro installer. The use case is that
> the distro wants all its software to be able to run on the x86_64 baseline
> it has chosen to build with.

Sure, and they can patch the packages if their wish is not shared by
upstream. Alternatively they can live with the fact that not all users
will be able to use all packages, which is probably already the case.
Or drop QEMU, I guess. Has FeSCO ever expressed how strict they are
and which of the three options they'd pick?

Either way, this only affects either the QEMU maintainers for the
distro, or the users of QEMU. It's only if the installation media used
QEMU, that this change would be actively blocking usage of the distro
on old processors.

> If we want to use POPCNT in the TCG code, can we not do a runtime check
> and selectively build pieces of code with  __attribute__((target("popcnt"))),
> as we've done historically for the bufferiszero.c code, rather than
> changing the entire QEMU baseline ?

bufferiszero.c has a very quick check in front of the indirect call
and runs for several hundred clock cycles, so the tradeoff is
different there.

I guess that, because these helpers are called by TCG, you wouldn't
pay the price of the indirect call. However, adding all this
infrastructure for 13-15 year old CPUs is not very enthralling.

Paolo



^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [PATCH 0/5] Reinstate ability to use Qemu on pre-SSE4.1 x86 hosts
  2024-06-12 11:46         ` Alexander Monakov
@ 2024-06-12 11:58           ` Paolo Bonzini
  2024-06-12 12:10             ` Alexander Monakov
  0 siblings, 1 reply; 30+ messages in thread
From: Paolo Bonzini @ 2024-06-12 11:58 UTC (permalink / raw)
  To: Alexander Monakov; +Cc: Daniel P. Berrangé, qemu-devel, Richard Henderson

On Wed, Jun 12, 2024 at 1:46 PM Alexander Monakov <amonakov@ispras.ru> wrote:
>
>
> On Wed, 12 Jun 2024, Paolo Bonzini wrote:
>
> > On Wed, Jun 12, 2024 at 1:19 PM Alexander Monakov <amonakov@ispras.ru> wrote:
> > > On Wed, 12 Jun 2024, Paolo Bonzini wrote:
> > > > I didn't do this because of RHEL9, I did it because it's silly that
> > > > QEMU cannot use POPCNT and has to waste 2% of the L1 d-cache to
> > > > compute the x86 parity flag (and POPCNT was introduced at the same
> > > > time as SSE4.2).
> > >
> > > From looking at that POPCNT patch I understood that Qemu detects
> > > presence of POPCNT at runtime and will only use the fallback when
> > > POPCNT is unavailable. Did I misunderstand?
> >
> > -mpopcnt allows GCC to generate the POPCNT instruction for helper
> > code. Right now we have code like this in
> > target/i386/tcg/cc_helper_template.h:
> >
> >     pf = parity_table[(uint8_t)dst];
> >
> > and it could be instead something like
> >
> > #if defined __i386__ || defined __x86_64__ || defined __s390x__||
> > defined __riscv_zbb
>
> GCC also predefines __POPCNT__ when -mpopcnt is active, so that would be
> available for ifdef testing like above, but...
>
> > static inline unsigned int compute_pf(uint8_t x)
> > {
> >     return __builtin_parity(x) * CC_P;
> > }
> > #else
> > extern const uint8_t parity_table[256];
> > static inline unsigned int compute_pf(uint8_t x)
> > {
> >     return parity_table[x];
> > }
> > #endif
> >
> > The code generated for __builtin_parity, if you don't have it
> > available in hardware, is pretty bad.
>
> On x86 parity _is_ available in baseline ISA, no? Here's what gcc-14 generates:
>
>         xor     eax, eax
>         test    dil, dil
>         setnp   al
>         sal     eax, 2

Ahah, nice. :) I'm pretty sure that, when I tested "pf =
(__builtin_popcount(x) & 1) * 4;", it was generating a call to
__builtin_popcountsi2.

Still - for something that has a code generator, there _is_ a cost in
supporting old CPUs, so I'd rather avoid reverting this. The glibc bug
that you linked is very different not just because it affected 32-bit
installation media, but also because it was a bug rather than
intentional.

Since you are reporting this issue, how did you find out / what broke for you?

Paolo



^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [PATCH 0/5] Reinstate ability to use Qemu on pre-SSE4.1 x86 hosts
  2024-06-12 11:58           ` Paolo Bonzini
@ 2024-06-12 12:10             ` Alexander Monakov
  2024-06-12 12:13               ` Paolo Bonzini
  0 siblings, 1 reply; 30+ messages in thread
From: Alexander Monakov @ 2024-06-12 12:10 UTC (permalink / raw)
  To: Paolo Bonzini; +Cc: Daniel P. Berrangé, qemu-devel, Richard Henderson


On Wed, 12 Jun 2024, Paolo Bonzini wrote:

> Ahah, nice. :) I'm pretty sure that, when I tested "pf =
> (__builtin_popcount(x) & 1) * 4;", it was generating a call to
> __builtin_popcountsi2.

Why write '__builtin_popcount(x) & 1' when you can write
'__builtin_parity(x)' in the first place? 

> Still - for something that has a code generator, there _is_ a cost in
> supporting old CPUs, so I'd rather avoid reverting this. The glibc bug
> that you linked is very different not just because it affected 32-bit
> installation media, but also because it was a bug rather than
> intentional.
> 
> Since you are reporting this issue, how did you find out / what broke for you?

I found out from the mailing list. My Core2-based desktop would be affected.

Last but not the least, I'm sympathetic to the efforts of my distro maintainers,
who I imagine would be put in an uncomfortable position by this change.

Alexander


^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [PATCH 0/5] Reinstate ability to use Qemu on pre-SSE4.1 x86 hosts
  2024-06-12 12:10             ` Alexander Monakov
@ 2024-06-12 12:13               ` Paolo Bonzini
  2024-06-12 13:34                 ` Alexander Monakov
  0 siblings, 1 reply; 30+ messages in thread
From: Paolo Bonzini @ 2024-06-12 12:13 UTC (permalink / raw)
  To: Alexander Monakov; +Cc: Daniel P. Berrangé, qemu-devel, Richard Henderson

On Wed, Jun 12, 2024 at 2:11 PM Alexander Monakov <amonakov@ispras.ru> wrote:
>
>
> On Wed, 12 Jun 2024, Paolo Bonzini wrote:
>
> > Ahah, nice. :) I'm pretty sure that, when I tested "pf =
> > (__builtin_popcount(x) & 1) * 4;", it was generating a call to
> > __builtin_popcountsi2.
>
> Why write '__builtin_popcount(x) & 1' when you can write
> '__builtin_parity(x)' in the first place?

I don't remember. :) Anhow, probably I will add __builtin_parity() to
include/qemu/host-utils.h and some kind of #ifdef HAVE_FAST_CTPOP.
Thanks.

> > Still - for something that has a code generator, there _is_ a cost in
> > supporting old CPUs, so I'd rather avoid reverting this. The glibc bug
> > that you linked is very different not just because it affected 32-bit
> > installation media, but also because it was a bug rather than
> > intentional.
> >
> > Since you are reporting this issue, how did you find out / what broke for you?
>
> I found out from the mailing list. My Core2-based desktop would be affected.

Do you run QEMU on it? With KVM or TCG?

Paolo



^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [PATCH 0/5] Reinstate ability to use Qemu on pre-SSE4.1 x86 hosts
  2024-06-12 11:51       ` Paolo Bonzini
@ 2024-06-12 12:21         ` Daniel P. Berrangé
  2024-06-12 15:09           ` Daniel P. Berrangé
  2024-06-12 17:00         ` Daniel P. Berrangé
  1 sibling, 1 reply; 30+ messages in thread
From: Daniel P. Berrangé @ 2024-06-12 12:21 UTC (permalink / raw)
  To: Paolo Bonzini; +Cc: Alexander Monakov, qemu-devel, Richard Henderson

On Wed, Jun 12, 2024 at 01:51:31PM +0200, Paolo Bonzini wrote:
> On Wed, Jun 12, 2024 at 1:38 PM Daniel P. Berrangé <berrange@redhat.com> wrote:
> > This isn't anything to do with the distro installer. The use case is that
> > the distro wants all its software to be able to run on the x86_64 baseline
> > it has chosen to build with.
> 
> Sure, and they can patch the packages if their wish is not shared by
> upstream. Alternatively they can live with the fact that not all users
> will be able to use all packages, which is probably already the case.

Yep, there's almost certainly scientific packages that have done
optimizations in their builds. QEMU is slightly more special
though because it is classed as a "critical path" package for
the distro. Even the QEMU linux-user pieces are now critical path,
since they're leveraged by docker & podman for running foreign arch
containers.

> Or drop QEMU, I guess. Has FeSCO ever expressed how strict they are
> and which of the three options they'd pick?

I don't know - i'm going to raise this question to find out if
there's any guidance.

> Either way, this only affects either the QEMU maintainers for the
> distro, or the users of QEMU. It's only if the installation media used
> QEMU, that this change would be actively blocking usage of the distro
> on old processors.



With regards,
Daniel
-- 
|: https://berrange.com      -o-    https://www.flickr.com/photos/dberrange :|
|: https://libvirt.org         -o-            https://fstop138.berrange.com :|
|: https://entangle-photo.org    -o-    https://www.instagram.com/dberrange :|



^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [PATCH 0/5] Reinstate ability to use Qemu on pre-SSE4.1 x86 hosts
  2024-06-12 12:13               ` Paolo Bonzini
@ 2024-06-12 13:34                 ` Alexander Monakov
  2024-06-12 13:39                   ` Paolo Bonzini
  0 siblings, 1 reply; 30+ messages in thread
From: Alexander Monakov @ 2024-06-12 13:34 UTC (permalink / raw)
  To: Paolo Bonzini; +Cc: Daniel P. Berrangé, qemu-devel, Richard Henderson


On Wed, 12 Jun 2024, Paolo Bonzini wrote:

> > I found out from the mailing list. My Core2-based desktop would be affected.
> 
> Do you run QEMU on it? With KVM or TCG?

Excuse me? Are you going to ask for SSH access to ensure my computer really
exists and is in working order?

Can you tell me why you never commented on buffer_is_zero improvements, where
v1 was sent in October?  Just trying to understand how you care for 2% of L1D
use but could be ok with those kinds of speedups be dropped on the floor.

Alexander


^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [PATCH 0/5] Reinstate ability to use Qemu on pre-SSE4.1 x86 hosts
  2024-06-12 13:34                 ` Alexander Monakov
@ 2024-06-12 13:39                   ` Paolo Bonzini
  2024-06-12 14:27                     ` Alexander Monakov
  0 siblings, 1 reply; 30+ messages in thread
From: Paolo Bonzini @ 2024-06-12 13:39 UTC (permalink / raw)
  To: Alexander Monakov; +Cc: Daniel P. Berrangé, qemu-devel, Richard Henderson

On Wed, Jun 12, 2024 at 3:34 PM Alexander Monakov <amonakov@ispras.ru> wrote:
> On Wed, 12 Jun 2024, Paolo Bonzini wrote:
> > > I found out from the mailing list. My Core2-based desktop would be affected.
> >
> > Do you run QEMU on it? With KVM or TCG?
>
> Excuse me? Are you going to ask for SSH access to ensure my computer really
> exists and is in working order?

Come on. The thing is, I'm not debating the existence of computers
that don't have x86_64-v2, but I *am* debating the usefulness of
making QEMU run on them and any extra information can be interesting.

> Can you tell me why you never commented on buffer_is_zero improvements, where
> v1 was sent in October?  Just trying to understand how you care for 2% of L1D
> use but could be ok with those kinds of speedups be dropped on the floor.

I'm not sure if there is any overlap in the scenarios where
buffer_is_zero performance matters, and x86 emulation. People can care
about thing A but not thing B. If there's anything that you think I
can help reviewing, feel free to let me know offlist.

Paolo



^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [PATCH 0/5] Reinstate ability to use Qemu on pre-SSE4.1 x86 hosts
  2024-06-12 13:39                   ` Paolo Bonzini
@ 2024-06-12 14:27                     ` Alexander Monakov
  0 siblings, 0 replies; 30+ messages in thread
From: Alexander Monakov @ 2024-06-12 14:27 UTC (permalink / raw)
  To: Paolo Bonzini; +Cc: Daniel P. Berrangé, qemu-devel, Richard Henderson

[-- Attachment #1: Type: text/plain, Size: 1338 bytes --]


On Wed, 12 Jun 2024, Paolo Bonzini wrote:

> On Wed, Jun 12, 2024 at 3:34 PM Alexander Monakov <amonakov@ispras.ru> wrote:
> > On Wed, 12 Jun 2024, Paolo Bonzini wrote:
> > > > I found out from the mailing list. My Core2-based desktop would be affected.
> > >
> > > Do you run QEMU on it? With KVM or TCG?
> >
> > Excuse me? Are you going to ask for SSH access to ensure my computer really
> > exists and is in working order?
> 
> Come on. The thing is, I'm not debating the existence of computers
> that don't have x86_64-v2, but I *am* debating the usefulness of
> making QEMU run on them and any extra information can be interesting.

I think it will be useful to me, with KVM and TCG both.

> > Can you tell me why you never commented on buffer_is_zero improvements, where
> > v1 was sent in October?  Just trying to understand how you care for 2% of L1D
> > use but could be ok with those kinds of speedups be dropped on the floor.
> 
> I'm not sure if there is any overlap in the scenarios where
> buffer_is_zero performance matters, and x86 emulation. People can care
> about thing A but not thing B. If there's anything that you think I
> can help reviewing, feel free to let me know offlist.

In that case I would've appreciated an early indication you're not interested,
making Cc'ing you on followups unnecessary.

Alexander

^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [PATCH 0/5] Reinstate ability to use Qemu on pre-SSE4.1 x86 hosts
  2024-06-12 12:21         ` Daniel P. Berrangé
@ 2024-06-12 15:09           ` Daniel P. Berrangé
  2024-06-12 15:29             ` Paolo Bonzini
                               ` (2 more replies)
  0 siblings, 3 replies; 30+ messages in thread
From: Daniel P. Berrangé @ 2024-06-12 15:09 UTC (permalink / raw)
  To: Paolo Bonzini, Alexander Monakov, qemu-devel, Richard Henderson

On Wed, Jun 12, 2024 at 01:21:26PM +0100, Daniel P. Berrangé wrote:
> On Wed, Jun 12, 2024 at 01:51:31PM +0200, Paolo Bonzini wrote:
> > On Wed, Jun 12, 2024 at 1:38 PM Daniel P. Berrangé <berrange@redhat.com> wrote:
> > > This isn't anything to do with the distro installer. The use case is that
> > > the distro wants all its software to be able to run on the x86_64 baseline
> > > it has chosen to build with.
> > 
> > Sure, and they can patch the packages if their wish is not shared by
> > upstream. Alternatively they can live with the fact that not all users
> > will be able to use all packages, which is probably already the case.
> 
> Yep, there's almost certainly scientific packages that have done
> optimizations in their builds. QEMU is slightly more special
> though because it is classed as a "critical path" package for
> the distro. Even the QEMU linux-user pieces are now critical path,
> since they're leveraged by docker & podman for running foreign arch
> containers.
> 
> > Or drop QEMU, I guess. Has FeSCO ever expressed how strict they are
> > and which of the three options they'd pick?
> 
> I don't know - i'm going to raise this question to find out if
> there's any guidance.

I learnt that FESCo approved a surprisingly loose rule saying

  "Libraries packaged in Fedora may require ISA extensions,
   however any packaged application must not crash on any
   officially supported architecture, either by providing
   a generic fallback implementation OR by cleanly exiting
   when the requisite hardware support is unavailable."

This might suggest we could put a runtime feature check in main(),
print a warning and then exit(1), however, QEMU has alot of code
that is triggered from ELF constructors. If we're building the
entire of QEMU codebase with extra features enabled, I worry that
the constructors could potentially cause a illegal instruction
crash before main() runs ?

With regards,
Daniel
-- 
|: https://berrange.com      -o-    https://www.flickr.com/photos/dberrange :|
|: https://libvirt.org         -o-            https://fstop138.berrange.com :|
|: https://entangle-photo.org    -o-    https://www.instagram.com/dberrange :|



^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [PATCH 0/5] Reinstate ability to use Qemu on pre-SSE4.1 x86 hosts
  2024-06-12 15:09           ` Daniel P. Berrangé
@ 2024-06-12 15:29             ` Paolo Bonzini
  2024-06-12 15:40             ` Alexander Monakov
  2024-06-12 17:06             ` Daniel P. Berrangé
  2 siblings, 0 replies; 30+ messages in thread
From: Paolo Bonzini @ 2024-06-12 15:29 UTC (permalink / raw)
  To: Daniel P. Berrangé; +Cc: Alexander Monakov, qemu-devel, Richard Henderson

On Wed, Jun 12, 2024 at 5:09 PM Daniel P. Berrangé <berrange@redhat.com> wrote:
> This might suggest we could put a runtime feature check in main(),
> print a warning and then exit(1), however, QEMU has alot of code
> that is triggered from ELF constructors. If we're building the
> entire of QEMU codebase with extra features enabled, I worry that
> the constructors could potentially cause a illegal instruction
> crash before main() runs ?

And I learnt that one can simply add -mneeded to the compiler command
line to achieve that, at least on glibc systems:

$ gcc f.c -mneeded -mpopcnt
$ qemu-x86_64 -cpu core2duo ./a.out
./a.out: CPU ISA level is lower than required
$ qemu-x86_64 ./a.out
1234

$ gcc f.c -mneeded
$ qemu-x86_64 -cpu core2duo ./a.out
1234

Using "readelf -n" on the executable unveils the magic:

Displaying notes found in: .note.gnu.property
  Owner                Data size     Description
  GNU                  0x00000030    NT_GNU_PROPERTY_TYPE_0
      Properties: x86 ISA needed: x86-64-baseline, x86-64-v2
    x86 feature used: x86
    x86 ISA used:

I'm actually amazed. :)

Paolo



^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [PATCH 0/5] Reinstate ability to use Qemu on pre-SSE4.1 x86 hosts
  2024-06-12 15:09           ` Daniel P. Berrangé
  2024-06-12 15:29             ` Paolo Bonzini
@ 2024-06-12 15:40             ` Alexander Monakov
  2024-06-12 16:24               ` Daniel P. Berrangé
  2024-06-12 17:06             ` Daniel P. Berrangé
  2 siblings, 1 reply; 30+ messages in thread
From: Alexander Monakov @ 2024-06-12 15:40 UTC (permalink / raw)
  To: Daniel P. Berrangé; +Cc: Paolo Bonzini, qemu-devel, Richard Henderson

[-- Attachment #1: Type: text/plain, Size: 943 bytes --]


On Wed, 12 Jun 2024, Daniel P. Berrangé wrote:

> I learnt that FESCo approved a surprisingly loose rule saying
> 
>   "Libraries packaged in Fedora may require ISA extensions,
>    however any packaged application must not crash on any
>    officially supported architecture, either by providing
>    a generic fallback implementation OR by cleanly exiting
>    when the requisite hardware support is unavailable."
> 
> This might suggest we could put a runtime feature check in main(),
> print a warning and then exit(1), however, QEMU has alot of code
> that is triggered from ELF constructors. If we're building the
> entire of QEMU codebase with extra features enabled, I worry that
> the constructors could potentially cause a illegal instruction
> crash before main() runs ?

Are you literally suggesting to find a solution that satisfies the letter
of Fedora rules, and not what's good for the spirit of a wider community.

Alexander

^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [PATCH 0/5] Reinstate ability to use Qemu on pre-SSE4.1 x86 hosts
  2024-06-12 15:40             ` Alexander Monakov
@ 2024-06-12 16:24               ` Daniel P. Berrangé
  0 siblings, 0 replies; 30+ messages in thread
From: Daniel P. Berrangé @ 2024-06-12 16:24 UTC (permalink / raw)
  To: Alexander Monakov; +Cc: Paolo Bonzini, qemu-devel, Richard Henderson

On Wed, Jun 12, 2024 at 06:40:09PM +0300, Alexander Monakov wrote:
> 
> On Wed, 12 Jun 2024, Daniel P. Berrangé wrote:
> 
> > I learnt that FESCo approved a surprisingly loose rule saying
> > 
> >   "Libraries packaged in Fedora may require ISA extensions,
> >    however any packaged application must not crash on any
> >    officially supported architecture, either by providing
> >    a generic fallback implementation OR by cleanly exiting
> >    when the requisite hardware support is unavailable."
> > 
> > This might suggest we could put a runtime feature check in main(),
> > print a warning and then exit(1), however, QEMU has alot of code
> > that is triggered from ELF constructors. If we're building the
> > entire of QEMU codebase with extra features enabled, I worry that
> > the constructors could potentially cause a illegal instruction
> > crash before main() runs ?
> 
> Are you literally suggesting to find a solution that satisfies the letter
> of Fedora rules, and not what's good for the spirit of a wider community.

I'm interested in exploring what the options are. Personally I still
think QEMU ought to maintain compat with the original x86_64 ABI, since
very few distros have moved to requiring -v2, but if that doesn't happen
I want to understand the implications for Fedora since that's where I'm
a maintainer.

With regards,
Daniel
-- 
|: https://berrange.com      -o-    https://www.flickr.com/photos/dberrange :|
|: https://libvirt.org         -o-            https://fstop138.berrange.com :|
|: https://entangle-photo.org    -o-    https://www.instagram.com/dberrange :|



^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [PATCH 0/5] Reinstate ability to use Qemu on pre-SSE4.1 x86 hosts
  2024-06-12 11:51       ` Paolo Bonzini
  2024-06-12 12:21         ` Daniel P. Berrangé
@ 2024-06-12 17:00         ` Daniel P. Berrangé
  2024-06-12 17:08           ` Paolo Bonzini
  1 sibling, 1 reply; 30+ messages in thread
From: Daniel P. Berrangé @ 2024-06-12 17:00 UTC (permalink / raw)
  To: Paolo Bonzini; +Cc: Alexander Monakov, qemu-devel, Richard Henderson

On Wed, Jun 12, 2024 at 01:51:31PM +0200, Paolo Bonzini wrote:
> On Wed, Jun 12, 2024 at 1:38 PM Daniel P. Berrangé <berrange@redhat.com> wrote:
> > If we want to use POPCNT in the TCG code, can we not do a runtime check
> > and selectively build pieces of code with  __attribute__((target("popcnt"))),
> > as we've done historically for the bufferiszero.c code, rather than
> > changing the entire QEMU baseline ?
> 
> bufferiszero.c has a very quick check in front of the indirect call
> and runs for several hundred clock cycles, so the tradeoff is
> different there.
> 
> I guess that, because these helpers are called by TCG, you wouldn't
> pay the price of the indirect call. However, adding all this
> infrastructure for 13-15 year old CPUs is not very enthralling.

Ah, so the distinction is that the old code had a runtime check
on 'have_popcnt' (and similar), where as now that check is eliminated
at compile time, since the condition is a constant.

Rather than re-introducing a runtime check again for everyone, could
we make it a configure time argument whether to assume x86_64-v2 ?
So those who are happy with a increased baseline can achieve the
maximum performance with all checks eliminated at compile time,
while still allowing the tradeoff of a dynamic check for those who
prefer compatibility over peak perfr ?

With regards,
Daniel
-- 
|: https://berrange.com      -o-    https://www.flickr.com/photos/dberrange :|
|: https://libvirt.org         -o-            https://fstop138.berrange.com :|
|: https://entangle-photo.org    -o-    https://www.instagram.com/dberrange :|



^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [PATCH 0/5] Reinstate ability to use Qemu on pre-SSE4.1 x86 hosts
  2024-06-12 15:09           ` Daniel P. Berrangé
  2024-06-12 15:29             ` Paolo Bonzini
  2024-06-12 15:40             ` Alexander Monakov
@ 2024-06-12 17:06             ` Daniel P. Berrangé
  2 siblings, 0 replies; 30+ messages in thread
From: Daniel P. Berrangé @ 2024-06-12 17:06 UTC (permalink / raw)
  To: Paolo Bonzini, Alexander Monakov, qemu-devel, Richard Henderson

On Wed, Jun 12, 2024 at 04:09:29PM +0100, Daniel P. Berrangé wrote:
> On Wed, Jun 12, 2024 at 01:21:26PM +0100, Daniel P. Berrangé wrote:
> > On Wed, Jun 12, 2024 at 01:51:31PM +0200, Paolo Bonzini wrote:
> > > On Wed, Jun 12, 2024 at 1:38 PM Daniel P. Berrangé <berrange@redhat.com> wrote:
> > > > This isn't anything to do with the distro installer. The use case is that
> > > > the distro wants all its software to be able to run on the x86_64 baseline
> > > > it has chosen to build with.
> > > 
> > > Sure, and they can patch the packages if their wish is not shared by
> > > upstream. Alternatively they can live with the fact that not all users
> > > will be able to use all packages, which is probably already the case.
> > 
> > Yep, there's almost certainly scientific packages that have done
> > optimizations in their builds. QEMU is slightly more special
> > though because it is classed as a "critical path" package for
> > the distro. Even the QEMU linux-user pieces are now critical path,
> > since they're leveraged by docker & podman for running foreign arch
> > containers.
> > 
> > > Or drop QEMU, I guess. Has FeSCO ever expressed how strict they are
> > > and which of the three options they'd pick?
> > 
> > I don't know - i'm going to raise this question to find out if
> > there's any guidance.
> 
> I learnt that FESCo approved a surprisingly loose rule saying
> 
>   "Libraries packaged in Fedora may require ISA extensions,
>    however any packaged application must not crash on any
>    officially supported architecture, either by providing
>    a generic fallback implementation OR by cleanly exiting
>    when the requisite hardware support is unavailable."
>

..snip..

I queried the looseness of this wording, and it is suggested
it wasn't intended to apply to existing packages, just newly
added ones. By that interpretation it wouldn't be valid for
QEMU, and we'd be pushed towards the revert downstream, to
retain a runtime check for the feature. I really hate the
idea of keeping a revert of these patches downstream though,
as it would be an indefinite rebase headache.

With regards,
Daniel
-- 
|: https://berrange.com      -o-    https://www.flickr.com/photos/dberrange :|
|: https://libvirt.org         -o-            https://fstop138.berrange.com :|
|: https://entangle-photo.org    -o-    https://www.instagram.com/dberrange :|



^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [PATCH 0/5] Reinstate ability to use Qemu on pre-SSE4.1 x86 hosts
  2024-06-12 17:00         ` Daniel P. Berrangé
@ 2024-06-12 17:08           ` Paolo Bonzini
  0 siblings, 0 replies; 30+ messages in thread
From: Paolo Bonzini @ 2024-06-12 17:08 UTC (permalink / raw)
  To: Daniel P. Berrangé; +Cc: Alexander Monakov, qemu-devel, Richard Henderson

On Wed, Jun 12, 2024 at 7:00 PM Daniel P. Berrangé <berrange@redhat.com> wrote:
> > I guess that, because these helpers are called by TCG, you wouldn't
> > pay the price of the indirect call. However, adding all this
> > infrastructure for 13-15 year old CPUs is not very enthralling.
>
> Rather than re-introducing a runtime check again for everyone, could
> we make it a configure time argument whether to assume x86_64-v2 ?

Fair enough, I'll work on it.

Paolo



^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [PATCH 0/5] Reinstate ability to use Qemu on pre-SSE4.1 x86 hosts
  2024-06-12 11:12   ` Paolo Bonzini
  2024-06-12 11:19     ` Alexander Monakov
  2024-06-12 11:38     ` Daniel P. Berrangé
@ 2024-06-23 21:27     ` Alexander Monakov
  2024-06-23 22:14       ` Richard Henderson
  2 siblings, 1 reply; 30+ messages in thread
From: Alexander Monakov @ 2024-06-23 21:27 UTC (permalink / raw)
  To: Paolo Bonzini; +Cc: Daniel P. Berrangé, qemu-devel, Richard Henderson

Hello,

On Wed, 12 Jun 2024, Paolo Bonzini wrote:

> I didn't do this because of RHEL9, I did it because it's silly that
> QEMU cannot use POPCNT and has to waste 2% of the L1 d-cache to
> compute the x86 parity flag (and POPCNT was introduced at the same
> time as SSE4.2).

I do not see where the 2% figure is coming from: even considering that
the 256-byte LUT may take an extra cache line due to misalignment, 320
bytes is still less than 1% of 32KB L1D size.

More importantly, the way this comment is phrased made me think that Qemu
eagerly computes PF. But the comment in target/i386/cpu.h is saying that
all flags are computed in an on-demand manner. Considering that software
pretty much never uses PF, why would the parity table be resident in L1D?
As far as I can see, the cost is rather a cache miss and perhaps a TLB miss
when PF is computed (mostly when EFLAGS are accessed all together on
context switches I think).

Is there something I'm not seeing?

Thanks.
Alexander


^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [PATCH 0/5] Reinstate ability to use Qemu on pre-SSE4.1 x86 hosts
  2024-06-23 21:27     ` Alexander Monakov
@ 2024-06-23 22:14       ` Richard Henderson
  0 siblings, 0 replies; 30+ messages in thread
From: Richard Henderson @ 2024-06-23 22:14 UTC (permalink / raw)
  To: Alexander Monakov, Paolo Bonzini; +Cc: Daniel P. Berrangé, qemu-devel

On 6/23/24 14:27, Alexander Monakov wrote:
> Hello,
> 
> On Wed, 12 Jun 2024, Paolo Bonzini wrote:
> 
>> I didn't do this because of RHEL9, I did it because it's silly that
>> QEMU cannot use POPCNT and has to waste 2% of the L1 d-cache to
>> compute the x86 parity flag (and POPCNT was introduced at the same
>> time as SSE4.2).
> 
> I do not see where the 2% figure is coming from: even considering that
> the 256-byte LUT may take an extra cache line due to misalignment, 320
> bytes is still less than 1% of 32KB L1D size.
> 
> More importantly, the way this comment is phrased made me think that Qemu
> eagerly computes PF. But the comment in target/i386/cpu.h is saying that
> all flags are computed in an on-demand manner. Considering that software
> pretty much never uses PF, why would the parity table be resident in L1D?
> As far as I can see, the cost is rather a cache miss and perhaps a TLB miss
> when PF is computed (mostly when EFLAGS are accessed all together on
> context switches I think).
> 
> Is there something I'm not seeing?

We delay flags computation until they're needed (since flags are often overwritten by the 
very next instruction), but when we do, we compute all of the flags.  So PF is computed at 
that point, even if PF itself will never be read.


r~


^ permalink raw reply	[flat|nested] 30+ messages in thread

end of thread, other threads:[~2024-06-23 22:15 UTC | newest]

Thread overview: 30+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2024-06-12 10:55 [PATCH 0/5] Reinstate ability to use Qemu on pre-SSE4.1 x86 hosts Alexander Monakov
2024-06-12 10:55 ` [PATCH 1/5] Revert "host/i386: assume presence of POPCNT" Alexander Monakov
2024-06-12 10:55 ` [PATCH 2/5] Revert "host/i386: assume presence of SSSE3" Alexander Monakov
2024-06-12 10:55 ` [PATCH 3/5] Revert "host/i386: assume presence of SSE2" Alexander Monakov
2024-06-12 10:55 ` [PATCH 4/5] Revert "host/i386: assume presence of CMOV" Alexander Monakov
2024-06-12 10:55 ` [PATCH 5/5] Revert "meson: assume x86-64-v2 baseline ISA" Alexander Monakov
2024-06-12 11:04 ` [PATCH 0/5] Reinstate ability to use Qemu on pre-SSE4.1 x86 hosts Daniel P. Berrangé
2024-06-12 11:12   ` Paolo Bonzini
2024-06-12 11:19     ` Alexander Monakov
2024-06-12 11:29       ` Paolo Bonzini
2024-06-12 11:46         ` Alexander Monakov
2024-06-12 11:58           ` Paolo Bonzini
2024-06-12 12:10             ` Alexander Monakov
2024-06-12 12:13               ` Paolo Bonzini
2024-06-12 13:34                 ` Alexander Monakov
2024-06-12 13:39                   ` Paolo Bonzini
2024-06-12 14:27                     ` Alexander Monakov
2024-06-12 11:38     ` Daniel P. Berrangé
2024-06-12 11:51       ` Paolo Bonzini
2024-06-12 12:21         ` Daniel P. Berrangé
2024-06-12 15:09           ` Daniel P. Berrangé
2024-06-12 15:29             ` Paolo Bonzini
2024-06-12 15:40             ` Alexander Monakov
2024-06-12 16:24               ` Daniel P. Berrangé
2024-06-12 17:06             ` Daniel P. Berrangé
2024-06-12 17:00         ` Daniel P. Berrangé
2024-06-12 17:08           ` Paolo Bonzini
2024-06-23 21:27     ` Alexander Monakov
2024-06-23 22:14       ` Richard Henderson
2024-06-12 11:14   ` Alexander Monakov

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).