qemu-devel.nongnu.org archive mirror
 help / color / mirror / Atom feed
From: Robert Hoo <robert.hu@linux.intel.com>
To: qemu-devel@nongnu.org, pbonzini@redhat.com, laurent@vivier.eu,
	philmd@redhat.com, berrange@redhat.com
Cc: robert.hu@intel.com, Robert Hoo <robert.hu@linux.intel.com>
Subject: [PATCH 2/2] util: add util function buffer_zero_avx512()
Date: Thu, 13 Feb 2020 15:52:59 +0800	[thread overview]
Message-ID: <1581580379-54109-3-git-send-email-robert.hu@linux.intel.com> (raw)
In-Reply-To: <1581580379-54109-1-git-send-email-robert.hu@linux.intel.com>

And initialize buffer_is_zero() with it, when Intel AVX512F is
available on host.

This function utilizes Intel AVX512 fundamental instructions which
perform over previous AVX2 instructions.

Signed-off-by: Robert Hoo <robert.hu@linux.intel.com>
---
 include/qemu/cpuid.h |  3 +++
 util/bufferiszero.c  | 56 +++++++++++++++++++++++++++++++++++++++++++++++++---
 2 files changed, 56 insertions(+), 3 deletions(-)

diff --git a/include/qemu/cpuid.h b/include/qemu/cpuid.h
index 6930170..09fc245 100644
--- a/include/qemu/cpuid.h
+++ b/include/qemu/cpuid.h
@@ -45,6 +45,9 @@
 #ifndef bit_AVX2
 #define bit_AVX2        (1 << 5)
 #endif
+#ifndef bit_AVX512F
+#define bit_AVX512F        (1 << 16)
+#endif
 #ifndef bit_BMI2
 #define bit_BMI2        (1 << 8)
 #endif
diff --git a/util/bufferiszero.c b/util/bufferiszero.c
index bfb2605..cbb854a 100644
--- a/util/bufferiszero.c
+++ b/util/bufferiszero.c
@@ -187,12 +187,54 @@ buffer_zero_avx2(const void *buf, size_t len)
 #pragma GCC pop_options
 #endif /* CONFIG_AVX2_OPT */
 
+#ifdef CONFIG_AVX512F_OPT
+#pragma GCC push_options
+#pragma GCC target("avx512f")
+#include <immintrin.h>
+
+static bool
+buffer_zero_avx512(const void *buf, size_t len)
+{
+    __m512i t;
+    __m512i *p, *e;
+
+    if (unlikely(len < 64)) { /*buff less than 512 bits, unlikely*/
+        return buffer_zero_int(buf, len);
+    }
+    /* Begin with an unaligned head of 64 bytes.  */
+    t = _mm512_loadu_si512(buf);
+    p = (__m512i *)(((uintptr_t)buf + 5 * 64) & -64);
+    e = (__m512i *)(((uintptr_t)buf + len) & -64);
+
+    /* Loop over 64-byte aligned blocks of 256.  */
+    while (p < e) {
+        __builtin_prefetch(p);
+        if (unlikely(_mm512_test_epi64_mask(t, t))) {
+            return false;
+        }
+        t = p[-4] | p[-3] | p[-2] | p[-1];
+        p += 4;
+    }
+
+    t |= _mm512_loadu_si512(buf + len - 4 * 64);
+    t |= _mm512_loadu_si512(buf + len - 3 * 64);
+    t |= _mm512_loadu_si512(buf + len - 2 * 64);
+    t |= _mm512_loadu_si512(buf + len - 1 * 64);
+
+    return !_mm512_test_epi64_mask(t, t);
+
+}
+#pragma GCC pop_options
+#endif
+
+
 /* Note that for test_buffer_is_zero_next_accel, the most preferred
  * ISA must have the least significant bit.
  */
-#define CACHE_AVX2    1
-#define CACHE_SSE4    2
-#define CACHE_SSE2    4
+#define CACHE_AVX512F 1
+#define CACHE_AVX2    2
+#define CACHE_SSE4    4
+#define CACHE_SSE2    6
 
 /* Make sure that these variables are appropriately initialized when
  * SSE2 is enabled on the compiler command-line, but the compiler is
@@ -226,6 +268,11 @@ static void init_accel(unsigned cache)
         fn = buffer_zero_avx2;
     }
 #endif
+#ifdef CONFIG_AVX512F_OPT
+    if (cache & CACHE_AVX512F) {
+        fn = buffer_zero_avx512;
+    }
+#endif
     buffer_accel = fn;
 }
 
@@ -255,6 +302,9 @@ static void __attribute__((constructor)) init_cpuid_cache(void)
             if ((bv & 6) == 6 && (b & bit_AVX2)) {
                 cache |= CACHE_AVX2;
             }
+            if ((bv & 6) == 6 && (b & bit_AVX512F)) {
+                cache |= CACHE_AVX512F;
+            }
         }
     }
     cpuid_cache = cache;
-- 
1.8.3.1



  parent reply	other threads:[~2020-02-13  7:53 UTC|newest]

Thread overview: 11+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-02-13  7:52 [PATCH 0/2] Add AVX512F optimization option and buffer_zero_avx512() Robert Hoo
2020-02-13  7:52 ` [PATCH 1/2] configure: add configure option avx512f_opt Robert Hoo
2020-02-13  7:52 ` Robert Hoo [this message]
2020-02-13 10:30   ` [PATCH 2/2] util: add util function buffer_zero_avx512() Paolo Bonzini
2020-02-13 11:58     ` Robert Hoo
2020-02-13 18:20   ` Richard Henderson
2020-02-24  7:07     ` Robert Hoo
2020-02-24 16:13       ` Richard Henderson
2020-02-25  7:34         ` Robert Hoo
2020-02-25 15:29           ` Richard Henderson
2020-02-13  8:40 ` [PATCH 0/2] Add AVX512F optimization option and buffer_zero_avx512() no-reply

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1581580379-54109-3-git-send-email-robert.hu@linux.intel.com \
    --to=robert.hu@linux.intel.com \
    --cc=berrange@redhat.com \
    --cc=laurent@vivier.eu \
    --cc=pbonzini@redhat.com \
    --cc=philmd@redhat.com \
    --cc=qemu-devel@nongnu.org \
    --cc=robert.hu@intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).