From: Alexander Monakov <amonakov@ispras.ru>
To: qemu-devel@nongnu.org
Cc: Mikhail Romanov <mmromanov@ispras.ru>,
Richard Henderson <richard.henderson@linaro.org>,
Paolo Bonzini <pbonzini@redhat.com>,
Alexander Monakov <amonakov@ispras.ru>
Subject: [PATCH v3 2/6] util/bufferiszero: introduce an inline wrapper
Date: Tue, 6 Feb 2024 23:48:05 +0300 [thread overview]
Message-ID: <20240206204809.9859-3-amonakov@ispras.ru> (raw)
In-Reply-To: <20240206204809.9859-1-amonakov@ispras.ru>
Make buffer_is_zero a 'static inline' function that tests up to three
bytes from the buffer before handing off to an unrolled loop. This
eliminates call overhead for most non-zero buffers, and allows to
optimize out length checks when it is known at compile time (which is
often the case in Qemu).
Signed-off-by: Alexander Monakov <amonakov@ispras.ru>
Signed-off-by: Mikhail Romanov <mmromanov@ispras.ru>
---
include/qemu/cutils.h | 28 +++++++++++++++-
util/bufferiszero.c | 76 ++++++++++++-------------------------------
2 files changed, 47 insertions(+), 57 deletions(-)
diff --git a/include/qemu/cutils.h b/include/qemu/cutils.h
index 92c927a6a3..62b153e603 100644
--- a/include/qemu/cutils.h
+++ b/include/qemu/cutils.h
@@ -187,9 +187,35 @@ char *freq_to_str(uint64_t freq_hz);
/* used to print char* safely */
#define STR_OR_NULL(str) ((str) ? (str) : "null")
-bool buffer_is_zero(const void *buf, size_t len);
+bool buffer_is_zero_len_4_plus(const void *, size_t);
+extern bool (*buffer_is_zero_len_256_plus)(const void *, size_t);
bool test_buffer_is_zero_next_accel(void);
+/*
+ * Check if a buffer is all zeroes.
+ */
+static inline bool buffer_is_zero(const void *vbuf, size_t len)
+{
+ const char *buf = vbuf;
+
+ if (len == 0) {
+ return true;
+ }
+ if (buf[0] || buf[len - 1] || buf[len / 2]) {
+ return false;
+ }
+ /* All bytes are covered for any len <= 3. */
+ if (len <= 3) {
+ return true;
+ }
+
+ if (len >= 256) {
+ return buffer_is_zero_len_256_plus(vbuf, len);
+ } else {
+ return buffer_is_zero_len_4_plus(vbuf, len);
+ }
+}
+
/*
* Implementation of ULEB128 (http://en.wikipedia.org/wiki/LEB128)
* Input is limited to 14-bit numbers
diff --git a/util/bufferiszero.c b/util/bufferiszero.c
index f5a3634f9a..01050694a6 100644
--- a/util/bufferiszero.c
+++ b/util/bufferiszero.c
@@ -26,8 +26,8 @@
#include "qemu/bswap.h"
#include "host/cpuinfo.h"
-static bool
-buffer_zero_int(const void *buf, size_t len)
+bool
+buffer_is_zero_len_4_plus(const void *buf, size_t len)
{
if (unlikely(len < 8)) {
/* For a very small buffer, simply accumulate all the bytes. */
@@ -157,57 +157,40 @@ buffer_zero_avx512(const void *buf, size_t len)
}
#endif /* CONFIG_AVX512F_OPT */
-/*
- * Make sure that these variables are appropriately initialized when
- * SSE2 is enabled on the compiler command-line, but the compiler is
- * too old to support CONFIG_AVX2_OPT.
- */
-#if defined(CONFIG_AVX512F_OPT) || defined(CONFIG_AVX2_OPT)
-# define INIT_USED 0
-# define INIT_LENGTH 0
-# define INIT_ACCEL buffer_zero_int
-#else
-# ifndef __SSE2__
-# error "ISA selection confusion"
-# endif
-# define INIT_USED CPUINFO_SSE2
-# define INIT_LENGTH 64
-# define INIT_ACCEL buffer_zero_sse2
-#endif
-
-static unsigned used_accel = INIT_USED;
-static unsigned length_to_accel = INIT_LENGTH;
-static bool (*buffer_accel)(const void *, size_t) = INIT_ACCEL;
-
static unsigned __attribute__((noinline))
select_accel_cpuinfo(unsigned info)
{
/* Array is sorted in order of algorithm preference. */
static const struct {
unsigned bit;
- unsigned len;
bool (*fn)(const void *, size_t);
} all[] = {
#ifdef CONFIG_AVX512F_OPT
- { CPUINFO_AVX512F, 256, buffer_zero_avx512 },
+ { CPUINFO_AVX512F, buffer_zero_avx512 },
#endif
#ifdef CONFIG_AVX2_OPT
- { CPUINFO_AVX2, 128, buffer_zero_avx2 },
+ { CPUINFO_AVX2, buffer_zero_avx2 },
#endif
- { CPUINFO_SSE2, 64, buffer_zero_sse2 },
- { CPUINFO_ALWAYS, 0, buffer_zero_int },
+ { CPUINFO_SSE2, buffer_zero_sse2 },
+ { CPUINFO_ALWAYS, buffer_is_zero_len_4_plus },
};
for (unsigned i = 0; i < ARRAY_SIZE(all); ++i) {
if (info & all[i].bit) {
- length_to_accel = all[i].len;
- buffer_accel = all[i].fn;
+ buffer_is_zero_len_256_plus = all[i].fn;
return all[i].bit;
}
}
return 0;
}
+static unsigned used_accel
+#if defined(__SSE2__)
+ = CPUINFO_SSE2;
+#else
+ = 0;
+#endif
+
#if defined(CONFIG_AVX512F_OPT) || defined(CONFIG_AVX2_OPT)
static void __attribute__((constructor)) init_accel(void)
{
@@ -227,35 +210,16 @@ bool test_buffer_is_zero_next_accel(void)
return used;
}
-static bool select_accel_fn(const void *buf, size_t len)
-{
- if (likely(len >= length_to_accel)) {
- return buffer_accel(buf, len);
- }
- return buffer_zero_int(buf, len);
-}
-
#else
-#define select_accel_fn buffer_zero_int
bool test_buffer_is_zero_next_accel(void)
{
return false;
}
#endif
-/*
- * Checks if a buffer is all zeroes
- */
-bool buffer_is_zero(const void *buf, size_t len)
-{
- if (unlikely(len == 0)) {
- return true;
- }
-
- /* Fetch the beginning of the buffer while we select the accelerator. */
- __builtin_prefetch(buf);
-
- /* Use an optimized zero check if possible. Note that this also
- includes a check for an unrolled loop over 64-bit integers. */
- return select_accel_fn(buf, len);
-}
+bool (*buffer_is_zero_len_256_plus)(const void *, size_t)
+#if defined(__SSE2__)
+ = buffer_zero_sse2;
+#else
+ = buffer_is_zero_len_4_plus;
+#endif
--
2.32.0
next prev parent reply other threads:[~2024-02-06 20:50 UTC|newest]
Thread overview: 19+ messages / expand[flat|nested] mbox.gz Atom feed top
2024-02-06 20:48 [PATCH v3 0/6] Optimize buffer_is_zero Alexander Monakov
2024-02-06 20:48 ` [PATCH v3 1/6] util/bufferiszero: remove SSE4.1 variant Alexander Monakov
2024-02-06 22:24 ` Richard Henderson
2024-02-06 20:48 ` Alexander Monakov [this message]
2024-02-06 22:44 ` [PATCH v3 2/6] util/bufferiszero: introduce an inline wrapper Richard Henderson
2024-02-07 7:13 ` Alexander Monakov
2024-02-08 20:07 ` Richard Henderson
2024-02-06 20:48 ` [PATCH v3 3/6] util/bufferiszero: remove AVX512 variant Alexander Monakov
2024-02-06 22:28 ` Richard Henderson
2024-02-06 23:56 ` Elena Ufimtseva
2024-02-07 6:29 ` Alexander Monakov
2024-02-07 10:38 ` Joao Martins
2024-02-06 20:48 ` [PATCH v3 4/6] util/bufferiszero: remove useless prefetches Alexander Monakov
2024-02-06 22:29 ` Richard Henderson
2024-02-06 20:48 ` [PATCH v3 5/6] util/bufferiszero: optimize SSE2 and AVX2 variants Alexander Monakov
2024-02-06 23:10 ` Richard Henderson
2024-02-06 20:48 ` [PATCH v3 6/6] util/bufferiszero: improve scalar variant Alexander Monakov
2024-02-06 22:34 ` Richard Henderson
2024-02-06 22:46 ` Richard Henderson
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20240206204809.9859-3-amonakov@ispras.ru \
--to=amonakov@ispras.ru \
--cc=mmromanov@ispras.ru \
--cc=pbonzini@redhat.com \
--cc=qemu-devel@nongnu.org \
--cc=richard.henderson@linaro.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).