From: Paolo Bonzini <pbonzini@redhat.com>
To: qemu-devel@nongnu.org
Cc: richard.henderson@linaro.org, paul@nowt.org
Subject: [PATCH 12/23] i386: Rewrite vector shift helper
Date: Sat, 27 Aug 2022 01:11:53 +0200 [thread overview]
Message-ID: <20220826231204.201395-13-pbonzini@redhat.com> (raw)
In-Reply-To: <20220826231204.201395-1-pbonzini@redhat.com>
From: Paul Brook <paul@nowt.org>
Rewrite the vector shift helpers in preperation for AVX support (3 operand
form and 256 bit vectors).
For now keep the existing two operand interface.
No functional changes to existing helpers.
Signed-off-by: Paul Brook <paul@nowt.org>
Message-Id: <20220424220204.2493824-11-paul@nowt.org>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
target/i386/ops_sse.h | 221 ++++++++++++++++++------------------------
1 file changed, 96 insertions(+), 125 deletions(-)
diff --git a/target/i386/ops_sse.h b/target/i386/ops_sse.h
index f603981ab8..8c745f5cab 100644
--- a/target/i386/ops_sse.h
+++ b/target/i386/ops_sse.h
@@ -56,195 +56,166 @@
#define MOVE(d, r) memcpy(&(d).B(0), &(r).B(0), SIZE)
#endif
-void glue(helper_psrlw, SUFFIX)(CPUX86State *env, Reg *d, Reg *s)
-{
- int shift;
+#if SHIFT == 0
+#define FPSRL(x, c) ((x) >> shift)
+#define FPSRAW(x, c) ((int16_t)(x) >> shift)
+#define FPSRAL(x, c) ((int32_t)(x) >> shift)
+#define FPSLL(x, c) ((x) << shift)
+#endif
- if (s->Q(0) > 15) {
- d->Q(0) = 0;
-#if SHIFT == 1
- d->Q(1) = 0;
-#endif
+void glue(helper_psrlw, SUFFIX)(CPUX86State *env, Reg *d, Reg *c)
+{
+ Reg *s = d;
+ int shift;
+ if (c->Q(0) > 15) {
+ for (int i = 0; i < 1 << SHIFT; i++) {
+ d->Q(i) = 0;
+ }
} else {
- shift = s->B(0);
- d->W(0) >>= shift;
- d->W(1) >>= shift;
- d->W(2) >>= shift;
- d->W(3) >>= shift;
-#if SHIFT == 1
- d->W(4) >>= shift;
- d->W(5) >>= shift;
- d->W(6) >>= shift;
- d->W(7) >>= shift;
-#endif
+ shift = c->B(0);
+ for (int i = 0; i < 4 << SHIFT; i++) {
+ d->W(i) = FPSRL(s->W(i), shift);
+ }
}
}
-void glue(helper_psraw, SUFFIX)(CPUX86State *env, Reg *d, Reg *s)
+void glue(helper_psllw, SUFFIX)(CPUX86State *env, Reg *d, Reg *c)
{
+ Reg *s = d;
int shift;
+ if (c->Q(0) > 15) {
+ for (int i = 0; i < 1 << SHIFT; i++) {
+ d->Q(i) = 0;
+ }
+ } else {
+ shift = c->B(0);
+ for (int i = 0; i < 4 << SHIFT; i++) {
+ d->W(i) = FPSLL(s->W(i), shift);
+ }
+ }
+}
- if (s->Q(0) > 15) {
+void glue(helper_psraw, SUFFIX)(CPUX86State *env, Reg *d, Reg *c)
+{
+ Reg *s = d;
+ int shift;
+ if (c->Q(0) > 15) {
shift = 15;
} else {
- shift = s->B(0);
+ shift = c->B(0);
+ }
+ for (int i = 0; i < 4 << SHIFT; i++) {
+ d->W(i) = FPSRAW(s->W(i), shift);
}
- d->W(0) = (int16_t)d->W(0) >> shift;
- d->W(1) = (int16_t)d->W(1) >> shift;
- d->W(2) = (int16_t)d->W(2) >> shift;
- d->W(3) = (int16_t)d->W(3) >> shift;
-#if SHIFT == 1
- d->W(4) = (int16_t)d->W(4) >> shift;
- d->W(5) = (int16_t)d->W(5) >> shift;
- d->W(6) = (int16_t)d->W(6) >> shift;
- d->W(7) = (int16_t)d->W(7) >> shift;
-#endif
}
-void glue(helper_psllw, SUFFIX)(CPUX86State *env, Reg *d, Reg *s)
+void glue(helper_psrld, SUFFIX)(CPUX86State *env, Reg *d, Reg *c)
{
+ Reg *s = d;
int shift;
-
- if (s->Q(0) > 15) {
- d->Q(0) = 0;
-#if SHIFT == 1
- d->Q(1) = 0;
-#endif
+ if (c->Q(0) > 31) {
+ for (int i = 0; i < 1 << SHIFT; i++) {
+ d->Q(i) = 0;
+ }
} else {
- shift = s->B(0);
- d->W(0) <<= shift;
- d->W(1) <<= shift;
- d->W(2) <<= shift;
- d->W(3) <<= shift;
-#if SHIFT == 1
- d->W(4) <<= shift;
- d->W(5) <<= shift;
- d->W(6) <<= shift;
- d->W(7) <<= shift;
-#endif
+ shift = c->B(0);
+ for (int i = 0; i < 2 << SHIFT; i++) {
+ d->L(i) = FPSRL(s->L(i), shift);
+ }
}
}
-void glue(helper_psrld, SUFFIX)(CPUX86State *env, Reg *d, Reg *s)
+void glue(helper_pslld, SUFFIX)(CPUX86State *env, Reg *d, Reg *c)
{
+ Reg *s = d;
int shift;
-
- if (s->Q(0) > 31) {
- d->Q(0) = 0;
-#if SHIFT == 1
- d->Q(1) = 0;
-#endif
+ if (c->Q(0) > 31) {
+ for (int i = 0; i < 1 << SHIFT; i++) {
+ d->Q(i) = 0;
+ }
} else {
- shift = s->B(0);
- d->L(0) >>= shift;
- d->L(1) >>= shift;
-#if SHIFT == 1
- d->L(2) >>= shift;
- d->L(3) >>= shift;
-#endif
+ shift = c->B(0);
+ for (int i = 0; i < 2 << SHIFT; i++) {
+ d->L(i) = FPSLL(s->L(i), shift);
+ }
}
}
-void glue(helper_psrad, SUFFIX)(CPUX86State *env, Reg *d, Reg *s)
+void glue(helper_psrad, SUFFIX)(CPUX86State *env, Reg *d, Reg *c)
{
+ Reg *s = d;
int shift;
-
- if (s->Q(0) > 31) {
+ if (c->Q(0) > 31) {
shift = 31;
} else {
- shift = s->B(0);
+ shift = c->B(0);
+ }
+ for (int i = 0; i < 2 << SHIFT; i++) {
+ d->L(i) = FPSRAL(s->L(i), shift);
}
- d->L(0) = (int32_t)d->L(0) >> shift;
- d->L(1) = (int32_t)d->L(1) >> shift;
-#if SHIFT == 1
- d->L(2) = (int32_t)d->L(2) >> shift;
- d->L(3) = (int32_t)d->L(3) >> shift;
-#endif
}
-void glue(helper_pslld, SUFFIX)(CPUX86State *env, Reg *d, Reg *s)
+void glue(helper_psrlq, SUFFIX)(CPUX86State *env, Reg *d, Reg *c)
{
+ Reg *s = d;
int shift;
-
- if (s->Q(0) > 31) {
- d->Q(0) = 0;
-#if SHIFT == 1
- d->Q(1) = 0;
-#endif
+ if (c->Q(0) > 63) {
+ for (int i = 0; i < 1 << SHIFT; i++) {
+ d->Q(i) = 0;
+ }
} else {
- shift = s->B(0);
- d->L(0) <<= shift;
- d->L(1) <<= shift;
-#if SHIFT == 1
- d->L(2) <<= shift;
- d->L(3) <<= shift;
-#endif
+ shift = c->B(0);
+ for (int i = 0; i < 1 << SHIFT; i++) {
+ d->Q(i) = FPSRL(s->Q(i), shift);
+ }
}
}
-void glue(helper_psrlq, SUFFIX)(CPUX86State *env, Reg *d, Reg *s)
+void glue(helper_psllq, SUFFIX)(CPUX86State *env, Reg *d, Reg *c)
{
+ Reg *s = d;
int shift;
-
- if (s->Q(0) > 63) {
- d->Q(0) = 0;
-#if SHIFT == 1
- d->Q(1) = 0;
-#endif
+ if (c->Q(0) > 63) {
+ for (int i = 0; i < 1 << SHIFT; i++) {
+ d->Q(i) = 0;
+ }
} else {
- shift = s->B(0);
- d->Q(0) >>= shift;
-#if SHIFT == 1
- d->Q(1) >>= shift;
-#endif
+ shift = c->B(0);
+ for (int i = 0; i < 1 << SHIFT; i++) {
+ d->Q(i) = FPSLL(s->Q(i), shift);
+ }
}
}
-void glue(helper_psllq, SUFFIX)(CPUX86State *env, Reg *d, Reg *s)
-{
- int shift;
-
- if (s->Q(0) > 63) {
- d->Q(0) = 0;
-#if SHIFT == 1
- d->Q(1) = 0;
-#endif
- } else {
- shift = s->B(0);
- d->Q(0) <<= shift;
-#if SHIFT == 1
- d->Q(1) <<= shift;
-#endif
- }
-}
-
-#if SHIFT == 1
-void glue(helper_psrldq, SUFFIX)(CPUX86State *env, Reg *d, Reg *s)
+#if SHIFT >= 1
+void glue(helper_psrldq, SUFFIX)(CPUX86State *env, Reg *d, Reg *c)
{
+ Reg *s = d;
int shift, i;
- shift = s->L(0);
+ shift = c->L(0);
if (shift > 16) {
shift = 16;
}
for (i = 0; i < 16 - shift; i++) {
- d->B(i) = d->B(i + shift);
+ d->B(i) = s->B(i + shift);
}
for (i = 16 - shift; i < 16; i++) {
d->B(i) = 0;
}
}
-void glue(helper_pslldq, SUFFIX)(CPUX86State *env, Reg *d, Reg *s)
+void glue(helper_pslldq, SUFFIX)(CPUX86State *env, Reg *d, Reg *c)
{
+ Reg *s = d;
int shift, i;
- shift = s->L(0);
+ shift = c->L(0);
if (shift > 16) {
shift = 16;
}
for (i = 15; i >= shift; i--) {
- d->B(i) = d->B(i - shift);
+ d->B(i) = s->B(i - shift);
}
for (i = 0; i < shift; i++) {
d->B(i) = 0;
--
2.37.1
next prev parent reply other threads:[~2022-08-26 23:24 UTC|newest]
Thread overview: 32+ messages / expand[flat|nested] mbox.gz Atom feed top
2022-08-26 23:11 [PATCH v2 00/23] target/i386: make most SSE helpers generic in the vector size Paolo Bonzini
2022-08-26 23:11 ` [PATCH 01/23] i386: do not use MOVL to move data between SSE registers Paolo Bonzini
2022-08-26 23:17 ` Richard Henderson
2022-08-26 23:11 ` [PATCH 02/23] i386: formatting fixes Paolo Bonzini
2022-08-26 23:17 ` Richard Henderson
2022-08-26 23:11 ` [PATCH 03/23] i386: Add ZMM_OFFSET macro Paolo Bonzini
2022-08-26 23:11 ` [PATCH 04/23] i386: Rework sse_op_table1 Paolo Bonzini
2022-08-26 23:11 ` [PATCH 05/23] i386: Rework sse_op_table6/7 Paolo Bonzini
2022-08-26 23:11 ` [PATCH 06/23] i386: Move 3DNOW decoder Paolo Bonzini
2022-08-26 23:11 ` [PATCH 07/23] i386: check SSE table flags instead of hardcoding opcodes Paolo Bonzini
2022-08-26 23:23 ` Richard Henderson
2022-08-26 23:11 ` [PATCH 08/23] i386: isolate MMX code more Paolo Bonzini
2022-08-26 23:28 ` Richard Henderson
2022-08-26 23:11 ` [PATCH 09/23] i386: Add size suffix to vector FP helpers Paolo Bonzini
2022-08-26 23:11 ` [PATCH 10/23] i386: do not cast gen_helper_* function pointers Paolo Bonzini
2022-08-26 23:32 ` Richard Henderson
2022-08-26 23:11 ` [PATCH 11/23] i386: Add CHECK_NO_VEX Paolo Bonzini
2022-08-26 23:11 ` Paolo Bonzini [this message]
2022-08-26 23:11 ` [PATCH 13/23] i386: Rewrite simple integer vector helpers Paolo Bonzini
2022-08-26 23:11 ` [PATCH 14/23] i386: Misc integer AVX helper prep Paolo Bonzini
2022-08-26 23:11 ` [PATCH 15/23] i386: Destructive vector helpers for AVX Paolo Bonzini
2022-08-26 23:45 ` Richard Henderson
2022-08-27 6:22 ` Paolo Bonzini
2022-08-26 23:11 ` [PATCH 16/23] i386: Floating point arithmetic helper AVX prep Paolo Bonzini
2022-08-26 23:11 ` [PATCH 17/23] i386: reimplement AVX comparison helpers Paolo Bonzini
2022-08-26 23:11 ` [PATCH 18/23] i386: Dot product AVX helper prep Paolo Bonzini
2022-08-26 23:12 ` [PATCH 19/23] i386: Destructive FP helpers for AVX Paolo Bonzini
2022-08-26 23:12 ` [PATCH 20/23] i386: Misc AVX helper prep Paolo Bonzini
2022-08-26 23:12 ` [PATCH 21/23] i386: Rewrite blendv helpers Paolo Bonzini
2022-08-26 23:12 ` [PATCH 22/23] i386: AVX pclmulqdq prep Paolo Bonzini
2022-08-26 23:12 ` [PATCH 23/23] i386: AVX+AES helpers prep Paolo Bonzini
2022-08-26 23:50 ` Richard Henderson
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20220826231204.201395-13-pbonzini@redhat.com \
--to=pbonzini@redhat.com \
--cc=paul@nowt.org \
--cc=qemu-devel@nongnu.org \
--cc=richard.henderson@linaro.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).