From: Filip Navara <filip.navara@gmail.com>
To: qemu-devel@nongnu.org
Subject: [Qemu-devel] [PATCH 08/18] Convert NEON VZIP/VUZP/VTRN helper functions to pure TCG
Date: Sun, 19 Jul 2009 15:19:54 -0000 [thread overview]
Message-ID: <E1MSYB6-0001Hf-Jb@lists.gnu.org> (raw)
The neon_trn_u8, neon_trn_u16, neon_unzip_u8, neon_zip_u8 and neon_zip_u16
helpers used fixed registers to return values. This patch replaces that with
TCG code, so T0/T1 is no longer directly used by the helper functions.
Bugs in the gen_neon_unzip register load code were also fixed.
Signed-off-by: Filip Navara <filip.navara@gmail.com>
---
target-arm/helpers.h | 6 --
target-arm/op_helper.c | 58 ------------------
target-arm/translate.c | 153 ++++++++++++++++++++++++++++++++++++++++++++---
3 files changed, 143 insertions(+), 74 deletions(-)
diff --git a/target-arm/helpers.h b/target-arm/helpers.h
index 01175e1..f298eff 100644
--- a/target-arm/helpers.h
+++ b/target-arm/helpers.h
@@ -338,12 +338,6 @@ DEF_HELPER_2(neon_qneg_s8, i32, env, i32)
DEF_HELPER_2(neon_qneg_s16, i32, env, i32)
DEF_HELPER_2(neon_qneg_s32, i32, env, i32)
-DEF_HELPER_0(neon_trn_u8, void)
-DEF_HELPER_0(neon_trn_u16, void)
-DEF_HELPER_0(neon_unzip_u8, void)
-DEF_HELPER_0(neon_zip_u8, void)
-DEF_HELPER_0(neon_zip_u16, void)
-
DEF_HELPER_2(neon_min_f32, i32, i32, i32)
DEF_HELPER_2(neon_max_f32, i32, i32, i32)
DEF_HELPER_2(neon_abd_f32, i32, i32, i32)
diff --git a/target-arm/op_helper.c b/target-arm/op_helper.c
index d4ae4ae..5ac631d 100644
--- a/target-arm/op_helper.c
+++ b/target-arm/op_helper.c
@@ -495,61 +495,3 @@ uint64_t HELPER(neon_sub_saturate_u64)(uint64_t src1, uint64_t src2)
}
return res;
}
-
-/* These need to return a pair of value, so still use T0/T1. */
-/* Transpose. Argument order is rather strange to avoid special casing
- the tranlation code.
- On input T0 = rm, T1 = rd. On output T0 = rd, T1 = rm */
-void HELPER(neon_trn_u8)(void)
-{
- uint32_t rd;
- uint32_t rm;
- rd = ((T0 & 0x00ff00ff) << 8) | (T1 & 0x00ff00ff);
- rm = ((T1 & 0xff00ff00) >> 8) | (T0 & 0xff00ff00);
- T0 = rd;
- T1 = rm;
-}
-
-void HELPER(neon_trn_u16)(void)
-{
- uint32_t rd;
- uint32_t rm;
- rd = (T0 << 16) | (T1 & 0xffff);
- rm = (T1 >> 16) | (T0 & 0xffff0000);
- T0 = rd;
- T1 = rm;
-}
-
-/* Worker routines for zip and unzip. */
-void HELPER(neon_unzip_u8)(void)
-{
- uint32_t rd;
- uint32_t rm;
- rd = (T0 & 0xff) | ((T0 >> 8) & 0xff00)
- | ((T1 << 16) & 0xff0000) | ((T1 << 8) & 0xff000000);
- rm = ((T0 >> 8) & 0xff) | ((T0 >> 16) & 0xff00)
- | ((T1 << 8) & 0xff0000) | (T1 & 0xff000000);
- T0 = rd;
- T1 = rm;
-}
-
-void HELPER(neon_zip_u8)(void)
-{
- uint32_t rd;
- uint32_t rm;
- rd = (T0 & 0xff) | ((T1 << 8) & 0xff00)
- | ((T0 << 16) & 0xff0000) | ((T1 << 24) & 0xff000000);
- rm = ((T0 >> 16) & 0xff) | ((T1 >> 8) & 0xff00)
- | ((T0 >> 8) & 0xff0000) | (T1 & 0xff000000);
- T0 = rd;
- T1 = rm;
-}
-
-void HELPER(neon_zip_u16)(void)
-{
- uint32_t tmp;
-
- tmp = (T0 & 0xffff) | (T1 << 16);
- T1 = (T1 & 0xffff0000) | (T0 >> 16);
- T0 = tmp;
-}
diff --git a/target-arm/translate.c b/target-arm/translate.c
index 87a6259..52b417d 100644
--- a/target-arm/translate.c
+++ b/target-arm/translate.c
@@ -3630,24 +3630,157 @@ static inline void gen_neon_get_scalar(int size, int reg)
}
}
+static void gen_neon_unzip_u8(TCGv t0, TCGv t1)
+{
+ TCGv rd, rm, tmp;
+
+ rd = new_tmp();
+ rm = new_tmp();
+ tmp = new_tmp();
+
+ tcg_gen_andi_i32(rd, t0, 0xff);
+ tcg_gen_shri_i32(tmp, t0, 8);
+ tcg_gen_andi_i32(tmp, tmp, 0xff00);
+ tcg_gen_or_i32(rd, rd, tmp);
+ tcg_gen_shli_i32(tmp, t1, 16);
+ tcg_gen_andi_i32(tmp, tmp, 0xff0000);
+ tcg_gen_or_i32(rd, rd, tmp);
+ tcg_gen_shli_i32(tmp, t1, 8);
+ tcg_gen_andi_i32(tmp, tmp, 0xff000000);
+ tcg_gen_or_i32(rd, rd, tmp);
+
+ tcg_gen_shri_i32(rm, t0, 8);
+ tcg_gen_andi_i32(rm, rm, 0xff);
+ tcg_gen_shri_i32(tmp, t0, 16);
+ tcg_gen_andi_i32(tmp, tmp, 0xff00);
+ tcg_gen_or_i32(rm, rm, tmp);
+ tcg_gen_shli_i32(tmp, t1, 8);
+ tcg_gen_andi_i32(tmp, tmp, 0xff0000);
+ tcg_gen_or_i32(rm, rm, tmp);
+ tcg_gen_andi_i32(tmp, t1, 0xff000000);
+ tcg_gen_or_i32(t1, rm, tmp);
+ tcg_gen_mov_i32(t0, rd);
+
+ dead_tmp(tmp);
+ dead_tmp(rm);
+ dead_tmp(rd);
+}
+
+static void gen_neon_zip_u8(TCGv t0, TCGv t1)
+{
+ TCGv rd, rm, tmp;
+
+ rd = new_tmp();
+ rm = new_tmp();
+ tmp = new_tmp();
+
+ tcg_gen_andi_i32(rd, t0, 0xff);
+ tcg_gen_shli_i32(tmp, t1, 8);
+ tcg_gen_andi_i32(tmp, tmp, 0xff00);
+ tcg_gen_or_i32(rd, rd, tmp);
+ tcg_gen_shli_i32(tmp, t0, 16);
+ tcg_gen_andi_i32(tmp, tmp, 0xff0000);
+ tcg_gen_or_i32(rd, rd, tmp);
+ tcg_gen_shli_i32(tmp, t1, 24);
+ tcg_gen_andi_i32(tmp, tmp, 0xff000000);
+ tcg_gen_or_i32(rd, rd, tmp);
+
+ tcg_gen_andi_i32(rm, t1, 0xff000000);
+ tcg_gen_shri_i32(tmp, t0, 8);
+ tcg_gen_andi_i32(tmp, tmp, 0xff0000);
+ tcg_gen_or_i32(rm, rm, tmp);
+ tcg_gen_shri_i32(tmp, t1, 8);
+ tcg_gen_andi_i32(tmp, tmp, 0xff00);
+ tcg_gen_or_i32(rm, rm, tmp);
+ tcg_gen_shri_i32(tmp, t0, 16);
+ tcg_gen_andi_i32(tmp, tmp, 0xff);
+ tcg_gen_or_i32(t1, rm, tmp);
+ tcg_gen_mov_i32(t0, rd);
+
+ dead_tmp(tmp);
+ dead_tmp(rm);
+ dead_tmp(rd);
+}
+
+static void gen_neon_zip_u16(TCGv t0, TCGv t1)
+{
+ TCGv tmp, tmp2;
+
+ tmp = new_tmp();
+ tmp2 = new_tmp();
+
+ tcg_gen_andi_i32(tmp, t0, 0xffff);
+ tcg_gen_shli_i32(tmp2, t1, 16);
+ tcg_gen_or_i32(tmp, tmp, tmp2);
+ tcg_gen_andi_i32(t1, t1, 0xffff0000);
+ tcg_gen_shri_i32(tmp2, t0, 16);
+ tcg_gen_or_i32(t1, t1, tmp2);
+ tcg_gen_mov_i32(t0, tmp);
+
+ dead_tmp(tmp2);
+ dead_tmp(tmp);
+}
+
static void gen_neon_unzip(int reg, int q, int tmp, int size)
{
int n;
-
+
for (n = 0; n < q + 1; n += 2) {
NEON_GET_REG(T0, reg, n);
- NEON_GET_REG(T0, reg, n + n);
+ NEON_GET_REG(T1, reg, n + 1);
switch (size) {
- case 0: gen_helper_neon_unzip_u8(); break;
- case 1: gen_helper_neon_zip_u16(); break; /* zip and unzip are the same. */
+ case 0: gen_neon_unzip_u8(cpu_T[0], cpu_T[1]); break;
+ case 1: gen_neon_zip_u16(cpu_T[0], cpu_T[1]); break; /* zip and unzip are the same. */
case 2: /* no-op */; break;
default: abort();
}
- gen_neon_movl_scratch_T0(tmp + n);
- gen_neon_movl_scratch_T1(tmp + n + 1);
+ gen_neon_movl_T0_scratch(tmp + n);
+ gen_neon_movl_T1_scratch(tmp + n + 1);
}
}
+static void gen_neon_trn_u8(TCGv t0, TCGv t1)
+{
+ TCGv rd, tmp;
+
+ rd = new_tmp();
+ tmp = new_tmp();
+
+ tcg_gen_shli_i32(rd, t0, 8);
+ tcg_gen_andi_i32(rd, rd, 0xff00ff00);
+ tcg_gen_andi_i32(tmp, t1, 0x00ff00ff);
+ tcg_gen_or_i32(rd, rd, tmp);
+
+ tcg_gen_shri_i32(t1, t1, 8);
+ tcg_gen_andi_i32(t1, t1, 0x00ff00ff);
+ tcg_gen_andi_i32(tmp, t0, 0xff00ff00);
+ tcg_gen_or_i32(t1, t1, tmp);
+ tcg_gen_mov_i32(t0, rd);
+
+ dead_tmp(tmp);
+ dead_tmp(rd);
+}
+
+static void gen_neon_trn_u16(TCGv t0, TCGv t1)
+{
+ TCGv rd, tmp;
+
+ rd = new_tmp();
+ tmp = new_tmp();
+
+ tcg_gen_shli_i32(rd, t0, 16);
+ tcg_gen_andi_i32(tmp, t1, 0xffff);
+ tcg_gen_or_i32(rd, rd, tmp);
+ tcg_gen_shri_i32(t1, t1, 16);
+ tcg_gen_andi_i32(tmp, t0, 0xffff0000);
+ tcg_gen_or_i32(t1, t1, tmp);
+ tcg_gen_mov_i32(t0, rd);
+
+ dead_tmp(tmp);
+ dead_tmp(rd);
+}
+
+
static struct {
int nregs;
int interleave;
@@ -5259,8 +5392,8 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
NEON_GET_REG(T0, rd, n);
NEON_GET_REG(T1, rd, n);
switch (size) {
- case 0: gen_helper_neon_zip_u8(); break;
- case 1: gen_helper_neon_zip_u16(); break;
+ case 0: gen_neon_zip_u8(cpu_T[0], cpu_T[1]); break;
+ case 1: gen_neon_zip_u16(cpu_T[0], cpu_T[1]); break;
case 2: /* no-op */; break;
default: abort();
}
@@ -5445,8 +5578,8 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
case 33: /* VTRN */
NEON_GET_REG(T1, rd, pass);
switch (size) {
- case 0: gen_helper_neon_trn_u8(); break;
- case 1: gen_helper_neon_trn_u16(); break;
+ case 0: gen_neon_trn_u8(cpu_T[0], cpu_T[1]); break;
+ case 1: gen_neon_trn_u16(cpu_T[0], cpu_T[1]); break;
case 2: abort();
default: return 1;
}
--
1.6.3.2.1299.gee46c
reply other threads:[~2009-07-19 15:19 UTC|newest]
Thread overview: [no followups] expand[flat|nested] mbox.gz Atom feed
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=E1MSYB6-0001Hf-Jb@lists.gnu.org \
--to=filip.navara@gmail.com \
--cc=qemu-devel@nongnu.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.