From: Filip Navara <filip.navara@gmail.com>
To: qemu-devel@nongnu.org
Subject: [Qemu-devel] [PATCH 08/18] Convert NEON VZIP/VUZP/VTRN helper functions to pure TCG
Date: Sun, 19 Jul 2009 15:19:54 -0000 [thread overview]
Message-ID: <E1MSYB6-0001Hf-Jb@lists.gnu.org> (raw)
The neon_trn_u8, neon_trn_u16, neon_unzip_u8, neon_zip_u8 and neon_zip_u16
helpers used fixed registers to return values. This patch replaces that with
TCG code, so T0/T1 is no longer directly used by the helper functions.
Bugs in the gen_neon_unzip register load code were also fixed.
Signed-off-by: Filip Navara <filip.navara@gmail.com>
---
target-arm/helpers.h | 6 --
target-arm/op_helper.c | 58 ------------------
target-arm/translate.c | 153 ++++++++++++++++++++++++++++++++++++++++++++---
3 files changed, 143 insertions(+), 74 deletions(-)
diff --git a/target-arm/helpers.h b/target-arm/helpers.h
index 01175e1..f298eff 100644
--- a/target-arm/helpers.h
+++ b/target-arm/helpers.h
@@ -338,12 +338,6 @@ DEF_HELPER_2(neon_qneg_s8, i32, env, i32)
DEF_HELPER_2(neon_qneg_s16, i32, env, i32)
DEF_HELPER_2(neon_qneg_s32, i32, env, i32)
-DEF_HELPER_0(neon_trn_u8, void)
-DEF_HELPER_0(neon_trn_u16, void)
-DEF_HELPER_0(neon_unzip_u8, void)
-DEF_HELPER_0(neon_zip_u8, void)
-DEF_HELPER_0(neon_zip_u16, void)
-
DEF_HELPER_2(neon_min_f32, i32, i32, i32)
DEF_HELPER_2(neon_max_f32, i32, i32, i32)
DEF_HELPER_2(neon_abd_f32, i32, i32, i32)
diff --git a/target-arm/op_helper.c b/target-arm/op_helper.c
index d4ae4ae..5ac631d 100644
--- a/target-arm/op_helper.c
+++ b/target-arm/op_helper.c
@@ -495,61 +495,3 @@ uint64_t HELPER(neon_sub_saturate_u64)(uint64_t src1, uint64_t src2)
}
return res;
}
-
-/* These need to return a pair of value, so still use T0/T1. */
-/* Transpose. Argument order is rather strange to avoid special casing
- the tranlation code.
- On input T0 = rm, T1 = rd. On output T0 = rd, T1 = rm */
-void HELPER(neon_trn_u8)(void)
-{
- uint32_t rd;
- uint32_t rm;
- rd = ((T0 & 0x00ff00ff) << 8) | (T1 & 0x00ff00ff);
- rm = ((T1 & 0xff00ff00) >> 8) | (T0 & 0xff00ff00);
- T0 = rd;
- T1 = rm;
-}
-
-void HELPER(neon_trn_u16)(void)
-{
- uint32_t rd;
- uint32_t rm;
- rd = (T0 << 16) | (T1 & 0xffff);
- rm = (T1 >> 16) | (T0 & 0xffff0000);
- T0 = rd;
- T1 = rm;
-}
-
-/* Worker routines for zip and unzip. */
-void HELPER(neon_unzip_u8)(void)
-{
- uint32_t rd;
- uint32_t rm;
- rd = (T0 & 0xff) | ((T0 >> 8) & 0xff00)
- | ((T1 << 16) & 0xff0000) | ((T1 << 8) & 0xff000000);
- rm = ((T0 >> 8) & 0xff) | ((T0 >> 16) & 0xff00)
- | ((T1 << 8) & 0xff0000) | (T1 & 0xff000000);
- T0 = rd;
- T1 = rm;
-}
-
-void HELPER(neon_zip_u8)(void)
-{
- uint32_t rd;
- uint32_t rm;
- rd = (T0 & 0xff) | ((T1 << 8) & 0xff00)
- | ((T0 << 16) & 0xff0000) | ((T1 << 24) & 0xff000000);
- rm = ((T0 >> 16) & 0xff) | ((T1 >> 8) & 0xff00)
- | ((T0 >> 8) & 0xff0000) | (T1 & 0xff000000);
- T0 = rd;
- T1 = rm;
-}
-
-void HELPER(neon_zip_u16)(void)
-{
- uint32_t tmp;
-
- tmp = (T0 & 0xffff) | (T1 << 16);
- T1 = (T1 & 0xffff0000) | (T0 >> 16);
- T0 = tmp;
-}
diff --git a/target-arm/translate.c b/target-arm/translate.c
index 87a6259..52b417d 100644
--- a/target-arm/translate.c
+++ b/target-arm/translate.c
@@ -3630,24 +3630,157 @@ static inline void gen_neon_get_scalar(int size, int reg)
}
}
+static void gen_neon_unzip_u8(TCGv t0, TCGv t1)
+{
+ TCGv rd, rm, tmp;
+
+ rd = new_tmp();
+ rm = new_tmp();
+ tmp = new_tmp();
+
+ tcg_gen_andi_i32(rd, t0, 0xff);
+ tcg_gen_shri_i32(tmp, t0, 8);
+ tcg_gen_andi_i32(tmp, tmp, 0xff00);
+ tcg_gen_or_i32(rd, rd, tmp);
+ tcg_gen_shli_i32(tmp, t1, 16);
+ tcg_gen_andi_i32(tmp, tmp, 0xff0000);
+ tcg_gen_or_i32(rd, rd, tmp);
+ tcg_gen_shli_i32(tmp, t1, 8);
+ tcg_gen_andi_i32(tmp, tmp, 0xff000000);
+ tcg_gen_or_i32(rd, rd, tmp);
+
+ tcg_gen_shri_i32(rm, t0, 8);
+ tcg_gen_andi_i32(rm, rm, 0xff);
+ tcg_gen_shri_i32(tmp, t0, 16);
+ tcg_gen_andi_i32(tmp, tmp, 0xff00);
+ tcg_gen_or_i32(rm, rm, tmp);
+ tcg_gen_shli_i32(tmp, t1, 8);
+ tcg_gen_andi_i32(tmp, tmp, 0xff0000);
+ tcg_gen_or_i32(rm, rm, tmp);
+ tcg_gen_andi_i32(tmp, t1, 0xff000000);
+ tcg_gen_or_i32(t1, rm, tmp);
+ tcg_gen_mov_i32(t0, rd);
+
+ dead_tmp(tmp);
+ dead_tmp(rm);
+ dead_tmp(rd);
+}
+
+static void gen_neon_zip_u8(TCGv t0, TCGv t1)
+{
+ TCGv rd, rm, tmp;
+
+ rd = new_tmp();
+ rm = new_tmp();
+ tmp = new_tmp();
+
+ tcg_gen_andi_i32(rd, t0, 0xff);
+ tcg_gen_shli_i32(tmp, t1, 8);
+ tcg_gen_andi_i32(tmp, tmp, 0xff00);
+ tcg_gen_or_i32(rd, rd, tmp);
+ tcg_gen_shli_i32(tmp, t0, 16);
+ tcg_gen_andi_i32(tmp, tmp, 0xff0000);
+ tcg_gen_or_i32(rd, rd, tmp);
+ tcg_gen_shli_i32(tmp, t1, 24);
+ tcg_gen_andi_i32(tmp, tmp, 0xff000000);
+ tcg_gen_or_i32(rd, rd, tmp);
+
+ tcg_gen_andi_i32(rm, t1, 0xff000000);
+ tcg_gen_shri_i32(tmp, t0, 8);
+ tcg_gen_andi_i32(tmp, tmp, 0xff0000);
+ tcg_gen_or_i32(rm, rm, tmp);
+ tcg_gen_shri_i32(tmp, t1, 8);
+ tcg_gen_andi_i32(tmp, tmp, 0xff00);
+ tcg_gen_or_i32(rm, rm, tmp);
+ tcg_gen_shri_i32(tmp, t0, 16);
+ tcg_gen_andi_i32(tmp, tmp, 0xff);
+ tcg_gen_or_i32(t1, rm, tmp);
+ tcg_gen_mov_i32(t0, rd);
+
+ dead_tmp(tmp);
+ dead_tmp(rm);
+ dead_tmp(rd);
+}
+
+static void gen_neon_zip_u16(TCGv t0, TCGv t1)
+{
+ TCGv tmp, tmp2;
+
+ tmp = new_tmp();
+ tmp2 = new_tmp();
+
+ tcg_gen_andi_i32(tmp, t0, 0xffff);
+ tcg_gen_shli_i32(tmp2, t1, 16);
+ tcg_gen_or_i32(tmp, tmp, tmp2);
+ tcg_gen_andi_i32(t1, t1, 0xffff0000);
+ tcg_gen_shri_i32(tmp2, t0, 16);
+ tcg_gen_or_i32(t1, t1, tmp2);
+ tcg_gen_mov_i32(t0, tmp);
+
+ dead_tmp(tmp2);
+ dead_tmp(tmp);
+}
+
static void gen_neon_unzip(int reg, int q, int tmp, int size)
{
int n;
-
+
for (n = 0; n < q + 1; n += 2) {
NEON_GET_REG(T0, reg, n);
- NEON_GET_REG(T0, reg, n + n);
+ NEON_GET_REG(T1, reg, n + 1);
switch (size) {
- case 0: gen_helper_neon_unzip_u8(); break;
- case 1: gen_helper_neon_zip_u16(); break; /* zip and unzip are the same. */
+ case 0: gen_neon_unzip_u8(cpu_T[0], cpu_T[1]); break;
+ case 1: gen_neon_zip_u16(cpu_T[0], cpu_T[1]); break; /* zip and unzip are the same. */
case 2: /* no-op */; break;
default: abort();
}
- gen_neon_movl_scratch_T0(tmp + n);
- gen_neon_movl_scratch_T1(tmp + n + 1);
+ gen_neon_movl_T0_scratch(tmp + n);
+ gen_neon_movl_T1_scratch(tmp + n + 1);
}
}
+static void gen_neon_trn_u8(TCGv t0, TCGv t1)
+{
+ TCGv rd, tmp;
+
+ rd = new_tmp();
+ tmp = new_tmp();
+
+ tcg_gen_shli_i32(rd, t0, 8);
+ tcg_gen_andi_i32(rd, rd, 0xff00ff00);
+ tcg_gen_andi_i32(tmp, t1, 0x00ff00ff);
+ tcg_gen_or_i32(rd, rd, tmp);
+
+ tcg_gen_shri_i32(t1, t1, 8);
+ tcg_gen_andi_i32(t1, t1, 0x00ff00ff);
+ tcg_gen_andi_i32(tmp, t0, 0xff00ff00);
+ tcg_gen_or_i32(t1, t1, tmp);
+ tcg_gen_mov_i32(t0, rd);
+
+ dead_tmp(tmp);
+ dead_tmp(rd);
+}
+
+static void gen_neon_trn_u16(TCGv t0, TCGv t1)
+{
+ TCGv rd, tmp;
+
+ rd = new_tmp();
+ tmp = new_tmp();
+
+ tcg_gen_shli_i32(rd, t0, 16);
+ tcg_gen_andi_i32(tmp, t1, 0xffff);
+ tcg_gen_or_i32(rd, rd, tmp);
+ tcg_gen_shri_i32(t1, t1, 16);
+ tcg_gen_andi_i32(tmp, t0, 0xffff0000);
+ tcg_gen_or_i32(t1, t1, tmp);
+ tcg_gen_mov_i32(t0, rd);
+
+ dead_tmp(tmp);
+ dead_tmp(rd);
+}
+
+
static struct {
int nregs;
int interleave;
@@ -5259,8 +5392,8 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
NEON_GET_REG(T0, rd, n);
NEON_GET_REG(T1, rd, n);
switch (size) {
- case 0: gen_helper_neon_zip_u8(); break;
- case 1: gen_helper_neon_zip_u16(); break;
+ case 0: gen_neon_zip_u8(cpu_T[0], cpu_T[1]); break;
+ case 1: gen_neon_zip_u16(cpu_T[0], cpu_T[1]); break;
case 2: /* no-op */; break;
default: abort();
}
@@ -5445,8 +5578,8 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
case 33: /* VTRN */
NEON_GET_REG(T1, rd, pass);
switch (size) {
- case 0: gen_helper_neon_trn_u8(); break;
- case 1: gen_helper_neon_trn_u16(); break;
+ case 0: gen_neon_trn_u8(cpu_T[0], cpu_T[1]); break;
+ case 1: gen_neon_trn_u16(cpu_T[0], cpu_T[1]); break;
case 2: abort();
default: return 1;
}
--
1.6.3.2.1299.gee46c
reply other threads:[~2009-07-19 15:19 UTC|newest]
Thread overview: [no followups] expand[flat|nested] mbox.gz Atom feed
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=E1MSYB6-0001Hf-Jb@lists.gnu.org \
--to=filip.navara@gmail.com \
--cc=qemu-devel@nongnu.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).