From: Aurelien Jarno <aurelien@aurel32.net>
To: qemu-devel@nongnu.org
Cc: Aurelien Jarno <aurelien@aurel32.net>
Subject: [Qemu-devel] [PATCH 08/14] target-mips: implement unaligned loads using TCG
Date: Tue, 9 Oct 2012 22:27:32 +0200 [thread overview]
Message-ID: <1349814458-21739-9-git-send-email-aurelien@aurel32.net> (raw)
In-Reply-To: <1349814458-21739-1-git-send-email-aurelien@aurel32.net>
Load/store from helpers should be avoided as they are quite
inefficient. Rewrite unaligned loads instructions using TCG and
aligned loads. The number of actual loads operations to implement
an unaligned load instruction is reduced from up to 8 to 1.
Note: As we can't rely on shift by 32 or 64 undefined behaviour,
the code loads already shift by one constants.
Signed-off-by: Aurelien Jarno <aurelien@aurel32.net>
---
target-mips/helper.h | 4 --
target-mips/op_helper.c | 142 -----------------------------------------------
target-mips/translate.c | 79 +++++++++++++++++++++-----
3 files changed, 66 insertions(+), 159 deletions(-)
diff --git a/target-mips/helper.h b/target-mips/helper.h
index 740178f..843c561 100644
--- a/target-mips/helper.h
+++ b/target-mips/helper.h
@@ -4,13 +4,9 @@ DEF_HELPER_3(raise_exception_err, noreturn, env, i32, int)
DEF_HELPER_2(raise_exception, noreturn, env, i32)
#ifdef TARGET_MIPS64
-DEF_HELPER_4(ldl, tl, env, tl, tl, int)
-DEF_HELPER_4(ldr, tl, env, tl, tl, int)
DEF_HELPER_4(sdl, void, env, tl, tl, int)
DEF_HELPER_4(sdr, void, env, tl, tl, int)
#endif
-DEF_HELPER_4(lwl, tl, env, tl, tl, int)
-DEF_HELPER_4(lwr, tl, env, tl, tl, int)
DEF_HELPER_4(swl, void, env, tl, tl, int)
DEF_HELPER_4(swr, void, env, tl, tl, int)
diff --git a/target-mips/op_helper.c b/target-mips/op_helper.c
index d88ac24..6ce27c1 100644
--- a/target-mips/op_helper.c
+++ b/target-mips/op_helper.c
@@ -335,56 +335,6 @@ HELPER_ST_ATOMIC(scd, ld, sd, 0x7)
#define GET_OFFSET(addr, offset) (addr - (offset))
#endif
-target_ulong helper_lwl(CPUMIPSState *env, target_ulong arg1,
- target_ulong arg2, int mem_idx)
-{
- target_ulong tmp;
-
- tmp = do_lbu(env, arg2, mem_idx);
- arg1 = (arg1 & 0x00FFFFFF) | (tmp << 24);
-
- if (GET_LMASK(arg2) <= 2) {
- tmp = do_lbu(env, GET_OFFSET(arg2, 1), mem_idx);
- arg1 = (arg1 & 0xFF00FFFF) | (tmp << 16);
- }
-
- if (GET_LMASK(arg2) <= 1) {
- tmp = do_lbu(env, GET_OFFSET(arg2, 2), mem_idx);
- arg1 = (arg1 & 0xFFFF00FF) | (tmp << 8);
- }
-
- if (GET_LMASK(arg2) == 0) {
- tmp = do_lbu(env, GET_OFFSET(arg2, 3), mem_idx);
- arg1 = (arg1 & 0xFFFFFF00) | tmp;
- }
- return (int32_t)arg1;
-}
-
-target_ulong helper_lwr(CPUMIPSState *env, target_ulong arg1,
- target_ulong arg2, int mem_idx)
-{
- target_ulong tmp;
-
- tmp = do_lbu(env, arg2, mem_idx);
- arg1 = (arg1 & 0xFFFFFF00) | tmp;
-
- if (GET_LMASK(arg2) >= 1) {
- tmp = do_lbu(env, GET_OFFSET(arg2, -1), mem_idx);
- arg1 = (arg1 & 0xFFFF00FF) | (tmp << 8);
- }
-
- if (GET_LMASK(arg2) >= 2) {
- tmp = do_lbu(env, GET_OFFSET(arg2, -2), mem_idx);
- arg1 = (arg1 & 0xFF00FFFF) | (tmp << 16);
- }
-
- if (GET_LMASK(arg2) == 3) {
- tmp = do_lbu(env, GET_OFFSET(arg2, -3), mem_idx);
- arg1 = (arg1 & 0x00FFFFFF) | (tmp << 24);
- }
- return (int32_t)arg1;
-}
-
void helper_swl(CPUMIPSState *env, target_ulong arg1, target_ulong arg2,
int mem_idx)
{
@@ -425,98 +375,6 @@ void helper_swr(CPUMIPSState *env, target_ulong arg1, target_ulong arg2,
#define GET_LMASK64(v) (((v) & 7) ^ 7)
#endif
-target_ulong helper_ldl(CPUMIPSState *env, target_ulong arg1,
- target_ulong arg2, int mem_idx)
-{
- uint64_t tmp;
-
- tmp = do_lbu(env, arg2, mem_idx);
- arg1 = (arg1 & 0x00FFFFFFFFFFFFFFULL) | (tmp << 56);
-
- if (GET_LMASK64(arg2) <= 6) {
- tmp = do_lbu(env, GET_OFFSET(arg2, 1), mem_idx);
- arg1 = (arg1 & 0xFF00FFFFFFFFFFFFULL) | (tmp << 48);
- }
-
- if (GET_LMASK64(arg2) <= 5) {
- tmp = do_lbu(env, GET_OFFSET(arg2, 2), mem_idx);
- arg1 = (arg1 & 0xFFFF00FFFFFFFFFFULL) | (tmp << 40);
- }
-
- if (GET_LMASK64(arg2) <= 4) {
- tmp = do_lbu(env, GET_OFFSET(arg2, 3), mem_idx);
- arg1 = (arg1 & 0xFFFFFF00FFFFFFFFULL) | (tmp << 32);
- }
-
- if (GET_LMASK64(arg2) <= 3) {
- tmp = do_lbu(env, GET_OFFSET(arg2, 4), mem_idx);
- arg1 = (arg1 & 0xFFFFFFFF00FFFFFFULL) | (tmp << 24);
- }
-
- if (GET_LMASK64(arg2) <= 2) {
- tmp = do_lbu(env, GET_OFFSET(arg2, 5), mem_idx);
- arg1 = (arg1 & 0xFFFFFFFFFF00FFFFULL) | (tmp << 16);
- }
-
- if (GET_LMASK64(arg2) <= 1) {
- tmp = do_lbu(env, GET_OFFSET(arg2, 6), mem_idx);
- arg1 = (arg1 & 0xFFFFFFFFFFFF00FFULL) | (tmp << 8);
- }
-
- if (GET_LMASK64(arg2) == 0) {
- tmp = do_lbu(env, GET_OFFSET(arg2, 7), mem_idx);
- arg1 = (arg1 & 0xFFFFFFFFFFFFFF00ULL) | tmp;
- }
-
- return arg1;
-}
-
-target_ulong helper_ldr(CPUMIPSState *env, target_ulong arg1,
- target_ulong arg2, int mem_idx)
-{
- uint64_t tmp;
-
- tmp = do_lbu(env, arg2, mem_idx);
- arg1 = (arg1 & 0xFFFFFFFFFFFFFF00ULL) | tmp;
-
- if (GET_LMASK64(arg2) >= 1) {
- tmp = do_lbu(env, GET_OFFSET(arg2, -1), mem_idx);
- arg1 = (arg1 & 0xFFFFFFFFFFFF00FFULL) | (tmp << 8);
- }
-
- if (GET_LMASK64(arg2) >= 2) {
- tmp = do_lbu(env, GET_OFFSET(arg2, -2), mem_idx);
- arg1 = (arg1 & 0xFFFFFFFFFF00FFFFULL) | (tmp << 16);
- }
-
- if (GET_LMASK64(arg2) >= 3) {
- tmp = do_lbu(env, GET_OFFSET(arg2, -3), mem_idx);
- arg1 = (arg1 & 0xFFFFFFFF00FFFFFFULL) | (tmp << 24);
- }
-
- if (GET_LMASK64(arg2) >= 4) {
- tmp = do_lbu(env, GET_OFFSET(arg2, -4), mem_idx);
- arg1 = (arg1 & 0xFFFFFF00FFFFFFFFULL) | (tmp << 32);
- }
-
- if (GET_LMASK64(arg2) >= 5) {
- tmp = do_lbu(env, GET_OFFSET(arg2, -5), mem_idx);
- arg1 = (arg1 & 0xFFFF00FFFFFFFFFFULL) | (tmp << 40);
- }
-
- if (GET_LMASK64(arg2) >= 6) {
- tmp = do_lbu(env, GET_OFFSET(arg2, -6), mem_idx);
- arg1 = (arg1 & 0xFF00FFFFFFFFFFFFULL) | (tmp << 48);
- }
-
- if (GET_LMASK64(arg2) == 7) {
- tmp = do_lbu(env, GET_OFFSET(arg2, -7), mem_idx);
- arg1 = (arg1 & 0x00FFFFFFFFFFFFFFULL) | (tmp << 56);
- }
-
- return arg1;
-}
-
void helper_sdl(CPUMIPSState *env, target_ulong arg1, target_ulong arg2,
int mem_idx)
{
diff --git a/target-mips/translate.c b/target-mips/translate.c
index f7d9467..8a7462b 100644
--- a/target-mips/translate.c
+++ b/target-mips/translate.c
@@ -1124,7 +1124,7 @@ static void gen_ld (CPUMIPSState *env, DisasContext *ctx, uint32_t opc,
int rt, int base, int16_t offset)
{
const char *opn = "ld";
- TCGv t0, t1;
+ TCGv t0, t1, t2;
if (rt == 0 && env->insn_flags & (INSN_LOONGSON2E | INSN_LOONGSON2F)) {
/* Loongson CPU uses a load to zero register for prefetch.
@@ -1158,21 +1158,47 @@ static void gen_ld (CPUMIPSState *env, DisasContext *ctx, uint32_t opc,
opn = "lld";
break;
case OPC_LDL:
- save_cpu_state(ctx, 1);
+ save_cpu_state(ctx, 0);
t1 = tcg_temp_new();
+ tcg_gen_andi_tl(t1, t0, 7);
+#ifndef TARGET_WORDS_BIGENDIAN
+ tcg_gen_xori_tl(t1, t1, 7);
+#endif
+ tcg_gen_shli_tl(t1, t1, 3);
+ tcg_gen_andi_tl(t0, t0, ~7);
+ tcg_gen_qemu_ld64(t0, t0, ctx->mem_idx);
+ tcg_gen_shl_tl(t0, t0, t1);
+ tcg_gen_xori_tl(t1, t1, 63);
+ t2 = tcg_const_tl(0x7fffffffffffffffull);
+ tcg_gen_shr_tl(t2, t2, t1);
gen_load_gpr(t1, rt);
- gen_helper_1e2i(ldl, t1, t1, t0, ctx->mem_idx);
- gen_store_gpr(t1, rt);
+ tcg_gen_and_tl(t1, t1, t2);
+ tcg_temp_free(t2);
+ tcg_gen_or_tl(t0, t0, t1);
tcg_temp_free(t1);
+ gen_store_gpr(t0, rt);
opn = "ldl";
break;
case OPC_LDR:
- save_cpu_state(ctx, 1);
+ save_cpu_state(ctx, 0);
t1 = tcg_temp_new();
+ tcg_gen_andi_tl(t1, t0, 7);
+#ifdef TARGET_WORDS_BIGENDIAN
+ tcg_gen_xori_tl(t1, t1, 7);
+#endif
+ tcg_gen_shli_tl(t1, t1, 3);
+ tcg_gen_andi_tl(t0, t0, ~7);
+ tcg_gen_qemu_ld64(t0, t0, ctx->mem_idx);
+ tcg_gen_shr_tl(t0, t0, t1);
+ tcg_gen_xori_tl(t1, t1, 63);
+ t2 = tcg_const_tl(0xfffffffffffffffeull);
+ tcg_gen_shl_tl(t2, t2, t1);
gen_load_gpr(t1, rt);
- gen_helper_1e2i(ldr, t1, t1, t0, ctx->mem_idx);
- gen_store_gpr(t1, rt);
+ tcg_gen_and_tl(t1, t1, t2);
+ tcg_temp_free(t2);
+ tcg_gen_or_tl(t0, t0, t1);
tcg_temp_free(t1);
+ gen_store_gpr(t0, rt);
opn = "ldr";
break;
case OPC_LDPC:
@@ -1225,21 +1251,48 @@ static void gen_ld (CPUMIPSState *env, DisasContext *ctx, uint32_t opc,
opn = "lbu";
break;
case OPC_LWL:
- save_cpu_state(ctx, 1);
+ save_cpu_state(ctx, 0);
t1 = tcg_temp_new();
+ tcg_gen_andi_tl(t1, t0, 3);
+#ifndef TARGET_WORDS_BIGENDIAN
+ tcg_gen_xori_tl(t1, t1, 3);
+#endif
+ tcg_gen_shli_tl(t1, t1, 3);
+ tcg_gen_andi_tl(t0, t0, ~3);
+ tcg_gen_qemu_ld32u(t0, t0, ctx->mem_idx);
+ tcg_gen_shl_tl(t0, t0, t1);
+ tcg_gen_xori_tl(t1, t1, 31);
+ t2 = tcg_const_tl(0x7fffffffull);
+ tcg_gen_shr_tl(t2, t2, t1);
gen_load_gpr(t1, rt);
- gen_helper_1e2i(lwl, t1, t1, t0, ctx->mem_idx);
- gen_store_gpr(t1, rt);
+ tcg_gen_and_tl(t1, t1, t2);
+ tcg_temp_free(t2);
+ tcg_gen_or_tl(t0, t0, t1);
tcg_temp_free(t1);
+ tcg_gen_ext32s_tl(t0, t0);
+ gen_store_gpr(t0, rt);
opn = "lwl";
break;
case OPC_LWR:
- save_cpu_state(ctx, 1);
+ save_cpu_state(ctx, 0);
t1 = tcg_temp_new();
+ tcg_gen_andi_tl(t1, t0, 3);
+#ifdef TARGET_WORDS_BIGENDIAN
+ tcg_gen_xori_tl(t1, t1, 3);
+#endif
+ tcg_gen_shli_tl(t1, t1, 3);
+ tcg_gen_andi_tl(t0, t0, ~3);
+ tcg_gen_qemu_ld32u(t0, t0, ctx->mem_idx);
+ tcg_gen_shr_tl(t0, t0, t1);
+ tcg_gen_xori_tl(t1, t1, 31);
+ t2 = tcg_const_tl(0xfffffffeull);
+ tcg_gen_shl_tl(t2, t2, t1);
gen_load_gpr(t1, rt);
- gen_helper_1e2i(lwr, t1, t1, t0, ctx->mem_idx);
- gen_store_gpr(t1, rt);
+ tcg_gen_and_tl(t1, t1, t2);
+ tcg_temp_free(t2);
+ tcg_gen_or_tl(t0, t0, t1);
tcg_temp_free(t1);
+ gen_store_gpr(t0, rt);
opn = "lwr";
break;
case OPC_LL:
--
1.7.10.4
next prev parent reply other threads:[~2012-10-09 20:27 UTC|newest]
Thread overview: 28+ messages / expand[flat|nested] mbox.gz Atom feed top
2012-10-09 20:27 [Qemu-devel] [PATCH 00/14] target-mips: misc fixes and optimizations Aurelien Jarno
2012-10-09 20:27 ` [Qemu-devel] [PATCH 01/14] softfloat: implement fused multiply-add NaN propagation for MIPS Aurelien Jarno
2012-10-09 20:27 ` [Qemu-devel] [PATCH 02/14] target-mips: use the softfloat floatXX_muladd functions Aurelien Jarno
2012-10-10 19:58 ` Richard Henderson
2012-10-09 20:27 ` [Qemu-devel] [PATCH 03/14] target-mips: fix FPU exceptions Aurelien Jarno
2012-10-10 20:05 ` Richard Henderson
2012-10-09 20:27 ` [Qemu-devel] [PATCH 04/14] target-mips: use softfloat constants when possible Aurelien Jarno
2012-10-10 20:09 ` Richard Henderson
2012-10-16 23:26 ` Aurelien Jarno
2012-10-09 20:27 ` [Qemu-devel] [PATCH 05/14] target-mips: cleanup load/store operations Aurelien Jarno
2012-10-10 20:10 ` Richard Henderson
2012-10-09 20:27 ` [Qemu-devel] [PATCH 06/14] target-mips: optimize load operations Aurelien Jarno
2012-10-10 20:11 ` Richard Henderson
2012-10-09 20:27 ` [Qemu-devel] [PATCH 07/14] target-mips: simplify load/store microMIPS helpers Aurelien Jarno
2012-10-10 20:15 ` Richard Henderson
2012-10-09 20:27 ` Aurelien Jarno [this message]
2012-10-10 20:28 ` [Qemu-devel] [PATCH 08/14] target-mips: implement unaligned loads using TCG Richard Henderson
2012-10-09 20:27 ` [Qemu-devel] [PATCH 09/14] target-mips: don't use local temps for store conditional Aurelien Jarno
2012-10-10 20:31 ` Richard Henderson
2012-10-09 20:27 ` [Qemu-devel] [PATCH 10/14] target-mips: implement movn/movz using movcond Aurelien Jarno
2012-10-10 20:33 ` Richard Henderson
2012-10-09 20:27 ` [Qemu-devel] [PATCH 11/14] target-mips: optimize ddiv/ddivu/div/divu with movcond Aurelien Jarno
2012-10-10 20:38 ` Richard Henderson
2012-10-09 20:27 ` [Qemu-devel] [PATCH 12/14] target-mips: use deposit instead of hardcoded version Aurelien Jarno
2012-10-10 20:43 ` Richard Henderson
2012-10-09 20:27 ` [Qemu-devel] [PATCH 13/14] target-mips: fix TLBR wrt SEGMask Aurelien Jarno
2012-10-10 20:44 ` Richard Henderson
2012-10-09 20:27 ` [Qemu-devel] [PATCH 14/14] target-mips: don't flush extra TLB on permissions upgrade Aurelien Jarno
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1349814458-21739-9-git-send-email-aurelien@aurel32.net \
--to=aurelien@aurel32.net \
--cc=qemu-devel@nongnu.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).