* [Qemu-devel] Wanted: A better way to implement MIPS unaligned instructions
@ 2008-06-17 12:12 Thiemo Seufer
2008-06-17 21:56 ` Fabrice Bellard
0 siblings, 1 reply; 2+ messages in thread
From: Thiemo Seufer @ 2008-06-17 12:12 UTC (permalink / raw)
To: qemu-devel
Hello All,
I am currently trying to implement TCG versions of the MIPS [ls][dw][lr]
set of instructions. I believe I can't use a helper function for
load/store type instructions. The appended patch uses TCG directly, but
the resulting translation is excessively complicated. Is there a better
way to do this?
Thiemo
Index: qemu-work/target-mips/op.c
===================================================================
--- qemu-work.orig/target-mips/op.c 2008-06-16 07:32:12.000000000 +0100
+++ qemu-work/target-mips/op.c 2008-06-17 06:43:36.000000000 +0100
@@ -30,41 +30,6 @@
#ifndef CALL_FROM_TB1
#define CALL_FROM_TB1(func, arg0) func(arg0)
#endif
-#ifndef CALL_FROM_TB1_CONST16
-#define CALL_FROM_TB1_CONST16(func, arg0) CALL_FROM_TB1(func, arg0)
-#endif
-#ifndef CALL_FROM_TB2
-#define CALL_FROM_TB2(func, arg0, arg1) func(arg0, arg1)
-#endif
-#ifndef CALL_FROM_TB2_CONST16
-#define CALL_FROM_TB2_CONST16(func, arg0, arg1) \
- CALL_FROM_TB2(func, arg0, arg1)
-#endif
-#ifndef CALL_FROM_TB3
-#define CALL_FROM_TB3(func, arg0, arg1, arg2) func(arg0, arg1, arg2)
-#endif
-#ifndef CALL_FROM_TB4
-#define CALL_FROM_TB4(func, arg0, arg1, arg2, arg3) \
- func(arg0, arg1, arg2, arg3)
-#endif
-
-/* Load and store */
-#define MEMSUFFIX _raw
-#include "op_mem.c"
-#undef MEMSUFFIX
-#if !defined(CONFIG_USER_ONLY)
-#define MEMSUFFIX _user
-#include "op_mem.c"
-#undef MEMSUFFIX
-
-#define MEMSUFFIX _super
-#include "op_mem.c"
-#undef MEMSUFFIX
-
-#define MEMSUFFIX _kernel
-#include "op_mem.c"
-#undef MEMSUFFIX
-#endif
/* 64 bits arithmetic */
#if TARGET_LONG_BITS > HOST_LONG_BITS
Index: qemu-work/target-mips/op_mem.c
===================================================================
--- qemu-work.orig/target-mips/op_mem.c 2008-06-16 07:32:12.000000000 +0100
+++ /dev/null 1970-01-01 00:00:00.000000000 +0000
@@ -1,269 +0,0 @@
-/*
- * MIPS emulation memory micro-operations for qemu.
- *
- * Copyright (c) 2004-2005 Jocelyn Mayer
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this library; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- */
-
-/* "half" load and stores. We must do the memory access inline,
- or fault handling won't work. */
-
-#ifdef TARGET_WORDS_BIGENDIAN
-#define GET_LMASK(v) ((v) & 3)
-#define GET_OFFSET(addr, offset) (addr + (offset))
-#else
-#define GET_LMASK(v) (((v) & 3) ^ 3)
-#define GET_OFFSET(addr, offset) (addr - (offset))
-#endif
-
-void glue(op_lwl, MEMSUFFIX) (void)
-{
- target_ulong tmp;
-
- tmp = glue(ldub, MEMSUFFIX)(T0);
- T1 = (T1 & 0x00FFFFFF) | (tmp << 24);
-
- if (GET_LMASK(T0) <= 2) {
- tmp = glue(ldub, MEMSUFFIX)(GET_OFFSET(T0, 1));
- T1 = (T1 & 0xFF00FFFF) | (tmp << 16);
- }
-
- if (GET_LMASK(T0) <= 1) {
- tmp = glue(ldub, MEMSUFFIX)(GET_OFFSET(T0, 2));
- T1 = (T1 & 0xFFFF00FF) | (tmp << 8);
- }
-
- if (GET_LMASK(T0) == 0) {
- tmp = glue(ldub, MEMSUFFIX)(GET_OFFSET(T0, 3));
- T1 = (T1 & 0xFFFFFF00) | tmp;
- }
- T1 = (int32_t)T1;
- FORCE_RET();
-}
-
-void glue(op_lwr, MEMSUFFIX) (void)
-{
- target_ulong tmp;
-
- tmp = glue(ldub, MEMSUFFIX)(T0);
- T1 = (T1 & 0xFFFFFF00) | tmp;
-
- if (GET_LMASK(T0) >= 1) {
- tmp = glue(ldub, MEMSUFFIX)(GET_OFFSET(T0, -1));
- T1 = (T1 & 0xFFFF00FF) | (tmp << 8);
- }
-
- if (GET_LMASK(T0) >= 2) {
- tmp = glue(ldub, MEMSUFFIX)(GET_OFFSET(T0, -2));
- T1 = (T1 & 0xFF00FFFF) | (tmp << 16);
- }
-
- if (GET_LMASK(T0) == 3) {
- tmp = glue(ldub, MEMSUFFIX)(GET_OFFSET(T0, -3));
- T1 = (T1 & 0x00FFFFFF) | (tmp << 24);
- }
- T1 = (int32_t)T1;
- FORCE_RET();
-}
-
-void glue(op_swl, MEMSUFFIX) (void)
-{
- glue(stb, MEMSUFFIX)(T0, (uint8_t)(T1 >> 24));
-
- if (GET_LMASK(T0) <= 2)
- glue(stb, MEMSUFFIX)(GET_OFFSET(T0, 1), (uint8_t)(T1 >> 16));
-
- if (GET_LMASK(T0) <= 1)
- glue(stb, MEMSUFFIX)(GET_OFFSET(T0, 2), (uint8_t)(T1 >> 8));
-
- if (GET_LMASK(T0) == 0)
- glue(stb, MEMSUFFIX)(GET_OFFSET(T0, 3), (uint8_t)T1);
-
- FORCE_RET();
-}
-
-void glue(op_swr, MEMSUFFIX) (void)
-{
- glue(stb, MEMSUFFIX)(T0, (uint8_t)T1);
-
- if (GET_LMASK(T0) >= 1)
- glue(stb, MEMSUFFIX)(GET_OFFSET(T0, -1), (uint8_t)(T1 >> 8));
-
- if (GET_LMASK(T0) >= 2)
- glue(stb, MEMSUFFIX)(GET_OFFSET(T0, -2), (uint8_t)(T1 >> 16));
-
- if (GET_LMASK(T0) == 3)
- glue(stb, MEMSUFFIX)(GET_OFFSET(T0, -3), (uint8_t)(T1 >> 24));
-
- FORCE_RET();
-}
-
-#if defined(TARGET_MIPS64)
-/* "half" load and stores. We must do the memory access inline,
- or fault handling won't work. */
-
-#ifdef TARGET_WORDS_BIGENDIAN
-#define GET_LMASK64(v) ((v) & 7)
-#else
-#define GET_LMASK64(v) (((v) & 7) ^ 7)
-#endif
-
-void glue(op_ldl, MEMSUFFIX) (void)
-{
- uint64_t tmp;
-
- tmp = glue(ldub, MEMSUFFIX)(T0);
- T1 = (T1 & 0x00FFFFFFFFFFFFFFULL) | (tmp << 56);
-
- if (GET_LMASK64(T0) <= 6) {
- tmp = glue(ldub, MEMSUFFIX)(GET_OFFSET(T0, 1));
- T1 = (T1 & 0xFF00FFFFFFFFFFFFULL) | (tmp << 48);
- }
-
- if (GET_LMASK64(T0) <= 5) {
- tmp = glue(ldub, MEMSUFFIX)(GET_OFFSET(T0, 2));
- T1 = (T1 & 0xFFFF00FFFFFFFFFFULL) | (tmp << 40);
- }
-
- if (GET_LMASK64(T0) <= 4) {
- tmp = glue(ldub, MEMSUFFIX)(GET_OFFSET(T0, 3));
- T1 = (T1 & 0xFFFFFF00FFFFFFFFULL) | (tmp << 32);
- }
-
- if (GET_LMASK64(T0) <= 3) {
- tmp = glue(ldub, MEMSUFFIX)(GET_OFFSET(T0, 4));
- T1 = (T1 & 0xFFFFFFFF00FFFFFFULL) | (tmp << 24);
- }
-
- if (GET_LMASK64(T0) <= 2) {
- tmp = glue(ldub, MEMSUFFIX)(GET_OFFSET(T0, 5));
- T1 = (T1 & 0xFFFFFFFFFF00FFFFULL) | (tmp << 16);
- }
-
- if (GET_LMASK64(T0) <= 1) {
- tmp = glue(ldub, MEMSUFFIX)(GET_OFFSET(T0, 6));
- T1 = (T1 & 0xFFFFFFFFFFFF00FFULL) | (tmp << 8);
- }
-
- if (GET_LMASK64(T0) == 0) {
- tmp = glue(ldub, MEMSUFFIX)(GET_OFFSET(T0, 7));
- T1 = (T1 & 0xFFFFFFFFFFFFFF00ULL) | tmp;
- }
-
- FORCE_RET();
-}
-
-void glue(op_ldr, MEMSUFFIX) (void)
-{
- uint64_t tmp;
-
- tmp = glue(ldub, MEMSUFFIX)(T0);
- T1 = (T1 & 0xFFFFFFFFFFFFFF00ULL) | tmp;
-
- if (GET_LMASK64(T0) >= 1) {
- tmp = glue(ldub, MEMSUFFIX)(GET_OFFSET(T0, -1));
- T1 = (T1 & 0xFFFFFFFFFFFF00FFULL) | (tmp << 8);
- }
-
- if (GET_LMASK64(T0) >= 2) {
- tmp = glue(ldub, MEMSUFFIX)(GET_OFFSET(T0, -2));
- T1 = (T1 & 0xFFFFFFFFFF00FFFFULL) | (tmp << 16);
- }
-
- if (GET_LMASK64(T0) >= 3) {
- tmp = glue(ldub, MEMSUFFIX)(GET_OFFSET(T0, -3));
- T1 = (T1 & 0xFFFFFFFF00FFFFFFULL) | (tmp << 24);
- }
-
- if (GET_LMASK64(T0) >= 4) {
- tmp = glue(ldub, MEMSUFFIX)(GET_OFFSET(T0, -4));
- T1 = (T1 & 0xFFFFFF00FFFFFFFFULL) | (tmp << 32);
- }
-
- if (GET_LMASK64(T0) >= 5) {
- tmp = glue(ldub, MEMSUFFIX)(GET_OFFSET(T0, -5));
- T1 = (T1 & 0xFFFF00FFFFFFFFFFULL) | (tmp << 40);
- }
-
- if (GET_LMASK64(T0) >= 6) {
- tmp = glue(ldub, MEMSUFFIX)(GET_OFFSET(T0, -6));
- T1 = (T1 & 0xFF00FFFFFFFFFFFFULL) | (tmp << 48);
- }
-
- if (GET_LMASK64(T0) == 7) {
- tmp = glue(ldub, MEMSUFFIX)(GET_OFFSET(T0, -7));
- T1 = (T1 & 0x00FFFFFFFFFFFFFFULL) | (tmp << 56);
- }
-
- FORCE_RET();
-}
-
-void glue(op_sdl, MEMSUFFIX) (void)
-{
- glue(stb, MEMSUFFIX)(T0, (uint8_t)(T1 >> 56));
-
- if (GET_LMASK64(T0) <= 6)
- glue(stb, MEMSUFFIX)(GET_OFFSET(T0, 1), (uint8_t)(T1 >> 48));
-
- if (GET_LMASK64(T0) <= 5)
- glue(stb, MEMSUFFIX)(GET_OFFSET(T0, 2), (uint8_t)(T1 >> 40));
-
- if (GET_LMASK64(T0) <= 4)
- glue(stb, MEMSUFFIX)(GET_OFFSET(T0, 3), (uint8_t)(T1 >> 32));
-
- if (GET_LMASK64(T0) <= 3)
- glue(stb, MEMSUFFIX)(GET_OFFSET(T0, 4), (uint8_t)(T1 >> 24));
-
- if (GET_LMASK64(T0) <= 2)
- glue(stb, MEMSUFFIX)(GET_OFFSET(T0, 5), (uint8_t)(T1 >> 16));
-
- if (GET_LMASK64(T0) <= 1)
- glue(stb, MEMSUFFIX)(GET_OFFSET(T0, 6), (uint8_t)(T1 >> 8));
-
- if (GET_LMASK64(T0) <= 0)
- glue(stb, MEMSUFFIX)(GET_OFFSET(T0, 7), (uint8_t)T1);
-
- FORCE_RET();
-}
-
-void glue(op_sdr, MEMSUFFIX) (void)
-{
- glue(stb, MEMSUFFIX)(T0, (uint8_t)T1);
-
- if (GET_LMASK64(T0) >= 1)
- glue(stb, MEMSUFFIX)(GET_OFFSET(T0, -1), (uint8_t)(T1 >> 8));
-
- if (GET_LMASK64(T0) >= 2)
- glue(stb, MEMSUFFIX)(GET_OFFSET(T0, -2), (uint8_t)(T1 >> 16));
-
- if (GET_LMASK64(T0) >= 3)
- glue(stb, MEMSUFFIX)(GET_OFFSET(T0, -3), (uint8_t)(T1 >> 24));
-
- if (GET_LMASK64(T0) >= 4)
- glue(stb, MEMSUFFIX)(GET_OFFSET(T0, -4), (uint8_t)(T1 >> 32));
-
- if (GET_LMASK64(T0) >= 5)
- glue(stb, MEMSUFFIX)(GET_OFFSET(T0, -5), (uint8_t)(T1 >> 40));
-
- if (GET_LMASK64(T0) >= 6)
- glue(stb, MEMSUFFIX)(GET_OFFSET(T0, -6), (uint8_t)(T1 >> 48));
-
- if (GET_LMASK64(T0) == 7)
- glue(stb, MEMSUFFIX)(GET_OFFSET(T0, -7), (uint8_t)(T1 >> 56));
-
- FORCE_RET();
-}
-#endif /* TARGET_MIPS64 */
Index: qemu-work/target-mips/translate.c
===================================================================
--- qemu-work.orig/target-mips/translate.c 2008-06-17 06:43:05.000000000 +0100
+++ qemu-work/target-mips/translate.c 2008-06-17 06:43:36.000000000 +0100
@@ -930,37 +930,6 @@
}
/* load/store instructions. */
-#if defined(CONFIG_USER_ONLY)
-#define op_ldst(name) gen_op_##name##_raw()
-#define OP_LD_TABLE(width)
-#define OP_ST_TABLE(width)
-#else
-#define op_ldst(name) (*gen_op_##name[ctx->mem_idx])()
-#define OP_LD_TABLE(width) \
-static GenOpFunc *gen_op_l##width[] = { \
- &gen_op_l##width##_kernel, \
- &gen_op_l##width##_super, \
- &gen_op_l##width##_user, \
-}
-#define OP_ST_TABLE(width) \
-static GenOpFunc *gen_op_s##width[] = { \
- &gen_op_s##width##_kernel, \
- &gen_op_s##width##_super, \
- &gen_op_s##width##_user, \
-}
-#endif
-
-#if defined(TARGET_MIPS64)
-OP_LD_TABLE(dl);
-OP_LD_TABLE(dr);
-OP_ST_TABLE(dl);
-OP_ST_TABLE(dr);
-#endif
-OP_LD_TABLE(wl);
-OP_LD_TABLE(wr);
-OP_ST_TABLE(wl);
-OP_ST_TABLE(wr);
-
#define OP_LD(insn,fname) \
void inline op_ldst_##insn(DisasContext *ctx) \
{ \
@@ -1032,6 +1001,486 @@
#endif
#undef OP_ST_ATOMIC
+/* "half" load and stores. We must do the memory access inline,
+ or fault handling won't work. */
+void inline get_lmask (TCGv ret, TCGv val)
+{
+ tcg_gen_andi_tl(ret, val, 3);
+#ifndef TARGET_WORDS_BIGENDIAN
+ tcg_gen_xori_tl(ret, ret, 3);
+#endif
+}
+
+void inline get_offset (TCGv ret, TCGv val, target_ulong off)
+{
+#ifdef TARGET_WORDS_BIGENDIAN
+ tcg_gen_addi_tl(ret, val, off);
+#else
+ tcg_gen_subi_tl(ret, val, off);
+#endif
+}
+
+void inline gen_lwl (DisasContext *ctx)
+{
+ TCGv r_mask = tcg_temp_local_new(TCG_TYPE_TL);
+ TCGv r_tmp1 = tcg_temp_local_new(TCG_TYPE_TL);
+ TCGv r_tmp2 = tcg_temp_local_new(TCG_TYPE_TL);
+ TCGv r_tmp3 = tcg_temp_local_new(TCG_TYPE_TL);
+ int l1 = gen_new_label();
+ int l2 = gen_new_label();
+ int l3 = gen_new_label();
+
+ get_lmask(r_mask, cpu_T[0]);
+ tcg_gen_qemu_ld8u(r_tmp1, cpu_T[0], ctx->mem_idx);
+ tcg_gen_shli_tl(r_tmp2, r_tmp1, 24);
+ tcg_gen_andi_tl(r_tmp3, cpu_T[1], 0x00ffffff);
+ tcg_gen_or_tl(cpu_T[1], r_tmp3, r_tmp2);
+
+ tcg_gen_brcondi_tl(TCG_COND_GT, r_mask, 2, l1);
+ get_offset(r_tmp2, cpu_T[0], 1);
+ tcg_gen_qemu_ld8u(r_tmp1, r_tmp2, ctx->mem_idx);
+ tcg_gen_shli_tl(r_tmp2, r_tmp1, 16);
+ tcg_gen_andi_tl(r_tmp3, cpu_T[1], 0xff00ffff);
+ tcg_gen_or_tl(cpu_T[1], r_tmp3, r_tmp2);
+ gen_set_label(l1);
+
+ tcg_gen_brcondi_tl(TCG_COND_GT, r_mask, 1, l2);
+ get_offset(r_tmp2, cpu_T[0], 2);
+ tcg_gen_qemu_ld8u(r_tmp1, r_tmp2, ctx->mem_idx);
+ tcg_gen_shli_tl(r_tmp2, r_tmp1, 8);
+ tcg_gen_andi_tl(r_tmp3, cpu_T[1], 0xffff00ff);
+ tcg_gen_or_tl(cpu_T[1], r_tmp3, r_tmp2);
+ gen_set_label(l2);
+
+ tcg_gen_brcondi_tl(TCG_COND_NE, r_mask, 0, l3);
+ get_offset(r_tmp2, cpu_T[0], 3);
+ tcg_gen_qemu_ld8u(r_tmp1, r_tmp2, ctx->mem_idx);
+ tcg_gen_andi_tl(r_tmp3, cpu_T[1], 0xffffff00);
+ tcg_gen_or_tl(cpu_T[1], r_tmp3, r_tmp1);
+ gen_set_label(l3);
+
+ tcg_temp_free(r_mask);
+ tcg_temp_free(r_tmp1);
+ tcg_temp_free(r_tmp2);
+ tcg_temp_free(r_tmp3);
+ tcg_gen_ext32s_tl(cpu_T[1], cpu_T[1]);
+}
+
+void inline gen_lwr (DisasContext *ctx)
+{
+ TCGv r_mask = tcg_temp_local_new(TCG_TYPE_TL);
+ TCGv r_tmp1 = tcg_temp_local_new(TCG_TYPE_TL);
+ TCGv r_tmp2 = tcg_temp_local_new(TCG_TYPE_TL);
+ TCGv r_tmp3 = tcg_temp_local_new(TCG_TYPE_TL);
+ int l1 = gen_new_label();
+ int l2 = gen_new_label();
+ int l3 = gen_new_label();
+
+ get_lmask(r_mask, cpu_T[0]);
+ tcg_gen_qemu_ld8u(r_tmp1, cpu_T[0], ctx->mem_idx);
+ tcg_gen_andi_tl(r_tmp3, cpu_T[1], 0xffffff00);
+ tcg_gen_or_tl(cpu_T[1], r_tmp3, r_tmp1);
+
+ tcg_gen_brcondi_tl(TCG_COND_LT, r_mask, 1, l1);
+ get_offset(r_tmp2, cpu_T[0], -1);
+ tcg_gen_qemu_ld8u(r_tmp1, r_tmp2, ctx->mem_idx);
+ tcg_gen_shli_tl(r_tmp2, r_tmp1, 8);
+ tcg_gen_andi_tl(r_tmp3, cpu_T[1], 0xffff00ff);
+ tcg_gen_or_tl(cpu_T[1], r_tmp3, r_tmp2);
+ gen_set_label(l1);
+
+ tcg_gen_brcondi_tl(TCG_COND_LT, r_mask, 2, l2);
+ get_offset(r_tmp2, cpu_T[0], -2);
+ tcg_gen_qemu_ld8u(r_tmp1, r_tmp2, ctx->mem_idx);
+ tcg_gen_shli_tl(r_tmp2, r_tmp1, 16);
+ tcg_gen_andi_tl(r_tmp3, cpu_T[1], 0xff00ffff);
+ tcg_gen_or_tl(cpu_T[1], r_tmp3, r_tmp2);
+ gen_set_label(l2);
+
+ tcg_gen_brcondi_tl(TCG_COND_NE, r_mask, 3, l3);
+ get_offset(r_tmp2, cpu_T[0], -3);
+ tcg_gen_qemu_ld8u(r_tmp1, r_tmp2, ctx->mem_idx);
+ tcg_gen_shli_tl(r_tmp2, r_tmp1, 24);
+ tcg_gen_andi_tl(r_tmp3, cpu_T[1], 0x00ffffff);
+ tcg_gen_or_tl(cpu_T[1], r_tmp3, r_tmp2);
+ gen_set_label(l3);
+
+ tcg_temp_free(r_mask);
+ tcg_temp_free(r_tmp1);
+ tcg_temp_free(r_tmp2);
+ tcg_temp_free(r_tmp3);
+ tcg_gen_ext32s_tl(cpu_T[1], cpu_T[1]);
+}
+
+void inline gen_swl (DisasContext *ctx)
+{
+ TCGv r_mask = tcg_temp_local_new(TCG_TYPE_TL);
+ TCGv r_tmp1 = tcg_temp_local_new(TCG_TYPE_TL);
+ TCGv r_tmp2 = tcg_temp_local_new(TCG_TYPE_TL);
+ int l1 = gen_new_label();
+ int l2 = gen_new_label();
+ int l3 = gen_new_label();
+
+ get_lmask(r_mask, cpu_T[0]);
+ tcg_gen_shri_tl(r_tmp1, cpu_T[1], 24);
+ tcg_gen_qemu_st8(r_tmp1, cpu_T[0], ctx->mem_idx);
+
+ tcg_gen_brcondi_tl(TCG_COND_GT, r_mask, 2, l1);
+ get_offset(r_tmp1, cpu_T[0], 1);
+ tcg_gen_shri_tl(r_tmp2, cpu_T[1], 16);
+ tcg_gen_qemu_st8(r_tmp2, r_tmp1, ctx->mem_idx);
+ gen_set_label(l1);
+
+ tcg_gen_brcondi_tl(TCG_COND_GT, r_mask, 1, l2);
+ get_offset(r_tmp1, cpu_T[0], 2);
+ tcg_gen_shri_tl(r_tmp2, cpu_T[1], 8);
+ tcg_gen_qemu_st8(r_tmp2, r_tmp1, ctx->mem_idx);
+ gen_set_label(l2);
+
+ tcg_gen_brcondi_tl(TCG_COND_NE, r_mask, 0, l3);
+ get_offset(r_tmp1, cpu_T[0], 3);
+ tcg_gen_qemu_st8(cpu_T[1], r_tmp1, ctx->mem_idx);
+ gen_set_label(l3);
+
+ tcg_temp_free(r_mask);
+ tcg_temp_free(r_tmp1);
+ tcg_temp_free(r_tmp2);
+}
+
+void inline gen_swr (DisasContext *ctx)
+{
+ TCGv r_mask = tcg_temp_local_new(TCG_TYPE_TL);
+ TCGv r_tmp1 = tcg_temp_local_new(TCG_TYPE_TL);
+ TCGv r_tmp2 = tcg_temp_local_new(TCG_TYPE_TL);
+ int l1 = gen_new_label();
+ int l2 = gen_new_label();
+ int l3 = gen_new_label();
+
+ get_lmask(r_mask, cpu_T[0]);
+ tcg_gen_qemu_st8(cpu_T[1], cpu_T[0], ctx->mem_idx);
+
+ tcg_gen_brcondi_tl(TCG_COND_LT, r_mask, 1, l1);
+ get_offset(r_tmp1, cpu_T[0], -1);
+ tcg_gen_shri_tl(r_tmp2, cpu_T[1], 8);
+ tcg_gen_qemu_st8(r_tmp2, r_tmp1, ctx->mem_idx);
+ gen_set_label(l1);
+
+ tcg_gen_brcondi_tl(TCG_COND_LT, r_mask, 2, l2);
+ get_offset(r_tmp1, cpu_T[0], -2);
+ tcg_gen_shri_tl(r_tmp2, cpu_T[1], 16);
+ tcg_gen_qemu_st8(r_tmp2, r_tmp1, ctx->mem_idx);
+ gen_set_label(l2);
+
+ tcg_gen_brcondi_tl(TCG_COND_NE, r_mask, 3, l3);
+ get_offset(r_tmp1, cpu_T[0], -3);
+ tcg_gen_shri_tl(r_tmp2, cpu_T[1], 24);
+ tcg_gen_qemu_st8(r_tmp2, r_tmp1, ctx->mem_idx);
+ gen_set_label(l3);
+
+ tcg_temp_free(r_mask);
+ tcg_temp_free(r_tmp1);
+ tcg_temp_free(r_tmp2);
+}
+
+#if defined(TARGET_MIPS64)
+
+void inline get_lmask64 (TCGv ret, TCGv val)
+{
+ tcg_gen_andi_tl(ret, val, 7);
+#ifndef TARGET_WORDS_BIGENDIAN
+ tcg_gen_xori_tl(ret, ret, 7);
+#endif
+}
+
+void inline gen_ldl (DisasContext *ctx)
+{
+ TCGv r_mask = tcg_temp_local_new(TCG_TYPE_TL);
+ TCGv r_tmp1 = tcg_temp_local_new(TCG_TYPE_TL);
+ TCGv r_tmp2 = tcg_temp_local_new(TCG_TYPE_TL);
+ TCGv r_tmp3 = tcg_temp_local_new(TCG_TYPE_TL);
+ int l1 = gen_new_label();
+ int l2 = gen_new_label();
+ int l3 = gen_new_label();
+ int l4 = gen_new_label();
+ int l5 = gen_new_label();
+ int l6 = gen_new_label();
+ int l7 = gen_new_label();
+
+ get_lmask64(r_mask, cpu_T[0]);
+ tcg_gen_qemu_ld8u(r_tmp1, cpu_T[0], ctx->mem_idx);
+ tcg_gen_shli_tl(r_tmp2, r_tmp1, 56);
+ tcg_gen_andi_tl(r_tmp3, cpu_T[1], 0x00ffffffffffffffULL);
+ tcg_gen_or_tl(cpu_T[1], r_tmp3, r_tmp2);
+
+ tcg_gen_brcondi_tl(TCG_COND_GT, r_mask, 6, l1);
+ get_offset(r_tmp2, cpu_T[0], 1);
+ tcg_gen_qemu_ld8u(r_tmp1, r_tmp2, ctx->mem_idx);
+ tcg_gen_shli_tl(r_tmp2, r_tmp1, 48);
+ tcg_gen_andi_tl(r_tmp3, cpu_T[1], 0xff00ffffffffffffULL);
+ tcg_gen_or_tl(cpu_T[1], r_tmp3, r_tmp2);
+ gen_set_label(l1);
+
+ tcg_gen_brcondi_tl(TCG_COND_GT, r_mask, 5, l2);
+ get_offset(r_tmp2, cpu_T[0], 2);
+ tcg_gen_qemu_ld8u(r_tmp1, r_tmp2, ctx->mem_idx);
+ tcg_gen_shli_tl(r_tmp2, r_tmp1, 40);
+ tcg_gen_andi_tl(r_tmp3, cpu_T[1], 0xffff00ffffffffffULL);
+ tcg_gen_or_tl(cpu_T[1], r_tmp3, r_tmp2);
+ gen_set_label(l2);
+
+ tcg_gen_brcondi_tl(TCG_COND_GT, r_mask, 4, l3);
+ get_offset(r_tmp2, cpu_T[0], 3);
+ tcg_gen_qemu_ld8u(r_tmp1, r_tmp2, ctx->mem_idx);
+ tcg_gen_shli_tl(r_tmp2, r_tmp1, 32);
+ tcg_gen_andi_tl(r_tmp3, cpu_T[1], 0xffffff00ffffffffULL);
+ tcg_gen_or_tl(cpu_T[1], r_tmp3, r_tmp2);
+ gen_set_label(l3);
+
+ tcg_gen_brcondi_tl(TCG_COND_GT, r_mask, 3, l4);
+ get_offset(r_tmp2, cpu_T[0], 4);
+ tcg_gen_qemu_ld8u(r_tmp1, r_tmp2, ctx->mem_idx);
+ tcg_gen_shli_tl(r_tmp2, r_tmp1, 24);
+ tcg_gen_andi_tl(r_tmp3, cpu_T[1], 0xffffffff00ffffffULL);
+ tcg_gen_or_tl(cpu_T[1], r_tmp3, r_tmp2);
+ gen_set_label(l4);
+
+ tcg_gen_brcondi_tl(TCG_COND_GT, r_mask, 2, l5);
+ get_offset(r_tmp2, cpu_T[0], 5);
+ tcg_gen_qemu_ld8u(r_tmp1, r_tmp2, ctx->mem_idx);
+ tcg_gen_shli_tl(r_tmp2, r_tmp1, 16);
+ tcg_gen_andi_tl(r_tmp3, cpu_T[1], 0xffffffffff00ffffULL);
+ tcg_gen_or_tl(cpu_T[1], r_tmp3, r_tmp2);
+ gen_set_label(l5);
+
+ tcg_gen_brcondi_tl(TCG_COND_GT, r_mask, 1, l6);
+ get_offset(r_tmp2, cpu_T[0], 6);
+ tcg_gen_qemu_ld8u(r_tmp1, r_tmp2, ctx->mem_idx);
+ tcg_gen_shli_tl(r_tmp2, r_tmp1, 8);
+ tcg_gen_andi_tl(r_tmp3, cpu_T[1], 0xffffffffffff00ffULL);
+ tcg_gen_or_tl(cpu_T[1], r_tmp3, r_tmp2);
+ gen_set_label(l6);
+
+ tcg_gen_brcondi_tl(TCG_COND_NE, r_mask, 0, l7);
+ get_offset(r_tmp2, cpu_T[0], 7);
+ tcg_gen_qemu_ld8u(r_tmp1, r_tmp2, ctx->mem_idx);
+ tcg_gen_andi_tl(r_tmp3, cpu_T[1], 0xffffffffffffff00ULL);
+ tcg_gen_or_tl(cpu_T[1], r_tmp3, r_tmp1);
+ gen_set_label(l7);
+
+ tcg_temp_free(r_mask);
+ tcg_temp_free(r_tmp1);
+ tcg_temp_free(r_tmp2);
+ tcg_temp_free(r_tmp3);
+}
+
+void inline gen_ldr (DisasContext *ctx)
+{
+ TCGv r_mask = tcg_temp_local_new(TCG_TYPE_TL);
+ TCGv r_tmp1 = tcg_temp_local_new(TCG_TYPE_TL);
+ TCGv r_tmp2 = tcg_temp_local_new(TCG_TYPE_TL);
+ TCGv r_tmp3 = tcg_temp_local_new(TCG_TYPE_TL);
+ int l1 = gen_new_label();
+ int l2 = gen_new_label();
+ int l3 = gen_new_label();
+ int l4 = gen_new_label();
+ int l5 = gen_new_label();
+ int l6 = gen_new_label();
+ int l7 = gen_new_label();
+
+ get_lmask64(r_mask, cpu_T[0]);
+ tcg_gen_qemu_ld8u(r_tmp1, cpu_T[0], ctx->mem_idx);
+ tcg_gen_andi_tl(r_tmp2, cpu_T[1], 0xffffffffffffff00ULL);
+ tcg_gen_or_tl(cpu_T[1], r_tmp2, r_tmp1);
+
+ tcg_gen_brcondi_tl(TCG_COND_LT, r_mask, 1, l1);
+ get_offset(r_tmp2, cpu_T[0], -1);
+ tcg_gen_qemu_ld8u(r_tmp1, r_tmp2, ctx->mem_idx);
+ tcg_gen_shli_tl(r_tmp2, r_tmp1, 8);
+ tcg_gen_andi_tl(r_tmp3, cpu_T[1], 0xffffffffffff00ffULL);
+ tcg_gen_or_tl(cpu_T[1], r_tmp3, r_tmp2);
+ gen_set_label(l1);
+
+ tcg_gen_brcondi_tl(TCG_COND_LT, r_mask, 2, l2);
+ get_offset(r_tmp2, cpu_T[0], -2);
+ tcg_gen_qemu_ld8u(r_tmp1, r_tmp2, ctx->mem_idx);
+ tcg_gen_shli_tl(r_tmp2, r_tmp1, 16);
+ tcg_gen_andi_tl(r_tmp3, cpu_T[1], 0xffffffffff00ffffULL);
+ tcg_gen_or_tl(cpu_T[1], r_tmp3, r_tmp2);
+ gen_set_label(l2);
+
+ tcg_gen_brcondi_tl(TCG_COND_LT, r_mask, 3, l3);
+ get_offset(r_tmp2, cpu_T[0], -3);
+ tcg_gen_qemu_ld8u(r_tmp1, r_tmp2, ctx->mem_idx);
+ tcg_gen_shli_tl(r_tmp2, r_tmp1, 24);
+ tcg_gen_andi_tl(r_tmp3, cpu_T[1], 0xffffffff00ffffffULL);
+ tcg_gen_or_tl(cpu_T[1], r_tmp3, r_tmp2);
+ gen_set_label(l3);
+
+ tcg_gen_brcondi_tl(TCG_COND_LT, r_mask, 4, l4);
+ get_offset(r_tmp2, cpu_T[0], -4);
+ tcg_gen_qemu_ld8u(r_tmp1, r_tmp2, ctx->mem_idx);
+ tcg_gen_shli_tl(r_tmp2, r_tmp1, 32);
+ tcg_gen_andi_tl(r_tmp3, cpu_T[1], 0xffffff00ffffffffULL);
+ tcg_gen_or_tl(cpu_T[1], r_tmp3, r_tmp2);
+ gen_set_label(l4);
+
+ tcg_gen_brcondi_tl(TCG_COND_LT, r_mask, 5, l5);
+ get_offset(r_tmp2, cpu_T[0], -5);
+ tcg_gen_qemu_ld8u(r_tmp1, r_tmp2, ctx->mem_idx);
+ tcg_gen_shli_tl(r_tmp2, r_tmp1, 40);
+ tcg_gen_andi_tl(r_tmp3, cpu_T[1], 0xffff00ffffffffffULL);
+ tcg_gen_or_tl(cpu_T[1], r_tmp3, r_tmp2);
+ gen_set_label(l5);
+
+ tcg_gen_brcondi_tl(TCG_COND_LT, r_mask, 6, l6);
+ get_offset(r_tmp2, cpu_T[0], -6);
+ tcg_gen_qemu_ld8u(r_tmp1, r_tmp2, ctx->mem_idx);
+ tcg_gen_shli_tl(r_tmp2, r_tmp1, 48);
+ tcg_gen_andi_tl(r_tmp3, cpu_T[1], 0xff00ffffffffffffULL);
+ tcg_gen_or_tl(cpu_T[1], r_tmp3, r_tmp2);
+ gen_set_label(l6);
+
+ tcg_gen_brcondi_tl(TCG_COND_NE, r_mask, 7, l7);
+ get_offset(r_tmp2, cpu_T[0], -7);
+ tcg_gen_qemu_ld8u(r_tmp1, r_tmp2, ctx->mem_idx);
+ tcg_gen_shli_tl(r_tmp2, r_tmp1, 56);
+ tcg_gen_andi_tl(r_tmp3, cpu_T[1], 0x00ffffffffffffffULL);
+ tcg_gen_or_tl(cpu_T[1], r_tmp3, r_tmp2);
+ gen_set_label(l7);
+
+ tcg_temp_free(r_mask);
+ tcg_temp_free(r_tmp1);
+ tcg_temp_free(r_tmp2);
+ tcg_temp_free(r_tmp3);
+}
+
+void inline gen_sdl (DisasContext *ctx)
+{
+ TCGv r_mask = tcg_temp_local_new(TCG_TYPE_TL);
+ TCGv r_tmp1 = tcg_temp_local_new(TCG_TYPE_TL);
+ TCGv r_tmp2 = tcg_temp_local_new(TCG_TYPE_TL);
+ int l1 = gen_new_label();
+ int l2 = gen_new_label();
+ int l3 = gen_new_label();
+ int l4 = gen_new_label();
+ int l5 = gen_new_label();
+ int l6 = gen_new_label();
+ int l7 = gen_new_label();
+
+ get_lmask64(r_mask, cpu_T[0]);
+ tcg_gen_shri_tl(r_tmp1, cpu_T[1], 56);
+ tcg_gen_qemu_st8(r_tmp1, cpu_T[0], ctx->mem_idx);
+
+ tcg_gen_brcondi_tl(TCG_COND_GT, r_mask, 6, l1);
+ get_offset(r_tmp1, cpu_T[0], 1);
+ tcg_gen_shri_tl(r_tmp2, cpu_T[1], 48);
+ tcg_gen_qemu_st8(r_tmp2, r_tmp1, ctx->mem_idx);
+ gen_set_label(l1);
+
+ tcg_gen_brcondi_tl(TCG_COND_GT, r_mask, 5, l2);
+ get_offset(r_tmp1, cpu_T[0], 2);
+ tcg_gen_shri_tl(r_tmp2, cpu_T[1], 40);
+ tcg_gen_qemu_st8(r_tmp2, r_tmp1, ctx->mem_idx);
+ gen_set_label(l2);
+
+ tcg_gen_brcondi_tl(TCG_COND_GT, r_mask, 4, l3);
+ get_offset(r_tmp1, cpu_T[0], 3);
+ tcg_gen_shri_tl(r_tmp2, cpu_T[1], 32);
+ tcg_gen_qemu_st8(r_tmp2, r_tmp1, ctx->mem_idx);
+ gen_set_label(l3);
+
+ tcg_gen_brcondi_tl(TCG_COND_GT, r_mask, 3, l4);
+ get_offset(r_tmp1, cpu_T[0], 4);
+ tcg_gen_shri_tl(r_tmp2, cpu_T[1], 24);
+ tcg_gen_qemu_st8(r_tmp2, r_tmp1, ctx->mem_idx);
+ gen_set_label(l4);
+
+ tcg_gen_brcondi_tl(TCG_COND_GT, r_mask, 3, l5);
+ get_offset(r_tmp1, cpu_T[0], 5);
+ tcg_gen_shri_tl(r_tmp2, cpu_T[1], 16);
+ tcg_gen_qemu_st8(r_tmp2, r_tmp1, ctx->mem_idx);
+ gen_set_label(l5);
+
+ tcg_gen_brcondi_tl(TCG_COND_GT, r_mask, 2, l6);
+ get_offset(r_tmp1, cpu_T[0], 6);
+ tcg_gen_shri_tl(r_tmp2, cpu_T[1], 8);
+ tcg_gen_qemu_st8(r_tmp2, r_tmp1, ctx->mem_idx);
+ gen_set_label(l6);
+
+ tcg_gen_brcondi_tl(TCG_COND_GT, r_mask, 1, l7);
+ get_offset(r_tmp1, cpu_T[0], 7);
+ tcg_gen_qemu_st8(cpu_T[1], r_tmp1, ctx->mem_idx);
+ gen_set_label(l7);
+
+ tcg_temp_free(r_mask);
+ tcg_temp_free(r_tmp1);
+ tcg_temp_free(r_tmp2);
+}
+
+void inline gen_sdr (DisasContext *ctx)
+{
+ TCGv r_mask = tcg_temp_local_new(TCG_TYPE_TL);
+ TCGv r_tmp1 = tcg_temp_local_new(TCG_TYPE_TL);
+ TCGv r_tmp2 = tcg_temp_local_new(TCG_TYPE_TL);
+ int l1 = gen_new_label();
+ int l2 = gen_new_label();
+ int l3 = gen_new_label();
+ int l4 = gen_new_label();
+ int l5 = gen_new_label();
+ int l6 = gen_new_label();
+ int l7 = gen_new_label();
+
+ get_lmask64(r_mask, cpu_T[0]);
+ tcg_gen_qemu_st8(cpu_T[1], cpu_T[0], ctx->mem_idx);
+
+ tcg_gen_brcondi_tl(TCG_COND_LT, r_mask, 1, l1);
+ get_offset(r_tmp1, cpu_T[0], -1);
+ tcg_gen_shri_tl(r_tmp2, cpu_T[1], 8);
+ tcg_gen_qemu_st8(r_tmp2, r_tmp1, ctx->mem_idx);
+ gen_set_label(l1);
+
+ tcg_gen_brcondi_tl(TCG_COND_LT, r_mask, 2, l2);
+ get_offset(r_tmp1, cpu_T[0], -2);
+ tcg_gen_shri_tl(r_tmp2, cpu_T[1], 16);
+ tcg_gen_qemu_st8(r_tmp2, r_tmp1, ctx->mem_idx);
+ gen_set_label(l2);
+
+ tcg_gen_brcondi_tl(TCG_COND_LT, r_mask, 3, l3);
+ get_offset(r_tmp1, cpu_T[0], -3);
+ tcg_gen_shri_tl(r_tmp2, cpu_T[1], 24);
+ tcg_gen_qemu_st8(r_tmp2, r_tmp1, ctx->mem_idx);
+ gen_set_label(l3);
+
+ tcg_gen_brcondi_tl(TCG_COND_LT, r_mask, 4, l4);
+ get_offset(r_tmp1, cpu_T[0], -4);
+ tcg_gen_shri_tl(r_tmp2, cpu_T[1], 32);
+ tcg_gen_qemu_st8(r_tmp2, r_tmp1, ctx->mem_idx);
+ gen_set_label(l4);
+
+ tcg_gen_brcondi_tl(TCG_COND_LT, r_mask, 5, l5);
+ get_offset(r_tmp1, cpu_T[0], -5);
+ tcg_gen_shri_tl(r_tmp2, cpu_T[1], 40);
+ tcg_gen_qemu_st8(r_tmp2, r_tmp1, ctx->mem_idx);
+ gen_set_label(l5);
+
+ tcg_gen_brcondi_tl(TCG_COND_LT, r_mask, 6, l6);
+ get_offset(r_tmp1, cpu_T[0], -6);
+ tcg_gen_shri_tl(r_tmp2, cpu_T[1], 48);
+ tcg_gen_qemu_st8(r_tmp2, r_tmp1, ctx->mem_idx);
+ gen_set_label(l6);
+
+ tcg_gen_brcondi_tl(TCG_COND_NE, r_mask, 7, l7);
+ get_offset(r_tmp1, cpu_T[0], -7);
+ tcg_gen_shri_tl(r_tmp2, cpu_T[1], 56);
+ tcg_gen_qemu_st8(r_tmp2, r_tmp1, ctx->mem_idx);
+ gen_set_label(l7);
+
+ tcg_temp_free(r_mask);
+ tcg_temp_free(r_tmp1);
+ tcg_temp_free(r_tmp2);
+}
+#endif /* TARGET_MIPS64 */
+
/* Load and store */
static void gen_ldst (DisasContext *ctx, uint32_t opc, int rt,
int base, int16_t offset)
@@ -1080,24 +1529,24 @@
break;
case OPC_LDL:
gen_load_gpr(cpu_T[1], rt);
- op_ldst(ldl);
+ gen_ldl(ctx);
gen_store_gpr(cpu_T[1], rt);
opn = "ldl";
break;
case OPC_SDL:
gen_load_gpr(cpu_T[1], rt);
- op_ldst(sdl);
+ gen_sdl(ctx);
opn = "sdl";
break;
case OPC_LDR:
gen_load_gpr(cpu_T[1], rt);
- op_ldst(ldr);
+ gen_ldr(ctx);
gen_store_gpr(cpu_T[1], rt);
opn = "ldr";
break;
case OPC_SDR:
gen_load_gpr(cpu_T[1], rt);
- op_ldst(sdr);
+ gen_sdr(ctx);
opn = "sdr";
break;
#endif
@@ -1143,24 +1592,24 @@
break;
case OPC_LWL:
gen_load_gpr(cpu_T[1], rt);
- op_ldst(lwl);
+ gen_lwl(ctx);
gen_store_gpr(cpu_T[1], rt);
opn = "lwl";
break;
case OPC_SWL:
gen_load_gpr(cpu_T[1], rt);
- op_ldst(swl);
- opn = "swr";
+ gen_swl(ctx);
+ opn = "swl";
break;
case OPC_LWR:
gen_load_gpr(cpu_T[1], rt);
- op_ldst(lwr);
+ gen_lwr(ctx);
gen_store_gpr(cpu_T[1], rt);
opn = "lwr";
break;
case OPC_SWR:
gen_load_gpr(cpu_T[1], rt);
- op_ldst(swr);
+ gen_swr(ctx);
opn = "swr";
break;
case OPC_LL:
Index: qemu-work/exec-all.h
===================================================================
--- qemu-work.orig/exec-all.h 2008-06-17 06:47:25.000000000 +0100
+++ qemu-work/exec-all.h 2008-06-17 06:47:53.000000000 +0100
@@ -30,7 +30,7 @@
struct TranslationBlock;
/* XXX: make safe guess about sizes */
-#define MAX_OP_PER_INSTR 64
+#define MAX_OP_PER_INSTR 256
/* A Call op needs up to 6 + 2N parameters (N = number of arguments). */
#define MAX_OPC_PARAM 10
#define OPC_BUF_SIZE 512
^ permalink raw reply [flat|nested] 2+ messages in thread
* Re: [Qemu-devel] Wanted: A better way to implement MIPS unaligned instructions
2008-06-17 12:12 [Qemu-devel] Wanted: A better way to implement MIPS unaligned instructions Thiemo Seufer
@ 2008-06-17 21:56 ` Fabrice Bellard
0 siblings, 0 replies; 2+ messages in thread
From: Fabrice Bellard @ 2008-06-17 21:56 UTC (permalink / raw)
To: qemu-devel
Thiemo Seufer wrote:
> Hello All,
>
> I am currently trying to implement TCG versions of the MIPS [ls][dw][lr]
> set of instructions. I believe I can't use a helper function for
> load/store type instructions. The appended patch uses TCG directly, but
> the resulting translation is excessively complicated. Is there a better
> way to do this?
A simpler solution is to use the same code as before in an helper.
Helpers are perfectly usable in this case provided the cpu state is
saved before calling them.
When the helper solution will work, it will be possible to optimize it
by directly accessing the QEMU TLB cache to do word accesses and to
generate the correct exception.
Fabrice.
^ permalink raw reply [flat|nested] 2+ messages in thread
end of thread, other threads:[~2008-06-17 21:57 UTC | newest]
Thread overview: 2+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2008-06-17 12:12 [Qemu-devel] Wanted: A better way to implement MIPS unaligned instructions Thiemo Seufer
2008-06-17 21:56 ` Fabrice Bellard
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).