From: TeLeMan <geleman@gmail.com>
To: qemu-devel@nongnu.org
Subject: [Qemu-devel] Re: [PATCH] tcg, tci: Add TCG and interpreter for bytecode (virtual machine)
Date: Fri, 23 Oct 2009 09:31:29 +0800 [thread overview]
Message-ID: <a38b25540910221831u1f43a337s8863b93930f275e9@mail.gmail.com> (raw)
[-- Attachment #1: Type: text/plain, Size: 123 bytes --]
Tested i386-softmmu only. Now tci can run windows xp sp2 and its speed
is about 6 times slower than jit.
--
SUN OF A BEACH
[-- Attachment #2: 0001-tci-fix-op_sar_iXX-and-op_ext16s_iXX.patch --]
[-- Type: text/plain, Size: 1333 bytes --]
Subject: [PATCH 1/5] tci: fix op_sar_iXX and op_ext16s_iXX
---
tcg/tci.c | 6 +++---
1 files changed, 3 insertions(+), 3 deletions(-)
diff --git a/tcg/tci.c b/tcg/tci.c
index e467b3a..81c415c 100644
--- a/tcg/tci.c
+++ b/tcg/tci.c
@@ -206,7 +206,7 @@ static uint16_t tci_read_r16(uint8_t **tb_ptr)
}
/* Read indexed register (16 bit signed) from bytecode. */
-static uint16_t tci_read_r16s(uint8_t **tb_ptr)
+static int16_t tci_read_r16s(uint8_t **tb_ptr)
{
uint16_t value = tci_read_reg16s(**tb_ptr);
*tb_ptr += 1;
@@ -549,7 +549,7 @@ unsigned long tcg_qemu_tb_exec(uint8_t *tb_ptr)
t0 = *tb_ptr++;
t1 = tci_read_ri32(&tb_ptr);
t2 = tci_read_ri32(&tb_ptr);
- tci_write_reg32(t0, (t1 >> t2) | (t1 & (1UL << 31)));
+ tci_write_reg32(t0, ((int32_t)t1 >> t2));
break;
#ifdef TCG_TARGET_HAS_rot_i32
case INDEX_op_rotl_i32:
@@ -794,7 +794,7 @@ unsigned long tcg_qemu_tb_exec(uint8_t *tb_ptr)
t0 = *tb_ptr++;
t1 = tci_read_ri64(&tb_ptr);
t2 = tci_read_ri64(&tb_ptr);
- tci_write_reg64(t0, (t1 >> t2) | (t1 & (1ULL << 63)));
+ tci_write_reg64(t0, ((int64_t)t1 >> t2));
break;
#ifdef TCG_TARGET_HAS_rot_i64
case INDEX_op_rotl_i64:
--
1.6.3.msysgit.0
[-- Attachment #3: 0002-tci-add-bswapXX_i32-div_i32-and-rot_i32.patch --]
[-- Type: text/plain, Size: 4270 bytes --]
Subject: [PATCH 2/5] tci: add bswapXX_i32,div_i32 and rot_i32
---
tcg/bytecode/tcg-target.c | 24 +++++++++++++++++++++++-
tcg/tci.c | 40 +++++++++++++++++++++++++++++++++++-----
2 files changed, 58 insertions(+), 6 deletions(-)
diff --git a/tcg/bytecode/tcg-target.c b/tcg/bytecode/tcg-target.c
index 2bd12b8..aae570f 100644
--- a/tcg/bytecode/tcg-target.c
+++ b/tcg/bytecode/tcg-target.c
@@ -722,6 +722,10 @@ static void tcg_out_op(TCGContext *s, int opc, const TCGArg *args,
case INDEX_op_shl_i32:
case INDEX_op_shr_i32:
case INDEX_op_sar_i32:
+#ifdef TCG_TARGET_HAS_rot_i32
+ case INDEX_op_rotl_i32:
+ case INDEX_op_rotr_i32:
+#endif
tcg_out_op_t(s, opc);
tcg_out_r(s, args[0]);
tcg_out_ri32(s, const_args[1], args[1]);
@@ -816,7 +820,10 @@ static void tcg_out_op(TCGContext *s, int opc, const TCGArg *args,
case INDEX_op_divu_i32:
case INDEX_op_rem_i32:
case INDEX_op_remu_i32:
- TODO();
+ tcg_out_op_t(s, opc);
+ tcg_out_r(s, args[0]);
+ tcg_out_ri32(s, const_args[1], args[1]);
+ tcg_out_ri32(s, const_args[2], args[2]);
break;
#else
case INDEX_op_div2_i32:
@@ -1002,6 +1009,21 @@ static void tcg_out_op(TCGContext *s, int opc, const TCGArg *args,
break;
#endif
#endif /* TCG_TARGET_REG_BITS == 64 */
+#if defined(TCG_TARGET_HAS_bswap32_i32)
+ case INDEX_op_bswap32_i32:
+ tcg_out_op_t(s, opc);
+ tcg_out_r(s, args[0]);
+ tcg_out_r(s, args[1]);
+ break;
+#endif
+#if defined(TCG_TARGET_HAS_bswap16_i32)
+ case INDEX_op_bswap16_i32:
+ tcg_dump_ops(s, stderr);
+ tcg_out_op_t(s, opc);
+ tcg_out_r(s, args[0]);
+ tcg_out_r(s, args[1]);
+ break;
+#endif
case INDEX_op_end:
TODO();
break;
diff --git a/tcg/tci.c b/tcg/tci.c
index 81c415c..8bb78e3 100644
--- a/tcg/tci.c
+++ b/tcg/tci.c
@@ -503,11 +503,29 @@ unsigned long tcg_qemu_tb_exec(uint8_t *tb_ptr)
break;
#ifdef TCG_TARGET_HAS_div_i32
case INDEX_op_div_i32:
+ t0 = *tb_ptr++;
+ t1 = tci_read_ri32(&tb_ptr);
+ t2 = tci_read_ri32(&tb_ptr);
+ tci_write_reg32(t0, (int32_t)t1 / (int32_t)t2);
+ break;
case INDEX_op_divu_i32:
+ t0 = *tb_ptr++;
+ t1 = tci_read_ri32(&tb_ptr);
+ t2 = tci_read_ri32(&tb_ptr);
+ tci_write_reg32(t0, t1 / t2);
+ break;
case INDEX_op_rem_i32:
+ t0 = *tb_ptr++;
+ t1 = tci_read_ri32(&tb_ptr);
+ t2 = tci_read_ri32(&tb_ptr);
+ tci_write_reg32(t0, (int32_t)t1 % (int32_t)t2);
+ break;
case INDEX_op_remu_i32:
- TODO();
- break;
+ t0 = *tb_ptr++;
+ t1 = tci_read_ri32(&tb_ptr);
+ t2 = tci_read_ri32(&tb_ptr);
+ tci_write_reg32(t0, t1 % t2);
+ break;
#else
case INDEX_op_div2_i32:
case INDEX_op_divu2_i32:
@@ -553,8 +571,16 @@ unsigned long tcg_qemu_tb_exec(uint8_t *tb_ptr)
break;
#ifdef TCG_TARGET_HAS_rot_i32
case INDEX_op_rotl_i32:
+ t0 = *tb_ptr++;
+ t1 = tci_read_ri32(&tb_ptr);
+ t2 = tci_read_ri32(&tb_ptr);
+ tci_write_reg32(t0, (t1<<t2)|(t1>>(32-t2)));
+ break;
case INDEX_op_rotr_i32:
- TODO();
+ t0 = *tb_ptr++;
+ t1 = tci_read_ri32(&tb_ptr);
+ t2 = tci_read_ri32(&tb_ptr);
+ tci_write_reg32(t0, (t1>>t2)|(t1<<(32-t2)));
break;
#endif
case INDEX_op_brcond_i32:
@@ -640,12 +666,16 @@ unsigned long tcg_qemu_tb_exec(uint8_t *tb_ptr)
#endif
#ifdef TCG_TARGET_HAS_bswap16_i32
case INDEX_op_bswap16_i32:
- TODO();
+ t0 = *tb_ptr++;
+ t1 = tci_read_r16(&tb_ptr);
+ tci_write_reg32(t0, bswap16(t1));
break;
#endif
#ifdef TCG_TARGET_HAS_bswap32_i32
case INDEX_op_bswap32_i32:
- TODO();
+ t0 = *tb_ptr++;
+ t1 = tci_read_r32(&tb_ptr);
+ tci_write_reg32(t0, bswap32(t1));
break;
#endif
#ifdef TCG_TARGET_HAS_not_i32
--
1.6.3.msysgit.0
[-- Attachment #4: 0003-tci-support-GETPC-for-SOFTMMU.patch --]
[-- Type: text/plain, Size: 1504 bytes --]
Subject: [PATCH 3/5] tci: support GETPC() for SOFTMMU
---
dyngen-exec.h | 5 ++++-
tcg/tci.c | 7 +++++++
2 files changed, 11 insertions(+), 1 deletions(-)
diff --git a/dyngen-exec.h b/dyngen-exec.h
index d5620ca..ba213c4 100644
--- a/dyngen-exec.h
+++ b/dyngen-exec.h
@@ -119,7 +119,10 @@ extern int printf(const char *, ...);
/* The return address may point to the start of the next instruction.
Subtracting one gets us the call instruction itself. */
-#if defined(__s390__)
+#if defined(CONFIG_TCG_INTERPRETER)
+extern uint8_t * tci_tb_ptr;
+# define GETPC() ((void *)tci_tb_ptr)
+#elif defined(__s390__)
# define GETPC() ((void*)(((unsigned long)__builtin_return_address(0) & 0x7fffffffUL) - 1))
#elif defined(__arm__)
/* Thumb return addresses have the low bit set, so we need to subtract two.
diff --git a/tcg/tci.c b/tcg/tci.c
index 8bb78e3..0ba605b 100644
--- a/tcg/tci.c
+++ b/tcg/tci.c
@@ -50,6 +50,10 @@ struct CPUX86State *env;
#error Target support missing, please fix!
#endif
+#ifdef CONFIG_SOFTMMU
+uint8_t * tci_tb_ptr;
+#endif
+
static tcg_target_ulong tci_reg[TCG_TARGET_NB_REGS];
static tcg_target_ulong tci_read_reg(uint32_t index)
@@ -380,6 +384,9 @@ unsigned long tcg_qemu_tb_exec(uint8_t *tb_ptr)
tci_reg[TCG_AREG0] = (tcg_target_ulong)env;
for (;;) {
+#ifdef CONFIG_SOFTMMU
+ tci_tb_ptr=tb_ptr;
+#endif
uint8_t opc = *(uint8_t *)tb_ptr++;
tcg_target_ulong t0;
tcg_target_ulong t1;
--
1.6.3.msysgit.0
[-- Attachment #5: 0004-tci-new-op_call-implementation-for-tci.patch --]
[-- Type: text/plain, Size: 19811 bytes --]
Subject: [PATCH 4/5] tci: new op_call implementation for tci
---
tcg/bytecode/tcg-target.c | 191 ++++++++++++++++++++++++++++++++++++++++++++-
tcg/tcg-opc.h | 25 ++++++
tcg/tcg.c | 139 ++++++++++++++++++++++++++++++++
tcg/tci.c | 156 ++++++++++++++++++++++++++++++++++--
4 files changed, 500 insertions(+), 11 deletions(-)
diff --git a/tcg/bytecode/tcg-target.c b/tcg/bytecode/tcg-target.c
index aae570f..744b9e6 100644
--- a/tcg/bytecode/tcg-target.c
+++ b/tcg/bytecode/tcg-target.c
@@ -248,6 +248,28 @@ static const TCGTargetOpDef tcg_target_op_defs[] = {
{ INDEX_op_bswap32_i32, { "r", "r" } },
#endif
+ { INDEX_op_call0_r0, { "i"} },
+ { INDEX_op_call1_r0, { "i","ri"} },
+ { INDEX_op_call2_r0, { "i","ri","ri"} },
+ { INDEX_op_call3_r0, { "i","ri","ri","ri"} },
+ { INDEX_op_call4_r0, { "i","ri","ri","ri","ri"} },
+
+ { INDEX_op_call0_r1, { "i","r"} },
+ { INDEX_op_call1_r1, { "i","ri","r"} },
+ { INDEX_op_call2_r1, { "i","ri","ri","r"} },
+ { INDEX_op_call3_r1, { "i","ri","ri","ri","r"} },
+ { INDEX_op_call4_r1, { "i","ri","ri","ri","ri","r"} },
+
+#if TCG_TARGET_REG_BITS == 32
+
+ { INDEX_op_call0_r2, { "i","r","r"} },
+ { INDEX_op_call1_r2, { "i","ri","r","r"} },
+ { INDEX_op_call2_r2, { "i","ri","ri","r","r"} },
+ { INDEX_op_call3_r2, { "i","ri","ri","ri","r","r"} },
+ { INDEX_op_call4_r2, { "i","ri","ri","ri","ri","r","r"} },
+
+#endif
+
{ -1 },
};
@@ -655,6 +677,172 @@ static void tcg_out_movi(TCGContext *s, TCGType type,
}
}
+static void tcg_out_op_call(TCGContext *s, const TCGArg *args,
+ const int *const_args)
+{
+ int nb_iargs=args[0]&0x0F;
+ int nb_oargs=args[0]>>4;
+
+ assert(const_args[1]!=0);
+
+ switch(nb_iargs)
+ {
+ case 0:
+ switch(nb_oargs)
+ {
+ case 0:
+ tcg_out_op_t(s, INDEX_op_call0_r0);
+ tcg_out_i(s, args[1]);/*func*/
+ break;
+ case 1:
+ tcg_out_op_t(s, INDEX_op_call0_r1);
+ tcg_out_i(s, args[1]);/*func*/
+ tcg_out_r(s, args[2]);/*r1*/
+ break;
+#if TCG_TARGET_REG_BITS == 32
+ case 2:
+ tcg_out_op_t(s, INDEX_op_call0_r2);
+ tcg_out_i(s, args[1]);/*func*/
+ tcg_out_r(s, args[2]);/*r1*/
+ tcg_out_r(s, args[3]);/*r2*/
+ break;
+#endif
+ default:
+ TODO();
+ }
+ break;
+ case 1:
+ switch(nb_oargs)
+ {
+ case 0:
+ tcg_out_op_t(s, INDEX_op_call1_r0);
+ tcg_out_i(s, args[1]);/*func*/
+ tcg_out_ri(s, const_args[2], args[2]);/*arg1*/
+ break;
+ case 1:
+ tcg_out_op_t(s, INDEX_op_call1_r1);
+ tcg_out_i(s, args[1]);/*func*/
+ tcg_out_ri(s, const_args[2], args[2]);/*arg1*/
+ tcg_out_r(s, args[3]);/*r1*/
+ break;
+#if TCG_TARGET_REG_BITS == 32
+ case 2:
+ tcg_out_op_t(s, INDEX_op_call1_r2);
+ tcg_out_i(s, args[1]);/*func*/
+ tcg_out_ri(s, const_args[2], args[2]);/*arg1*/
+ tcg_out_r(s, args[3]);/*r1*/
+ tcg_out_r(s, args[4]);/*r2*/
+ break;
+#endif
+ default:
+ TODO();
+ }
+ break;
+ case 2:
+ switch(nb_oargs)
+ {
+ case 0:
+ tcg_out_op_t(s, INDEX_op_call2_r0);
+ tcg_out_i(s, args[1]);/*func*/
+ tcg_out_ri(s, const_args[3], args[3]);/*arg1*/
+ tcg_out_ri(s, const_args[2], args[2]);/*arg2*/
+ break;
+ case 1:
+ tcg_out_op_t(s, INDEX_op_call2_r1);
+ tcg_out_i(s, args[1]);/*func*/
+ tcg_out_ri(s, const_args[3], args[3]);/*arg1*/
+ tcg_out_ri(s, const_args[2], args[2]);/*arg2*/
+ tcg_out_r(s, args[4]);/*r1*/
+ break;
+#if TCG_TARGET_REG_BITS == 32
+ case 2:
+ tcg_out_op_t(s, INDEX_op_call2_r2);
+ tcg_out_i(s, args[1]);/*func*/
+ tcg_out_ri(s, const_args[3], args[3]);/*arg1*/
+ tcg_out_ri(s, const_args[2], args[2]);/*arg2*/
+ tcg_out_r(s, args[4]);/*r1*/
+ tcg_out_r(s, args[5]);/*r2*/
+ break;
+#endif
+ default:
+ TODO();
+ }
+ break;
+ case 3:
+ switch(nb_oargs)
+ {
+ case 0:
+ tcg_out_op_t(s, INDEX_op_call3_r0);
+ tcg_out_i(s, args[1]);/*func*/
+ tcg_out_ri(s, const_args[4], args[4]);/*arg1*/
+ tcg_out_ri(s, const_args[3], args[3]);/*arg2*/
+ tcg_out_ri(s, const_args[2], args[2]);/*arg3*/
+ break;
+ case 1:
+ tcg_out_op_t(s, INDEX_op_call3_r1);
+ tcg_out_i(s, args[1]);/*func*/
+ tcg_out_ri(s, const_args[4], args[4]);/*arg1*/
+ tcg_out_ri(s, const_args[3], args[3]);/*arg2*/
+ tcg_out_ri(s, const_args[2], args[2]);/*arg3*/
+ tcg_out_r(s, args[5]);/*r1*/
+ break;
+#if TCG_TARGET_REG_BITS == 32
+ case 2:
+ tcg_out_op_t(s, INDEX_op_call3_r2);
+ tcg_out_i(s, args[1]);/*func*/
+ tcg_out_ri(s, const_args[4], args[4]);/*arg1*/
+ tcg_out_ri(s, const_args[3], args[3]);/*arg2*/
+ tcg_out_ri(s, const_args[2], args[2]);/*arg3*/
+ tcg_out_r(s, args[5]);/*r1*/
+ tcg_out_r(s, args[6]);/*r2*/
+ break;
+#endif
+ default:
+ TODO();
+ }
+ break;
+ case 4:
+ switch(nb_oargs)
+ {
+ case 0:
+ tcg_out_op_t(s, INDEX_op_call4_r0);
+ tcg_out_i(s, args[1]);/*func*/
+ tcg_out_ri(s, const_args[5], args[5]);/*arg1*/
+ tcg_out_ri(s, const_args[4], args[4]);/*arg2*/
+ tcg_out_ri(s, const_args[3], args[3]);/*arg3*/
+ tcg_out_ri(s, const_args[2], args[2]);/*arg4*/
+ break;
+ case 1:
+ tcg_out_op_t(s, INDEX_op_call4_r1);
+ tcg_out_i(s, args[1]);/*func*/
+ tcg_out_ri(s, const_args[5], args[5]);/*arg1*/
+ tcg_out_ri(s, const_args[4], args[4]);/*arg2*/
+ tcg_out_ri(s, const_args[3], args[3]);/*arg3*/
+ tcg_out_ri(s, const_args[2], args[2]);/*arg4*/
+ tcg_out_r(s, args[6]);/*r1*/
+ break;
+#if TCG_TARGET_REG_BITS == 32
+ case 2:
+ tcg_out_op_t(s, INDEX_op_call4_r2);
+ tcg_out_i(s, args[1]);/*func*/
+ tcg_out_ri(s, const_args[5], args[5]);/*arg1*/
+ tcg_out_ri(s, const_args[4], args[4]);/*arg2*/
+ tcg_out_ri(s, const_args[3], args[3]);/*arg3*/
+ tcg_out_ri(s, const_args[2], args[2]);/*arg4*/
+ tcg_out_r(s, args[6]);/*r1*/
+ tcg_out_r(s, args[7]);/*r2*/
+ break;
+#endif
+ default:
+ TODO();
+ }
+ break;
+ default:
+ TODO();
+ }
+}
+
+
static void tcg_out_op(TCGContext *s, int opc, const TCGArg *args,
const int *const_args)
{
@@ -683,8 +871,7 @@ static void tcg_out_op(TCGContext *s, int opc, const TCGArg *args,
tci_out_label(s, args[0]);
break;
case INDEX_op_call:
- tcg_out_op_t(s, opc);
- tcg_out_ri(s, const_args[0], args[0]);
+ tcg_out_op_call(s,args,const_args);
break;
case INDEX_op_jmp:
TODO();
diff --git a/tcg/tcg-opc.h b/tcg/tcg-opc.h
index b7f3fd7..070ba39 100644
--- a/tcg/tcg-opc.h
+++ b/tcg/tcg-opc.h
@@ -269,4 +269,29 @@ DEF2(qemu_st64, 0, 2, 1, TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS)
#endif /* TCG_TARGET_REG_BITS != 32 */
+#ifdef CONFIG_TCG_INTERPRETER
+
+DEF2(call0_r0, 0, 0, 0, 0)
+DEF2(call1_r0, 0, 1, 0, 0)
+DEF2(call2_r0, 0, 2, 0, 0)
+DEF2(call3_r0, 0, 3, 0, 0)
+DEF2(call4_r0, 0, 4, 0, 0)
+DEF2(call0_r1, 1, 0, 0, 0)
+DEF2(call1_r1, 1, 1, 0, 0)
+DEF2(call2_r1, 1, 2, 0, 0)
+DEF2(call3_r1, 1, 3, 0, 0)
+DEF2(call4_r1, 1, 4, 0, 0)
+
+#if TCG_TARGET_REG_BITS == 32
+
+DEF2(call0_r2, 2, 0, 0, 0)
+DEF2(call1_r2, 2, 1, 0, 0)
+DEF2(call2_r2, 2, 2, 0, 0)
+DEF2(call3_r2, 2, 3, 0, 0)
+DEF2(call4_r2, 2, 4, 0, 0)
+
+#endif
+
+#endif
+
#undef DEF2
diff --git a/tcg/tcg.c b/tcg/tcg.c
index 2a82f37..20aac38 100644
--- a/tcg/tcg.c
+++ b/tcg/tcg.c
@@ -1690,6 +1690,142 @@ static void tcg_reg_alloc_op(TCGContext *s,
#define STACK_DIR(x) (x)
#endif
+#ifdef CONFIG_TCG_INTERPRETER
+
+static int tcg_reg_alloc_call(TCGContext *s, const TCGOpDef *def,
+ int opc, const TCGArg *args,
+ unsigned int dead_iargs)
+{
+ int nb_iargs, nb_oargs, flags, i, reg, nb_params;
+ TCGArg arg,func_arg;
+ TCGTemp *ts;
+ tcg_target_long func_addr;
+ TCGRegSet allocated_regs;
+ const TCGArgConstraint *arg_ct;
+ TCGArg new_args[TCG_MAX_OP_ARGS];
+ int const_args[TCG_MAX_OP_ARGS];
+
+ arg = *args++;
+
+ nb_oargs = arg >> 16;
+ nb_iargs = arg & 0xffff;
+ nb_params = nb_iargs - 1;
+
+ flags = args[nb_oargs + nb_iargs];
+
+ const_args[0]=1;
+ new_args[0]=(nb_oargs<<4)|nb_params;
+
+ /* satisfy input constraints */
+ tcg_regset_set(allocated_regs, s->reserved_regs);
+
+ for(i = nb_params; i >= 0; i--) {
+ arg = args[nb_oargs + i];
+ ts = &s->temps[arg];
+ if (ts->val_type == TEMP_VAL_MEM) {
+ reg = tcg_reg_alloc(s, tcg_target_available_regs[ts->type], allocated_regs);
+ tcg_out_ld(s, ts->type, reg, ts->mem_reg, ts->mem_offset);
+ ts->val_type = TEMP_VAL_REG;
+ ts->reg = reg;
+ ts->mem_coherent = 1;
+ s->reg_to_temp[reg] = arg;
+ } else if (ts->val_type == TEMP_VAL_CONST) {
+ /* constant is OK for instruction */
+ const_args[nb_params+1-i] = 1;
+ new_args[nb_params+1-i] = ts->val;
+ goto iarg_end;
+ }
+ assert(ts->val_type == TEMP_VAL_REG);
+ reg = ts->reg;
+ if (tcg_regset_test_reg(tcg_target_available_regs[ts->type], reg)) {
+ /* nothing to do : the constraint is satisfied */
+ } else {
+ allocate_in_reg:
+ /* allocate a new register matching the constraint
+ and move the temporary register into it */
+ reg = tcg_reg_alloc(s, tcg_target_available_regs[ts->type], allocated_regs);
+ tcg_out_mov(s, reg, ts->reg);
+ }
+ new_args[nb_params+1-i] = reg;
+ const_args[nb_params+1-i] = 0;
+ tcg_regset_set_reg(allocated_regs, reg);
+ iarg_end: ;
+ }
+
+ /* mark dead temporaries and free the associated registers */
+ for(i = 0; i < nb_iargs; i++) {
+ arg = args[nb_oargs + i];
+ if (IS_DEAD_IARG(i)) {
+ ts = &s->temps[arg];
+ if (!ts->fixed_reg) {
+ if (ts->val_type == TEMP_VAL_REG)
+ s->reg_to_temp[ts->reg] = -1;
+ ts->val_type = TEMP_VAL_DEAD;
+ }
+ }
+ }
+
+ /* clobber call registers */
+ for(reg = 0; reg < TCG_TARGET_NB_REGS; reg++) {
+ if (tcg_regset_test_reg(tcg_target_call_clobber_regs, reg)) {
+ tcg_reg_free(s, reg);
+ }
+ }
+
+ /* store globals and free associated registers (we assume the insn
+ can modify any global. */
+ if (!(flags & TCG_CALL_CONST)) {
+ save_globals(s, allocated_regs);
+ }
+
+ /* satisfy the output constraints */
+ tcg_regset_set(allocated_regs, s->reserved_regs);
+ for(i = 0; i < nb_oargs; i++) {
+ arg = args[i];
+ ts = &s->temps[arg];
+
+ /* if fixed register, we try to use it */
+ reg = ts->reg;
+ if (ts->fixed_reg &&
+ tcg_regset_test_reg(tcg_target_available_regs[ts->type], reg)) {
+ goto oarg_end;
+ }
+ reg = tcg_reg_alloc(s, tcg_target_available_regs[ts->type], allocated_regs);
+
+ tcg_regset_set_reg(allocated_regs, reg);
+ /* if a fixed register is used, then a move will be done afterwards */
+ if (!ts->fixed_reg) {
+ if (ts->val_type == TEMP_VAL_REG)
+ s->reg_to_temp[ts->reg] = -1;
+ ts->val_type = TEMP_VAL_REG;
+ ts->reg = reg;
+ /* temp value is modified, so the value kept in memory is
+ potentially not the same */
+ ts->mem_coherent = 0;
+ s->reg_to_temp[reg] = arg;
+ }
+oarg_end:
+ new_args[i+nb_params+2] = reg;
+ }
+
+
+ /* emit instruction */
+ tcg_out_op(s, opc, new_args, const_args);
+
+ /* move the outputs in the correct register if needed */
+ for(i = 0; i < nb_oargs; i++) {
+ ts = &s->temps[args[i]];
+ reg = new_args[i+nb_params+2];
+ if (ts->fixed_reg && ts->reg != reg) {
+ tcg_out_mov(s, ts->reg, reg);
+ }
+ }
+
+ return nb_iargs + nb_oargs + def->nb_cargs + 1;
+}
+
+#else
+
static int tcg_reg_alloc_call(TCGContext *s, const TCGOpDef *def,
int opc, const TCGArg *args,
unsigned int dead_iargs)
@@ -1868,6 +2004,9 @@ static int tcg_reg_alloc_call(TCGContext *s, const TCGOpDef *def,
return nb_iargs + nb_oargs + def->nb_cargs + 1;
}
+
+#endif
+
#ifdef CONFIG_PROFILER
static int64_t tcg_table_op_count[NB_OPS];
diff --git a/tcg/tci.c b/tcg/tci.c
index 0ba605b..3e4165b 100644
--- a/tcg/tci.c
+++ b/tcg/tci.c
@@ -41,8 +41,23 @@
#define TRACE() ((void)0)
#endif
-typedef tcg_target_ulong (*helper_function)(tcg_target_ulong, tcg_target_ulong,
+typedef tcg_target_ulong (*helper_function0)(void);
+typedef tcg_target_ulong (*helper_function1)(tcg_target_ulong);
+typedef tcg_target_ulong (*helper_function2)(tcg_target_ulong, tcg_target_ulong);
+typedef tcg_target_ulong (*helper_function3)(tcg_target_ulong, tcg_target_ulong,
+ tcg_target_ulong);
+typedef tcg_target_ulong (*helper_function4)(tcg_target_ulong, tcg_target_ulong,
tcg_target_ulong, tcg_target_ulong);
+#if TCG_TARGET_REG_BITS == 32
+
+typedef uint64_t (*helper_function0_r64)(void);
+typedef uint64_t (*helper_function1_r64)(tcg_target_ulong);
+typedef uint64_t (*helper_function2_r64)(tcg_target_ulong, tcg_target_ulong);
+typedef uint64_t (*helper_function3_r64)(tcg_target_ulong, tcg_target_ulong,
+ tcg_target_ulong);
+typedef uint64_t (*helper_function4_r64)(tcg_target_ulong, tcg_target_ulong,
+ tcg_target_ulong, tcg_target_ulong);
+#endif
#if defined(TARGET_I386)
struct CPUX86State *env;
@@ -427,15 +442,138 @@ unsigned long tcg_qemu_tb_exec(uint8_t *tb_ptr)
case INDEX_op_set_label:
TODO();
break;
- case INDEX_op_call:
- t0 = tci_read_ri(&tb_ptr);
- t0 = ((helper_function)t0)(tci_read_reg(TCG_REG_R0),
- tci_read_reg(TCG_REG_R1),
- tci_read_reg(TCG_REG_R2),
- tci_read_reg(TCG_REG_R3));
- // TODO: fix for 32 bit host / 64 bit target.
- tci_write_reg(TCG_REG_R0, t0);
+ case INDEX_op_call0_r0:
+ t0 = tci_read_i(&tb_ptr);
+ ((helper_function0)t0)();
+ break;
+ case INDEX_op_call1_r0:
+ t0 = tci_read_i(&tb_ptr);
+ t1 = tci_read_ri(&tb_ptr);
+ ((helper_function1)t0)(t1);
+ break;
+ case INDEX_op_call2_r0:
+ t0 = tci_read_i(&tb_ptr);
+ t1 = tci_read_ri(&tb_ptr);
+ t2 = tci_read_ri(&tb_ptr);
+ ((helper_function2)t0)(t1,t2);
+ break;
+ case INDEX_op_call3_r0:
+ t0 = tci_read_i(&tb_ptr);
+ t1 = tci_read_ri(&tb_ptr);
+ t2 = tci_read_ri(&tb_ptr);
+ t3 = tci_read_ri(&tb_ptr);
+ ((helper_function3)t0)(t1,t2,t3);
+ break;
+ case INDEX_op_call4_r0:
+ t0 = tci_read_i(&tb_ptr);
+ t1 = tci_read_ri(&tb_ptr);
+ t2 = tci_read_ri(&tb_ptr);
+ t3 = tci_read_ri(&tb_ptr);
+ t4 = tci_read_ri(&tb_ptr);
+ ((helper_function4)t0)(t1,t2,t3,t4);
+ break;
+ case INDEX_op_call0_r1:
+ t0 = tci_read_i(&tb_ptr);
+ t0 = ((helper_function0)t0)();
+ t5 = *tb_ptr++;
+ tci_write_reg(t5,t0);
+
+ break;
+ case INDEX_op_call1_r1:
+ t0 = tci_read_i(&tb_ptr);
+ t1 = tci_read_ri(&tb_ptr);
+ t0 = ((helper_function1)t0)(t1);
+ t5 = *tb_ptr++;
+ tci_write_reg(t5,t0);
+
+ break;
+ case INDEX_op_call2_r1:
+ t0 = tci_read_i(&tb_ptr);
+ t1 = tci_read_ri(&tb_ptr);
+ t2 = tci_read_ri(&tb_ptr);
+ t0 = ((helper_function2)t0)(t1,t2);
+ t5 = *tb_ptr++;
+ tci_write_reg(t5,t0);
+
+ break;
+ case INDEX_op_call3_r1:
+ t0 = tci_read_i(&tb_ptr);
+ t1 = tci_read_ri(&tb_ptr);
+ t2 = tci_read_ri(&tb_ptr);
+ t3 = tci_read_ri(&tb_ptr);
+ t0 = ((helper_function3)t0)(t1,t2,t3);
+ t5 = *tb_ptr++;
+ tci_write_reg(t5,t0);
+
+ break;
+ case INDEX_op_call4_r1:
+ t0 = tci_read_i(&tb_ptr);
+ t1 = tci_read_ri(&tb_ptr);
+ t2 = tci_read_ri(&tb_ptr);
+ t3 = tci_read_ri(&tb_ptr);
+ t4 = tci_read_ri(&tb_ptr);
+ t0 = ((helper_function4)t0)(t1,t2,t3,t4);
+ t5 = *tb_ptr++;
+ tci_write_reg(t5,t0);
+
break;
+#if TCG_TARGET_REG_BITS == 32
+ case INDEX_op_call0_r2:
+ t0 = tci_read_i(&tb_ptr);
+ u64 = ((helper_function0_r64)t0)();
+ t5 = *tb_ptr++;
+ tci_write_reg(t5,u64);
+ t5 = *tb_ptr++;
+ tci_write_reg(t5, u64>>32);
+
+ break;
+ case INDEX_op_call1_r2:
+ t0 = tci_read_i(&tb_ptr);
+ t1 = tci_read_ri(&tb_ptr);
+ u64 = ((helper_function1_r64)t0)(t1);
+ t5 = *tb_ptr++;
+ tci_write_reg(t5,u64);
+ t5 = *tb_ptr++;
+ tci_write_reg(t5, u64>>32);
+
+ break;
+ case INDEX_op_call2_r2:
+ t0 = tci_read_i(&tb_ptr);
+ t1 = tci_read_ri(&tb_ptr);
+ t2 = tci_read_ri(&tb_ptr);
+ u64 = ((helper_function2_r64)t0)(t1,t2);
+ t5 = *tb_ptr++;
+ tci_write_reg(t5,u64);
+ t5 = *tb_ptr++;
+ tci_write_reg(t5, u64>>32);
+
+ break;
+ case INDEX_op_call3_r2:
+ t0 = tci_read_i(&tb_ptr);
+ t1 = tci_read_ri(&tb_ptr);
+ t2 = tci_read_ri(&tb_ptr);
+ t3 = tci_read_ri(&tb_ptr);
+ u64 = ((helper_function3_r64)t0)(t1,t2,t3);
+ t5 = *tb_ptr++;
+ tci_write_reg(t5,u64);
+ t5 = *tb_ptr++;
+ tci_write_reg(t5, u64>>32);
+
+ break;
+ case INDEX_op_call4_r2:
+ t0 = tci_read_i(&tb_ptr);
+ t1 = tci_read_ri(&tb_ptr);
+ t2 = tci_read_ri(&tb_ptr);
+ t3 = tci_read_ri(&tb_ptr);
+ t4 = tci_read_ri(&tb_ptr);
+ u64 = ((helper_function4_r64)t0)(t1,t2,t3,t4);
+ t5 = *tb_ptr++;
+ tci_write_reg(t5,u64);
+ t5 = *tb_ptr++;
+ tci_write_reg(t5, u64>>32);
+
+ break;
+#endif
case INDEX_op_jmp:
case INDEX_op_br:
t0 = *(uint64_t *)tb_ptr;
--
1.6.3.msysgit.0
[-- Attachment #6: 0005-tci-speed-optimization.patch --]
[-- Type: text/plain, Size: 8365 bytes --]
Subject: [PATCH 5/5] tci: speed optimization
---
tcg/tci.c | 66 ++++++++++++++++++++++++++++++------------------------------
1 files changed, 33 insertions(+), 33 deletions(-)
diff --git a/tcg/tci.c b/tcg/tci.c
index 3e4165b..8628e69 100644
--- a/tcg/tci.c
+++ b/tcg/tci.c
@@ -71,88 +71,88 @@ uint8_t * tci_tb_ptr;
static tcg_target_ulong tci_reg[TCG_TARGET_NB_REGS];
-static tcg_target_ulong tci_read_reg(uint32_t index)
+static inline tcg_target_ulong tci_read_reg(uint32_t index)
{
assert(index < ARRAY_SIZE(tci_reg));
return tci_reg[index];
}
-static uint8_t tci_read_reg8(uint32_t index)
+static inline uint8_t tci_read_reg8(uint32_t index)
{
return (uint8_t)tci_read_reg(index);
}
-static int8_t tci_read_reg8s(uint32_t index)
+static inline int8_t tci_read_reg8s(uint32_t index)
{
return (int8_t)tci_read_reg(index);
}
-static uint16_t tci_read_reg16(uint32_t index)
+static inline uint16_t tci_read_reg16(uint32_t index)
{
return (uint16_t)tci_read_reg(index);
}
-static int16_t tci_read_reg16s(uint32_t index)
+static inline int16_t tci_read_reg16s(uint32_t index)
{
return (int16_t)tci_read_reg(index);
}
-static uint32_t tci_read_reg32(uint32_t index)
+static inline uint32_t tci_read_reg32(uint32_t index)
{
return (uint32_t)tci_read_reg(index);
}
#if TCG_TARGET_REG_BITS == 64
-static int32_t tci_read_reg32s(uint32_t index)
+static inline int32_t tci_read_reg32s(uint32_t index)
{
return (int32_t)tci_read_reg(index);
}
-static uint64_t tci_read_reg64(uint32_t index)
+static inline uint64_t tci_read_reg64(uint32_t index)
{
return tci_read_reg(index);
}
#endif
-static void tci_write_reg(uint32_t index, tcg_target_ulong value)
+static inline void tci_write_reg(uint32_t index, tcg_target_ulong value)
{
assert(index < ARRAY_SIZE(tci_reg));
assert(index != TCG_AREG0);
tci_reg[index] = value;
}
-static void tci_write_reg8(uint32_t index, uint8_t value)
+static inline void tci_write_reg8(uint32_t index, uint8_t value)
{
tci_write_reg(index, value);
}
-static void tci_write_reg8s(uint32_t index, int8_t value)
+static inline void tci_write_reg8s(uint32_t index, int8_t value)
{
tci_write_reg(index, value);
}
-static void tci_write_reg16s(uint32_t index, int16_t value)
+static inline void tci_write_reg16s(uint32_t index, int16_t value)
{
tci_write_reg(index, value);
}
-static void tci_write_reg16(uint32_t index, uint16_t value)
+static inline void tci_write_reg16(uint32_t index, uint16_t value)
{
tci_write_reg(index, value);
}
-static void tci_write_reg32(uint32_t index, uint32_t value)
+static inline void tci_write_reg32(uint32_t index, uint32_t value)
{
tci_write_reg(index, value);
}
-static void tci_write_reg32s(uint32_t index, int32_t value)
+static inline void tci_write_reg32s(uint32_t index, int32_t value)
{
tci_write_reg(index, value);
}
#if TCG_TARGET_REG_BITS == 64
-static void tci_write_reg64(uint32_t index, uint64_t value)
+static inline void tci_write_reg64(uint32_t index, uint64_t value)
{
tci_write_reg(index, value);
}
@@ -160,14 +160,14 @@ static void tci_write_reg64(uint32_t index, uint64_t value)
#if TCG_TARGET_REG_BITS == 32
/* Create a 64 bit value from two 32 bit values. */
-static uint64_t tci_uint64(uint32_t high, uint32_t low)
+static inline uint64_t tci_uint64(uint32_t high, uint32_t low)
{
return ((uint64_t)high << 32) + low;
}
#endif
/* Read constant (native size) from bytecode. */
-static tcg_target_ulong tci_read_i(uint8_t **tb_ptr)
+static inline tcg_target_ulong tci_read_i(uint8_t **tb_ptr)
{
tcg_target_ulong value = *(tcg_target_ulong *)(*tb_ptr);
*tb_ptr += sizeof(tcg_target_ulong);
@@ -175,7 +175,7 @@ static tcg_target_ulong tci_read_i(uint8_t **tb_ptr)
}
/* Read constant (32 bit) from bytecode. */
-static uint32_t tci_read_i32(uint8_t **tb_ptr)
+static inline uint32_t tci_read_i32(uint8_t **tb_ptr)
{
uint32_t value = *(uint32_t *)(*tb_ptr);
*tb_ptr += 4;
@@ -184,7 +184,7 @@ static uint32_t tci_read_i32(uint8_t **tb_ptr)
#if TCG_TARGET_REG_BITS == 64
/* Read constant (64 bit) from bytecode. */
-static uint64_t tci_read_i64(uint8_t **tb_ptr)
+static inline uint64_t tci_read_i64(uint8_t **tb_ptr)
{
uint64_t value = *(uint64_t *)(*tb_ptr);
*tb_ptr += 8;
@@ -193,7 +193,7 @@ static uint64_t tci_read_i64(uint8_t **tb_ptr)
#endif
/* Read indexed register (native size) from bytecode. */
-static tcg_target_ulong tci_read_r(uint8_t **tb_ptr)
+static inline tcg_target_ulong tci_read_r(uint8_t **tb_ptr)
{
tcg_target_ulong value = tci_read_reg(**tb_ptr);
*tb_ptr += 1;
@@ -201,7 +201,7 @@ static tcg_target_ulong tci_read_r(uint8_t **tb_ptr)
}
/* Read indexed register (8 bit) from bytecode. */
-static uint8_t tci_read_r8(uint8_t **tb_ptr)
+static inline uint8_t tci_read_r8(uint8_t **tb_ptr)
{
uint8_t value = tci_read_reg8(**tb_ptr);
*tb_ptr += 1;
@@ -209,7 +209,7 @@ static uint8_t tci_read_r8(uint8_t **tb_ptr)
}
/* Read indexed register (8 bit signed) from bytecode. */
-static int8_t tci_read_r8s(uint8_t **tb_ptr)
+static inline int8_t tci_read_r8s(uint8_t **tb_ptr)
{
int8_t value = tci_read_reg8s(**tb_ptr);
*tb_ptr += 1;
@@ -217,7 +217,7 @@ static int8_t tci_read_r8s(uint8_t **tb_ptr)
}
/* Read indexed register (16 bit) from bytecode. */
-static uint16_t tci_read_r16(uint8_t **tb_ptr)
+static inline uint16_t tci_read_r16(uint8_t **tb_ptr)
{
uint16_t value = tci_read_reg16(**tb_ptr);
*tb_ptr += 1;
@@ -225,7 +225,7 @@ static uint16_t tci_read_r16(uint8_t **tb_ptr)
}
/* Read indexed register (16 bit signed) from bytecode. */
-static int16_t tci_read_r16s(uint8_t **tb_ptr)
+static inline int16_t tci_read_r16s(uint8_t **tb_ptr)
{
uint16_t value = tci_read_reg16s(**tb_ptr);
*tb_ptr += 1;
@@ -233,7 +233,7 @@ static int16_t tci_read_r16s(uint8_t **tb_ptr)
}
/* Read indexed register (32 bit) from bytecode. */
-static uint32_t tci_read_r32(uint8_t **tb_ptr)
+static inline uint32_t tci_read_r32(uint8_t **tb_ptr)
{
uint32_t value = tci_read_reg32(**tb_ptr);
*tb_ptr += 1;
@@ -242,7 +242,7 @@ static uint32_t tci_read_r32(uint8_t **tb_ptr)
#if TCG_TARGET_REG_BITS == 64
/* Read indexed register (32 bit signed) from bytecode. */
-static int32_t tci_read_r32s(uint8_t **tb_ptr)
+static inline int32_t tci_read_r32s(uint8_t **tb_ptr)
{
int32_t value = tci_read_reg32s(**tb_ptr);
*tb_ptr += 1;
@@ -250,7 +250,7 @@ static int32_t tci_read_r32s(uint8_t **tb_ptr)
}
/* Read indexed register (64 bit) from bytecode. */
-static uint64_t tci_read_r64(uint8_t **tb_ptr)
+static inline uint64_t tci_read_r64(uint8_t **tb_ptr)
{
uint64_t value = tci_read_reg64(**tb_ptr);
*tb_ptr += 1;
@@ -259,7 +259,7 @@ static uint64_t tci_read_r64(uint8_t **tb_ptr)
#endif
/* Read indexed register or constant (native size) from bytecode. */
-static tcg_target_ulong tci_read_ri(uint8_t **tb_ptr)
+static inline tcg_target_ulong tci_read_ri(uint8_t **tb_ptr)
{
bool const_arg;
tcg_target_ulong value;
@@ -274,7 +274,7 @@ static tcg_target_ulong tci_read_ri(uint8_t **tb_ptr)
}
/* Read indexed register or constant (32 bit) from bytecode. */
-static uint32_t tci_read_ri32(uint8_t **tb_ptr)
+static inline uint32_t tci_read_ri32(uint8_t **tb_ptr)
{
bool const_arg;
uint32_t value;
@@ -290,7 +290,7 @@ static uint32_t tci_read_ri32(uint8_t **tb_ptr)
#if TCG_TARGET_REG_BITS == 64
/* Read indexed register or constant (64 bit) from bytecode. */
-static uint64_t tci_read_ri64(uint8_t **tb_ptr)
+static inline uint64_t tci_read_ri64(uint8_t **tb_ptr)
{
bool const_arg;
uint64_t value;
@@ -305,7 +305,7 @@ static uint64_t tci_read_ri64(uint8_t **tb_ptr)
}
#endif
-static bool tci_compare32(uint32_t u0, uint32_t u1, TCGCond condition)
+static inline bool tci_compare32(uint32_t u0, uint32_t u1, TCGCond condition)
{
bool result = false;
int32_t i0 = u0;
@@ -347,7 +347,7 @@ static bool tci_compare32(uint32_t u0, uint32_t u1, TCGCond condition)
return result;
}
-static bool tci_compare64(uint64_t u0, uint64_t u1, TCGCond condition)
+static inline bool tci_compare64(uint64_t u0, uint64_t u1, TCGCond condition)
{
bool result = false;
int64_t i0 = u0;
--
1.6.3.msysgit.0
next reply other threads:[~2009-10-23 1:31 UTC|newest]
Thread overview: 4+ messages / expand[flat|nested] mbox.gz Atom feed top
2009-10-23 1:31 TeLeMan [this message]
2009-10-23 18:58 ` [Qemu-devel] Re: [PATCH] tcg, tci: Add TCG and interpreter for bytecode (virtual machine) Stefan Weil
2009-10-24 3:23 ` TeLeMan
2009-10-26 19:08 ` Stuart Brady
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=a38b25540910221831u1f43a337s8863b93930f275e9@mail.gmail.com \
--to=geleman@gmail.com \
--cc=qemu-devel@nongnu.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).