Subject: [PATCH 4/5] tci: new op_call implementation for tci --- tcg/bytecode/tcg-target.c | 191 ++++++++++++++++++++++++++++++++++++++++++++- tcg/tcg-opc.h | 25 ++++++ tcg/tcg.c | 139 ++++++++++++++++++++++++++++++++ tcg/tci.c | 156 ++++++++++++++++++++++++++++++++++-- 4 files changed, 500 insertions(+), 11 deletions(-) diff --git a/tcg/bytecode/tcg-target.c b/tcg/bytecode/tcg-target.c index aae570f..744b9e6 100644 --- a/tcg/bytecode/tcg-target.c +++ b/tcg/bytecode/tcg-target.c @@ -248,6 +248,28 @@ static const TCGTargetOpDef tcg_target_op_defs[] = { { INDEX_op_bswap32_i32, { "r", "r" } }, #endif + { INDEX_op_call0_r0, { "i"} }, + { INDEX_op_call1_r0, { "i","ri"} }, + { INDEX_op_call2_r0, { "i","ri","ri"} }, + { INDEX_op_call3_r0, { "i","ri","ri","ri"} }, + { INDEX_op_call4_r0, { "i","ri","ri","ri","ri"} }, + + { INDEX_op_call0_r1, { "i","r"} }, + { INDEX_op_call1_r1, { "i","ri","r"} }, + { INDEX_op_call2_r1, { "i","ri","ri","r"} }, + { INDEX_op_call3_r1, { "i","ri","ri","ri","r"} }, + { INDEX_op_call4_r1, { "i","ri","ri","ri","ri","r"} }, + +#if TCG_TARGET_REG_BITS == 32 + + { INDEX_op_call0_r2, { "i","r","r"} }, + { INDEX_op_call1_r2, { "i","ri","r","r"} }, + { INDEX_op_call2_r2, { "i","ri","ri","r","r"} }, + { INDEX_op_call3_r2, { "i","ri","ri","ri","r","r"} }, + { INDEX_op_call4_r2, { "i","ri","ri","ri","ri","r","r"} }, + +#endif + { -1 }, }; @@ -655,6 +677,172 @@ static void tcg_out_movi(TCGContext *s, TCGType type, } } +static void tcg_out_op_call(TCGContext *s, const TCGArg *args, + const int *const_args) +{ + int nb_iargs=args[0]&0x0F; + int nb_oargs=args[0]>>4; + + assert(const_args[1]!=0); + + switch(nb_iargs) + { + case 0: + switch(nb_oargs) + { + case 0: + tcg_out_op_t(s, INDEX_op_call0_r0); + tcg_out_i(s, args[1]);/*func*/ + break; + case 1: + tcg_out_op_t(s, INDEX_op_call0_r1); + tcg_out_i(s, args[1]);/*func*/ + tcg_out_r(s, args[2]);/*r1*/ + break; +#if TCG_TARGET_REG_BITS == 32 + case 2: + tcg_out_op_t(s, INDEX_op_call0_r2); + tcg_out_i(s, args[1]);/*func*/ + tcg_out_r(s, args[2]);/*r1*/ + tcg_out_r(s, args[3]);/*r2*/ + break; +#endif + default: + TODO(); + } + break; + case 1: + switch(nb_oargs) + { + case 0: + tcg_out_op_t(s, INDEX_op_call1_r0); + tcg_out_i(s, args[1]);/*func*/ + tcg_out_ri(s, const_args[2], args[2]);/*arg1*/ + break; + case 1: + tcg_out_op_t(s, INDEX_op_call1_r1); + tcg_out_i(s, args[1]);/*func*/ + tcg_out_ri(s, const_args[2], args[2]);/*arg1*/ + tcg_out_r(s, args[3]);/*r1*/ + break; +#if TCG_TARGET_REG_BITS == 32 + case 2: + tcg_out_op_t(s, INDEX_op_call1_r2); + tcg_out_i(s, args[1]);/*func*/ + tcg_out_ri(s, const_args[2], args[2]);/*arg1*/ + tcg_out_r(s, args[3]);/*r1*/ + tcg_out_r(s, args[4]);/*r2*/ + break; +#endif + default: + TODO(); + } + break; + case 2: + switch(nb_oargs) + { + case 0: + tcg_out_op_t(s, INDEX_op_call2_r0); + tcg_out_i(s, args[1]);/*func*/ + tcg_out_ri(s, const_args[3], args[3]);/*arg1*/ + tcg_out_ri(s, const_args[2], args[2]);/*arg2*/ + break; + case 1: + tcg_out_op_t(s, INDEX_op_call2_r1); + tcg_out_i(s, args[1]);/*func*/ + tcg_out_ri(s, const_args[3], args[3]);/*arg1*/ + tcg_out_ri(s, const_args[2], args[2]);/*arg2*/ + tcg_out_r(s, args[4]);/*r1*/ + break; +#if TCG_TARGET_REG_BITS == 32 + case 2: + tcg_out_op_t(s, INDEX_op_call2_r2); + tcg_out_i(s, args[1]);/*func*/ + tcg_out_ri(s, const_args[3], args[3]);/*arg1*/ + tcg_out_ri(s, const_args[2], args[2]);/*arg2*/ + tcg_out_r(s, args[4]);/*r1*/ + tcg_out_r(s, args[5]);/*r2*/ + break; +#endif + default: + TODO(); + } + break; + case 3: + switch(nb_oargs) + { + case 0: + tcg_out_op_t(s, INDEX_op_call3_r0); + tcg_out_i(s, args[1]);/*func*/ + tcg_out_ri(s, const_args[4], args[4]);/*arg1*/ + tcg_out_ri(s, const_args[3], args[3]);/*arg2*/ + tcg_out_ri(s, const_args[2], args[2]);/*arg3*/ + break; + case 1: + tcg_out_op_t(s, INDEX_op_call3_r1); + tcg_out_i(s, args[1]);/*func*/ + tcg_out_ri(s, const_args[4], args[4]);/*arg1*/ + tcg_out_ri(s, const_args[3], args[3]);/*arg2*/ + tcg_out_ri(s, const_args[2], args[2]);/*arg3*/ + tcg_out_r(s, args[5]);/*r1*/ + break; +#if TCG_TARGET_REG_BITS == 32 + case 2: + tcg_out_op_t(s, INDEX_op_call3_r2); + tcg_out_i(s, args[1]);/*func*/ + tcg_out_ri(s, const_args[4], args[4]);/*arg1*/ + tcg_out_ri(s, const_args[3], args[3]);/*arg2*/ + tcg_out_ri(s, const_args[2], args[2]);/*arg3*/ + tcg_out_r(s, args[5]);/*r1*/ + tcg_out_r(s, args[6]);/*r2*/ + break; +#endif + default: + TODO(); + } + break; + case 4: + switch(nb_oargs) + { + case 0: + tcg_out_op_t(s, INDEX_op_call4_r0); + tcg_out_i(s, args[1]);/*func*/ + tcg_out_ri(s, const_args[5], args[5]);/*arg1*/ + tcg_out_ri(s, const_args[4], args[4]);/*arg2*/ + tcg_out_ri(s, const_args[3], args[3]);/*arg3*/ + tcg_out_ri(s, const_args[2], args[2]);/*arg4*/ + break; + case 1: + tcg_out_op_t(s, INDEX_op_call4_r1); + tcg_out_i(s, args[1]);/*func*/ + tcg_out_ri(s, const_args[5], args[5]);/*arg1*/ + tcg_out_ri(s, const_args[4], args[4]);/*arg2*/ + tcg_out_ri(s, const_args[3], args[3]);/*arg3*/ + tcg_out_ri(s, const_args[2], args[2]);/*arg4*/ + tcg_out_r(s, args[6]);/*r1*/ + break; +#if TCG_TARGET_REG_BITS == 32 + case 2: + tcg_out_op_t(s, INDEX_op_call4_r2); + tcg_out_i(s, args[1]);/*func*/ + tcg_out_ri(s, const_args[5], args[5]);/*arg1*/ + tcg_out_ri(s, const_args[4], args[4]);/*arg2*/ + tcg_out_ri(s, const_args[3], args[3]);/*arg3*/ + tcg_out_ri(s, const_args[2], args[2]);/*arg4*/ + tcg_out_r(s, args[6]);/*r1*/ + tcg_out_r(s, args[7]);/*r2*/ + break; +#endif + default: + TODO(); + } + break; + default: + TODO(); + } +} + + static void tcg_out_op(TCGContext *s, int opc, const TCGArg *args, const int *const_args) { @@ -683,8 +871,7 @@ static void tcg_out_op(TCGContext *s, int opc, const TCGArg *args, tci_out_label(s, args[0]); break; case INDEX_op_call: - tcg_out_op_t(s, opc); - tcg_out_ri(s, const_args[0], args[0]); + tcg_out_op_call(s,args,const_args); break; case INDEX_op_jmp: TODO(); diff --git a/tcg/tcg-opc.h b/tcg/tcg-opc.h index b7f3fd7..070ba39 100644 --- a/tcg/tcg-opc.h +++ b/tcg/tcg-opc.h @@ -269,4 +269,29 @@ DEF2(qemu_st64, 0, 2, 1, TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS) #endif /* TCG_TARGET_REG_BITS != 32 */ +#ifdef CONFIG_TCG_INTERPRETER + +DEF2(call0_r0, 0, 0, 0, 0) +DEF2(call1_r0, 0, 1, 0, 0) +DEF2(call2_r0, 0, 2, 0, 0) +DEF2(call3_r0, 0, 3, 0, 0) +DEF2(call4_r0, 0, 4, 0, 0) +DEF2(call0_r1, 1, 0, 0, 0) +DEF2(call1_r1, 1, 1, 0, 0) +DEF2(call2_r1, 1, 2, 0, 0) +DEF2(call3_r1, 1, 3, 0, 0) +DEF2(call4_r1, 1, 4, 0, 0) + +#if TCG_TARGET_REG_BITS == 32 + +DEF2(call0_r2, 2, 0, 0, 0) +DEF2(call1_r2, 2, 1, 0, 0) +DEF2(call2_r2, 2, 2, 0, 0) +DEF2(call3_r2, 2, 3, 0, 0) +DEF2(call4_r2, 2, 4, 0, 0) + +#endif + +#endif + #undef DEF2 diff --git a/tcg/tcg.c b/tcg/tcg.c index 2a82f37..20aac38 100644 --- a/tcg/tcg.c +++ b/tcg/tcg.c @@ -1690,6 +1690,142 @@ static void tcg_reg_alloc_op(TCGContext *s, #define STACK_DIR(x) (x) #endif +#ifdef CONFIG_TCG_INTERPRETER + +static int tcg_reg_alloc_call(TCGContext *s, const TCGOpDef *def, + int opc, const TCGArg *args, + unsigned int dead_iargs) +{ + int nb_iargs, nb_oargs, flags, i, reg, nb_params; + TCGArg arg,func_arg; + TCGTemp *ts; + tcg_target_long func_addr; + TCGRegSet allocated_regs; + const TCGArgConstraint *arg_ct; + TCGArg new_args[TCG_MAX_OP_ARGS]; + int const_args[TCG_MAX_OP_ARGS]; + + arg = *args++; + + nb_oargs = arg >> 16; + nb_iargs = arg & 0xffff; + nb_params = nb_iargs - 1; + + flags = args[nb_oargs + nb_iargs]; + + const_args[0]=1; + new_args[0]=(nb_oargs<<4)|nb_params; + + /* satisfy input constraints */ + tcg_regset_set(allocated_regs, s->reserved_regs); + + for(i = nb_params; i >= 0; i--) { + arg = args[nb_oargs + i]; + ts = &s->temps[arg]; + if (ts->val_type == TEMP_VAL_MEM) { + reg = tcg_reg_alloc(s, tcg_target_available_regs[ts->type], allocated_regs); + tcg_out_ld(s, ts->type, reg, ts->mem_reg, ts->mem_offset); + ts->val_type = TEMP_VAL_REG; + ts->reg = reg; + ts->mem_coherent = 1; + s->reg_to_temp[reg] = arg; + } else if (ts->val_type == TEMP_VAL_CONST) { + /* constant is OK for instruction */ + const_args[nb_params+1-i] = 1; + new_args[nb_params+1-i] = ts->val; + goto iarg_end; + } + assert(ts->val_type == TEMP_VAL_REG); + reg = ts->reg; + if (tcg_regset_test_reg(tcg_target_available_regs[ts->type], reg)) { + /* nothing to do : the constraint is satisfied */ + } else { + allocate_in_reg: + /* allocate a new register matching the constraint + and move the temporary register into it */ + reg = tcg_reg_alloc(s, tcg_target_available_regs[ts->type], allocated_regs); + tcg_out_mov(s, reg, ts->reg); + } + new_args[nb_params+1-i] = reg; + const_args[nb_params+1-i] = 0; + tcg_regset_set_reg(allocated_regs, reg); + iarg_end: ; + } + + /* mark dead temporaries and free the associated registers */ + for(i = 0; i < nb_iargs; i++) { + arg = args[nb_oargs + i]; + if (IS_DEAD_IARG(i)) { + ts = &s->temps[arg]; + if (!ts->fixed_reg) { + if (ts->val_type == TEMP_VAL_REG) + s->reg_to_temp[ts->reg] = -1; + ts->val_type = TEMP_VAL_DEAD; + } + } + } + + /* clobber call registers */ + for(reg = 0; reg < TCG_TARGET_NB_REGS; reg++) { + if (tcg_regset_test_reg(tcg_target_call_clobber_regs, reg)) { + tcg_reg_free(s, reg); + } + } + + /* store globals and free associated registers (we assume the insn + can modify any global. */ + if (!(flags & TCG_CALL_CONST)) { + save_globals(s, allocated_regs); + } + + /* satisfy the output constraints */ + tcg_regset_set(allocated_regs, s->reserved_regs); + for(i = 0; i < nb_oargs; i++) { + arg = args[i]; + ts = &s->temps[arg]; + + /* if fixed register, we try to use it */ + reg = ts->reg; + if (ts->fixed_reg && + tcg_regset_test_reg(tcg_target_available_regs[ts->type], reg)) { + goto oarg_end; + } + reg = tcg_reg_alloc(s, tcg_target_available_regs[ts->type], allocated_regs); + + tcg_regset_set_reg(allocated_regs, reg); + /* if a fixed register is used, then a move will be done afterwards */ + if (!ts->fixed_reg) { + if (ts->val_type == TEMP_VAL_REG) + s->reg_to_temp[ts->reg] = -1; + ts->val_type = TEMP_VAL_REG; + ts->reg = reg; + /* temp value is modified, so the value kept in memory is + potentially not the same */ + ts->mem_coherent = 0; + s->reg_to_temp[reg] = arg; + } +oarg_end: + new_args[i+nb_params+2] = reg; + } + + + /* emit instruction */ + tcg_out_op(s, opc, new_args, const_args); + + /* move the outputs in the correct register if needed */ + for(i = 0; i < nb_oargs; i++) { + ts = &s->temps[args[i]]; + reg = new_args[i+nb_params+2]; + if (ts->fixed_reg && ts->reg != reg) { + tcg_out_mov(s, ts->reg, reg); + } + } + + return nb_iargs + nb_oargs + def->nb_cargs + 1; +} + +#else + static int tcg_reg_alloc_call(TCGContext *s, const TCGOpDef *def, int opc, const TCGArg *args, unsigned int dead_iargs) @@ -1868,6 +2004,9 @@ static int tcg_reg_alloc_call(TCGContext *s, const TCGOpDef *def, return nb_iargs + nb_oargs + def->nb_cargs + 1; } + +#endif + #ifdef CONFIG_PROFILER static int64_t tcg_table_op_count[NB_OPS]; diff --git a/tcg/tci.c b/tcg/tci.c index 0ba605b..3e4165b 100644 --- a/tcg/tci.c +++ b/tcg/tci.c @@ -41,8 +41,23 @@ #define TRACE() ((void)0) #endif -typedef tcg_target_ulong (*helper_function)(tcg_target_ulong, tcg_target_ulong, +typedef tcg_target_ulong (*helper_function0)(void); +typedef tcg_target_ulong (*helper_function1)(tcg_target_ulong); +typedef tcg_target_ulong (*helper_function2)(tcg_target_ulong, tcg_target_ulong); +typedef tcg_target_ulong (*helper_function3)(tcg_target_ulong, tcg_target_ulong, + tcg_target_ulong); +typedef tcg_target_ulong (*helper_function4)(tcg_target_ulong, tcg_target_ulong, tcg_target_ulong, tcg_target_ulong); +#if TCG_TARGET_REG_BITS == 32 + +typedef uint64_t (*helper_function0_r64)(void); +typedef uint64_t (*helper_function1_r64)(tcg_target_ulong); +typedef uint64_t (*helper_function2_r64)(tcg_target_ulong, tcg_target_ulong); +typedef uint64_t (*helper_function3_r64)(tcg_target_ulong, tcg_target_ulong, + tcg_target_ulong); +typedef uint64_t (*helper_function4_r64)(tcg_target_ulong, tcg_target_ulong, + tcg_target_ulong, tcg_target_ulong); +#endif #if defined(TARGET_I386) struct CPUX86State *env; @@ -427,15 +442,138 @@ unsigned long tcg_qemu_tb_exec(uint8_t *tb_ptr) case INDEX_op_set_label: TODO(); break; - case INDEX_op_call: - t0 = tci_read_ri(&tb_ptr); - t0 = ((helper_function)t0)(tci_read_reg(TCG_REG_R0), - tci_read_reg(TCG_REG_R1), - tci_read_reg(TCG_REG_R2), - tci_read_reg(TCG_REG_R3)); - // TODO: fix for 32 bit host / 64 bit target. - tci_write_reg(TCG_REG_R0, t0); + case INDEX_op_call0_r0: + t0 = tci_read_i(&tb_ptr); + ((helper_function0)t0)(); + break; + case INDEX_op_call1_r0: + t0 = tci_read_i(&tb_ptr); + t1 = tci_read_ri(&tb_ptr); + ((helper_function1)t0)(t1); + break; + case INDEX_op_call2_r0: + t0 = tci_read_i(&tb_ptr); + t1 = tci_read_ri(&tb_ptr); + t2 = tci_read_ri(&tb_ptr); + ((helper_function2)t0)(t1,t2); + break; + case INDEX_op_call3_r0: + t0 = tci_read_i(&tb_ptr); + t1 = tci_read_ri(&tb_ptr); + t2 = tci_read_ri(&tb_ptr); + t3 = tci_read_ri(&tb_ptr); + ((helper_function3)t0)(t1,t2,t3); + break; + case INDEX_op_call4_r0: + t0 = tci_read_i(&tb_ptr); + t1 = tci_read_ri(&tb_ptr); + t2 = tci_read_ri(&tb_ptr); + t3 = tci_read_ri(&tb_ptr); + t4 = tci_read_ri(&tb_ptr); + ((helper_function4)t0)(t1,t2,t3,t4); + break; + case INDEX_op_call0_r1: + t0 = tci_read_i(&tb_ptr); + t0 = ((helper_function0)t0)(); + t5 = *tb_ptr++; + tci_write_reg(t5,t0); + + break; + case INDEX_op_call1_r1: + t0 = tci_read_i(&tb_ptr); + t1 = tci_read_ri(&tb_ptr); + t0 = ((helper_function1)t0)(t1); + t5 = *tb_ptr++; + tci_write_reg(t5,t0); + + break; + case INDEX_op_call2_r1: + t0 = tci_read_i(&tb_ptr); + t1 = tci_read_ri(&tb_ptr); + t2 = tci_read_ri(&tb_ptr); + t0 = ((helper_function2)t0)(t1,t2); + t5 = *tb_ptr++; + tci_write_reg(t5,t0); + + break; + case INDEX_op_call3_r1: + t0 = tci_read_i(&tb_ptr); + t1 = tci_read_ri(&tb_ptr); + t2 = tci_read_ri(&tb_ptr); + t3 = tci_read_ri(&tb_ptr); + t0 = ((helper_function3)t0)(t1,t2,t3); + t5 = *tb_ptr++; + tci_write_reg(t5,t0); + + break; + case INDEX_op_call4_r1: + t0 = tci_read_i(&tb_ptr); + t1 = tci_read_ri(&tb_ptr); + t2 = tci_read_ri(&tb_ptr); + t3 = tci_read_ri(&tb_ptr); + t4 = tci_read_ri(&tb_ptr); + t0 = ((helper_function4)t0)(t1,t2,t3,t4); + t5 = *tb_ptr++; + tci_write_reg(t5,t0); + break; +#if TCG_TARGET_REG_BITS == 32 + case INDEX_op_call0_r2: + t0 = tci_read_i(&tb_ptr); + u64 = ((helper_function0_r64)t0)(); + t5 = *tb_ptr++; + tci_write_reg(t5,u64); + t5 = *tb_ptr++; + tci_write_reg(t5, u64>>32); + + break; + case INDEX_op_call1_r2: + t0 = tci_read_i(&tb_ptr); + t1 = tci_read_ri(&tb_ptr); + u64 = ((helper_function1_r64)t0)(t1); + t5 = *tb_ptr++; + tci_write_reg(t5,u64); + t5 = *tb_ptr++; + tci_write_reg(t5, u64>>32); + + break; + case INDEX_op_call2_r2: + t0 = tci_read_i(&tb_ptr); + t1 = tci_read_ri(&tb_ptr); + t2 = tci_read_ri(&tb_ptr); + u64 = ((helper_function2_r64)t0)(t1,t2); + t5 = *tb_ptr++; + tci_write_reg(t5,u64); + t5 = *tb_ptr++; + tci_write_reg(t5, u64>>32); + + break; + case INDEX_op_call3_r2: + t0 = tci_read_i(&tb_ptr); + t1 = tci_read_ri(&tb_ptr); + t2 = tci_read_ri(&tb_ptr); + t3 = tci_read_ri(&tb_ptr); + u64 = ((helper_function3_r64)t0)(t1,t2,t3); + t5 = *tb_ptr++; + tci_write_reg(t5,u64); + t5 = *tb_ptr++; + tci_write_reg(t5, u64>>32); + + break; + case INDEX_op_call4_r2: + t0 = tci_read_i(&tb_ptr); + t1 = tci_read_ri(&tb_ptr); + t2 = tci_read_ri(&tb_ptr); + t3 = tci_read_ri(&tb_ptr); + t4 = tci_read_ri(&tb_ptr); + u64 = ((helper_function4_r64)t0)(t1,t2,t3,t4); + t5 = *tb_ptr++; + tci_write_reg(t5,u64); + t5 = *tb_ptr++; + tci_write_reg(t5, u64>>32); + + break; +#endif case INDEX_op_jmp: case INDEX_op_br: t0 = *(uint64_t *)tb_ptr; -- 1.6.3.msysgit.0