From mboxrd@z Thu Jan 1 00:00:00 1970 Received: from mailman by lists.gnu.org with tmda-scanned (Exim 4.43) id 1KZWF7-000442-4c for qemu-devel@nongnu.org; Sat, 30 Aug 2008 15:36:17 -0400 Received: from exim by lists.gnu.org with spam-scanned (Exim 4.43) id 1KZWF6-00043Z-HT for qemu-devel@nongnu.org; Sat, 30 Aug 2008 15:36:16 -0400 Received: from [199.232.76.173] (port=47021 helo=monty-python.gnu.org) by lists.gnu.org with esmtp (Exim 4.43) id 1KZWF6-00043S-5T for qemu-devel@nongnu.org; Sat, 30 Aug 2008 15:36:16 -0400 Received: from vsmtp04.dti.ne.jp ([202.216.231.139]:34532) by monty-python.gnu.org with esmtp (Exim 4.60) (envelope-from ) id 1KZWF3-0003X7-Bs for qemu-devel@nongnu.org; Sat, 30 Aug 2008 15:36:15 -0400 Received: from [192.168.1.21] (PPPa35.e11.eacc.dti.ne.jp [124.255.86.36]) by vsmtp04.dti.ne.jp (3.11v) with ESMTP AUTH id m7UJa8om002083 for ; Sun, 31 Aug 2008 04:36:08 +0900 (JST) Message-ID: <48B9A131.7080400@juno.dti.ne.jp> Date: Sun, 31 Aug 2008 04:36:17 +0900 From: Shin-ichiro KAWASAKI MIME-Version: 1.0 Content-Type: text/plain; charset=ISO-2022-JP Content-Transfer-Encoding: 7bit Subject: [Qemu-devel] [PATCH] SH4: convert fmov/fadd to TCG Reply-To: qemu-devel@nongnu.org List-Id: qemu-devel.nongnu.org List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , To: qemu-devel@nongnu.org This patch converts two SH4 float instructions, 'fmov Rm,Rn' and 'fadd' into TCG. Before converting other float instructions into TCG, comments on it will be appreciated. - TCG variables intorudced for float operation : FT[01], and DT[01]. - I think float registers 'fregs' are not to be mapped for TCG variables, because TCG does not support float operations, now. Instead of it, float register load/store function introduced. For 64 bit operation, they do 32 bit swap with temporary TCG vars. I hope that this won't result too much overhead. - A comment is added to imply that SH-Linux does not run 'fmov' in 64 bit . Regards, Shin-ichiro KAWASAKI Index: trunk/target-sh4/op.c =================================================================== --- trunk/target-sh4/op.c (revision 5116) +++ trunk/target-sh4/op.c (working copy) @@ -230,18 +230,6 @@ RETURN(); } -void OPPROTO op_fadd_FT(void) -{ - FT0 = float32_add(FT0, FT1, &env->fp_status); - RETURN(); -} - -void OPPROTO op_fadd_DT(void) -{ - DT0 = float64_add(DT0, DT1, &env->fp_status); - RETURN(); -} - void OPPROTO op_fsub_FT(void) { FT0 = float32_sub(FT0, FT1, &env->fp_status); Index: trunk/target-sh4/helper.h =================================================================== --- trunk/target-sh4/helper.h (revision 5116) +++ trunk/target-sh4/helper.h (working copy) @@ -16,3 +16,6 @@ DEF_HELPER(uint32_t, helper_negc, (uint32_t)) DEF_HELPER(void, helper_macl, (uint32_t, uint32_t)) DEF_HELPER(void, helper_macw, (uint32_t, uint32_t)) + +DEF_HELPER(uint32_t, helper_fadd_FT, (uint32_t, uint32_t, CPUState *)) +DEF_HELPER(uint64_t, helper_fadd_DT, (uint64_t, uint64_t, CPUState *)) Index: trunk/target-sh4/op_helper.c =================================================================== --- trunk/target-sh4/op_helper.c (revision 5116) +++ trunk/target-sh4/op_helper.c (working copy) @@ -388,3 +388,15 @@ env->sr &= ~SR_T; *addr = new; } + +uint32_t helper_fadd_FT(uint32_t t0, uint32_t t1, CPUState * env) +{ + float32 ret = float32_add(*(float32*)&t0, *(float32*)&t1, &env->fp_status); + return *(uint32_t*)(&ret); +} + +uint64_t helper_fadd_DT(uint64_t t0, uint64_t t1, CPUState * env) +{ + float64 ret = float64_add(*(float64*)&t0, *(float64*)&t1, &env->fp_status); + return *(uint64_t*)(&ret); +} Index: trunk/target-sh4/translate.c =================================================================== --- trunk/target-sh4/translate.c (revision 5116) +++ trunk/target-sh4/translate.c (working copy) @@ -69,6 +69,8 @@ /* dyngen register indexes */ static TCGv cpu_T[2]; +static TCGv cpu_FT[2]; +static TCGv cpu_DT[2]; #include "gen-icount.h" @@ -90,6 +92,14 @@ cpu_env = tcg_global_reg_new(TCG_TYPE_PTR, TCG_AREG0, "env"); cpu_T[0] = tcg_global_reg_new(TCG_TYPE_I32, TCG_AREG1, "T0"); cpu_T[1] = tcg_global_reg_new(TCG_TYPE_I32, TCG_AREG2, "T1"); + cpu_FT[0] = tcg_global_mem_new(TCG_TYPE_I32, TCG_AREG0, + offsetof(CPUState, ft0), "FT0"); + cpu_FT[1] = tcg_global_mem_new(TCG_TYPE_I32, TCG_AREG0, + offsetof(CPUState, ft1), "FT1"); + cpu_DT[0] = tcg_global_mem_new(TCG_TYPE_I64, TCG_AREG0, + offsetof(CPUState, dt0), "DT0"); + cpu_DT[1] = tcg_global_mem_new(TCG_TYPE_I64, TCG_AREG0, + offsetof(CPUState, dt1), "DT1"); for (i = 0; i < 24; i++) cpu_gregs[i] = tcg_global_mem_new(TCG_TYPE_I32, TCG_AREG0, @@ -345,6 +355,42 @@ tcg_gen_ori_i32(cpu_flags, cpu_flags, flags); } +static inline void gen_ld_frN(TCGv ft, TCGv cpu_env, uint32_t reg) +{ + tcg_gen_ld_i32(ft, cpu_env, offsetof(CPUState, fregs[reg])); +} + +static inline void gen_ld_drN(TCGv dt, TCGv cpu_env, uint32_t reg) +{ + TCGv tmp = tcg_temp_new(TCG_TYPE_I64); + + tcg_gen_ld_i64(dt, cpu_env, offsetof(CPUState, fregs[reg])); + tcg_gen_shli_i64(tmp, dt, 32); + tcg_gen_shri_i64(dt, dt, 32); + tcg_gen_or_i64(dt, tmp, dt); + + tcg_temp_free(tmp); +} + +static inline void gen_st_frN(TCGv ft, TCGv cpu_env, uint32_t reg) +{ + tcg_gen_st_i32(ft, cpu_env, offsetof(CPUState, fregs[reg])); +} + +static inline void gen_st_drN(TCGv dt, TCGv cpu_env, uint32_t reg) +{ + TCGv tmp1 = tcg_temp_new(TCG_TYPE_I64); + TCGv tmp2 = tcg_temp_new(TCG_TYPE_I64); + + tcg_gen_shli_i64(tmp1, dt, 32); + tcg_gen_shri_i64(tmp2, dt, 32); + tcg_gen_or_i64(tmp1, tmp1, tmp2); + tcg_gen_st_i64(tmp1, cpu_env, offsetof(CPUState, fregs[reg])); + + tcg_temp_free(tmp1); + tcg_temp_free(tmp2); +} + #define B3_0 (ctx->opcode & 0xf) #define B6_4 ((ctx->opcode >> 4) & 0x7) #define B7_4 ((ctx->opcode >> 4) & 0xf) @@ -811,12 +857,14 @@ tcg_gen_xor_i32(cpu_gregs[REG(B11_8)], cpu_gregs[REG(B11_8)], cpu_gregs[REG(B7_4)]); return; case 0xf00c: /* fmov {F,D,X}Rm,{F,D,X}Rn - FPSCR: Nothing */ + /* 64 bit fmov code is not tested, because SH-Linux seems + not to set FPSCR_SZ flag true. */ if (ctx->fpscr & FPSCR_SZ) { - gen_op_fmov_drN_DT0(XREG(B7_4)); - gen_op_fmov_DT0_drN(XREG(B11_8)); + gen_ld_drN(cpu_DT[0], cpu_env, XREG(B7_4)); + gen_st_drN(cpu_DT[1], cpu_env, XREG(B11_8)); } else { - gen_op_fmov_frN_FT0(FREG(B7_4)); - gen_op_fmov_FT0_frN(FREG(B11_8)); + gen_ld_frN(cpu_FT[0], cpu_env, FREG(B7_4)); + gen_st_frN(cpu_FT[0], cpu_env, FREG(B11_8)); } return; case 0xf00a: /* fmov {F,D,X}Rm,@Rn - FPSCR: Nothing */ @@ -905,17 +953,22 @@ if (ctx->fpscr & FPSCR_PR) { if (ctx->opcode & 0x0110) break; /* illegal instruction */ - gen_op_fmov_drN_DT1(DREG(B7_4)); - gen_op_fmov_drN_DT0(DREG(B11_8)); + gen_ld_drN(cpu_DT[1], cpu_env, DREG(B7_4)); + gen_ld_drN(cpu_DT[0], cpu_env, DREG(B11_8)); } else { - gen_op_fmov_frN_FT1(FREG(B7_4)); - gen_op_fmov_frN_FT0(FREG(B11_8)); + gen_ld_frN(cpu_FT[1], cpu_env, FREG(B7_4)); + gen_ld_frN(cpu_FT[0], cpu_env, FREG(B11_8)); } switch (ctx->opcode & 0xf00f) { case 0xf000: /* fadd Rm,Rn */ - ctx->fpscr & FPSCR_PR ? gen_op_fadd_DT() : gen_op_fadd_FT(); + if (ctx->fpscr & FPSCR_PR) + tcg_gen_helper_1_3(helper_fadd_DT, cpu_DT[0], + cpu_DT[0], cpu_DT[1], cpu_env); + else + tcg_gen_helper_1_3(helper_fadd_FT, cpu_FT[0], + cpu_FT[0], cpu_FT[1], cpu_env); break; case 0xf001: /* fsub Rm,Rn */ ctx->fpscr & FPSCR_PR ? gen_op_fsub_DT() : gen_op_fsub_FT(); @@ -935,10 +988,10 @@ } if (ctx->fpscr & FPSCR_PR) { - gen_op_fmov_DT0_drN(DREG(B11_8)); + gen_st_drN(cpu_DT[0], cpu_env, DREG(B11_8)); } else { - gen_op_fmov_FT0_frN(FREG(B11_8)); + gen_st_frN(cpu_FT[0], cpu_env, FREG(B11_8)); } return; }