qemu-devel.nongnu.org archive mirror
 help / color / mirror / Atom feed
* [Qemu-devel] [PATCH] SH4 : convert branch/jump instructions to TCG
@ 2008-08-29 20:15 Shin-ichiro KAWASAKI
  2008-08-29 22:32 ` Aurelien Jarno
  0 siblings, 1 reply; 2+ messages in thread
From: Shin-ichiro KAWASAKI @ 2008-08-29 20:15 UTC (permalink / raw)
  To: qemu-devel

This patch is another contributes to TCG work on SH4.

All branch/jump instructions are converted into TCG implementation.
The fields 'flags' and 'delayed_pc' of CPUState structure are mapped as
TCG global memory variables.  These fields are not real registers but 
accessed so often. I guess they worth TCG vars.

This patch is for rev.5107, and will cause a small patch failure when you
patch it after merging the patch I send two hours before. 
I merge these patches if necessery.

Regards,
Shin-ichiro KAWASAKI


Index: trunk/target-sh4/op.c
===================================================================
--- trunk/target-sh4/op.c	(revision 5107)
+++ trunk/target-sh4/op.c	(working copy)
@@ -43,70 +43,6 @@
     RETURN();
 }
 
-void OPPROTO op_bf_s(void)
-{
-    env->delayed_pc = PARAM1;
-    if (!(env->sr & SR_T)) {
-        env->flags |= DELAY_SLOT_TRUE;
-    }
-    RETURN();
-}
-
-void OPPROTO op_bt_s(void)
-{
-    env->delayed_pc = PARAM1;
-    if (env->sr & SR_T) {
-        env->flags |= DELAY_SLOT_TRUE;
-    }
-    RETURN();
-}
-
-void OPPROTO op_store_flags(void)
-{
-    env->flags &= DELAY_SLOT_TRUE;
-    env->flags |= PARAM1;
-    RETURN();
-}
-
-void OPPROTO op_bra(void)
-{
-    env->delayed_pc = PARAM1;
-    RETURN();
-}
-
-void OPPROTO op_braf_T0(void)
-{
-    env->delayed_pc = PARAM1 + T0;
-    RETURN();
-}
-
-void OPPROTO op_bsr(void)
-{
-    env->pr = PARAM1;
-    env->delayed_pc = PARAM2;
-    RETURN();
-}
-
-void OPPROTO op_bsrf_T0(void)
-{
-    env->pr = PARAM1;
-    env->delayed_pc = PARAM1 + T0;
-    RETURN();
-}
-
-void OPPROTO op_jsr_T0(void)
-{
-    env->pr = PARAM1;
-    env->delayed_pc = T0;
-    RETURN();
-}
-
-void OPPROTO op_rts(void)
-{
-    env->delayed_pc = env->pr;
-    RETURN();
-}
-
 void OPPROTO op_ldtlb(void)
 {
     helper_ldtlb();
@@ -125,13 +61,6 @@
     RETURN();
 }
 
-void OPPROTO op_rte(void)
-{
-    env->sr = env->ssr;
-    env->delayed_pc = env->spc;
-    RETURN();
-}
-
 void OPPROTO op_addc_T0_T1(void)
 {
     helper_addc_T0_T1();
@@ -311,12 +240,6 @@
     RETURN();
 }
 
-void OPPROTO op_jmp_T0(void)
-{
-    env->delayed_pc = T0;
-    RETURN();
-}
-
 void OPPROTO op_ldcl_rMplus_rN_bank(void)
 {
     env->gregs[PARAM2] = env->gregs[PARAM1];
@@ -634,28 +557,6 @@
     RETURN();
 }
 
-void OPPROTO op_jT(void)
-{
-    if (env->sr & SR_T)
-	GOTO_LABEL_PARAM(1);
-    RETURN();
-}
-
-void OPPROTO op_jdelayed(void)
-{
-    if (env->flags & DELAY_SLOT_TRUE) {
-        env->flags &= ~DELAY_SLOT_TRUE;
-        GOTO_LABEL_PARAM(1);
-    }
-    RETURN();
-}
-
-void OPPROTO op_movl_delayed_pc_PC(void)
-{
-    env->pc = env->delayed_pc;
-    RETURN();
-}
-
 void OPPROTO op_tst_imm_T0(void)
 {
     cond_t((T0 & PARAM1) == 0);
Index: trunk/target-sh4/translate.c
===================================================================
--- trunk/target-sh4/translate.c	(revision 5107)
+++ trunk/target-sh4/translate.c	(working copy)
@@ -62,7 +62,7 @@
 static TCGv cpu_gregs[24];
 static TCGv cpu_pc, cpu_sr, cpu_ssr, cpu_spc, cpu_gbr;
 static TCGv cpu_vbr, cpu_sgr, cpu_dbr, cpu_mach, cpu_macl;
-static TCGv cpu_pr, cpu_fpscr, cpu_fpul;
+static TCGv cpu_pr, cpu_fpscr, cpu_fpul, cpu_flags, cpu_delayed_pc;
 
 /* dyngen register indexes */
 static TCGv cpu_T[2];
@@ -119,6 +119,11 @@
                                    offsetof(CPUState, fpscr), "FPSCR");
     cpu_fpul = tcg_global_mem_new(TCG_TYPE_I32, TCG_AREG0,
                                   offsetof(CPUState, fpul), "FPUL");
+    cpu_flags = tcg_global_mem_new(TCG_TYPE_I32, TCG_AREG0,
+				   offsetof(CPUState, flags), "FLAGS");
+    cpu_delayed_pc = tcg_global_mem_new(TCG_TYPE_I32, TCG_AREG0,
+					offsetof(CPUState, delayed_pc),
+					"DELAYED_PC");
 
     /* register helpers */
 #undef DEF_HELPER
@@ -249,7 +254,7 @@
     if (ctx->delayed_pc == (uint32_t) - 1) {
 	/* Target is not statically known, it comes necessarily from a
 	   delayed jump as immediate jump are conditinal jumps */
-	gen_op_movl_delayed_pc_PC();
+	tcg_gen_mov_i32(cpu_pc, cpu_delayed_pc);
 	if (ctx->singlestep_enabled)
 	    gen_op_debug();
 	tcg_gen_exit_tb(0);
@@ -258,6 +263,16 @@
     }
 }
 
+static inline void gen_branch_slot(uint32_t delayed_pc, int true)
+{
+    int label = gen_new_label();
+    tcg_gen_movi_i32(cpu_delayed_pc, delayed_pc);
+    tcg_gen_andi_i32(cpu_T[0], cpu_sr, SR_T);
+    tcg_gen_brcondi_i32(TCG_COND_NE, cpu_T[0], true ? SR_T : 0, label);
+    tcg_gen_ori_i32(cpu_flags, cpu_flags, DELAY_SLOT_TRUE);
+    gen_set_label(label);
+}
+
 /* Immediate conditional jump (bt or bf) */
 static void gen_conditional_jump(DisasContext * ctx,
 				 target_ulong ift, target_ulong ifnott)
@@ -265,7 +280,8 @@
     int l1;
 
     l1 = gen_new_label();
-    gen_op_jT(l1);
+    tcg_gen_andi_i32(cpu_T[0], cpu_sr, SR_T);
+    tcg_gen_brcondi_i32(TCG_COND_EQ, cpu_T[0], SR_T, l1);
     gen_goto_tb(ctx, 0, ifnott);
     gen_set_label(l1);
     gen_goto_tb(ctx, 1, ift);
@@ -277,12 +293,20 @@
     int l1;
 
     l1 = gen_new_label();
-    gen_op_jdelayed(l1);
+    tcg_gen_andi_i32(cpu_T[0], cpu_flags, DELAY_SLOT_TRUE);
+    tcg_gen_brcondi_i32(TCG_COND_EQ, cpu_T[0], DELAY_SLOT_TRUE, l1);
     gen_goto_tb(ctx, 1, ctx->pc + 2);
     gen_set_label(l1);
+    tcg_gen_andi_i32(cpu_flags, cpu_flags, ~DELAY_SLOT_TRUE);
     gen_jump(ctx);
 }
 
+static inline void gen_store_flags(uint32_t flags)
+{
+    tcg_gen_andi_i32(cpu_flags, cpu_flags, DELAY_SLOT_TRUE);
+    tcg_gen_ori_i32(cpu_flags, cpu_flags, flags);
+}
+
 #define B3_0 (ctx->opcode & 0xf)
 #define B6_4 ((ctx->opcode >> 4) & 0x7)
 #define B7_4 ((ctx->opcode >> 4) & 0xf)
@@ -319,7 +343,8 @@
 	tcg_gen_andi_i32(cpu_sr, cpu_sr, ~(SR_M | SR_Q | SR_T));
 	return;
     case 0x000b:		/* rts */
-	CHECK_NOT_DELAY_SLOT gen_op_rts();
+        CHECK_NOT_DELAY_SLOT
+        tcg_gen_mov_i32(cpu_delayed_pc, cpu_pr);
 	ctx->flags |= DELAY_SLOT;
 	ctx->delayed_pc = (uint32_t) - 1;
 	return;
@@ -341,7 +366,9 @@
 #endif
 	return;
     case 0x002b:		/* rte */
-	CHECK_NOT_DELAY_SLOT gen_op_rte();
+	CHECK_NOT_DELAY_SLOT
+        tcg_gen_mov_i32(cpu_sr, cpu_ssr);
+        tcg_gen_mov_i32(cpu_delayed_pc, cpu_spc);
 	ctx->flags |= DELAY_SLOT;
 	ctx->delayed_pc = (uint32_t) - 1;
 	return;
@@ -402,13 +429,15 @@
 	return;
     case 0xa000:		/* bra disp */
 	CHECK_NOT_DELAY_SLOT
-	    gen_op_bra(ctx->delayed_pc = ctx->pc + 4 + B11_0s * 2);
+	ctx->delayed_pc = ctx->pc + 4 + B11_0s * 2;
+	tcg_gen_movi_i32(cpu_delayed_pc, ctx->delayed_pc);
 	ctx->flags |= DELAY_SLOT;
 	return;
     case 0xb000:		/* bsr disp */
-	CHECK_NOT_DELAY_SLOT
-	    gen_op_bsr(ctx->pc + 4, ctx->delayed_pc =
-		       ctx->pc + 4 + B11_0s * 2);
+        CHECK_NOT_DELAY_SLOT
+	tcg_gen_movi_i32(cpu_pr, ctx->pc + 4);
+	ctx->delayed_pc = ctx->pc + 4 + B11_0s * 2;
+	tcg_gen_movi_i32(cpu_delayed_pc, ctx->delayed_pc);
 	ctx->flags |= DELAY_SLOT;
 	return;
     }
@@ -895,7 +924,7 @@
 	return;
     case 0x8f00:		/* bf/s label */
 	CHECK_NOT_DELAY_SLOT
-	    gen_op_bf_s(ctx->delayed_pc = ctx->pc + 4 + B7_0s * 2);
+        gen_branch_slot(ctx->delayed_pc = ctx->pc + 4 + B7_0s * 2, 0);
 	ctx->flags |= DELAY_SLOT_CONDITIONAL;
 	return;
     case 0x8900:		/* bt label */
@@ -906,7 +935,7 @@
 	return;
     case 0x8d00:		/* bt/s label */
 	CHECK_NOT_DELAY_SLOT
-	    gen_op_bt_s(ctx->delayed_pc = ctx->pc + 4 + B7_0s * 2);
+        gen_branch_slot(ctx->delayed_pc = ctx->pc + 4 + B7_0s * 2, 1);
 	ctx->flags |= DELAY_SLOT_CONDITIONAL;
 	return;
     case 0x8800:		/* cmp/eq #imm,R0 */
@@ -1046,13 +1075,14 @@
     switch (ctx->opcode & 0xf0ff) {
     case 0x0023:		/* braf Rn */
 	CHECK_NOT_DELAY_SLOT tcg_gen_mov_i32(cpu_T[0], cpu_gregs[REG(B11_8)]);
-	gen_op_braf_T0(ctx->pc + 4);
+	tcg_gen_addi_i32(cpu_delayed_pc, cpu_T[0], ctx->pc + 4);
 	ctx->flags |= DELAY_SLOT;
 	ctx->delayed_pc = (uint32_t) - 1;
 	return;
     case 0x0003:		/* bsrf Rn */
 	CHECK_NOT_DELAY_SLOT tcg_gen_mov_i32(cpu_T[0], cpu_gregs[REG(B11_8)]);
-	gen_op_bsrf_T0(ctx->pc + 4);
+	tcg_gen_movi_i32(cpu_pr, ctx->pc + 4);
+	tcg_gen_add_i32(cpu_delayed_pc, cpu_T[0], cpu_pr);
 	ctx->flags |= DELAY_SLOT;
 	ctx->delayed_pc = (uint32_t) - 1;
 	return;
@@ -1069,13 +1099,14 @@
 	return;
     case 0x402b:		/* jmp @Rn */
 	CHECK_NOT_DELAY_SLOT tcg_gen_mov_i32(cpu_T[0], cpu_gregs[REG(B11_8)]);
-	gen_op_jmp_T0();
+	tcg_gen_mov_i32(cpu_delayed_pc, cpu_T[0]);
 	ctx->flags |= DELAY_SLOT;
 	ctx->delayed_pc = (uint32_t) - 1;
 	return;
     case 0x400b:		/* jsr @Rn */
 	CHECK_NOT_DELAY_SLOT tcg_gen_mov_i32(cpu_T[0], cpu_gregs[REG(B11_8)]);
-	gen_op_jsr_T0(ctx->pc + 4);
+	tcg_gen_movi_i32(cpu_pr, ctx->pc + 4);
+	tcg_gen_mov_i32(cpu_delayed_pc, cpu_T[0]);
 	ctx->flags |= DELAY_SLOT;
 	ctx->delayed_pc = (uint32_t) - 1;
 	return;
@@ -1294,12 +1325,12 @@
 
     if (old_flags & (DELAY_SLOT | DELAY_SLOT_CONDITIONAL)) {
         if (ctx->flags & DELAY_SLOT_CLEARME) {
-            gen_op_store_flags(0);
+            gen_store_flags(0);
         } else {
 	    /* go out of the delay slot */
 	    uint32_t new_flags = ctx->flags;
 	    new_flags &= ~(DELAY_SLOT | DELAY_SLOT_CONDITIONAL);
-	    gen_op_store_flags(new_flags);
+	    gen_store_flags(new_flags);
         }
         ctx->flags = 0;
         ctx->bstate = BS_BRANCH;
@@ -1313,7 +1344,7 @@
 
     /* go into a delay slot */
     if (ctx->flags & (DELAY_SLOT | DELAY_SLOT_CONDITIONAL))
-        gen_op_store_flags(ctx->flags);
+        gen_store_flags(ctx->flags);
 }
 
 static inline void
@@ -1410,7 +1441,7 @@
             /* fall through */
         case BS_NONE:
             if (ctx.flags) {
-                gen_op_store_flags(ctx.flags | DELAY_SLOT_CLEARME);
+                gen_store_flags(ctx.flags | DELAY_SLOT_CLEARME);
 	    }
             gen_goto_tb(&ctx, 0, ctx.pc);
             break;

^ permalink raw reply	[flat|nested] 2+ messages in thread

* Re: [Qemu-devel] [PATCH] SH4 : convert branch/jump instructions to TCG
  2008-08-29 20:15 [Qemu-devel] [PATCH] SH4 : convert branch/jump instructions to TCG Shin-ichiro KAWASAKI
@ 2008-08-29 22:32 ` Aurelien Jarno
  0 siblings, 0 replies; 2+ messages in thread
From: Aurelien Jarno @ 2008-08-29 22:32 UTC (permalink / raw)
  To: qemu-devel

On Sat, Aug 30, 2008 at 05:15:49AM +0900, Shin-ichiro KAWASAKI wrote:
> This patch is another contributes to TCG work on SH4.
> 
> All branch/jump instructions are converted into TCG implementation.
> The fields 'flags' and 'delayed_pc' of CPUState structure are mapped as
> TCG global memory variables.  These fields are not real registers but 
> accessed so often. I guess they worth TCG vars.

I also agree that we should map them using TCG vars. However, I have
changed their name, so that they appear differently in debugging 
outputs.

> This patch is for rev.5107, and will cause a small patch failure when you
> patch it after merging the patch I send two hours before. 
> I merge these patches if necessery.

I have done the merge manually and applied the patch, thanks.

-- 
  .''`.  Aurelien Jarno	            | GPG: 1024D/F1BCDB73
 : :' :  Debian developer           | Electrical Engineer
 `. `'   aurel32@debian.org         | aurelien@aurel32.net
   `-    people.debian.org/~aurel32 | www.aurel32.net

^ permalink raw reply	[flat|nested] 2+ messages in thread

end of thread, other threads:[~2008-08-29 22:32 UTC | newest]

Thread overview: 2+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2008-08-29 20:15 [Qemu-devel] [PATCH] SH4 : convert branch/jump instructions to TCG Shin-ichiro KAWASAKI
2008-08-29 22:32 ` Aurelien Jarno

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).