All of lore.kernel.org
 help / color / mirror / Atom feed
From: Ulrich Hecht <uli@suse.de>
To: Aurelien Jarno <aurelien@aurel32.net>
Cc: qemu-devel@nongnu.org, agraf@suse.de
Subject: Re: [Qemu-devel] [PATCH 2/9] S/390 CPU emulation
Date: Mon, 2 Nov 2009 17:16:44 +0200	[thread overview]
Message-ID: <200911021616.45102.uli@suse.de> (raw)
In-Reply-To: <20091022212854.GW1883@hall.aurel32.net>

[-- Attachment #1: Type: text/plain, Size: 896 bytes --]

On Thursday 22 October 2009, Aurelien Jarno wrote:
> Probably the second. Changing the instruction pointer in the helper
> instead of using the proper goto_tb TCG op prevents TB chaining, and
> therefore as a huge impact on performance.
>
> It's something not difficult to implement, and that I would definitely
> want to see in the patch before getting it merged.

OK, I implemented it, and the surprising result is that performance  does 
not get any better; in fact it even suffers a little bit. (My standard 
quick test, the polarssl test suite, shows about a 2% performance impact 
when profiled with cachegrind).

Could there be anything I overlooked? I modelled my implementation after 
those in the existing targets. (See the attached patch that goes on top 
of my other S/390 patches.)

CU
Uli

-- 
SUSE LINUX Products GmbH, GF: Markus Rex, HRB 16746 (AG Nürnberg)

[-- Attachment #2: s390-goto_tb.patch --]
[-- Type: text/x-diff, Size: 5423 bytes --]

diff --git a/target-s390/helpers.h b/target-s390/helpers.h
index 0d16760..6009312 100644
--- a/target-s390/helpers.h
+++ b/target-s390/helpers.h
@@ -15,7 +15,6 @@ DEF_HELPER_FLAGS_1(set_cc_comp_s64, TCG_CALL_PURE|TCG_CALL_CONST, i32, s64)
 DEF_HELPER_FLAGS_1(set_cc_nz_u32, TCG_CALL_PURE|TCG_CALL_CONST, i32, i32)
 DEF_HELPER_FLAGS_1(set_cc_nz_u64, TCG_CALL_PURE|TCG_CALL_CONST, i32, i64)
 DEF_HELPER_FLAGS_2(set_cc_icm, TCG_CALL_PURE|TCG_CALL_CONST, i32, i32, i32)
-DEF_HELPER_4(brc, void, i32, i32, i64, s32)
 DEF_HELPER_3(brctg, void, i64, i64, s32)
 DEF_HELPER_3(brct, void, i32, i64, s32)
 DEF_HELPER_4(brcl, void, i32, i32, i64, s64)
diff --git a/target-s390/op_helper.c b/target-s390/op_helper.c
index 637d22f..f7f52ba 100644
--- a/target-s390/op_helper.c
+++ b/target-s390/op_helper.c
@@ -218,17 +218,6 @@ uint32_t HELPER(set_cc_icm)(uint32_t mask, uint32_t val)
     return cc;
 }
 
-/* relative conditional branch */
-void HELPER(brc)(uint32_t cc, uint32_t mask, uint64_t pc, int32_t offset)
-{
-    if ( mask & ( 1 << (3 - cc) ) ) {
-        env->psw.addr = pc + offset;
-    }
-    else {
-        env->psw.addr = pc + 4;
-    }
-}
-
 /* branch relative on 64-bit count (condition is computed inline, this only
    does the branch */
 void HELPER(brctg)(uint64_t flag, uint64_t pc, int32_t offset)
diff --git a/target-s390/translate.c b/target-s390/translate.c
index 9ffa7bd..5a7cfe7 100644
--- a/target-s390/translate.c
+++ b/target-s390/translate.c
@@ -49,6 +49,7 @@ struct DisasContext {
     uint64_t pc;
     int is_jmp;
     CPUS390XState *env;
+    struct TranslationBlock *tb;
 };
 
 #define DISAS_EXCP 4
@@ -359,23 +360,55 @@ static void gen_bcr(uint32_t mask, int tr, uint64_t offset)
     tcg_temp_free(target);
 }
 
-static void gen_brc(uint32_t mask, uint64_t pc, int32_t offset)
+static inline void gen_goto_tb(DisasContext *s, int tb_num, target_ulong pc)
 {
-    TCGv p;
-    TCGv_i32 m, o;
+    TranslationBlock *tb;
+
+    tb = s->tb;
+    /* NOTE: we handle the case where the TB spans two pages here */
+    if ((pc & TARGET_PAGE_MASK) == (tb->pc & TARGET_PAGE_MASK) ||
+        (pc & TARGET_PAGE_MASK) == ((s->pc - 1) & TARGET_PAGE_MASK))  {
+        /* jump to same page: we can use a direct jump */
+        tcg_gen_mov_i32(global_cc, cc);
+        tcg_gen_goto_tb(tb_num);
+        tcg_gen_movi_i64(psw_addr, pc);
+        tcg_gen_exit_tb((long)tb + tb_num);
+    } else {
+        /* jump to another page: currently not optimized */
+        tcg_gen_movi_i64(psw_addr, pc);
+        tcg_gen_mov_i32(global_cc, cc);
+        tcg_gen_exit_tb(0);
+    }
+}
+
+static void gen_brc(uint32_t mask, DisasContext *s, int32_t offset)
+{
+    TCGv_i32 r;
+    TCGv_i32 tmp, tmp2;
+    int skip;
     
     if (mask == 0xf) {	/* unconditional */
-      tcg_gen_movi_i64(psw_addr, pc + offset);
+      //tcg_gen_movi_i64(psw_addr, s->pc + offset);
+      gen_goto_tb(s, 0, s->pc + offset);
     }
     else {
-      m = tcg_const_i32(mask);
-      p = tcg_const_i64(pc);
-      o = tcg_const_i32(offset);
-      gen_helper_brc(cc, m, p, o);
-      tcg_temp_free(m);
-      tcg_temp_free(p);
-      tcg_temp_free(o);
+      tmp = tcg_const_i32(3);
+      tcg_gen_sub_i32(tmp, tmp, cc);	/* 3 - cc */
+      tmp2 = tcg_const_i32(1);
+      tcg_gen_shl_i32(tmp2, tmp2, tmp);	/* 1 << (3 - cc) */
+      r = tcg_const_i32(mask);
+      tcg_gen_and_i32(r, r, tmp2);	/* mask & (1 << (3 - cc)) */
+      tcg_temp_free(tmp);
+      tcg_temp_free(tmp2);
+      skip = gen_new_label();
+      tcg_gen_brcondi_i32(TCG_COND_EQ, r, 0, skip);
+      gen_goto_tb(s, 0, s->pc + offset);
+      gen_set_label(skip);
+      gen_goto_tb(s, 1, s->pc + 4);
+      tcg_gen_mov_i32(global_cc, cc);
+      tcg_temp_free(r);
     }
+    s->is_jmp = DISAS_TB_JUMP;
 }
 
 static void gen_set_cc_add64(TCGv v1, TCGv v2, TCGv vr)
@@ -1143,9 +1176,7 @@ static void disas_a7(DisasContext *s, int op, int r1, int i2)
         tcg_temp_free(tmp2);
         break;
     case 0x4: /* brc m1, i2 */
-        /* FIXME: optimize m1 == 0xf (unconditional) case */
-        gen_brc(r1, s->pc, i2 * 2);
-        s->is_jmp = DISAS_JUMP;
+        gen_brc(r1, s, i2 * 2);
         return;
     case 0x5: /* BRAS     R1,I2     [RI] */
         tmp = tcg_const_i64(s->pc + 4);
@@ -2739,6 +2770,7 @@ static inline void gen_intermediate_code_internal (CPUState *env,
     dc.env = env;
     dc.pc = pc_start;
     dc.is_jmp = DISAS_NEXT;
+    dc.tb = tb;
     
     gen_opc_end = gen_opc_buf + OPC_MAX_SIZE;
     
@@ -2778,8 +2810,11 @@ static inline void gen_intermediate_code_internal (CPUState *env,
         num_insns++;
     } while (!dc.is_jmp && gen_opc_ptr < gen_opc_end && dc.pc < next_page_start
              && num_insns < max_insns && !env->singlestep_enabled);
-    tcg_gen_mov_i32(global_cc, cc);
-    tcg_temp_free(cc);
+
+    if (dc.is_jmp != DISAS_TB_JUMP) {
+        tcg_gen_mov_i32(global_cc, cc);
+        tcg_temp_free(cc);
+    }
     
     if (!dc.is_jmp) {
         tcg_gen_st_i64(tcg_const_i64(dc.pc), cpu_env, offsetof(CPUState, psw.addr));
@@ -2794,7 +2829,9 @@ static inline void gen_intermediate_code_internal (CPUState *env,
     if (tb->cflags & CF_LAST_IO)
         gen_io_end();
     /* Generate the return instruction */
-    tcg_gen_exit_tb(0);
+    if (dc.is_jmp != DISAS_TB_JUMP) {
+        tcg_gen_exit_tb(0);
+    }
     gen_icount_end(tb, num_insns);
     *gen_opc_ptr = INDEX_op_end;
     if (search_pc) {

  reply	other threads:[~2009-11-02 15:15 UTC|newest]

Thread overview: 26+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2009-10-16 12:38 [Qemu-devel] [PATCH 0/9] S/390 support updated Ulrich Hecht
2009-10-16 12:38 ` [Qemu-devel] [PATCH 1/9] TCG "sync" op Ulrich Hecht
2009-10-16 12:38   ` [Qemu-devel] [PATCH 2/9] S/390 CPU emulation Ulrich Hecht
2009-10-16 12:38     ` [Qemu-devel] [PATCH 3/9] S/390 host/target build system support Ulrich Hecht
2009-10-16 12:38       ` [Qemu-devel] [PATCH 4/9] S/390 host support for TCG Ulrich Hecht
2009-10-16 12:38         ` [Qemu-devel] [PATCH 5/9] linux-user: S/390 64-bit (s390x) support Ulrich Hecht
2009-10-16 12:38           ` [Qemu-devel] [PATCH 6/9] linux-user: don't do locking in single-threaded processes Ulrich Hecht
2009-10-16 12:38             ` [Qemu-devel] [PATCH 7/9] linux-user: dup3, fallocate syscalls Ulrich Hecht
2009-10-16 12:38               ` [Qemu-devel] [PATCH 8/9] linux-user: define a couple of syscalls for non-uid16 targets Ulrich Hecht
2009-10-16 12:38                 ` [Qemu-devel] [PATCH 9/9] linux-user: getpriority errno fix Ulrich Hecht
2009-10-17 10:44       ` [Qemu-devel] [PATCH 3/9] S/390 host/target build system support Aurelien Jarno
2009-10-17 10:42     ` [Qemu-devel] [PATCH 2/9] S/390 CPU emulation Aurelien Jarno
2009-10-19 17:17       ` Ulrich Hecht
2009-10-22 21:28         ` Aurelien Jarno
2009-11-02 15:16           ` Ulrich Hecht [this message]
2009-11-02 18:42             ` Aurelien Jarno
2009-11-02 19:03               ` Laurent Desnogues
2009-11-09 16:55                 ` Ulrich Hecht
2009-11-10 16:02                   ` Aurelien Jarno
2009-11-11 10:46                     ` Ulrich Hecht
2009-10-16 15:52   ` [Qemu-devel] [PATCH 1/9] TCG "sync" op Aurelien Jarno
2009-10-16 16:37     ` Ulrich Hecht
2009-10-16 17:29       ` Aurelien Jarno
2009-10-19 17:17         ` Ulrich Hecht
2009-10-17  8:59     ` Edgar E. Iglesias
2009-10-19 17:17       ` Ulrich Hecht

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=200911021616.45102.uli@suse.de \
    --to=uli@suse.de \
    --cc=agraf@suse.de \
    --cc=aurelien@aurel32.net \
    --cc=qemu-devel@nongnu.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.