qemu-devel.nongnu.org archive mirror
 help / color / mirror / Atom feed
* [Qemu-devel] [PATCH 0/3] powerpc tcg backend improvements
@ 2015-10-12 21:23 Richard Henderson
  2015-10-12 21:23 ` [Qemu-devel] [PATCH 1/3] tcg/ppc: Adjust exit_tb for change in prologue placement Richard Henderson
                   ` (2 more replies)
  0 siblings, 3 replies; 4+ messages in thread
From: Richard Henderson @ 2015-10-12 21:23 UTC (permalink / raw)
  To: qemu-devel

I happened to notice the ppc backend had a dependency on the placement
of the prologue, which has just changed.  There is a 32 byte window at
code_gen_buffer + 16MB where we might do the wrong thing.

The second patch reduces the code size reserved for performing goto_tb
from 7 insns to 4.  We probably haven't emitted all 7 insns for quite
some time, since the TCG_REG_RA patch went in.  But in the process,
allow for the atomic update of the insns, something that I could see
being required eventually.

The third patch is a guess.  But it matches the preferences in gcc,
giving out-of-order processors a tad more freedom by avoiding cr0
when possible.

Anyway, I've been running an alpha guest on a ppc64le host with these
patches for a week now.


r~


Richard Henderson (3):
  tcg/ppc: Adjust exit_tb for change in prologue placement
  tcg/ppc: Revise goto_tb implementation
  tcg/ppc: Prefer mask over andi.

 tcg/ppc/tcg-target.c | 79 ++++++++++++++++++++++++++++++++++------------------
 translate-all.c      |  2 ++
 2 files changed, 54 insertions(+), 27 deletions(-)

-- 
2.4.3

^ permalink raw reply	[flat|nested] 4+ messages in thread

* [Qemu-devel] [PATCH 1/3] tcg/ppc: Adjust exit_tb for change in prologue placement
  2015-10-12 21:23 [Qemu-devel] [PATCH 0/3] powerpc tcg backend improvements Richard Henderson
@ 2015-10-12 21:23 ` Richard Henderson
  2015-10-12 21:23 ` [Qemu-devel] [PATCH 2/3] tcg/ppc: Revise goto_tb implementation Richard Henderson
  2015-10-12 21:23 ` [Qemu-devel] [PATCH 3/3] tcg/ppc: Prefer mask over andi Richard Henderson
  2 siblings, 0 replies; 4+ messages in thread
From: Richard Henderson @ 2015-10-12 21:23 UTC (permalink / raw)
  To: qemu-devel

Changing the prologue to the beginning of the code_gen_buffer
changes the direction of the "return" branch.  Need to change
the logic to match.

Signed-off-by: Richard Henderson <rth@twiddle.net>
---
 tcg/ppc/tcg-target.c | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/tcg/ppc/tcg-target.c b/tcg/ppc/tcg-target.c
index 92ef719..fd7a3e0 100644
--- a/tcg/ppc/tcg-target.c
+++ b/tcg/ppc/tcg-target.c
@@ -1855,12 +1855,10 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args,
         if (USE_REG_RA) {
             ptrdiff_t disp = tcg_pcrel_diff(s, tb_ret_addr);
 
-            /* If we can use a direct branch, otherwise use the value in RA.
-               Note that the direct branch is always forward.  If it's in
-               range now, it'll still be in range after the movi.  Don't
-               bother about the 20 bytes where the test here fails but it
-               would succeed below.  */
-            if (!in_range_b(disp)) {
+            /* Use a direct branch if we can, otherwise use the value in RA.
+               Note that the direct branch is always backward, thus we need
+               to account for the possibility of 5 insns from the movi.  */
+            if (!in_range_b(disp - 20)) {
                 tcg_out32(s, MTSPR | RS(TCG_REG_RA) | CTR);
                 tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R3, args[0]);
                 tcg_out32(s, BCCTR | BO_ALWAYS);
-- 
2.4.3

^ permalink raw reply related	[flat|nested] 4+ messages in thread

* [Qemu-devel] [PATCH 2/3] tcg/ppc: Revise goto_tb implementation
  2015-10-12 21:23 [Qemu-devel] [PATCH 0/3] powerpc tcg backend improvements Richard Henderson
  2015-10-12 21:23 ` [Qemu-devel] [PATCH 1/3] tcg/ppc: Adjust exit_tb for change in prologue placement Richard Henderson
@ 2015-10-12 21:23 ` Richard Henderson
  2015-10-12 21:23 ` [Qemu-devel] [PATCH 3/3] tcg/ppc: Prefer mask over andi Richard Henderson
  2 siblings, 0 replies; 4+ messages in thread
From: Richard Henderson @ 2015-10-12 21:23 UTC (permalink / raw)
  To: qemu-devel

Restrict the size of code_gen_buffer to 2GB on ppc64, which
lets us assert that everything is reachable with addis+addi
from tb_ret_addr.  This lets us use a max of 4 insns for goto_tb
instead of 7.

Emit the indirect branch portion of goto_tb up front, which
means we only have to update two insns to update any link.
With a 64-bit store, we can update the link atomically, which
may be required in future.

Signed-off-by: Richard Henderson <rth@twiddle.net>
---
 tcg/ppc/tcg-target.c | 49 ++++++++++++++++++++++++++++++++++++++-----------
 translate-all.c      |  2 ++
 2 files changed, 40 insertions(+), 11 deletions(-)

diff --git a/tcg/ppc/tcg-target.c b/tcg/ppc/tcg-target.c
index fd7a3e0..cee13e0 100644
--- a/tcg/ppc/tcg-target.c
+++ b/tcg/ppc/tcg-target.c
@@ -1239,11 +1239,36 @@ static void tcg_out_brcond2 (TCGContext *s, const TCGArg *args,
 
 void ppc_tb_set_jmp_target(uintptr_t jmp_addr, uintptr_t addr)
 {
-    TCGContext s;
+    tcg_insn_unit i1, i2;
+    uint64_t pair;
+    intptr_t diff = addr - jmp_addr;
 
-    s.code_buf = s.code_ptr = (tcg_insn_unit *)jmp_addr;
-    tcg_out_b(&s, 0, (tcg_insn_unit *)addr);
-    flush_icache_range(jmp_addr, jmp_addr + tcg_current_code_size(&s));
+    if (in_range_b(diff)) {
+        i1 = B | (diff & 0x3fffffc);
+        i2 = NOP;
+    } else if (USE_REG_RA) {
+        intptr_t lo, hi;
+        diff = addr - (uintptr_t)tb_ret_addr;
+        lo = (int16_t)diff;
+        hi = (int32_t)(diff - lo);
+        assert(diff == hi + lo);
+        i1 = ADDIS | TAI(TCG_REG_TMP1, TCG_REG_RA, hi >> 16);
+        i2 = ADDI | TAI(TCG_REG_TMP1, TCG_REG_TMP1, lo);
+    } else {
+        assert(TCG_TARGET_REG_BITS == 32 || addr == (int32_t)addr);
+        i1 = ADDIS | TAI(TCG_REG_TMP1, 0, addr >> 16);
+        i2 = ORI | SAI(TCG_REG_TMP1, TCG_REG_TMP1, addr);
+    }
+#ifdef HOST_WORDS_BIGENDIAN
+    pair = (uint64_t)i1 << 32 | i2;
+#else
+    pair = (uint64_t)i2 << 32 | i1;
+#endif
+
+    /* ??? __atomic_store_8, presuming there's some way to do that
+       for 32-bit, otherwise this is good enough for 64-bit.  */
+    *(uint64_t *)jmp_addr = pair;
+    flush_icache_range(jmp_addr, jmp_addr + 8);
 }
 
 static void tcg_out_call(TCGContext *s, tcg_insn_unit *target)
@@ -1869,14 +1894,16 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args,
         tcg_out_b(s, 0, tb_ret_addr);
         break;
     case INDEX_op_goto_tb:
-        if (s->tb_jmp_offset) {
-            /* Direct jump method.  */
-            s->tb_jmp_offset[args[0]] = tcg_current_code_size(s);
-            s->code_ptr += 7;
-        } else {
-            /* Indirect jump method.  */
-            tcg_abort();
+        tcg_debug_assert(s->tb_jmp_offset);
+        /* Direct jump.  Ensure the next insns are 8-byte aligned. */
+        if ((uintptr_t)s->code_ptr & 7) {
+            tcg_out32(s, NOP);
         }
+        s->tb_jmp_offset[args[0]] = tcg_current_code_size(s);
+        /* To be replaced by either a branch+nop or a load into TMP1.  */
+        s->code_ptr += 2;
+        tcg_out32(s, MTSPR | RS(TCG_REG_TMP1) | CTR);
+        tcg_out32(s, BCCTR | BO_ALWAYS);
         s->tb_next_offset[args[0]] = tcg_current_code_size(s);
         break;
     case INDEX_op_br:
diff --git a/translate-all.c b/translate-all.c
index 333eba4..20ce40e 100644
--- a/translate-all.c
+++ b/translate-all.c
@@ -468,6 +468,8 @@ static inline PageDesc *page_find(tb_page_addr_t index)
 # define MAX_CODE_GEN_BUFFER_SIZE  (2ul * 1024 * 1024 * 1024)
 #elif defined(__sparc__)
 # define MAX_CODE_GEN_BUFFER_SIZE  (2ul * 1024 * 1024 * 1024)
+#elif defined(__powerpc64__)
+# define MAX_CODE_GEN_BUFFER_SIZE  (2ul * 1024 * 1024 * 1024)
 #elif defined(__aarch64__)
 # define MAX_CODE_GEN_BUFFER_SIZE  (128ul * 1024 * 1024)
 #elif defined(__arm__)
-- 
2.4.3

^ permalink raw reply related	[flat|nested] 4+ messages in thread

* [Qemu-devel] [PATCH 3/3] tcg/ppc: Prefer mask over andi.
  2015-10-12 21:23 [Qemu-devel] [PATCH 0/3] powerpc tcg backend improvements Richard Henderson
  2015-10-12 21:23 ` [Qemu-devel] [PATCH 1/3] tcg/ppc: Adjust exit_tb for change in prologue placement Richard Henderson
  2015-10-12 21:23 ` [Qemu-devel] [PATCH 2/3] tcg/ppc: Revise goto_tb implementation Richard Henderson
@ 2015-10-12 21:23 ` Richard Henderson
  2 siblings, 0 replies; 4+ messages in thread
From: Richard Henderson @ 2015-10-12 21:23 UTC (permalink / raw)
  To: qemu-devel

Prefer the instruction that isn't required to modify cr0.

Signed-off-by: Richard Henderson <rth@twiddle.net>
---
 tcg/ppc/tcg-target.c | 20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/tcg/ppc/tcg-target.c b/tcg/ppc/tcg-target.c
index cee13e0..2c72565 100644
--- a/tcg/ppc/tcg-target.c
+++ b/tcg/ppc/tcg-target.c
@@ -700,14 +700,14 @@ static void tcg_out_andi32(TCGContext *s, TCGReg dst, TCGReg src, uint32_t c)
 {
     int mb, me;
 
-    if ((c & 0xffff) == c) {
+    if (mask_operand(c, &mb, &me)) {
+        tcg_out_rlw(s, RLWINM, dst, src, 0, mb, me);
+    } else if ((c & 0xffff) == c) {
         tcg_out32(s, ANDI | SAI(src, dst, c));
         return;
     } else if ((c & 0xffff0000) == c) {
         tcg_out32(s, ANDIS | SAI(src, dst, c >> 16));
         return;
-    } else if (mask_operand(c, &mb, &me)) {
-        tcg_out_rlw(s, RLWINM, dst, src, 0, mb, me);
     } else {
         tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_R0, c);
         tcg_out32(s, AND | SAB(src, dst, TCG_REG_R0));
@@ -719,18 +719,18 @@ static void tcg_out_andi64(TCGContext *s, TCGReg dst, TCGReg src, uint64_t c)
     int mb, me;
 
     assert(TCG_TARGET_REG_BITS == 64);
-    if ((c & 0xffff) == c) {
-        tcg_out32(s, ANDI | SAI(src, dst, c));
-        return;
-    } else if ((c & 0xffff0000) == c) {
-        tcg_out32(s, ANDIS | SAI(src, dst, c >> 16));
-        return;
-    } else if (mask64_operand(c, &mb, &me)) {
+    if (mask64_operand(c, &mb, &me)) {
         if (mb == 0) {
             tcg_out_rld(s, RLDICR, dst, src, 0, me);
         } else {
             tcg_out_rld(s, RLDICL, dst, src, 0, mb);
         }
+    } else if ((c & 0xffff) == c) {
+        tcg_out32(s, ANDI | SAI(src, dst, c));
+        return;
+    } else if ((c & 0xffff0000) == c) {
+        tcg_out32(s, ANDIS | SAI(src, dst, c >> 16));
+        return;
     } else {
         tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_R0, c);
         tcg_out32(s, AND | SAB(src, dst, TCG_REG_R0));
-- 
2.4.3

^ permalink raw reply related	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2015-10-12 21:24 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2015-10-12 21:23 [Qemu-devel] [PATCH 0/3] powerpc tcg backend improvements Richard Henderson
2015-10-12 21:23 ` [Qemu-devel] [PATCH 1/3] tcg/ppc: Adjust exit_tb for change in prologue placement Richard Henderson
2015-10-12 21:23 ` [Qemu-devel] [PATCH 2/3] tcg/ppc: Revise goto_tb implementation Richard Henderson
2015-10-12 21:23 ` [Qemu-devel] [PATCH 3/3] tcg/ppc: Prefer mask over andi Richard Henderson

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).