qemu-devel.nongnu.org archive mirror
 help / color / mirror / Atom feed
* [Qemu-devel] [PATCH v4 1/3] tcg/aarch64: Introduce and use long branch to register
@ 2017-06-30 14:36 Pranith Kumar
  2017-06-30 14:36 ` [Qemu-devel] [PATCH v4 2/3] tcg/aarch64: Use ADRP+ADD to compute target address Pranith Kumar
  2017-06-30 14:36 ` [Qemu-devel] [PATCH v4 3/3] tcg/aarch64: Enable indirect jump path using LDR (literal) Pranith Kumar
  0 siblings, 2 replies; 5+ messages in thread
From: Pranith Kumar @ 2017-06-30 14:36 UTC (permalink / raw)
  To: alex.bennee; +Cc: qemu-devel, rth

We can use a branch to register instruction for exit_tb for offsets
greater than 128MB.

CC: Alex Bennée <alex.bennee@linaro.org>
Reviewed-by: Richard Henderson <rth@twiddle.net>
Signed-off-by: Pranith Kumar <bobby.prani@gmail.com>
---
 tcg/aarch64/tcg-target.inc.c | 15 +++++++++++++--
 1 file changed, 13 insertions(+), 2 deletions(-)

diff --git a/tcg/aarch64/tcg-target.inc.c b/tcg/aarch64/tcg-target.inc.c
index 1fa3bccc89..8fce11ace7 100644
--- a/tcg/aarch64/tcg-target.inc.c
+++ b/tcg/aarch64/tcg-target.inc.c
@@ -819,6 +819,17 @@ static inline void tcg_out_goto(TCGContext *s, tcg_insn_unit *target)
     tcg_out_insn(s, 3206, B, offset);
 }
 
+static inline void tcg_out_goto_long(TCGContext *s, tcg_insn_unit *target)
+{
+    ptrdiff_t offset = target - s->code_ptr;
+    if (offset == sextract64(offset, 0, 26)) {
+        tcg_out_insn(s, 3206, BL, offset);
+    } else {
+        tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP, (intptr_t)target);
+        tcg_out_insn(s, 3207, BR, TCG_REG_TMP);
+    }
+}
+
 static inline void tcg_out_goto_noaddr(TCGContext *s)
 {
     /* We pay attention here to not modify the branch target by reading from
@@ -1364,10 +1375,10 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
     case INDEX_op_exit_tb:
         /* Reuse the zeroing that exists for goto_ptr.  */
         if (a0 == 0) {
-            tcg_out_goto(s, s->code_gen_epilogue);
+            tcg_out_goto_long(s, s->code_gen_epilogue);
         } else {
             tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_X0, a0);
-            tcg_out_goto(s, tb_ret_addr);
+            tcg_out_goto_long(s, tb_ret_addr);
         }
         break;
 
-- 
2.13.0

^ permalink raw reply related	[flat|nested] 5+ messages in thread

* [Qemu-devel] [PATCH v4 2/3] tcg/aarch64: Use ADRP+ADD to compute target address
  2017-06-30 14:36 [Qemu-devel] [PATCH v4 1/3] tcg/aarch64: Introduce and use long branch to register Pranith Kumar
@ 2017-06-30 14:36 ` Pranith Kumar
  2017-06-30 16:08   ` Richard Henderson
  2017-06-30 14:36 ` [Qemu-devel] [PATCH v4 3/3] tcg/aarch64: Enable indirect jump path using LDR (literal) Pranith Kumar
  1 sibling, 1 reply; 5+ messages in thread
From: Pranith Kumar @ 2017-06-30 14:36 UTC (permalink / raw)
  To: alex.bennee; +Cc: qemu-devel, rth

We use ADRP+ADD to compute the target address for goto_tb. This patch
introduces the NOP instruction which is used to align the above
instruction pair so that we can use one atomic instruction to patch
the destination offsets.

CC: Richard Henderson <rth@twiddle.net>
CC: Alex Bennée <alex.bennee@linaro.org>
Signed-off-by: Pranith Kumar <bobby.prani@gmail.com>
---
 accel/tcg/translate-all.c    |  2 +-
 tcg/aarch64/tcg-target.inc.c | 36 ++++++++++++++++++++++++++++++------
 2 files changed, 31 insertions(+), 7 deletions(-)

diff --git a/accel/tcg/translate-all.c b/accel/tcg/translate-all.c
index f6ad46b613..65a92dbf67 100644
--- a/accel/tcg/translate-all.c
+++ b/accel/tcg/translate-all.c
@@ -522,7 +522,7 @@ static inline PageDesc *page_find(tb_page_addr_t index)
 #elif defined(__powerpc__)
 # define MAX_CODE_GEN_BUFFER_SIZE  (32u * 1024 * 1024)
 #elif defined(__aarch64__)
-# define MAX_CODE_GEN_BUFFER_SIZE  (128ul * 1024 * 1024)
+# define MAX_CODE_GEN_BUFFER_SIZE  (2ul * 1024 * 1024 * 1024)
 #elif defined(__s390x__)
   /* We have a +- 4GB range on the branches; leave some slop.  */
 # define MAX_CODE_GEN_BUFFER_SIZE  (3ul * 1024 * 1024 * 1024)
diff --git a/tcg/aarch64/tcg-target.inc.c b/tcg/aarch64/tcg-target.inc.c
index 8fce11ace7..a84422d633 100644
--- a/tcg/aarch64/tcg-target.inc.c
+++ b/tcg/aarch64/tcg-target.inc.c
@@ -372,6 +372,7 @@ typedef enum {
     I3510_EON       = 0x4a200000,
     I3510_ANDS      = 0x6a000000,
 
+    NOP             = 0xd503201f,
     /* System instructions.  */
     DMB_ISH         = 0xd50338bf,
     DMB_LD          = 0x00000100,
@@ -865,11 +866,27 @@ static inline void tcg_out_call(TCGContext *s, tcg_insn_unit *target)
 
 void aarch64_tb_set_jmp_target(uintptr_t jmp_addr, uintptr_t addr)
 {
-    tcg_insn_unit *code_ptr = (tcg_insn_unit *)jmp_addr;
-    tcg_insn_unit *target = (tcg_insn_unit *)addr;
+    tcg_insn_unit i1, i2;
+    TCGType rt = TCG_TYPE_I64;
+    TCGReg  rd = TCG_REG_TMP;
+    uint64_t pair;
 
-    reloc_pc26_atomic(code_ptr, target);
-    flush_icache_range(jmp_addr, jmp_addr + 4);
+    ptrdiff_t offset = addr - jmp_addr;
+
+    if (offset == sextract64(offset, 0, 26)) {
+        i1 = I3206_B | ((offset >> 2) & 0x3ffffff);
+        i2 = NOP;
+    } else {
+        offset = (addr >> 12) - (jmp_addr >> 12);
+
+        /* patch ADRP */
+        i1 = I3406_ADRP | (offset & 3) << 29 | (offset & 0x1ffffc) << (5 - 2) | rd;
+        /* patch ADDI */
+        i2 = I3401_ADDI | rt << 31 | (addr & 0xfff) << 10 | rd << 5 | rd;
+    }
+    pair = (uint64_t)i2 << 32 | i1;
+    atomic_set((uint64_t *)jmp_addr, pair);
+    flush_icache_range(jmp_addr, jmp_addr + 8);
 }
 
 static inline void tcg_out_goto_label(TCGContext *s, TCGLabel *l)
@@ -1388,10 +1405,17 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
 #endif
         /* consistency for USE_DIRECT_JUMP */
         tcg_debug_assert(s->tb_jmp_insn_offset != NULL);
+        /* Ensure that ADRP+ADD are 8-byte aligned so that an atomic
+           write can be used to patch the target address. */
+        if ((uintptr_t)s->code_ptr & 7) {
+            tcg_out32(s, NOP);
+        }
         s->tb_jmp_insn_offset[a0] = tcg_current_code_size(s);
         /* actual branch destination will be patched by
-           aarch64_tb_set_jmp_target later, beware retranslation. */
-        tcg_out_goto_noaddr(s);
+           aarch64_tb_set_jmp_target later. */
+        tcg_out_insn(s, 3406, ADRP, TCG_REG_TMP, 0);
+        tcg_out_insn(s, 3401, ADDI, TCG_TYPE_I64, TCG_REG_TMP, TCG_REG_TMP, 0);
+        tcg_out_insn(s, 3207, BR, TCG_REG_TMP);
         s->tb_jmp_reset_offset[a0] = tcg_current_code_size(s);
         break;
 
-- 
2.13.0

^ permalink raw reply related	[flat|nested] 5+ messages in thread

* [Qemu-devel] [PATCH v4 3/3] tcg/aarch64: Enable indirect jump path using LDR (literal)
  2017-06-30 14:36 [Qemu-devel] [PATCH v4 1/3] tcg/aarch64: Introduce and use long branch to register Pranith Kumar
  2017-06-30 14:36 ` [Qemu-devel] [PATCH v4 2/3] tcg/aarch64: Use ADRP+ADD to compute target address Pranith Kumar
@ 2017-06-30 14:36 ` Pranith Kumar
  2017-07-03 21:14   ` Richard Henderson
  1 sibling, 1 reply; 5+ messages in thread
From: Pranith Kumar @ 2017-06-30 14:36 UTC (permalink / raw)
  To: alex.bennee; +Cc: qemu-devel, rth

This patch enables the indirect jump path using an LDR (literal)
instruction. It will be interesting to test and see which performs
better among the two paths.

CC: Alex Bennée <alex.bennee@linaro.org>
Reviewed-by: Richard Henderson <rth@twiddle.net>
Signed-off-by: Pranith Kumar <bobby.prani@gmail.com>
---
 tcg/aarch64/tcg-target.inc.c | 42 ++++++++++++++++++++++++++++--------------
 1 file changed, 28 insertions(+), 14 deletions(-)

diff --git a/tcg/aarch64/tcg-target.inc.c b/tcg/aarch64/tcg-target.inc.c
index a84422d633..04bc369a92 100644
--- a/tcg/aarch64/tcg-target.inc.c
+++ b/tcg/aarch64/tcg-target.inc.c
@@ -269,6 +269,8 @@ typedef enum {
     I3207_BLR       = 0xd63f0000,
     I3207_RET       = 0xd65f0000,
 
+    /* Load literal for loading the address at pc-relative offset */
+    I3305_LDR       = 0x58000000,
     /* Load/store register.  Described here as 3.3.12, but the helper
        that emits them can transform to 3.3.10 or 3.3.13.  */
     I3312_STRB      = 0x38000000 | LDST_ST << 22 | MO_8 << 30,
@@ -389,6 +391,11 @@ static inline uint32_t tcg_in32(TCGContext *s)
 #define tcg_out_insn(S, FMT, OP, ...) \
     glue(tcg_out_insn_,FMT)(S, glue(glue(glue(I,FMT),_),OP), ## __VA_ARGS__)
 
+static void tcg_out_insn_3305(TCGContext *s, AArch64Insn insn, int imm19, TCGReg rt)
+{
+    tcg_out32(s, insn | (imm19 & 0x7ffff) << 5 | rt);
+}
+
 static void tcg_out_insn_3201(TCGContext *s, AArch64Insn insn, TCGType ext,
                               TCGReg rt, int imm19)
 {
@@ -864,6 +871,8 @@ static inline void tcg_out_call(TCGContext *s, tcg_insn_unit *target)
     }
 }
 
+#ifdef USE_DIRECT_JUMP
+
 void aarch64_tb_set_jmp_target(uintptr_t jmp_addr, uintptr_t addr)
 {
     tcg_insn_unit i1, i2;
@@ -889,6 +898,8 @@ void aarch64_tb_set_jmp_target(uintptr_t jmp_addr, uintptr_t addr)
     flush_icache_range(jmp_addr, jmp_addr + 8);
 }
 
+#endif
+
 static inline void tcg_out_goto_label(TCGContext *s, TCGLabel *l)
 {
     if (!l->has_value) {
@@ -1400,21 +1411,24 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
         break;
 
     case INDEX_op_goto_tb:
-#ifndef USE_DIRECT_JUMP
-#error "USE_DIRECT_JUMP required for aarch64"
-#endif
-        /* consistency for USE_DIRECT_JUMP */
-        tcg_debug_assert(s->tb_jmp_insn_offset != NULL);
-        /* Ensure that ADRP+ADD are 8-byte aligned so that an atomic
-           write can be used to patch the target address. */
-        if ((uintptr_t)s->code_ptr & 7) {
-            tcg_out32(s, NOP);
+        if (s->tb_jmp_insn_offset != NULL) {
+            /* USE_DIRECT_JUMP */
+            /* Ensure that ADRP+ADD are 8-byte aligned so that an atomic
+               write can be used to patch the target address. */
+            if ((uintptr_t)s->code_ptr & 7) {
+                tcg_out32(s, NOP);
+            }
+            s->tb_jmp_insn_offset[a0] = tcg_current_code_size(s);
+            /* actual branch destination will be patched by
+               aarch64_tb_set_jmp_target later. */
+            tcg_out_insn(s, 3406, ADRP, TCG_REG_TMP, 0);
+            tcg_out_insn(s, 3401, ADDI, TCG_TYPE_I64, TCG_REG_TMP, TCG_REG_TMP, 0);
+        } else {
+            /* !USE_DIRECT_JUMP */
+            tcg_debug_assert(s->tb_jmp_target_addr != NULL);
+            intptr_t offset = tcg_pcrel_diff(s, (s->tb_jmp_target_addr + a0)) >> 2;
+            tcg_out_insn(s, 3305, LDR, offset, TCG_REG_TMP);
         }
-        s->tb_jmp_insn_offset[a0] = tcg_current_code_size(s);
-        /* actual branch destination will be patched by
-           aarch64_tb_set_jmp_target later. */
-        tcg_out_insn(s, 3406, ADRP, TCG_REG_TMP, 0);
-        tcg_out_insn(s, 3401, ADDI, TCG_TYPE_I64, TCG_REG_TMP, TCG_REG_TMP, 0);
         tcg_out_insn(s, 3207, BR, TCG_REG_TMP);
         s->tb_jmp_reset_offset[a0] = tcg_current_code_size(s);
         break;
-- 
2.13.0

^ permalink raw reply related	[flat|nested] 5+ messages in thread

* Re: [Qemu-devel] [PATCH v4 2/3] tcg/aarch64: Use ADRP+ADD to compute target address
  2017-06-30 14:36 ` [Qemu-devel] [PATCH v4 2/3] tcg/aarch64: Use ADRP+ADD to compute target address Pranith Kumar
@ 2017-06-30 16:08   ` Richard Henderson
  0 siblings, 0 replies; 5+ messages in thread
From: Richard Henderson @ 2017-06-30 16:08 UTC (permalink / raw)
  To: Pranith Kumar, alex.bennee; +Cc: qemu-devel

On 06/30/2017 07:36 AM, Pranith Kumar wrote:
> We use ADRP+ADD to compute the target address for goto_tb. This patch
> introduces the NOP instruction which is used to align the above
> instruction pair so that we can use one atomic instruction to patch
> the destination offsets.
> 
> CC: Richard Henderson<rth@twiddle.net>
> CC: Alex Bennée<alex.bennee@linaro.org>
> Signed-off-by: Pranith Kumar<bobby.prani@gmail.com>
> ---
>   accel/tcg/translate-all.c    |  2 +-
>   tcg/aarch64/tcg-target.inc.c | 36 ++++++++++++++++++++++++++++++------
>   2 files changed, 31 insertions(+), 7 deletions(-)

Reviewed-by: Richard Henderson <rth@twiddle.net>


r~

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [Qemu-devel] [PATCH v4 3/3] tcg/aarch64: Enable indirect jump path using LDR (literal)
  2017-06-30 14:36 ` [Qemu-devel] [PATCH v4 3/3] tcg/aarch64: Enable indirect jump path using LDR (literal) Pranith Kumar
@ 2017-07-03 21:14   ` Richard Henderson
  0 siblings, 0 replies; 5+ messages in thread
From: Richard Henderson @ 2017-07-03 21:14 UTC (permalink / raw)
  To: Pranith Kumar, alex.bennee; +Cc: qemu-devel

On 06/30/2017 07:36 AM, Pranith Kumar wrote:
> This patch enables the indirect jump path using an LDR (literal)
> instruction. It will be interesting to test and see which performs
> better among the two paths.
> 
> CC: Alex Bennée <alex.bennee@linaro.org>
> Reviewed-by: Richard Henderson <rth@twiddle.net>
> Signed-off-by: Pranith Kumar <bobby.prani@gmail.com>

Applied all to tcg-next.

Bonus points for converting USE_DIRECT_JUMP to a run-time flag, perhaps 
controllable by a -d bit.  That would further enable easy benchmarking of the 
two paths.


r~

^ permalink raw reply	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2017-07-03 21:14 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2017-06-30 14:36 [Qemu-devel] [PATCH v4 1/3] tcg/aarch64: Introduce and use long branch to register Pranith Kumar
2017-06-30 14:36 ` [Qemu-devel] [PATCH v4 2/3] tcg/aarch64: Use ADRP+ADD to compute target address Pranith Kumar
2017-06-30 16:08   ` Richard Henderson
2017-06-30 14:36 ` [Qemu-devel] [PATCH v4 3/3] tcg/aarch64: Enable indirect jump path using LDR (literal) Pranith Kumar
2017-07-03 21:14   ` Richard Henderson

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).