* [PATCH for-8.2 v2 0/7] tcg/ppc: Support power10 prefixed instructions
@ 2023-08-08 3:02 Richard Henderson
2023-08-08 3:02 ` [PATCH v2 1/7] tcg/ppc: Untabify tcg-target.c.inc Richard Henderson
` (6 more replies)
0 siblings, 7 replies; 16+ messages in thread
From: Richard Henderson @ 2023-08-08 3:02 UTC (permalink / raw)
To: qemu-devel; +Cc: npiggin, jniethe5, qemu-ppc
Emit one 64-bit instruction for large constants and pc-relatives.
With pc-relative addressing, we don't need REG_TB, which means we
can re-enable direct branching for goto_tb.
Changes for v2:
* Merged Nick's adjustments for goto_tb. Only patch B/NOP,
falling through to PLD for indirect branch; drop PLA option.
* Fix sx typo in patch 3 (jordan).
r~
Richard Henderson (7):
tcg/ppc: Untabify tcg-target.c.inc
tcg/ppc: Use PADDI in tcg_out_movi
tcg/ppc: Use prefixed instructions in tcg_out_mem_long
tcg/ppc: Use PLD in tcg_out_movi for constant pool
tcg/ppc: Use prefixed instructions in tcg_out_dupi_vec
tcg/ppc: Disable USE_REG_TB for Power v3.1
tcg/ppc: Use prefixed instructions for tcg_out_goto_tb
tcg/ppc/tcg-target.c.inc | 192 +++++++++++++++++++++++++++++++++++----
1 file changed, 176 insertions(+), 16 deletions(-)
--
2.34.1
^ permalink raw reply [flat|nested] 16+ messages in thread
* [PATCH v2 1/7] tcg/ppc: Untabify tcg-target.c.inc
2023-08-08 3:02 [PATCH for-8.2 v2 0/7] tcg/ppc: Support power10 prefixed instructions Richard Henderson
@ 2023-08-08 3:02 ` Richard Henderson
2023-08-09 8:55 ` Nicholas Piggin
2023-08-08 3:02 ` [PATCH v2 2/7] tcg/ppc: Use PADDI in tcg_out_movi Richard Henderson
` (5 subsequent siblings)
6 siblings, 1 reply; 16+ messages in thread
From: Richard Henderson @ 2023-08-08 3:02 UTC (permalink / raw)
To: qemu-devel; +Cc: npiggin, jniethe5, qemu-ppc
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
tcg/ppc/tcg-target.c.inc | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/tcg/ppc/tcg-target.c.inc b/tcg/ppc/tcg-target.c.inc
index 511e14b180..642d0fd128 100644
--- a/tcg/ppc/tcg-target.c.inc
+++ b/tcg/ppc/tcg-target.c.inc
@@ -221,7 +221,7 @@ static inline bool in_range_b(tcg_target_long target)
}
static uint32_t reloc_pc24_val(const tcg_insn_unit *pc,
- const tcg_insn_unit *target)
+ const tcg_insn_unit *target)
{
ptrdiff_t disp = tcg_ptr_byte_diff(target, pc);
tcg_debug_assert(in_range_b(disp));
@@ -241,7 +241,7 @@ static bool reloc_pc24(tcg_insn_unit *src_rw, const tcg_insn_unit *target)
}
static uint16_t reloc_pc14_val(const tcg_insn_unit *pc,
- const tcg_insn_unit *target)
+ const tcg_insn_unit *target)
{
ptrdiff_t disp = tcg_ptr_byte_diff(target, pc);
tcg_debug_assert(disp == (int16_t) disp);
@@ -3587,7 +3587,7 @@ static void expand_vec_mul(TCGType type, unsigned vece, TCGv_vec v0,
tcgv_vec_arg(t1), tcgv_vec_arg(t2));
vec_gen_3(INDEX_op_ppc_pkum_vec, type, vece, tcgv_vec_arg(v0),
tcgv_vec_arg(v0), tcgv_vec_arg(t1));
- break;
+ break;
case MO_32:
tcg_debug_assert(!have_isa_2_07);
--
2.34.1
^ permalink raw reply related [flat|nested] 16+ messages in thread
* [PATCH v2 2/7] tcg/ppc: Use PADDI in tcg_out_movi
2023-08-08 3:02 [PATCH for-8.2 v2 0/7] tcg/ppc: Support power10 prefixed instructions Richard Henderson
2023-08-08 3:02 ` [PATCH v2 1/7] tcg/ppc: Untabify tcg-target.c.inc Richard Henderson
@ 2023-08-08 3:02 ` Richard Henderson
2023-08-09 9:03 ` Nicholas Piggin
2023-08-08 3:02 ` [PATCH v2 3/7] tcg/ppc: Use prefixed instructions in tcg_out_mem_long Richard Henderson
` (4 subsequent siblings)
6 siblings, 1 reply; 16+ messages in thread
From: Richard Henderson @ 2023-08-08 3:02 UTC (permalink / raw)
To: qemu-devel; +Cc: npiggin, jniethe5, qemu-ppc
PADDI can load 34-bit immediates and 34-bit pc-relative addresses.
Reviewed-by: Jordan Niethe <jniethe5@gmail.com>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
tcg/ppc/tcg-target.c.inc | 51 ++++++++++++++++++++++++++++++++++++++++
1 file changed, 51 insertions(+)
diff --git a/tcg/ppc/tcg-target.c.inc b/tcg/ppc/tcg-target.c.inc
index 642d0fd128..2141c0bc78 100644
--- a/tcg/ppc/tcg-target.c.inc
+++ b/tcg/ppc/tcg-target.c.inc
@@ -707,6 +707,38 @@ static bool patch_reloc(tcg_insn_unit *code_ptr, int type,
return true;
}
+/* Ensure that the prefixed instruction does not cross a 64-byte boundary. */
+static bool tcg_out_need_prefix_align(TCGContext *s)
+{
+ return ((uintptr_t)s->code_ptr & 0x3f) == 0x3c;
+}
+
+static void tcg_out_prefix_align(TCGContext *s)
+{
+ if (tcg_out_need_prefix_align(s)) {
+ tcg_out32(s, NOP);
+ }
+}
+
+static ptrdiff_t tcg_pcrel_diff_for_prefix(TCGContext *s, const void *target)
+{
+ return tcg_pcrel_diff(s, target) - (tcg_out_need_prefix_align(s) ? 4 : 0);
+}
+
+/* Output Type 10 Prefix - Modified Load/Store Form (MLS:D) */
+static void tcg_out_mls_d(TCGContext *s, tcg_insn_unit opc, unsigned rt,
+ unsigned ra, tcg_target_long imm, bool r)
+{
+ tcg_insn_unit p, i;
+
+ p = OPCD(1) | (2 << 24) | (r << 20) | ((imm >> 16) & 0x3ffff);
+ i = opc | TAI(rt, ra, imm);
+
+ tcg_out_prefix_align(s);
+ tcg_out32(s, p);
+ tcg_out32(s, i);
+}
+
static void tcg_out_mem_long(TCGContext *s, int opi, int opx, TCGReg rt,
TCGReg base, tcg_target_long offset);
@@ -992,6 +1024,25 @@ static void tcg_out_movi_int(TCGContext *s, TCGType type, TCGReg ret,
return;
}
+ /*
+ * Load values up to 34 bits, and pc-relative addresses,
+ * with one prefixed insn.
+ */
+ if (have_isa_3_10) {
+ if (arg == sextract64(arg, 0, 34)) {
+ /* pli ret,value = paddi ret,0,value,0 */
+ tcg_out_mls_d(s, ADDI, ret, 0, arg, 0);
+ return;
+ }
+
+ tmp = tcg_pcrel_diff_for_prefix(s, (void *)arg);
+ if (tmp == sextract64(tmp, 0, 34)) {
+ /* pla ret,value = paddi ret,0,value,1 */
+ tcg_out_mls_d(s, ADDI, ret, 0, tmp, 1);
+ return;
+ }
+ }
+
/* Load 32-bit immediates with two insns. Note that we've already
eliminated bare ADDIS, so we know both insns are required. */
if (TCG_TARGET_REG_BITS == 32 || arg == (int32_t)arg) {
--
2.34.1
^ permalink raw reply related [flat|nested] 16+ messages in thread
* [PATCH v2 3/7] tcg/ppc: Use prefixed instructions in tcg_out_mem_long
2023-08-08 3:02 [PATCH for-8.2 v2 0/7] tcg/ppc: Support power10 prefixed instructions Richard Henderson
2023-08-08 3:02 ` [PATCH v2 1/7] tcg/ppc: Untabify tcg-target.c.inc Richard Henderson
2023-08-08 3:02 ` [PATCH v2 2/7] tcg/ppc: Use PADDI in tcg_out_movi Richard Henderson
@ 2023-08-08 3:02 ` Richard Henderson
2023-08-09 11:00 ` Nicholas Piggin
2023-08-08 3:02 ` [PATCH v2 4/7] tcg/ppc: Use PLD in tcg_out_movi for constant pool Richard Henderson
` (3 subsequent siblings)
6 siblings, 1 reply; 16+ messages in thread
From: Richard Henderson @ 2023-08-08 3:02 UTC (permalink / raw)
To: qemu-devel; +Cc: npiggin, jniethe5, qemu-ppc
When the offset is out of range of the non-prefixed insn, but
fits the 34-bit immediate of the prefixed insn, use that.
Reviewed-by: Jordan Niethe <jniethe5@gmail.com>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
tcg/ppc/tcg-target.c.inc | 66 ++++++++++++++++++++++++++++++++++++++++
1 file changed, 66 insertions(+)
diff --git a/tcg/ppc/tcg-target.c.inc b/tcg/ppc/tcg-target.c.inc
index 2141c0bc78..61ae9d8ab7 100644
--- a/tcg/ppc/tcg-target.c.inc
+++ b/tcg/ppc/tcg-target.c.inc
@@ -323,6 +323,15 @@ static bool tcg_target_const_match(int64_t val, TCGType type, int ct)
#define STDX XO31(149)
#define STQ XO62( 2)
+#define PLWA OPCD( 41)
+#define PLD OPCD( 57)
+#define PLXSD OPCD( 42)
+#define PLXV OPCD(25 * 2 + 1) /* force tx=1 */
+
+#define PSTD OPCD( 61)
+#define PSTXSD OPCD( 46)
+#define PSTXV OPCD(27 * 2 + 1) /* force sx=1 */
+
#define ADDIC OPCD( 12)
#define ADDI OPCD( 14)
#define ADDIS OPCD( 15)
@@ -725,6 +734,20 @@ static ptrdiff_t tcg_pcrel_diff_for_prefix(TCGContext *s, const void *target)
return tcg_pcrel_diff(s, target) - (tcg_out_need_prefix_align(s) ? 4 : 0);
}
+/* Output Type 00 Prefix - 8-Byte Load/Store Form (8LS:D) */
+static void tcg_out_8ls_d(TCGContext *s, tcg_insn_unit opc, unsigned rt,
+ unsigned ra, tcg_target_long imm, bool r)
+{
+ tcg_insn_unit p, i;
+
+ p = OPCD(1) | (r << 20) | ((imm >> 16) & 0x3ffff);
+ i = opc | TAI(rt, ra, imm);
+
+ tcg_out_prefix_align(s);
+ tcg_out32(s, p);
+ tcg_out32(s, i);
+}
+
/* Output Type 10 Prefix - Modified Load/Store Form (MLS:D) */
static void tcg_out_mls_d(TCGContext *s, tcg_insn_unit opc, unsigned rt,
unsigned ra, tcg_target_long imm, bool r)
@@ -1368,6 +1391,49 @@ static void tcg_out_mem_long(TCGContext *s, int opi, int opx, TCGReg rt,
break;
}
+ /* For unaligned or large offsets, use the prefixed form. */
+ if (have_isa_3_10
+ && (offset != (int16_t)offset || (offset & align))
+ && offset == sextract64(offset, 0, 34)) {
+ /*
+ * Note that the MLS:D insns retain their un-prefixed opcode,
+ * while the 8LS:D insns use a different opcode space.
+ */
+ switch (opi) {
+ case LBZ:
+ case LHZ:
+ case LHA:
+ case LWZ:
+ case STB:
+ case STH:
+ case STW:
+ case ADDI:
+ tcg_out_mls_d(s, opi, rt, base, offset, 0);
+ return;
+ case LWA:
+ tcg_out_8ls_d(s, PLWA, rt, base, offset, 0);
+ return;
+ case LD:
+ tcg_out_8ls_d(s, PLD, rt, base, offset, 0);
+ return;
+ case STD:
+ tcg_out_8ls_d(s, PSTD, rt, base, offset, 0);
+ return;
+ case LXSD:
+ tcg_out_8ls_d(s, PLXSD, rt & 31, base, offset, 0);
+ return;
+ case STXSD:
+ tcg_out_8ls_d(s, PSTXSD, rt & 31, base, offset, 0);
+ return;
+ case LXV:
+ tcg_out_8ls_d(s, PLXV, rt & 31, base, offset, 0);
+ return;
+ case STXV:
+ tcg_out_8ls_d(s, PSTXV, rt & 31, base, offset, 0);
+ return;
+ }
+ }
+
/* For unaligned, or very large offsets, use the indexed form. */
if (offset & align || offset != (int32_t)offset || opi == 0) {
if (rs == base) {
--
2.34.1
^ permalink raw reply related [flat|nested] 16+ messages in thread
* [PATCH v2 4/7] tcg/ppc: Use PLD in tcg_out_movi for constant pool
2023-08-08 3:02 [PATCH for-8.2 v2 0/7] tcg/ppc: Support power10 prefixed instructions Richard Henderson
` (2 preceding siblings ...)
2023-08-08 3:02 ` [PATCH v2 3/7] tcg/ppc: Use prefixed instructions in tcg_out_mem_long Richard Henderson
@ 2023-08-08 3:02 ` Richard Henderson
2023-08-09 11:20 ` Nicholas Piggin
2023-08-08 3:02 ` [PATCH v2 5/7] tcg/ppc: Use prefixed instructions in tcg_out_dupi_vec Richard Henderson
` (2 subsequent siblings)
6 siblings, 1 reply; 16+ messages in thread
From: Richard Henderson @ 2023-08-08 3:02 UTC (permalink / raw)
To: qemu-devel; +Cc: npiggin, jniethe5, qemu-ppc
The prefixed instruction has a pc-relative form to use here.
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
tcg/ppc/tcg-target.c.inc | 24 ++++++++++++++++++++++++
1 file changed, 24 insertions(+)
diff --git a/tcg/ppc/tcg-target.c.inc b/tcg/ppc/tcg-target.c.inc
index 61ae9d8ab7..b3b2e9874d 100644
--- a/tcg/ppc/tcg-target.c.inc
+++ b/tcg/ppc/tcg-target.c.inc
@@ -101,6 +101,10 @@
#define ALL_GENERAL_REGS 0xffffffffu
#define ALL_VECTOR_REGS 0xffffffff00000000ull
+#ifndef R_PPC64_PCREL34
+#define R_PPC64_PCREL34 132
+#endif
+
#define have_isel (cpuinfo & CPUINFO_ISEL)
#ifndef CONFIG_SOFTMMU
@@ -260,6 +264,19 @@ static bool reloc_pc14(tcg_insn_unit *src_rw, const tcg_insn_unit *target)
return false;
}
+static bool reloc_pc34(tcg_insn_unit *src_rw, const tcg_insn_unit *target)
+{
+ const tcg_insn_unit *src_rx = tcg_splitwx_to_rx(src_rw);
+ ptrdiff_t disp = tcg_ptr_byte_diff(target, src_rx);
+
+ if (disp == sextract64(disp, 0, 34)) {
+ src_rw[0] = (src_rw[0] & ~0x3ffff) | ((disp >> 16) & 0x3ffff);
+ src_rw[1] = (src_rw[1] & ~0xffff) | (disp & 0xffff);
+ return true;
+ }
+ return false;
+}
+
/* test if a constant matches the constraint */
static bool tcg_target_const_match(int64_t val, TCGType type, int ct)
{
@@ -684,6 +701,8 @@ static bool patch_reloc(tcg_insn_unit *code_ptr, int type,
return reloc_pc14(code_ptr, target);
case R_PPC_REL24:
return reloc_pc24(code_ptr, target);
+ case R_PPC64_PCREL34:
+ return reloc_pc34(code_ptr, target);
case R_PPC_ADDR16:
/*
* We are (slightly) abusing this relocation type. In particular,
@@ -1111,6 +1130,11 @@ static void tcg_out_movi_int(TCGContext *s, TCGType type, TCGReg ret,
}
/* Use the constant pool, if possible. */
+ if (have_isa_3_10) {
+ tcg_out_8ls_d(s, PLD, ret, 0, 0, 1);
+ new_pool_label(s, arg, R_PPC64_PCREL34, s->code_ptr - 2, 0);
+ return;
+ }
if (!in_prologue && USE_REG_TB) {
new_pool_label(s, arg, R_PPC_ADDR16, s->code_ptr,
tcg_tbrel_diff(s, NULL));
--
2.34.1
^ permalink raw reply related [flat|nested] 16+ messages in thread
* [PATCH v2 5/7] tcg/ppc: Use prefixed instructions in tcg_out_dupi_vec
2023-08-08 3:02 [PATCH for-8.2 v2 0/7] tcg/ppc: Support power10 prefixed instructions Richard Henderson
` (3 preceding siblings ...)
2023-08-08 3:02 ` [PATCH v2 4/7] tcg/ppc: Use PLD in tcg_out_movi for constant pool Richard Henderson
@ 2023-08-08 3:02 ` Richard Henderson
2023-08-08 3:02 ` [PATCH v2 6/7] tcg/ppc: Disable USE_REG_TB for Power v3.1 Richard Henderson
2023-08-08 3:02 ` [PATCH v2 7/7] tcg/ppc: Use prefixed instructions for tcg_out_goto_tb Richard Henderson
6 siblings, 0 replies; 16+ messages in thread
From: Richard Henderson @ 2023-08-08 3:02 UTC (permalink / raw)
To: qemu-devel; +Cc: npiggin, jniethe5, qemu-ppc
The prefixed instructions have a pc-relative form to use here.
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
tcg/ppc/tcg-target.c.inc | 12 ++++++++++++
1 file changed, 12 insertions(+)
diff --git a/tcg/ppc/tcg-target.c.inc b/tcg/ppc/tcg-target.c.inc
index b3b2e9874d..01ca5c9f39 100644
--- a/tcg/ppc/tcg-target.c.inc
+++ b/tcg/ppc/tcg-target.c.inc
@@ -1195,6 +1195,18 @@ static void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
/*
* Otherwise we must load the value from the constant pool.
*/
+
+ if (have_isa_3_10) {
+ if (type == TCG_TYPE_V64) {
+ tcg_out_8ls_d(s, PLXSD, ret & 31, 0, 0, 1);
+ new_pool_label(s, val, R_PPC64_PCREL34, s->code_ptr - 2, 0);
+ } else {
+ tcg_out_8ls_d(s, PLXV, ret & 31, 0, 0, 1);
+ new_pool_l2(s, R_PPC64_PCREL34, s->code_ptr - 2, 0, val, val);
+ }
+ return;
+ }
+
if (USE_REG_TB) {
rel = R_PPC_ADDR16;
add = tcg_tbrel_diff(s, NULL);
--
2.34.1
^ permalink raw reply related [flat|nested] 16+ messages in thread
* [PATCH v2 6/7] tcg/ppc: Disable USE_REG_TB for Power v3.1
2023-08-08 3:02 [PATCH for-8.2 v2 0/7] tcg/ppc: Support power10 prefixed instructions Richard Henderson
` (4 preceding siblings ...)
2023-08-08 3:02 ` [PATCH v2 5/7] tcg/ppc: Use prefixed instructions in tcg_out_dupi_vec Richard Henderson
@ 2023-08-08 3:02 ` Richard Henderson
2023-08-09 11:21 ` Nicholas Piggin
2023-08-08 3:02 ` [PATCH v2 7/7] tcg/ppc: Use prefixed instructions for tcg_out_goto_tb Richard Henderson
6 siblings, 1 reply; 16+ messages in thread
From: Richard Henderson @ 2023-08-08 3:02 UTC (permalink / raw)
To: qemu-devel; +Cc: npiggin, jniethe5, qemu-ppc
With Power v3.1, we have pc-relative addressing and so
do not require a register holding the current TB.
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
tcg/ppc/tcg-target.c.inc | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/tcg/ppc/tcg-target.c.inc b/tcg/ppc/tcg-target.c.inc
index 01ca5c9f39..63fe4ef995 100644
--- a/tcg/ppc/tcg-target.c.inc
+++ b/tcg/ppc/tcg-target.c.inc
@@ -83,7 +83,7 @@
#define TCG_VEC_TMP2 TCG_REG_V1
#define TCG_REG_TB TCG_REG_R31
-#define USE_REG_TB (TCG_TARGET_REG_BITS == 64)
+#define USE_REG_TB (TCG_TARGET_REG_BITS == 64 && !have_isa_3_10)
/* Shorthand for size of a pointer. Avoid promotion to unsigned. */
#define SZP ((int)sizeof(void *))
--
2.34.1
^ permalink raw reply related [flat|nested] 16+ messages in thread
* [PATCH v2 7/7] tcg/ppc: Use prefixed instructions for tcg_out_goto_tb
2023-08-08 3:02 [PATCH for-8.2 v2 0/7] tcg/ppc: Support power10 prefixed instructions Richard Henderson
` (5 preceding siblings ...)
2023-08-08 3:02 ` [PATCH v2 6/7] tcg/ppc: Disable USE_REG_TB for Power v3.1 Richard Henderson
@ 2023-08-08 3:02 ` Richard Henderson
2023-08-09 2:56 ` Jordan Niethe
2023-08-09 11:24 ` Nicholas Piggin
6 siblings, 2 replies; 16+ messages in thread
From: Richard Henderson @ 2023-08-08 3:02 UTC (permalink / raw)
To: qemu-devel; +Cc: npiggin, jniethe5, qemu-ppc
When a direct branch is out of range, we can load the destination for
the indirect branch using PLA (for 16GB worth of buffer) and PLD from
the TranslationBlock for everything larger.
This means the patch affects exactly one instruction: B (plus filler),
PLA or PLD. Which means we can update and execute the patch atomically.
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
tcg/ppc/tcg-target.c.inc | 31 +++++++++++++++++++------------
1 file changed, 19 insertions(+), 12 deletions(-)
diff --git a/tcg/ppc/tcg-target.c.inc b/tcg/ppc/tcg-target.c.inc
index 63fe4ef995..b686a68247 100644
--- a/tcg/ppc/tcg-target.c.inc
+++ b/tcg/ppc/tcg-target.c.inc
@@ -2646,31 +2646,38 @@ static void tcg_out_goto_tb(TCGContext *s, int which)
uintptr_t ptr = get_jmp_target_addr(s, which);
if (USE_REG_TB) {
+ /*
+ * With REG_TB, we must always use indirect branching,
+ * so that the branch destination and TCG_REG_TB match.
+ */
ptrdiff_t offset = tcg_tbrel_diff(s, (void *)ptr);
tcg_out_mem_long(s, LD, LDX, TCG_REG_TB, TCG_REG_TB, offset);
-
- /* TODO: Use direct branches when possible. */
- set_jmp_insn_offset(s, which);
tcg_out32(s, MTSPR | RS(TCG_REG_TB) | CTR);
-
tcg_out32(s, BCCTR | BO_ALWAYS);
/* For the unlinked case, need to reset TCG_REG_TB. */
set_jmp_reset_offset(s, which);
tcg_out_mem_long(s, ADDI, ADD, TCG_REG_TB, TCG_REG_TB,
-tcg_current_code_size(s));
- } else {
- /* Direct branch will be patched by tb_target_set_jmp_target. */
- set_jmp_insn_offset(s, which);
- tcg_out32(s, NOP);
+ return;
+ }
- /* When branch is out of range, fall through to indirect. */
+ /* Direct branch will be patched by tb_target_set_jmp_target. */
+ set_jmp_insn_offset(s, which);
+ tcg_out32(s, NOP);
+
+ /* When branch is out of range, fall through to indirect. */
+ if (have_isa_3_10) {
+ ptrdiff_t offset = tcg_pcrel_diff_for_prefix(s, (void *)ptr);
+ tcg_out_8ls_d(s, PLD, TCG_REG_TMP1, 0, offset, 1);
+ } else {
tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_TMP1, ptr - (int16_t)ptr);
tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP1, TCG_REG_TMP1, (int16_t)ptr);
- tcg_out32(s, MTSPR | RS(TCG_REG_TMP1) | CTR);
- tcg_out32(s, BCCTR | BO_ALWAYS);
- set_jmp_reset_offset(s, which);
}
+
+ tcg_out32(s, MTSPR | RS(TCG_REG_TMP1) | CTR);
+ tcg_out32(s, BCCTR | BO_ALWAYS);
+ set_jmp_reset_offset(s, which);
}
void tb_target_set_jmp_target(const TranslationBlock *tb, int n,
--
2.34.1
^ permalink raw reply related [flat|nested] 16+ messages in thread
* Re: [PATCH v2 7/7] tcg/ppc: Use prefixed instructions for tcg_out_goto_tb
2023-08-08 3:02 ` [PATCH v2 7/7] tcg/ppc: Use prefixed instructions for tcg_out_goto_tb Richard Henderson
@ 2023-08-09 2:56 ` Jordan Niethe
2023-08-09 3:18 ` Richard Henderson
2023-08-09 11:24 ` Nicholas Piggin
1 sibling, 1 reply; 16+ messages in thread
From: Jordan Niethe @ 2023-08-09 2:56 UTC (permalink / raw)
To: Richard Henderson; +Cc: qemu-devel, npiggin, qemu-ppc
On Tue, Aug 8, 2023 at 1:02 PM Richard Henderson
<richard.henderson@linaro.org> wrote:
>
> When a direct branch is out of range, we can load the destination for
> the indirect branch using PLA (for 16GB worth of buffer) and PLD from
> the TranslationBlock for everything larger.
>
> This means the patch affects exactly one instruction: B (plus filler),
> PLA or PLD. Which means we can update and execute the patch atomically.
I think the commit message needs to be updated for Nick's changes.
>
> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
> ---
> tcg/ppc/tcg-target.c.inc | 31 +++++++++++++++++++------------
> 1 file changed, 19 insertions(+), 12 deletions(-)
>
> diff --git a/tcg/ppc/tcg-target.c.inc b/tcg/ppc/tcg-target.c.inc
> index 63fe4ef995..b686a68247 100644
> --- a/tcg/ppc/tcg-target.c.inc
> +++ b/tcg/ppc/tcg-target.c.inc
> @@ -2646,31 +2646,38 @@ static void tcg_out_goto_tb(TCGContext *s, int which)
> uintptr_t ptr = get_jmp_target_addr(s, which);
>
> if (USE_REG_TB) {
> + /*
> + * With REG_TB, we must always use indirect branching,
> + * so that the branch destination and TCG_REG_TB match.
> + */
> ptrdiff_t offset = tcg_tbrel_diff(s, (void *)ptr);
> tcg_out_mem_long(s, LD, LDX, TCG_REG_TB, TCG_REG_TB, offset);
> -
> - /* TODO: Use direct branches when possible. */
> - set_jmp_insn_offset(s, which);
> tcg_out32(s, MTSPR | RS(TCG_REG_TB) | CTR);
> -
> tcg_out32(s, BCCTR | BO_ALWAYS);
>
> /* For the unlinked case, need to reset TCG_REG_TB. */
> set_jmp_reset_offset(s, which);
> tcg_out_mem_long(s, ADDI, ADD, TCG_REG_TB, TCG_REG_TB,
> -tcg_current_code_size(s));
> - } else {
> - /* Direct branch will be patched by tb_target_set_jmp_target. */
> - set_jmp_insn_offset(s, which);
> - tcg_out32(s, NOP);
> + return;
> + }
>
> - /* When branch is out of range, fall through to indirect. */
> + /* Direct branch will be patched by tb_target_set_jmp_target. */
> + set_jmp_insn_offset(s, which);
> + tcg_out32(s, NOP);
> +
> + /* When branch is out of range, fall through to indirect. */
> + if (have_isa_3_10) {
> + ptrdiff_t offset = tcg_pcrel_diff_for_prefix(s, (void *)ptr);
> + tcg_out_8ls_d(s, PLD, TCG_REG_TMP1, 0, offset, 1);
> + } else {
> tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_TMP1, ptr - (int16_t)ptr);
> tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP1, TCG_REG_TMP1, (int16_t)ptr);
> - tcg_out32(s, MTSPR | RS(TCG_REG_TMP1) | CTR);
> - tcg_out32(s, BCCTR | BO_ALWAYS);
> - set_jmp_reset_offset(s, which);
> }
> +
> + tcg_out32(s, MTSPR | RS(TCG_REG_TMP1) | CTR);
> + tcg_out32(s, BCCTR | BO_ALWAYS);
> + set_jmp_reset_offset(s, which);
> }
>
> void tb_target_set_jmp_target(const TranslationBlock *tb, int n,
> --
> 2.34.1
>
Thank you for implementing this Richard.
I was able to boot mttcg guests on P9 and P10 hosts.
Tested-by: Jordan Niethe <jniethe5@gmail.com>
Reviewed-by: Jordan Niethe <jniethe5@gmail.com>
^ permalink raw reply [flat|nested] 16+ messages in thread
* Re: [PATCH v2 7/7] tcg/ppc: Use prefixed instructions for tcg_out_goto_tb
2023-08-09 2:56 ` Jordan Niethe
@ 2023-08-09 3:18 ` Richard Henderson
0 siblings, 0 replies; 16+ messages in thread
From: Richard Henderson @ 2023-08-09 3:18 UTC (permalink / raw)
To: Jordan Niethe; +Cc: qemu-devel, npiggin, qemu-ppc
On 8/8/23 19:56, Jordan Niethe wrote:
> On Tue, Aug 8, 2023 at 1:02 PM Richard Henderson
> <richard.henderson@linaro.org> wrote:
>>
>> When a direct branch is out of range, we can load the destination for
>> the indirect branch using PLA (for 16GB worth of buffer) and PLD from
>> the TranslationBlock for everything larger.
>>
>> This means the patch affects exactly one instruction: B (plus filler),
>> PLA or PLD. Which means we can update and execute the patch atomically.
>
> I think the commit message needs to be updated for Nick's changes.
Whoops, yes.
r~
^ permalink raw reply [flat|nested] 16+ messages in thread
* Re: [PATCH v2 1/7] tcg/ppc: Untabify tcg-target.c.inc
2023-08-08 3:02 ` [PATCH v2 1/7] tcg/ppc: Untabify tcg-target.c.inc Richard Henderson
@ 2023-08-09 8:55 ` Nicholas Piggin
0 siblings, 0 replies; 16+ messages in thread
From: Nicholas Piggin @ 2023-08-09 8:55 UTC (permalink / raw)
To: Richard Henderson, qemu-devel; +Cc: jniethe5, qemu-ppc
Acked-by: Nicholas Piggin <npiggin@gmail.com>
On Tue Aug 8, 2023 at 1:02 PM AEST, Richard Henderson wrote:
> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
> ---
> tcg/ppc/tcg-target.c.inc | 6 +++---
> 1 file changed, 3 insertions(+), 3 deletions(-)
>
> diff --git a/tcg/ppc/tcg-target.c.inc b/tcg/ppc/tcg-target.c.inc
> index 511e14b180..642d0fd128 100644
> --- a/tcg/ppc/tcg-target.c.inc
> +++ b/tcg/ppc/tcg-target.c.inc
> @@ -221,7 +221,7 @@ static inline bool in_range_b(tcg_target_long target)
> }
>
> static uint32_t reloc_pc24_val(const tcg_insn_unit *pc,
> - const tcg_insn_unit *target)
> + const tcg_insn_unit *target)
> {
> ptrdiff_t disp = tcg_ptr_byte_diff(target, pc);
> tcg_debug_assert(in_range_b(disp));
> @@ -241,7 +241,7 @@ static bool reloc_pc24(tcg_insn_unit *src_rw, const tcg_insn_unit *target)
> }
>
> static uint16_t reloc_pc14_val(const tcg_insn_unit *pc,
> - const tcg_insn_unit *target)
> + const tcg_insn_unit *target)
> {
> ptrdiff_t disp = tcg_ptr_byte_diff(target, pc);
> tcg_debug_assert(disp == (int16_t) disp);
> @@ -3587,7 +3587,7 @@ static void expand_vec_mul(TCGType type, unsigned vece, TCGv_vec v0,
> tcgv_vec_arg(t1), tcgv_vec_arg(t2));
> vec_gen_3(INDEX_op_ppc_pkum_vec, type, vece, tcgv_vec_arg(v0),
> tcgv_vec_arg(v0), tcgv_vec_arg(t1));
> - break;
> + break;
>
> case MO_32:
> tcg_debug_assert(!have_isa_2_07);
^ permalink raw reply [flat|nested] 16+ messages in thread
* Re: [PATCH v2 2/7] tcg/ppc: Use PADDI in tcg_out_movi
2023-08-08 3:02 ` [PATCH v2 2/7] tcg/ppc: Use PADDI in tcg_out_movi Richard Henderson
@ 2023-08-09 9:03 ` Nicholas Piggin
0 siblings, 0 replies; 16+ messages in thread
From: Nicholas Piggin @ 2023-08-09 9:03 UTC (permalink / raw)
To: Richard Henderson, qemu-devel; +Cc: jniethe5, qemu-ppc
On Tue Aug 8, 2023 at 1:02 PM AEST, Richard Henderson wrote:
> PADDI can load 34-bit immediates and 34-bit pc-relative addresses.
>
Reviewed-by: Nicholas Piggin <npiggin@gmail.com>
> Reviewed-by: Jordan Niethe <jniethe5@gmail.com>
> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
> ---
> tcg/ppc/tcg-target.c.inc | 51 ++++++++++++++++++++++++++++++++++++++++
> 1 file changed, 51 insertions(+)
>
> diff --git a/tcg/ppc/tcg-target.c.inc b/tcg/ppc/tcg-target.c.inc
> index 642d0fd128..2141c0bc78 100644
> --- a/tcg/ppc/tcg-target.c.inc
> +++ b/tcg/ppc/tcg-target.c.inc
> @@ -707,6 +707,38 @@ static bool patch_reloc(tcg_insn_unit *code_ptr, int type,
> return true;
> }
>
> +/* Ensure that the prefixed instruction does not cross a 64-byte boundary. */
> +static bool tcg_out_need_prefix_align(TCGContext *s)
> +{
> + return ((uintptr_t)s->code_ptr & 0x3f) == 0x3c;
> +}
> +
> +static void tcg_out_prefix_align(TCGContext *s)
> +{
> + if (tcg_out_need_prefix_align(s)) {
> + tcg_out32(s, NOP);
> + }
> +}
> +
> +static ptrdiff_t tcg_pcrel_diff_for_prefix(TCGContext *s, const void *target)
> +{
> + return tcg_pcrel_diff(s, target) - (tcg_out_need_prefix_align(s) ? 4 : 0);
> +}
> +
> +/* Output Type 10 Prefix - Modified Load/Store Form (MLS:D) */
> +static void tcg_out_mls_d(TCGContext *s, tcg_insn_unit opc, unsigned rt,
> + unsigned ra, tcg_target_long imm, bool r)
> +{
> + tcg_insn_unit p, i;
> +
> + p = OPCD(1) | (2 << 24) | (r << 20) | ((imm >> 16) & 0x3ffff);
> + i = opc | TAI(rt, ra, imm);
> +
> + tcg_out_prefix_align(s);
> + tcg_out32(s, p);
> + tcg_out32(s, i);
> +}
> +
> static void tcg_out_mem_long(TCGContext *s, int opi, int opx, TCGReg rt,
> TCGReg base, tcg_target_long offset);
>
> @@ -992,6 +1024,25 @@ static void tcg_out_movi_int(TCGContext *s, TCGType type, TCGReg ret,
> return;
> }
>
> + /*
> + * Load values up to 34 bits, and pc-relative addresses,
> + * with one prefixed insn.
> + */
> + if (have_isa_3_10) {
> + if (arg == sextract64(arg, 0, 34)) {
> + /* pli ret,value = paddi ret,0,value,0 */
> + tcg_out_mls_d(s, ADDI, ret, 0, arg, 0);
> + return;
> + }
> +
> + tmp = tcg_pcrel_diff_for_prefix(s, (void *)arg);
> + if (tmp == sextract64(tmp, 0, 34)) {
> + /* pla ret,value = paddi ret,0,value,1 */
> + tcg_out_mls_d(s, ADDI, ret, 0, tmp, 1);
> + return;
> + }
> + }
> +
> /* Load 32-bit immediates with two insns. Note that we've already
> eliminated bare ADDIS, so we know both insns are required. */
> if (TCG_TARGET_REG_BITS == 32 || arg == (int32_t)arg) {
^ permalink raw reply [flat|nested] 16+ messages in thread
* Re: [PATCH v2 3/7] tcg/ppc: Use prefixed instructions in tcg_out_mem_long
2023-08-08 3:02 ` [PATCH v2 3/7] tcg/ppc: Use prefixed instructions in tcg_out_mem_long Richard Henderson
@ 2023-08-09 11:00 ` Nicholas Piggin
0 siblings, 0 replies; 16+ messages in thread
From: Nicholas Piggin @ 2023-08-09 11:00 UTC (permalink / raw)
To: Richard Henderson, qemu-devel; +Cc: jniethe5, qemu-ppc
On Tue Aug 8, 2023 at 1:02 PM AEST, Richard Henderson wrote:
> When the offset is out of range of the non-prefixed insn, but
> fits the 34-bit immediate of the prefixed insn, use that.
>
The switch will fall through in some cases (at least opi == 0).
Should it have a default: break; to make that obvious?
Reviewed-by: Nicholas Piggin <npiggin@gmail.com>
> Reviewed-by: Jordan Niethe <jniethe5@gmail.com>
> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
> ---
> tcg/ppc/tcg-target.c.inc | 66 ++++++++++++++++++++++++++++++++++++++++
> 1 file changed, 66 insertions(+)
>
> diff --git a/tcg/ppc/tcg-target.c.inc b/tcg/ppc/tcg-target.c.inc
> index 2141c0bc78..61ae9d8ab7 100644
> --- a/tcg/ppc/tcg-target.c.inc
> +++ b/tcg/ppc/tcg-target.c.inc
> @@ -323,6 +323,15 @@ static bool tcg_target_const_match(int64_t val, TCGType type, int ct)
> #define STDX XO31(149)
> #define STQ XO62( 2)
>
> +#define PLWA OPCD( 41)
> +#define PLD OPCD( 57)
> +#define PLXSD OPCD( 42)
> +#define PLXV OPCD(25 * 2 + 1) /* force tx=1 */
> +
> +#define PSTD OPCD( 61)
> +#define PSTXSD OPCD( 46)
> +#define PSTXV OPCD(27 * 2 + 1) /* force sx=1 */
> +
> #define ADDIC OPCD( 12)
> #define ADDI OPCD( 14)
> #define ADDIS OPCD( 15)
> @@ -725,6 +734,20 @@ static ptrdiff_t tcg_pcrel_diff_for_prefix(TCGContext *s, const void *target)
> return tcg_pcrel_diff(s, target) - (tcg_out_need_prefix_align(s) ? 4 : 0);
> }
>
> +/* Output Type 00 Prefix - 8-Byte Load/Store Form (8LS:D) */
> +static void tcg_out_8ls_d(TCGContext *s, tcg_insn_unit opc, unsigned rt,
> + unsigned ra, tcg_target_long imm, bool r)
> +{
> + tcg_insn_unit p, i;
> +
> + p = OPCD(1) | (r << 20) | ((imm >> 16) & 0x3ffff);
> + i = opc | TAI(rt, ra, imm);
> +
> + tcg_out_prefix_align(s);
> + tcg_out32(s, p);
> + tcg_out32(s, i);
> +}
> +
> /* Output Type 10 Prefix - Modified Load/Store Form (MLS:D) */
> static void tcg_out_mls_d(TCGContext *s, tcg_insn_unit opc, unsigned rt,
> unsigned ra, tcg_target_long imm, bool r)
> @@ -1368,6 +1391,49 @@ static void tcg_out_mem_long(TCGContext *s, int opi, int opx, TCGReg rt,
> break;
> }
>
> + /* For unaligned or large offsets, use the prefixed form. */
> + if (have_isa_3_10
> + && (offset != (int16_t)offset || (offset & align))
> + && offset == sextract64(offset, 0, 34)) {
> + /*
> + * Note that the MLS:D insns retain their un-prefixed opcode,
> + * while the 8LS:D insns use a different opcode space.
> + */
> + switch (opi) {
> + case LBZ:
> + case LHZ:
> + case LHA:
> + case LWZ:
> + case STB:
> + case STH:
> + case STW:
> + case ADDI:
> + tcg_out_mls_d(s, opi, rt, base, offset, 0);
> + return;
> + case LWA:
> + tcg_out_8ls_d(s, PLWA, rt, base, offset, 0);
> + return;
> + case LD:
> + tcg_out_8ls_d(s, PLD, rt, base, offset, 0);
> + return;
> + case STD:
> + tcg_out_8ls_d(s, PSTD, rt, base, offset, 0);
> + return;
> + case LXSD:
> + tcg_out_8ls_d(s, PLXSD, rt & 31, base, offset, 0);
> + return;
> + case STXSD:
> + tcg_out_8ls_d(s, PSTXSD, rt & 31, base, offset, 0);
> + return;
> + case LXV:
> + tcg_out_8ls_d(s, PLXV, rt & 31, base, offset, 0);
> + return;
> + case STXV:
> + tcg_out_8ls_d(s, PSTXV, rt & 31, base, offset, 0);
> + return;
> + }
> + }
> +
> /* For unaligned, or very large offsets, use the indexed form. */
> if (offset & align || offset != (int32_t)offset || opi == 0) {
> if (rs == base) {
^ permalink raw reply [flat|nested] 16+ messages in thread
* Re: [PATCH v2 4/7] tcg/ppc: Use PLD in tcg_out_movi for constant pool
2023-08-08 3:02 ` [PATCH v2 4/7] tcg/ppc: Use PLD in tcg_out_movi for constant pool Richard Henderson
@ 2023-08-09 11:20 ` Nicholas Piggin
0 siblings, 0 replies; 16+ messages in thread
From: Nicholas Piggin @ 2023-08-09 11:20 UTC (permalink / raw)
To: Richard Henderson, qemu-devel; +Cc: jniethe5, qemu-ppc
On Tue Aug 8, 2023 at 1:02 PM AEST, Richard Henderson wrote:
> The prefixed instruction has a pc-relative form to use here.
I don't understand this code very well but going by existing
relocs it looks okay.
Reviewed-by: Nicholas Piggin <npiggin@gmail.com>
>
> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
> ---
> tcg/ppc/tcg-target.c.inc | 24 ++++++++++++++++++++++++
> 1 file changed, 24 insertions(+)
>
> diff --git a/tcg/ppc/tcg-target.c.inc b/tcg/ppc/tcg-target.c.inc
> index 61ae9d8ab7..b3b2e9874d 100644
> --- a/tcg/ppc/tcg-target.c.inc
> +++ b/tcg/ppc/tcg-target.c.inc
> @@ -101,6 +101,10 @@
> #define ALL_GENERAL_REGS 0xffffffffu
> #define ALL_VECTOR_REGS 0xffffffff00000000ull
>
> +#ifndef R_PPC64_PCREL34
> +#define R_PPC64_PCREL34 132
> +#endif
> +
> #define have_isel (cpuinfo & CPUINFO_ISEL)
>
> #ifndef CONFIG_SOFTMMU
> @@ -260,6 +264,19 @@ static bool reloc_pc14(tcg_insn_unit *src_rw, const tcg_insn_unit *target)
> return false;
> }
>
> +static bool reloc_pc34(tcg_insn_unit *src_rw, const tcg_insn_unit *target)
> +{
> + const tcg_insn_unit *src_rx = tcg_splitwx_to_rx(src_rw);
> + ptrdiff_t disp = tcg_ptr_byte_diff(target, src_rx);
> +
> + if (disp == sextract64(disp, 0, 34)) {
> + src_rw[0] = (src_rw[0] & ~0x3ffff) | ((disp >> 16) & 0x3ffff);
> + src_rw[1] = (src_rw[1] & ~0xffff) | (disp & 0xffff);
> + return true;
> + }
> + return false;
> +}
> +
> /* test if a constant matches the constraint */
> static bool tcg_target_const_match(int64_t val, TCGType type, int ct)
> {
> @@ -684,6 +701,8 @@ static bool patch_reloc(tcg_insn_unit *code_ptr, int type,
> return reloc_pc14(code_ptr, target);
> case R_PPC_REL24:
> return reloc_pc24(code_ptr, target);
> + case R_PPC64_PCREL34:
> + return reloc_pc34(code_ptr, target);
> case R_PPC_ADDR16:
> /*
> * We are (slightly) abusing this relocation type. In particular,
> @@ -1111,6 +1130,11 @@ static void tcg_out_movi_int(TCGContext *s, TCGType type, TCGReg ret,
> }
>
> /* Use the constant pool, if possible. */
> + if (have_isa_3_10) {
> + tcg_out_8ls_d(s, PLD, ret, 0, 0, 1);
> + new_pool_label(s, arg, R_PPC64_PCREL34, s->code_ptr - 2, 0);
> + return;
> + }
> if (!in_prologue && USE_REG_TB) {
> new_pool_label(s, arg, R_PPC_ADDR16, s->code_ptr,
> tcg_tbrel_diff(s, NULL));
^ permalink raw reply [flat|nested] 16+ messages in thread
* Re: [PATCH v2 6/7] tcg/ppc: Disable USE_REG_TB for Power v3.1
2023-08-08 3:02 ` [PATCH v2 6/7] tcg/ppc: Disable USE_REG_TB for Power v3.1 Richard Henderson
@ 2023-08-09 11:21 ` Nicholas Piggin
0 siblings, 0 replies; 16+ messages in thread
From: Nicholas Piggin @ 2023-08-09 11:21 UTC (permalink / raw)
To: Richard Henderson, qemu-devel; +Cc: jniethe5, qemu-ppc
On Tue Aug 8, 2023 at 1:02 PM AEST, Richard Henderson wrote:
> With Power v3.1, we have pc-relative addressing and so
> do not require a register holding the current TB.
>
Acked-by: Nicholas Piggin <npiggin@gmail.com>
> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
> ---
> tcg/ppc/tcg-target.c.inc | 2 +-
> 1 file changed, 1 insertion(+), 1 deletion(-)
>
> diff --git a/tcg/ppc/tcg-target.c.inc b/tcg/ppc/tcg-target.c.inc
> index 01ca5c9f39..63fe4ef995 100644
> --- a/tcg/ppc/tcg-target.c.inc
> +++ b/tcg/ppc/tcg-target.c.inc
> @@ -83,7 +83,7 @@
> #define TCG_VEC_TMP2 TCG_REG_V1
>
> #define TCG_REG_TB TCG_REG_R31
> -#define USE_REG_TB (TCG_TARGET_REG_BITS == 64)
> +#define USE_REG_TB (TCG_TARGET_REG_BITS == 64 && !have_isa_3_10)
>
> /* Shorthand for size of a pointer. Avoid promotion to unsigned. */
> #define SZP ((int)sizeof(void *))
^ permalink raw reply [flat|nested] 16+ messages in thread
* Re: [PATCH v2 7/7] tcg/ppc: Use prefixed instructions for tcg_out_goto_tb
2023-08-08 3:02 ` [PATCH v2 7/7] tcg/ppc: Use prefixed instructions for tcg_out_goto_tb Richard Henderson
2023-08-09 2:56 ` Jordan Niethe
@ 2023-08-09 11:24 ` Nicholas Piggin
1 sibling, 0 replies; 16+ messages in thread
From: Nicholas Piggin @ 2023-08-09 11:24 UTC (permalink / raw)
To: Richard Henderson, qemu-devel; +Cc: jniethe5, qemu-ppc
On Tue Aug 8, 2023 at 1:02 PM AEST, Richard Henderson wrote:
> When a direct branch is out of range, we can load the destination for
> the indirect branch using PLA (for 16GB worth of buffer) and PLD from
> the TranslationBlock for everything larger.
>
> This means the patch affects exactly one instruction: B (plus filler),
> PLA or PLD. Which means we can update and execute the patch atomically.
>
Aside from changelog that Jordan pointed out,
Reviewed-by: Nicholas Piggin <npiggin@gmail.com>
> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
> ---
> tcg/ppc/tcg-target.c.inc | 31 +++++++++++++++++++------------
> 1 file changed, 19 insertions(+), 12 deletions(-)
>
> diff --git a/tcg/ppc/tcg-target.c.inc b/tcg/ppc/tcg-target.c.inc
> index 63fe4ef995..b686a68247 100644
> --- a/tcg/ppc/tcg-target.c.inc
> +++ b/tcg/ppc/tcg-target.c.inc
> @@ -2646,31 +2646,38 @@ static void tcg_out_goto_tb(TCGContext *s, int which)
> uintptr_t ptr = get_jmp_target_addr(s, which);
>
> if (USE_REG_TB) {
> + /*
> + * With REG_TB, we must always use indirect branching,
> + * so that the branch destination and TCG_REG_TB match.
> + */
> ptrdiff_t offset = tcg_tbrel_diff(s, (void *)ptr);
> tcg_out_mem_long(s, LD, LDX, TCG_REG_TB, TCG_REG_TB, offset);
> -
> - /* TODO: Use direct branches when possible. */
> - set_jmp_insn_offset(s, which);
> tcg_out32(s, MTSPR | RS(TCG_REG_TB) | CTR);
> -
> tcg_out32(s, BCCTR | BO_ALWAYS);
>
> /* For the unlinked case, need to reset TCG_REG_TB. */
> set_jmp_reset_offset(s, which);
> tcg_out_mem_long(s, ADDI, ADD, TCG_REG_TB, TCG_REG_TB,
> -tcg_current_code_size(s));
> - } else {
> - /* Direct branch will be patched by tb_target_set_jmp_target. */
> - set_jmp_insn_offset(s, which);
> - tcg_out32(s, NOP);
> + return;
> + }
>
> - /* When branch is out of range, fall through to indirect. */
> + /* Direct branch will be patched by tb_target_set_jmp_target. */
> + set_jmp_insn_offset(s, which);
> + tcg_out32(s, NOP);
> +
> + /* When branch is out of range, fall through to indirect. */
> + if (have_isa_3_10) {
> + ptrdiff_t offset = tcg_pcrel_diff_for_prefix(s, (void *)ptr);
> + tcg_out_8ls_d(s, PLD, TCG_REG_TMP1, 0, offset, 1);
> + } else {
> tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_TMP1, ptr - (int16_t)ptr);
> tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP1, TCG_REG_TMP1, (int16_t)ptr);
> - tcg_out32(s, MTSPR | RS(TCG_REG_TMP1) | CTR);
> - tcg_out32(s, BCCTR | BO_ALWAYS);
> - set_jmp_reset_offset(s, which);
> }
> +
> + tcg_out32(s, MTSPR | RS(TCG_REG_TMP1) | CTR);
> + tcg_out32(s, BCCTR | BO_ALWAYS);
> + set_jmp_reset_offset(s, which);
> }
>
> void tb_target_set_jmp_target(const TranslationBlock *tb, int n,
^ permalink raw reply [flat|nested] 16+ messages in thread
end of thread, other threads:[~2023-08-09 11:24 UTC | newest]
Thread overview: 16+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2023-08-08 3:02 [PATCH for-8.2 v2 0/7] tcg/ppc: Support power10 prefixed instructions Richard Henderson
2023-08-08 3:02 ` [PATCH v2 1/7] tcg/ppc: Untabify tcg-target.c.inc Richard Henderson
2023-08-09 8:55 ` Nicholas Piggin
2023-08-08 3:02 ` [PATCH v2 2/7] tcg/ppc: Use PADDI in tcg_out_movi Richard Henderson
2023-08-09 9:03 ` Nicholas Piggin
2023-08-08 3:02 ` [PATCH v2 3/7] tcg/ppc: Use prefixed instructions in tcg_out_mem_long Richard Henderson
2023-08-09 11:00 ` Nicholas Piggin
2023-08-08 3:02 ` [PATCH v2 4/7] tcg/ppc: Use PLD in tcg_out_movi for constant pool Richard Henderson
2023-08-09 11:20 ` Nicholas Piggin
2023-08-08 3:02 ` [PATCH v2 5/7] tcg/ppc: Use prefixed instructions in tcg_out_dupi_vec Richard Henderson
2023-08-08 3:02 ` [PATCH v2 6/7] tcg/ppc: Disable USE_REG_TB for Power v3.1 Richard Henderson
2023-08-09 11:21 ` Nicholas Piggin
2023-08-08 3:02 ` [PATCH v2 7/7] tcg/ppc: Use prefixed instructions for tcg_out_goto_tb Richard Henderson
2023-08-09 2:56 ` Jordan Niethe
2023-08-09 3:18 ` Richard Henderson
2023-08-09 11:24 ` Nicholas Piggin
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).