* [Qemu-devel] [PULL 1/4] tcg/arm: Fix double-word comparisons
2018-01-12 21:06 [Qemu-devel] [PULL 0/4] TCG queued patches Richard Henderson
@ 2018-01-12 21:06 ` Richard Henderson
2018-01-12 21:06 ` [Qemu-devel] [PULL 2/4] tcg/arm: Support tlb offsets larger than 64k Richard Henderson
` (4 subsequent siblings)
5 siblings, 0 replies; 7+ messages in thread
From: Richard Henderson @ 2018-01-12 21:06 UTC (permalink / raw)
To: qemu-devel; +Cc: peter.maydell, aurelien, Richard Henderson
From: Richard Henderson <rth@twiddle.net>
The code sequence we were generating was only good for unsigned
comparisons. For signed comparisions, use the sequence from gcc.
Fixes booting of ppc64 firmware, with a patch changing the code
sequence for ppc comparisons.
Tested-by: Michael Roth <mdroth@linux.vnet.ibm.com>
Signed-off-by: Richard Henderson <rth@twiddle.net>
---
tcg/arm/tcg-target.inc.c | 112 +++++++++++++++++++++++++++++++++--------------
1 file changed, 80 insertions(+), 32 deletions(-)
diff --git a/tcg/arm/tcg-target.inc.c b/tcg/arm/tcg-target.inc.c
index 98a12535a5..b9890c8bd8 100644
--- a/tcg/arm/tcg-target.inc.c
+++ b/tcg/arm/tcg-target.inc.c
@@ -239,10 +239,11 @@ static void patch_reloc(tcg_insn_unit *code_ptr, int type,
}
}
-#define TCG_CT_CONST_ARM 0x100
-#define TCG_CT_CONST_INV 0x200
-#define TCG_CT_CONST_NEG 0x400
-#define TCG_CT_CONST_ZERO 0x800
+#define TCG_CT_CONST_ARM 0x0100
+#define TCG_CT_CONST_INV 0x0200
+#define TCG_CT_CONST_NEG 0x0400
+#define TCG_CT_CONST_INVNEG 0x0800
+#define TCG_CT_CONST_ZERO 0x1000
/* parse target specific constraints */
static const char *target_parse_constraint(TCGArgConstraint *ct,
@@ -258,6 +259,9 @@ static const char *target_parse_constraint(TCGArgConstraint *ct,
case 'N': /* The gcc constraint letter is L, already used here. */
ct->ct |= TCG_CT_CONST_NEG;
break;
+ case 'M':
+ ct->ct |= TCG_CT_CONST_INVNEG;
+ break;
case 'Z':
ct->ct |= TCG_CT_CONST_ZERO;
break;
@@ -351,8 +355,7 @@ static inline int check_fit_imm(uint32_t imm)
static inline int tcg_target_const_match(tcg_target_long val, TCGType type,
const TCGArgConstraint *arg_ct)
{
- int ct;
- ct = arg_ct->ct;
+ int ct = arg_ct->ct;
if (ct & TCG_CT_CONST) {
return 1;
} else if ((ct & TCG_CT_CONST_ARM) && check_fit_imm(val)) {
@@ -361,6 +364,9 @@ static inline int tcg_target_const_match(tcg_target_long val, TCGType type,
return 1;
} else if ((ct & TCG_CT_CONST_NEG) && check_fit_imm(-val)) {
return 1;
+ } else if ((ct & TCG_CT_CONST_INVNEG)
+ && check_fit_imm(~val) && check_fit_imm(-val)) {
+ return 1;
} else if ((ct & TCG_CT_CONST_ZERO) && val == 0) {
return 1;
} else {
@@ -1103,6 +1109,64 @@ static inline void tcg_out_mb(TCGContext *s, TCGArg a0)
}
}
+static TCGCond tcg_out_cmp2(TCGContext *s, const TCGArg *args,
+ const int *const_args)
+{
+ TCGReg al = args[0];
+ TCGReg ah = args[1];
+ TCGArg bl = args[2];
+ TCGArg bh = args[3];
+ TCGCond cond = args[4];
+ int const_bl = const_args[2];
+ int const_bh = const_args[3];
+
+ switch (cond) {
+ case TCG_COND_EQ:
+ case TCG_COND_NE:
+ case TCG_COND_LTU:
+ case TCG_COND_LEU:
+ case TCG_COND_GTU:
+ case TCG_COND_GEU:
+ /* We perform a conditional comparision. If the high half is
+ equal, then overwrite the flags with the comparison of the
+ low half. The resulting flags cover the whole. */
+ tcg_out_dat_rIN(s, COND_AL, ARITH_CMP, ARITH_CMN, 0, ah, bh, const_bh);
+ tcg_out_dat_rIN(s, COND_EQ, ARITH_CMP, ARITH_CMN, 0, al, bl, const_bl);
+ return cond;
+
+ case TCG_COND_LT:
+ case TCG_COND_GE:
+ /* We perform a double-word subtraction and examine the result.
+ We do not actually need the result of the subtract, so the
+ low part "subtract" is a compare. For the high half we have
+ no choice but to compute into a temporary. */
+ tcg_out_dat_rIN(s, COND_AL, ARITH_CMP, ARITH_CMN, 0, al, bl, const_bl);
+ tcg_out_dat_rIK(s, COND_AL, ARITH_SBC | TO_CPSR, ARITH_ADC | TO_CPSR,
+ TCG_REG_TMP, ah, bh, const_bh);
+ return cond;
+
+ case TCG_COND_LE:
+ case TCG_COND_GT:
+ /* Similar, but with swapped arguments. And of course we must
+ force the immediates into a register. */
+ if (const_bl) {
+ tcg_out_movi(s, TCG_TYPE_REG, TCG_REG_TMP, bl);
+ bl = TCG_REG_TMP;
+ }
+ tcg_out_dat_rIN(s, COND_AL, ARITH_CMP, ARITH_CMN, 0, bl, al, 0);
+ if (const_bh) {
+ tcg_out_movi(s, TCG_TYPE_REG, TCG_REG_TMP, bh);
+ bh = TCG_REG_TMP;
+ }
+ tcg_out_dat_rIK(s, COND_AL, ARITH_SBC | TO_CPSR, ARITH_ADC | TO_CPSR,
+ TCG_REG_TMP, bh, ah, 0);
+ return tcg_swap_cond(cond);
+
+ default:
+ g_assert_not_reached();
+ }
+}
+
#ifdef CONFIG_SOFTMMU
#include "tcg-ldst.inc.c"
@@ -1964,22 +2028,6 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
tcg_out_goto_label(s, tcg_cond_to_arm_cond[args[2]],
arg_label(args[3]));
break;
- case INDEX_op_brcond2_i32:
- /* The resulting conditions are:
- * TCG_COND_EQ --> a0 == a2 && a1 == a3,
- * TCG_COND_NE --> (a0 != a2 && a1 == a3) || a1 != a3,
- * TCG_COND_LT(U) --> (a0 < a2 && a1 == a3) || a1 < a3,
- * TCG_COND_GE(U) --> (a0 >= a2 && a1 == a3) || (a1 >= a3 && a1 != a3),
- * TCG_COND_LE(U) --> (a0 <= a2 && a1 == a3) || (a1 <= a3 && a1 != a3),
- * TCG_COND_GT(U) --> (a0 > a2 && a1 == a3) || a1 > a3,
- */
- tcg_out_dat_rIN(s, COND_AL, ARITH_CMP, ARITH_CMN, 0,
- args[1], args[3], const_args[3]);
- tcg_out_dat_rIN(s, COND_EQ, ARITH_CMP, ARITH_CMN, 0,
- args[0], args[2], const_args[2]);
- tcg_out_goto_label(s, tcg_cond_to_arm_cond[args[4]],
- arg_label(args[5]));
- break;
case INDEX_op_setcond_i32:
tcg_out_dat_rIN(s, COND_AL, ARITH_CMP, ARITH_CMN, 0,
args[1], args[2], const_args[2]);
@@ -1988,15 +2036,15 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
tcg_out_dat_imm(s, tcg_cond_to_arm_cond[tcg_invert_cond(args[3])],
ARITH_MOV, args[0], 0, 0);
break;
+
+ case INDEX_op_brcond2_i32:
+ c = tcg_out_cmp2(s, args, const_args);
+ tcg_out_goto_label(s, tcg_cond_to_arm_cond[c], arg_label(args[5]));
+ break;
case INDEX_op_setcond2_i32:
- /* See brcond2_i32 comment */
- tcg_out_dat_rIN(s, COND_AL, ARITH_CMP, ARITH_CMN, 0,
- args[2], args[4], const_args[4]);
- tcg_out_dat_rIN(s, COND_EQ, ARITH_CMP, ARITH_CMN, 0,
- args[1], args[3], const_args[3]);
- tcg_out_dat_imm(s, tcg_cond_to_arm_cond[args[5]],
- ARITH_MOV, args[0], 0, 1);
- tcg_out_dat_imm(s, tcg_cond_to_arm_cond[tcg_invert_cond(args[5])],
+ c = tcg_out_cmp2(s, args + 1, const_args + 1);
+ tcg_out_dat_imm(s, tcg_cond_to_arm_cond[c], ARITH_MOV, args[0], 0, 1);
+ tcg_out_dat_imm(s, tcg_cond_to_arm_cond[tcg_invert_cond(c)],
ARITH_MOV, args[0], 0, 0);
break;
@@ -2093,9 +2141,9 @@ static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op)
static const TCGTargetOpDef sub2
= { .args_ct_str = { "r", "r", "rI", "rI", "rIN", "rIK" } };
static const TCGTargetOpDef br2
- = { .args_ct_str = { "r", "r", "rIN", "rIN" } };
+ = { .args_ct_str = { "r", "r", "rIM", "rIM" } };
static const TCGTargetOpDef setc2
- = { .args_ct_str = { "r", "r", "r", "rIN", "rIN" } };
+ = { .args_ct_str = { "r", "r", "r", "rIM", "rIM" } };
switch (op) {
case INDEX_op_goto_ptr:
--
2.14.3
^ permalink raw reply related [flat|nested] 7+ messages in thread
* [Qemu-devel] [PULL 2/4] tcg/arm: Support tlb offsets larger than 64k
2018-01-12 21:06 [Qemu-devel] [PULL 0/4] TCG queued patches Richard Henderson
2018-01-12 21:06 ` [Qemu-devel] [PULL 1/4] tcg/arm: Fix double-word comparisons Richard Henderson
@ 2018-01-12 21:06 ` Richard Henderson
2018-01-12 21:06 ` [Qemu-devel] [PULL 3/4] tcg/ppc: " Richard Henderson
` (3 subsequent siblings)
5 siblings, 0 replies; 7+ messages in thread
From: Richard Henderson @ 2018-01-12 21:06 UTC (permalink / raw)
To: qemu-devel; +Cc: peter.maydell, aurelien
AArch64 with SVE has an offset of 80k to the 8th TLB.
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
tcg/arm/tcg-target.inc.c | 30 +++++++++++++++++-------------
1 file changed, 17 insertions(+), 13 deletions(-)
diff --git a/tcg/arm/tcg-target.inc.c b/tcg/arm/tcg-target.inc.c
index b9890c8bd8..4bd465732b 100644
--- a/tcg/arm/tcg-target.inc.c
+++ b/tcg/arm/tcg-target.inc.c
@@ -1261,12 +1261,6 @@ static TCGReg tcg_out_arg_reg64(TCGContext *s, TCGReg argreg,
/* We're expecting to use an 8-bit immediate and to mask. */
QEMU_BUILD_BUG_ON(CPU_TLB_BITS > 8);
-/* We're expecting to use an 8-bit immediate add + 8-bit ldrd offset.
- Using the offset of the second entry in the last tlb table ensures
- that we can index all of the elements of the first entry. */
-QEMU_BUILD_BUG_ON(offsetof(CPUArchState, tlb_table[NB_MMU_MODES - 1][1])
- > 0xffff);
-
/* Load and compare a TLB entry, leaving the flags set. Returns the register
containing the addend of the tlb entry. Clobbers R0, R1, R2, TMP. */
@@ -1279,6 +1273,7 @@ static TCGReg tcg_out_tlb_read(TCGContext *s, TCGReg addrlo, TCGReg addrhi,
? offsetof(CPUArchState, tlb_table[mem_index][0].addr_read)
: offsetof(CPUArchState, tlb_table[mem_index][0].addr_write));
int add_off = offsetof(CPUArchState, tlb_table[mem_index][0].addend);
+ int mask_off;
unsigned s_bits = opc & MO_SIZE;
unsigned a_bits = get_alignment_bits(opc);
@@ -1310,16 +1305,25 @@ static TCGReg tcg_out_tlb_read(TCGContext *s, TCGReg addrlo, TCGReg addrhi,
0, addrlo, SHIFT_IMM_LSR(TARGET_PAGE_BITS));
}
- /* We checked that the offset is contained within 16 bits above. */
- if (add_off > 0xfff
- || (use_armv6_instructions && TARGET_LONG_BITS == 64
- && cmp_off > 0xff)) {
+ /* Add portions of the offset until the memory access is in range.
+ * If we plan on using ldrd, reduce to an 8-bit offset; otherwise
+ * we can use a 12-bit offset. */
+ if (use_armv6_instructions && TARGET_LONG_BITS == 64) {
+ mask_off = 0xff;
+ } else {
+ mask_off = 0xfff;
+ }
+ while (add_off > mask_off) {
+ int shift = ctz32(cmp_off & ~mask_off) & ~1;
+ int rot = ((32 - shift) << 7) & 0xf00;
+ int addend = cmp_off & (0xff << shift);
tcg_out_dat_imm(s, COND_AL, ARITH_ADD, TCG_REG_R2, base,
- (24 << 7) | (cmp_off >> 8));
+ rot | ((cmp_off >> shift) & 0xff));
base = TCG_REG_R2;
- add_off -= cmp_off & 0xff00;
- cmp_off &= 0xff;
+ add_off -= addend;
+ cmp_off -= addend;
}
+
if (!use_armv7_instructions) {
tcg_out_dat_imm(s, COND_AL, ARITH_AND,
TCG_REG_R0, TCG_REG_TMP, CPU_TLB_SIZE - 1);
--
2.14.3
^ permalink raw reply related [flat|nested] 7+ messages in thread
* [Qemu-devel] [PULL 4/4] tcg/ppc: Allow a 32-bit offset to the constant pool
2018-01-12 21:06 [Qemu-devel] [PULL 0/4] TCG queued patches Richard Henderson
` (2 preceding siblings ...)
2018-01-12 21:06 ` [Qemu-devel] [PULL 3/4] tcg/ppc: " Richard Henderson
@ 2018-01-12 21:06 ` Richard Henderson
2018-01-12 21:42 ` [Qemu-devel] [PULL 0/4] TCG queued patches Aurelien Jarno
2018-01-15 10:08 ` Peter Maydell
5 siblings, 0 replies; 7+ messages in thread
From: Richard Henderson @ 2018-01-12 21:06 UTC (permalink / raw)
To: qemu-devel; +Cc: peter.maydell, aurelien
We recently relaxed the limit of the number of opcodes that can
appear in a TranslationBlock. In certain cases this has resulted
in relocation overflow.
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
tcg/ppc/tcg-target.inc.c | 67 ++++++++++++++++++++++++++++--------------------
1 file changed, 39 insertions(+), 28 deletions(-)
diff --git a/tcg/ppc/tcg-target.inc.c b/tcg/ppc/tcg-target.inc.c
index 74f9b4aa34..86f7de5f7e 100644
--- a/tcg/ppc/tcg-target.inc.c
+++ b/tcg/ppc/tcg-target.inc.c
@@ -222,33 +222,6 @@ static inline void tcg_out_bc_noaddr(TCGContext *s, int insn)
tcg_out32(s, insn | retrans);
}
-static void patch_reloc(tcg_insn_unit *code_ptr, int type,
- intptr_t value, intptr_t addend)
-{
- tcg_insn_unit *target;
- tcg_insn_unit old;
-
- value += addend;
- target = (tcg_insn_unit *)value;
-
- switch (type) {
- case R_PPC_REL14:
- reloc_pc14(code_ptr, target);
- break;
- case R_PPC_REL24:
- reloc_pc24(code_ptr, target);
- break;
- case R_PPC_ADDR16:
- assert(value == (int16_t)value);
- old = *code_ptr;
- old = deposit32(old, 0, 16, value);
- *code_ptr = old;
- break;
- default:
- tcg_abort();
- }
-}
-
/* parse target specific constraints */
static const char *target_parse_constraint(TCGArgConstraint *ct,
const char *ct_str, TCGType type)
@@ -552,6 +525,43 @@ static const uint32_t tcg_to_isel[] = {
[TCG_COND_GTU] = ISEL | BC_(7, CR_GT),
};
+static void patch_reloc(tcg_insn_unit *code_ptr, int type,
+ intptr_t value, intptr_t addend)
+{
+ tcg_insn_unit *target;
+ tcg_insn_unit old;
+
+ value += addend;
+ target = (tcg_insn_unit *)value;
+
+ switch (type) {
+ case R_PPC_REL14:
+ reloc_pc14(code_ptr, target);
+ break;
+ case R_PPC_REL24:
+ reloc_pc24(code_ptr, target);
+ break;
+ case R_PPC_ADDR16:
+ /* We are abusing this relocation type. This points to a pair
+ of insns, addis + load. If the displacement is small, we
+ can nop out the addis. */
+ if (value == (int16_t)value) {
+ code_ptr[0] = NOP;
+ old = deposit32(code_ptr[1], 0, 16, value);
+ code_ptr[1] = deposit32(old, 16, 5, TCG_REG_TB);
+ } else {
+ int16_t lo = value;
+ int hi = value - lo;
+ assert(hi + lo == value);
+ code_ptr[0] = deposit32(code_ptr[0], 0, 16, hi >> 16);
+ code_ptr[1] = deposit32(code_ptr[1], 0, 16, lo);
+ }
+ break;
+ default:
+ g_assert_not_reached();
+ }
+}
+
static void tcg_out_mem_long(TCGContext *s, int opi, int opx, TCGReg rt,
TCGReg base, tcg_target_long offset);
@@ -690,7 +700,8 @@ static void tcg_out_movi_int(TCGContext *s, TCGType type, TCGReg ret,
if (!in_prologue && USE_REG_TB) {
new_pool_label(s, arg, R_PPC_ADDR16, s->code_ptr,
-(intptr_t)s->code_gen_ptr);
- tcg_out32(s, LD | TAI(ret, TCG_REG_TB, 0));
+ tcg_out32(s, ADDIS | TAI(ret, TCG_REG_TB, 0));
+ tcg_out32(s, LD | TAI(ret, ret, 0));
return;
}
--
2.14.3
^ permalink raw reply related [flat|nested] 7+ messages in thread