[Qemu-devel] [PATCH v2 0/4] tcg/arm fixes

qemu-devel.nongnu.org archive mirror
 help / color / mirror / Atom feed

* [Qemu-devel] [PATCH v2 0/4] tcg/arm fixes
@ 2018-01-16  3:45 Richard Henderson
  2018-01-16  3:45 ` [Qemu-devel] [PATCH v2 1/4] tcg/arm: Fix double-word comparisons Richard Henderson
  2018-01-16  3:45 ` [Qemu-devel] [PATCH v2 2/4] tcg/arm: Support tlb offsets larger than 64k Richard Henderson
  0 siblings, 2 replies; 4+ messages in thread
From: Richard Henderson @ 2018-01-16  3:45 UTC (permalink / raw)
  To: qemu-devel; +Cc: peter.maydell

Changes since v1:

Patch 1:
  * Use rsb/rsc for gt/le.  Now we always use "rI" as a constraint,
    so we never have to load immediates into registers by hand.

Patch 2:
  * We can't use add_ofs in the while condition and cmp_ofs in the
    reduction and converge; use cmp_ofs always.  Fixes the sparc64
    regression that Peter noticed.


r~


Richard Henderson (4):
  tcg/arm: Fix double-word comparisons
  tcg/arm: Support tlb offsets larger than 64k

^ permalink raw reply	[flat|nested] 4+ messages in thread

* [Qemu-devel] [PATCH v2 1/4] tcg/arm: Fix double-word comparisons
  2018-01-16  3:45 [Qemu-devel] [PATCH v2 0/4] tcg/arm fixes Richard Henderson
@ 2018-01-16  3:45 ` Richard Henderson
  2018-01-16 11:33   ` Peter Maydell
  2018-01-16  3:45 ` [Qemu-devel] [PATCH v2 2/4] tcg/arm: Support tlb offsets larger than 64k Richard Henderson
  1 sibling, 1 reply; 4+ messages in thread
From: Richard Henderson @ 2018-01-16  3:45 UTC (permalink / raw)
  To: qemu-devel; +Cc: peter.maydell, Richard Henderson

The code sequence we were generating was only good for unsigned
comparisons.  For signed comparisions, use the sequence from gcc.

Fixes booting of ppc64 firmware, with a patch changing the code
sequence for ppc comparisons.

Tested-by: Michael Roth <mdroth@linux.vnet.ibm.com>
Signed-off-by: Richard Henderson <rth@twiddle.net>
---
 tcg/arm/tcg-target.inc.c | 97 ++++++++++++++++++++++++++++++++----------------
 1 file changed, 65 insertions(+), 32 deletions(-)

diff --git a/tcg/arm/tcg-target.inc.c b/tcg/arm/tcg-target.inc.c
index 98a12535a5..0ff283d84f 100644
--- a/tcg/arm/tcg-target.inc.c
+++ b/tcg/arm/tcg-target.inc.c
@@ -239,10 +239,10 @@ static void patch_reloc(tcg_insn_unit *code_ptr, int type,
     }
 }
 
-#define TCG_CT_CONST_ARM  0x100
-#define TCG_CT_CONST_INV  0x200
-#define TCG_CT_CONST_NEG  0x400
-#define TCG_CT_CONST_ZERO 0x800
+#define TCG_CT_CONST_ARM     0x0100
+#define TCG_CT_CONST_INV     0x0200
+#define TCG_CT_CONST_NEG     0x0400
+#define TCG_CT_CONST_ZERO    0x1000
 
 /* parse target specific constraints */
 static const char *target_parse_constraint(TCGArgConstraint *ct,
@@ -351,8 +351,7 @@ static inline int check_fit_imm(uint32_t imm)
 static inline int tcg_target_const_match(tcg_target_long val, TCGType type,
                                          const TCGArgConstraint *arg_ct)
 {
-    int ct;
-    ct = arg_ct->ct;
+    int ct = arg_ct->ct;
     if (ct & TCG_CT_CONST) {
         return 1;
     } else if ((ct & TCG_CT_CONST_ARM) && check_fit_imm(val)) {
@@ -1103,6 +1102,56 @@ static inline void tcg_out_mb(TCGContext *s, TCGArg a0)
     }
 }
 
+static TCGCond tcg_out_cmp2(TCGContext *s, const TCGArg *args,
+                            const int *const_args)
+{
+    TCGReg al = args[0];
+    TCGReg ah = args[1];
+    TCGArg bl = args[2];
+    TCGArg bh = args[3];
+    TCGCond cond = args[4];
+    int const_bl = const_args[2];
+    int const_bh = const_args[3];
+
+    switch (cond) {
+    case TCG_COND_EQ:
+    case TCG_COND_NE:
+    case TCG_COND_LTU:
+    case TCG_COND_LEU:
+    case TCG_COND_GTU:
+    case TCG_COND_GEU:
+        /* We perform a conditional comparision.  If the high half is
+           equal, then overwrite the flags with the comparison of the
+           low half.  The resulting flags cover the whole.  */
+        tcg_out_dat_rI(s, COND_AL, ARITH_CMP, 0, ah, bh, const_bh);
+        tcg_out_dat_rI(s, COND_EQ, ARITH_CMP, 0, al, bl, const_bl);
+        return cond;
+
+    case TCG_COND_LT:
+    case TCG_COND_GE:
+        /* We perform a double-word subtraction and examine the result.
+           We do not actually need the result of the subtract, so the
+           low part "subtract" is a compare.  For the high half we have
+           no choice but to compute into a temporary.  */
+        tcg_out_dat_rI(s, COND_AL, ARITH_CMP, 0, al, bl, const_bl);
+        tcg_out_dat_rI(s, COND_AL, ARITH_SBC | TO_CPSR,
+                       TCG_REG_TMP, ah, bh, const_bh);
+        return cond;
+
+    case TCG_COND_LE:
+    case TCG_COND_GT:
+        /* Similar, but with swapped arguments, via reversed subtract.  */
+        tcg_out_dat_rI(s, COND_AL, ARITH_RSB | TO_CPSR,
+                       TCG_REG_TMP, al, bl, const_bl);
+        tcg_out_dat_rI(s, COND_AL, ARITH_RSC | TO_CPSR,
+                        TCG_REG_TMP, ah, bh, const_bh);
+        return tcg_swap_cond(cond);
+
+    default:
+        g_assert_not_reached();
+    }
+}
+
 #ifdef CONFIG_SOFTMMU
 #include "tcg-ldst.inc.c"
 
@@ -1964,22 +2013,6 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
         tcg_out_goto_label(s, tcg_cond_to_arm_cond[args[2]],
                            arg_label(args[3]));
         break;
-    case INDEX_op_brcond2_i32:
-        /* The resulting conditions are:
-         * TCG_COND_EQ    -->  a0 == a2 && a1 == a3,
-         * TCG_COND_NE    --> (a0 != a2 && a1 == a3) ||  a1 != a3,
-         * TCG_COND_LT(U) --> (a0 <  a2 && a1 == a3) ||  a1 <  a3,
-         * TCG_COND_GE(U) --> (a0 >= a2 && a1 == a3) || (a1 >= a3 && a1 != a3),
-         * TCG_COND_LE(U) --> (a0 <= a2 && a1 == a3) || (a1 <= a3 && a1 != a3),
-         * TCG_COND_GT(U) --> (a0 >  a2 && a1 == a3) ||  a1 >  a3,
-         */
-        tcg_out_dat_rIN(s, COND_AL, ARITH_CMP, ARITH_CMN, 0,
-                        args[1], args[3], const_args[3]);
-        tcg_out_dat_rIN(s, COND_EQ, ARITH_CMP, ARITH_CMN, 0,
-                        args[0], args[2], const_args[2]);
-        tcg_out_goto_label(s, tcg_cond_to_arm_cond[args[4]],
-                           arg_label(args[5]));
-        break;
     case INDEX_op_setcond_i32:
         tcg_out_dat_rIN(s, COND_AL, ARITH_CMP, ARITH_CMN, 0,
                         args[1], args[2], const_args[2]);
@@ -1988,15 +2021,15 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
         tcg_out_dat_imm(s, tcg_cond_to_arm_cond[tcg_invert_cond(args[3])],
                         ARITH_MOV, args[0], 0, 0);
         break;
+
+    case INDEX_op_brcond2_i32:
+        c = tcg_out_cmp2(s, args, const_args);
+        tcg_out_goto_label(s, tcg_cond_to_arm_cond[c], arg_label(args[5]));
+        break;
     case INDEX_op_setcond2_i32:
-        /* See brcond2_i32 comment */
-        tcg_out_dat_rIN(s, COND_AL, ARITH_CMP, ARITH_CMN, 0,
-                        args[2], args[4], const_args[4]);
-        tcg_out_dat_rIN(s, COND_EQ, ARITH_CMP, ARITH_CMN, 0,
-                        args[1], args[3], const_args[3]);
-        tcg_out_dat_imm(s, tcg_cond_to_arm_cond[args[5]],
-                        ARITH_MOV, args[0], 0, 1);
-        tcg_out_dat_imm(s, tcg_cond_to_arm_cond[tcg_invert_cond(args[5])],
+        c = tcg_out_cmp2(s, args + 1, const_args + 1);
+        tcg_out_dat_imm(s, tcg_cond_to_arm_cond[c], ARITH_MOV, args[0], 0, 1);
+        tcg_out_dat_imm(s, tcg_cond_to_arm_cond[tcg_invert_cond(c)],
                         ARITH_MOV, args[0], 0, 0);
         break;
 
@@ -2093,9 +2126,9 @@ static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op)
     static const TCGTargetOpDef sub2
         = { .args_ct_str = { "r", "r", "rI", "rI", "rIN", "rIK" } };
     static const TCGTargetOpDef br2
-        = { .args_ct_str = { "r", "r", "rIN", "rIN" } };
+        = { .args_ct_str = { "r", "r", "rI", "rI" } };
     static const TCGTargetOpDef setc2
-        = { .args_ct_str = { "r", "r", "r", "rIN", "rIN" } };
+        = { .args_ct_str = { "r", "r", "r", "rI", "rI" } };
 
     switch (op) {
     case INDEX_op_goto_ptr:
-- 
2.14.3

^ permalink raw reply related	[flat|nested] 4+ messages in thread

* [Qemu-devel] [PATCH v2 2/4] tcg/arm: Support tlb offsets larger than 64k
  2018-01-16  3:45 [Qemu-devel] [PATCH v2 0/4] tcg/arm fixes Richard Henderson
  2018-01-16  3:45 ` [Qemu-devel] [PATCH v2 1/4] tcg/arm: Fix double-word comparisons Richard Henderson
@ 2018-01-16  3:45 ` Richard Henderson
  1 sibling, 0 replies; 4+ messages in thread
From: Richard Henderson @ 2018-01-16  3:45 UTC (permalink / raw)
  To: qemu-devel; +Cc: peter.maydell

AArch64 with SVE has an offset of 80k to the 8th TLB.

Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 tcg/arm/tcg-target.inc.c | 30 +++++++++++++++++-------------
 1 file changed, 17 insertions(+), 13 deletions(-)

diff --git a/tcg/arm/tcg-target.inc.c b/tcg/arm/tcg-target.inc.c
index 0ff283d84f..8f5d4f208d 100644
--- a/tcg/arm/tcg-target.inc.c
+++ b/tcg/arm/tcg-target.inc.c
@@ -1246,12 +1246,6 @@ static TCGReg tcg_out_arg_reg64(TCGContext *s, TCGReg argreg,
 /* We're expecting to use an 8-bit immediate and to mask.  */
 QEMU_BUILD_BUG_ON(CPU_TLB_BITS > 8);
 
-/* We're expecting to use an 8-bit immediate add + 8-bit ldrd offset.
-   Using the offset of the second entry in the last tlb table ensures
-   that we can index all of the elements of the first entry.  */
-QEMU_BUILD_BUG_ON(offsetof(CPUArchState, tlb_table[NB_MMU_MODES - 1][1])
-                  > 0xffff);
-
 /* Load and compare a TLB entry, leaving the flags set.  Returns the register
    containing the addend of the tlb entry.  Clobbers R0, R1, R2, TMP.  */
 
@@ -1264,6 +1258,7 @@ static TCGReg tcg_out_tlb_read(TCGContext *s, TCGReg addrlo, TCGReg addrhi,
          ? offsetof(CPUArchState, tlb_table[mem_index][0].addr_read)
          : offsetof(CPUArchState, tlb_table[mem_index][0].addr_write));
     int add_off = offsetof(CPUArchState, tlb_table[mem_index][0].addend);
+    int mask_off;
     unsigned s_bits = opc & MO_SIZE;
     unsigned a_bits = get_alignment_bits(opc);
 
@@ -1295,16 +1290,25 @@ static TCGReg tcg_out_tlb_read(TCGContext *s, TCGReg addrlo, TCGReg addrhi,
                         0, addrlo, SHIFT_IMM_LSR(TARGET_PAGE_BITS));
     }
 
-    /* We checked that the offset is contained within 16 bits above.  */
-    if (add_off > 0xfff
-        || (use_armv6_instructions && TARGET_LONG_BITS == 64
-            && cmp_off > 0xff)) {
+    /* Add portions of the offset until the memory access is in range.
+     * If we plan on using ldrd, reduce to an 8-bit offset; otherwise
+     * we can use a 12-bit offset.  */
+    if (use_armv6_instructions && TARGET_LONG_BITS == 64) {
+        mask_off = 0xff;
+    } else {
+        mask_off = 0xfff;
+    }
+    while (cmp_off > mask_off) {
+        int shift = ctz32(cmp_off & ~mask_off) & ~1;
+        int rot = ((32 - shift) << 7) & 0xf00;
+        int addend = cmp_off & (0xff << shift);
         tcg_out_dat_imm(s, COND_AL, ARITH_ADD, TCG_REG_R2, base,
-                        (24 << 7) | (cmp_off >> 8));
+                        rot | ((cmp_off >> shift) & 0xff));
         base = TCG_REG_R2;
-        add_off -= cmp_off & 0xff00;
-        cmp_off &= 0xff;
+        add_off -= addend;
+        cmp_off -= addend;
     }
+
     if (!use_armv7_instructions) {
         tcg_out_dat_imm(s, COND_AL, ARITH_AND,
                         TCG_REG_R0, TCG_REG_TMP, CPU_TLB_SIZE - 1);
-- 
2.14.3

^ permalink raw reply related	[flat|nested] 4+ messages in thread

* Re: [Qemu-devel] [PATCH v2 1/4] tcg/arm: Fix double-word comparisons
  2018-01-16  3:45 ` [Qemu-devel] [PATCH v2 1/4] tcg/arm: Fix double-word comparisons Richard Henderson
@ 2018-01-16 11:33   ` Peter Maydell
  0 siblings, 0 replies; 4+ messages in thread
From: Peter Maydell @ 2018-01-16 11:33 UTC (permalink / raw)
  To: Richard Henderson; +Cc: QEMU Developers, Richard Henderson

On 16 January 2018 at 03:45, Richard Henderson
<richard.henderson@linaro.org> wrote:
> The code sequence we were generating was only good for unsigned
> comparisons.  For signed comparisions, use the sequence from gcc.
>
> Fixes booting of ppc64 firmware, with a patch changing the code
> sequence for ppc comparisons.
>
> Tested-by: Michael Roth <mdroth@linux.vnet.ibm.com>
> Signed-off-by: Richard Henderson <rth@twiddle.net>
> ---
>  tcg/arm/tcg-target.inc.c | 97 ++++++++++++++++++++++++++++++++----------------
>  1 file changed, 65 insertions(+), 32 deletions(-)
>
> diff --git a/tcg/arm/tcg-target.inc.c b/tcg/arm/tcg-target.inc.c
> index 98a12535a5..0ff283d84f 100644
> --- a/tcg/arm/tcg-target.inc.c
> +++ b/tcg/arm/tcg-target.inc.c
> @@ -239,10 +239,10 @@ static void patch_reloc(tcg_insn_unit *code_ptr, int type,
>      }
>  }
>
> -#define TCG_CT_CONST_ARM  0x100
> -#define TCG_CT_CONST_INV  0x200
> -#define TCG_CT_CONST_NEG  0x400
> -#define TCG_CT_CONST_ZERO 0x800
> +#define TCG_CT_CONST_ARM     0x0100
> +#define TCG_CT_CONST_INV     0x0200
> +#define TCG_CT_CONST_NEG     0x0400
> +#define TCG_CT_CONST_ZERO    0x1000

You might as well just drop this hunk now you're not adding a new entry.

>  /* parse target specific constraints */
>  static const char *target_parse_constraint(TCGArgConstraint *ct,
> @@ -351,8 +351,7 @@ static inline int check_fit_imm(uint32_t imm)
>  static inline int tcg_target_const_match(tcg_target_long val, TCGType type,
>                                           const TCGArgConstraint *arg_ct)
>  {
> -    int ct;
> -    ct = arg_ct->ct;
> +    int ct = arg_ct->ct;
>      if (ct & TCG_CT_CONST) {
>          return 1;
>      } else if ((ct & TCG_CT_CONST_ARM) && check_fit_imm(val)) {

Ditto.


Otherwise
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>

thanks
-- PMM

^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2018-01-16 11:33 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2018-01-16  3:45 [Qemu-devel] [PATCH v2 0/4] tcg/arm fixes Richard Henderson
2018-01-16  3:45 ` [Qemu-devel] [PATCH v2 1/4] tcg/arm: Fix double-word comparisons Richard Henderson
2018-01-16 11:33   ` Peter Maydell
2018-01-16  3:45 ` [Qemu-devel] [PATCH v2 2/4] tcg/arm: Support tlb offsets larger than 64k Richard Henderson

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).