* [Qemu-devel] [PATCH v4 01/33] disas: Disassemble all ppc insns for the host
2013-04-04 22:55 [Qemu-devel] [PATCH v4 00/33] Modernize tcg/ppc64 Richard Henderson
@ 2013-04-04 22:55 ` Richard Henderson
2013-04-04 22:55 ` [Qemu-devel] [PATCH v4 02/33] tcg-ppc64: Use TCGReg everywhere Richard Henderson
` (32 subsequent siblings)
33 siblings, 0 replies; 48+ messages in thread
From: Richard Henderson @ 2013-04-04 22:55 UTC (permalink / raw)
To: qemu-devel; +Cc: av1474, Aurelien Jarno
Reviewed-by: Aurelien Jarno <aurelien@aurel32.net>
Signed-off-by: Richard Henderson <rth@twiddle.net>
---
disas.c | 1 +
1 file changed, 1 insertion(+)
diff --git a/disas.c b/disas.c
index 74d3ba0..67103e0 100644
--- a/disas.c
+++ b/disas.c
@@ -325,6 +325,7 @@ void disas(FILE *out, void *code, unsigned long size)
s.info.mach = bfd_mach_x86_64;
print_insn = print_insn_i386;
#elif defined(_ARCH_PPC)
+ s.info.disassembler_options = (char *)"any";
print_insn = print_insn_ppc;
#elif defined(__alpha__)
print_insn = print_insn_alpha;
--
1.8.1.4
^ permalink raw reply related [flat|nested] 48+ messages in thread
* [Qemu-devel] [PATCH v4 02/33] tcg-ppc64: Use TCGReg everywhere
2013-04-04 22:55 [Qemu-devel] [PATCH v4 00/33] Modernize tcg/ppc64 Richard Henderson
2013-04-04 22:55 ` [Qemu-devel] [PATCH v4 01/33] disas: Disassemble all ppc insns for the host Richard Henderson
@ 2013-04-04 22:55 ` Richard Henderson
2013-04-04 22:55 ` [Qemu-devel] [PATCH v4 03/33] tcg-ppc64: Introduce and use tcg_out_rlw Richard Henderson
` (31 subsequent siblings)
33 siblings, 0 replies; 48+ messages in thread
From: Richard Henderson @ 2013-04-04 22:55 UTC (permalink / raw)
To: qemu-devel; +Cc: av1474, Aurelien Jarno
Reviewed-by: Aurelien Jarno <aurelien@aurel32.net>
Signed-off-by: Richard Henderson <rth@twiddle.net>
---
tcg/ppc64/tcg-target.c | 36 +++++++++++++++++++++---------------
1 file changed, 21 insertions(+), 15 deletions(-)
diff --git a/tcg/ppc64/tcg-target.c b/tcg/ppc64/tcg-target.c
index 833fe0c..762ca1b 100644
--- a/tcg/ppc64/tcg-target.c
+++ b/tcg/ppc64/tcg-target.c
@@ -431,19 +431,21 @@ static const uint32_t tcg_to_bc[] = {
[TCG_COND_GTU] = BC | BI (7, CR_GT) | BO_COND_TRUE,
};
-static void tcg_out_mov (TCGContext *s, TCGType type, TCGReg ret, TCGReg arg)
+static inline void tcg_out_mov(TCGContext *s, TCGType type,
+ TCGReg ret, TCGReg arg)
{
tcg_out32 (s, OR | SAB (arg, ret, arg));
}
-static void tcg_out_rld (TCGContext *s, int op, int ra, int rs, int sh, int mb)
+static inline void tcg_out_rld(TCGContext *s, int op, TCGReg ra, TCGReg rs,
+ int sh, int mb)
{
sh = SH (sh & 0x1f) | (((sh >> 5) & 1) << 1);
mb = MB64 ((mb >> 5) | ((mb << 1) & 0x3f));
tcg_out32 (s, op | RA (ra) | RS (rs) | sh | mb);
}
-static void tcg_out_movi32 (TCGContext *s, int ret, int32_t arg)
+static void tcg_out_movi32(TCGContext *s, TCGReg ret, int32_t arg)
{
if (arg == (int16_t) arg)
tcg_out32 (s, ADDI | RT (ret) | RA (0) | (arg & 0xffff));
@@ -522,8 +524,8 @@ static void tcg_out_call (TCGContext *s, tcg_target_long arg, int const_arg)
#endif
}
-static void tcg_out_ldst (TCGContext *s, int ret, int addr,
- int offset, int op1, int op2)
+static void tcg_out_ldst(TCGContext *s, TCGReg ret, TCGReg addr,
+ int offset, int op1, int op2)
{
if (offset == (int16_t) offset)
tcg_out32 (s, op1 | RT (ret) | RA (addr) | (offset & 0xffff));
@@ -533,8 +535,8 @@ static void tcg_out_ldst (TCGContext *s, int ret, int addr,
}
}
-static void tcg_out_ldsta (TCGContext *s, int ret, int addr,
- int offset, int op1, int op2)
+static void tcg_out_ldsta(TCGContext *s, TCGReg ret, TCGReg addr,
+ int offset, int op1, int op2)
{
if (offset == (int16_t) (offset & ~3))
tcg_out32 (s, op1 | RT (ret) | RA (addr) | (offset & 0xffff));
@@ -566,8 +568,8 @@ static const void * const qemu_st_helpers[4] = {
helper_stq_mmu,
};
-static void tcg_out_tlb_read (TCGContext *s, int r0, int r1, int r2,
- int addr_reg, int s_bits, int offset)
+static void tcg_out_tlb_read(TCGContext *s, TCGReg r0, TCGReg r1, TCGReg r2,
+ TCGReg addr_reg, int s_bits, int offset)
{
#if TARGET_LONG_BITS == 32
tcg_out_rld (s, RLDICL, addr_reg, addr_reg, 0, 32);
@@ -616,9 +618,11 @@ static void tcg_out_tlb_read (TCGContext *s, int r0, int r1, int r2,
static void tcg_out_qemu_ld (TCGContext *s, const TCGArg *args, int opc)
{
- int addr_reg, data_reg, r0, r1, rbase, bswap;
+ TCGReg addr_reg, data_reg, r0, r1, rbase;
+ int bswap;
#ifdef CONFIG_SOFTMMU
- int r2, mem_index, s_bits, ir;
+ TCGReg r2, ir;
+ int mem_index, s_bits;
void *label1_ptr, *label2_ptr;
#endif
@@ -766,9 +770,11 @@ static void tcg_out_qemu_ld (TCGContext *s, const TCGArg *args, int opc)
static void tcg_out_qemu_st (TCGContext *s, const TCGArg *args, int opc)
{
- int addr_reg, r0, r1, rbase, data_reg, bswap;
+ TCGReg addr_reg, r0, r1, rbase, data_reg;
+ int bswap;
#ifdef CONFIG_SOFTMMU
- int r2, mem_index, ir;
+ TCGReg r2, ir;
+ int mem_index;
void *label1_ptr, *label2_ptr;
#endif
@@ -954,7 +960,7 @@ static void tcg_out_st (TCGContext *s, TCGType type, TCGReg arg, TCGReg arg1,
tcg_out_ldsta (s, arg, arg1, arg2, STD, STDX);
}
-static void ppc_addi32 (TCGContext *s, int rt, int ra, tcg_target_long si)
+static void ppc_addi32(TCGContext *s, TCGReg rt, TCGReg ra, tcg_target_long si)
{
if (!si && rt == ra)
return;
@@ -968,7 +974,7 @@ static void ppc_addi32 (TCGContext *s, int rt, int ra, tcg_target_long si)
}
}
-static void ppc_addi64 (TCGContext *s, int rt, int ra, tcg_target_long si)
+static void ppc_addi64(TCGContext *s, TCGReg rt, TCGReg ra, tcg_target_long si)
{
/* XXX: suboptimal */
if (si == (int16_t) si
--
1.8.1.4
^ permalink raw reply related [flat|nested] 48+ messages in thread
* [Qemu-devel] [PATCH v4 03/33] tcg-ppc64: Introduce and use tcg_out_rlw
2013-04-04 22:55 [Qemu-devel] [PATCH v4 00/33] Modernize tcg/ppc64 Richard Henderson
2013-04-04 22:55 ` [Qemu-devel] [PATCH v4 01/33] disas: Disassemble all ppc insns for the host Richard Henderson
2013-04-04 22:55 ` [Qemu-devel] [PATCH v4 02/33] tcg-ppc64: Use TCGReg everywhere Richard Henderson
@ 2013-04-04 22:55 ` Richard Henderson
2013-04-04 22:55 ` [Qemu-devel] [PATCH v4 04/33] tcg-ppc64: Introduce and use tcg_out_ext32u Richard Henderson
` (30 subsequent siblings)
33 siblings, 0 replies; 48+ messages in thread
From: Richard Henderson @ 2013-04-04 22:55 UTC (permalink / raw)
To: qemu-devel; +Cc: av1474, Aurelien Jarno
Reviewed-by: Aurelien Jarno <aurelien@aurel32.net>
Signed-off-by: Richard Henderson <rth@twiddle.net>
---
tcg/ppc64/tcg-target.c | 72 ++++++++++++++------------------------------------
1 file changed, 20 insertions(+), 52 deletions(-)
diff --git a/tcg/ppc64/tcg-target.c b/tcg/ppc64/tcg-target.c
index 762ca1b..3587d0e 100644
--- a/tcg/ppc64/tcg-target.c
+++ b/tcg/ppc64/tcg-target.c
@@ -445,6 +445,12 @@ static inline void tcg_out_rld(TCGContext *s, int op, TCGReg ra, TCGReg rs,
tcg_out32 (s, op | RA (ra) | RS (rs) | sh | mb);
}
+static inline void tcg_out_rlw(TCGContext *s, int op, TCGReg ra, TCGReg rs,
+ int sh, int mb, int me)
+{
+ tcg_out32(s, op | RA(ra) | RS(rs) | SH(sh) | MB(mb) | ME(me));
+}
+
static void tcg_out_movi32(TCGContext *s, TCGReg ret, int32_t arg)
{
if (arg == (int16_t) arg)
@@ -574,24 +580,14 @@ static void tcg_out_tlb_read(TCGContext *s, TCGReg r0, TCGReg r1, TCGReg r2,
#if TARGET_LONG_BITS == 32
tcg_out_rld (s, RLDICL, addr_reg, addr_reg, 0, 32);
- tcg_out32 (s, (RLWINM
- | RA (r0)
- | RS (addr_reg)
- | SH (32 - (TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS))
- | MB (32 - (CPU_TLB_BITS + CPU_TLB_ENTRY_BITS))
- | ME (31 - CPU_TLB_ENTRY_BITS)
- )
- );
+ tcg_out_rlw(s, RLWINM, r0, addr_reg,
+ 32 - (TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS),
+ 32 - (CPU_TLB_BITS + CPU_TLB_ENTRY_BITS),
+ 31 - CPU_TLB_ENTRY_BITS);
tcg_out32 (s, ADD | RT (r0) | RA (r0) | RB (TCG_AREG0));
tcg_out32 (s, (LWZU | RT (r1) | RA (r0) | offset));
- tcg_out32 (s, (RLWINM
- | RA (r2)
- | RS (addr_reg)
- | SH (0)
- | MB ((32 - s_bits) & 31)
- | ME (31 - TARGET_PAGE_BITS)
- )
- );
+ tcg_out_rlw(s, RLWINM, r2, addr_reg, 0,
+ (32 - s_bits) & 31, 31 - TARGET_PAGE_BITS);
#else
tcg_out_rld (s, RLDICL, r0, addr_reg,
64 - TARGET_PAGE_BITS,
@@ -1093,14 +1089,7 @@ static void tcg_out_setcond (TCGContext *s, TCGType type, TCGCond cond,
}
else {
tcg_out32 (s, CNTLZW | RS (arg) | RA (0));
- tcg_out32 (s, (RLWINM
- | RA (arg0)
- | RS (0)
- | SH (27)
- | MB (5)
- | ME (31)
- )
- );
+ tcg_out_rlw(s, RLWINM, arg0, 0, 27, 5, 31);
}
break;
@@ -1161,14 +1150,7 @@ static void tcg_out_setcond (TCGContext *s, TCGType type, TCGCond cond,
tcg_out_cmp (s, cond, arg1, arg2, const_arg2, 7, type == TCG_TYPE_I64);
if (crop) tcg_out32 (s, crop);
tcg_out32 (s, MFCR | RT (0));
- tcg_out32 (s, (RLWINM
- | RA (arg0)
- | RS (0)
- | SH (sh)
- | MB (31)
- | ME (31)
- )
- );
+ tcg_out_rlw(s, RLWINM, arg0, 0, sh, 31, 31);
break;
default:
@@ -1407,31 +1389,17 @@ static void tcg_out_op (TCGContext *s, TCGOpcode opc, const TCGArg *args,
case INDEX_op_shl_i32:
if (const_args[2]) {
- tcg_out32 (s, (RLWINM
- | RA (args[0])
- | RS (args[1])
- | SH (args[2])
- | MB (0)
- | ME (31 - args[2])
- )
- );
- }
- else
+ tcg_out_rlw(s, RLWINM, args[0], args[1], args[2], 0, 31 - args[2]);
+ } else {
tcg_out32 (s, SLW | SAB (args[1], args[0], args[2]));
+ }
break;
case INDEX_op_shr_i32:
if (const_args[2]) {
- tcg_out32 (s, (RLWINM
- | RA (args[0])
- | RS (args[1])
- | SH (32 - args[2])
- | MB (args[2])
- | ME (31)
- )
- );
- }
- else
+ tcg_out_rlw(s, RLWINM, args[0], args[1], 32 - args[2], args[2], 31);
+ } else {
tcg_out32 (s, SRW | SAB (args[1], args[0], args[2]));
+ }
break;
case INDEX_op_sar_i32:
if (const_args[2])
--
1.8.1.4
^ permalink raw reply related [flat|nested] 48+ messages in thread
* [Qemu-devel] [PATCH v4 04/33] tcg-ppc64: Introduce and use tcg_out_ext32u
2013-04-04 22:55 [Qemu-devel] [PATCH v4 00/33] Modernize tcg/ppc64 Richard Henderson
` (2 preceding siblings ...)
2013-04-04 22:55 ` [Qemu-devel] [PATCH v4 03/33] tcg-ppc64: Introduce and use tcg_out_rlw Richard Henderson
@ 2013-04-04 22:55 ` Richard Henderson
2013-04-04 22:55 ` [Qemu-devel] [PATCH v4 05/33] tcg-ppc64: Introduce and use tcg_out_shli64 Richard Henderson
` (29 subsequent siblings)
33 siblings, 0 replies; 48+ messages in thread
From: Richard Henderson @ 2013-04-04 22:55 UTC (permalink / raw)
To: qemu-devel; +Cc: av1474, Aurelien Jarno
Reviewed-by: Aurelien Jarno <aurelien@aurel32.net>
Signed-off-by: Richard Henderson <rth@twiddle.net>
---
tcg/ppc64/tcg-target.c | 15 ++++++++++-----
1 file changed, 10 insertions(+), 5 deletions(-)
diff --git a/tcg/ppc64/tcg-target.c b/tcg/ppc64/tcg-target.c
index 3587d0e..84e30ad 100644
--- a/tcg/ppc64/tcg-target.c
+++ b/tcg/ppc64/tcg-target.c
@@ -451,6 +451,11 @@ static inline void tcg_out_rlw(TCGContext *s, int op, TCGReg ra, TCGReg rs,
tcg_out32(s, op | RA(ra) | RS(rs) | SH(sh) | MB(mb) | ME(me));
}
+static inline void tcg_out_ext32u(TCGContext *s, TCGReg dst, TCGReg src)
+{
+ tcg_out_rld(s, RLDICL, dst, src, 0, 32);
+}
+
static void tcg_out_movi32(TCGContext *s, TCGReg ret, int32_t arg)
{
if (arg == (int16_t) arg)
@@ -484,7 +489,7 @@ static void tcg_out_movi (TCGContext *s, TCGType type,
else {
tcg_out_movi32 (s, ret, arg32);
if (arg32 < 0)
- tcg_out_rld (s, RLDICL, ret, ret, 0, 32);
+ tcg_out_ext32u(s, ret, ret);
}
}
}
@@ -578,7 +583,7 @@ static void tcg_out_tlb_read(TCGContext *s, TCGReg r0, TCGReg r1, TCGReg r2,
TCGReg addr_reg, int s_bits, int offset)
{
#if TARGET_LONG_BITS == 32
- tcg_out_rld (s, RLDICL, addr_reg, addr_reg, 0, 32);
+ tcg_out_ext32u(s, addr_reg, addr_reg);
tcg_out_rlw(s, RLWINM, r0, addr_reg,
32 - (TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS),
@@ -691,7 +696,7 @@ static void tcg_out_qemu_ld (TCGContext *s, const TCGArg *args, int opc)
#else /* !CONFIG_SOFTMMU */
#if TARGET_LONG_BITS == 32
- tcg_out_rld (s, RLDICL, addr_reg, addr_reg, 0, 32);
+ tcg_out_ext32u(s, addr_reg, addr_reg);
#endif
r0 = addr_reg;
r1 = 3;
@@ -824,7 +829,7 @@ static void tcg_out_qemu_st (TCGContext *s, const TCGArg *args, int opc)
#else /* !CONFIG_SOFTMMU */
#if TARGET_LONG_BITS == 32
- tcg_out_rld (s, RLDICL, addr_reg, addr_reg, 0, 32);
+ tcg_out_ext32u(s, addr_reg, addr_reg);
#endif
r1 = 3;
r0 = addr_reg;
@@ -1531,7 +1536,7 @@ static void tcg_out_op (TCGContext *s, TCGOpcode opc, const TCGArg *args,
break;
case INDEX_op_ext32u_i64:
- tcg_out_rld (s, RLDICL, args[0], args[1], 0, 32);
+ tcg_out_ext32u(s, args[0], args[1]);
break;
case INDEX_op_setcond_i32:
--
1.8.1.4
^ permalink raw reply related [flat|nested] 48+ messages in thread
* [Qemu-devel] [PATCH v4 05/33] tcg-ppc64: Introduce and use tcg_out_shli64
2013-04-04 22:55 [Qemu-devel] [PATCH v4 00/33] Modernize tcg/ppc64 Richard Henderson
` (3 preceding siblings ...)
2013-04-04 22:55 ` [Qemu-devel] [PATCH v4 04/33] tcg-ppc64: Introduce and use tcg_out_ext32u Richard Henderson
@ 2013-04-04 22:55 ` Richard Henderson
2013-04-04 22:55 ` [Qemu-devel] [PATCH v4 06/33] tcg-ppc64: Introduce and use tcg_out_shri64 Richard Henderson
` (28 subsequent siblings)
33 siblings, 0 replies; 48+ messages in thread
From: Richard Henderson @ 2013-04-04 22:55 UTC (permalink / raw)
To: qemu-devel; +Cc: av1474, Aurelien Jarno
Reviewed-by: Aurelien Jarno <aurelien@aurel32.net>
Signed-off-by: Richard Henderson <rth@twiddle.net>
---
tcg/ppc64/tcg-target.c | 13 ++++++++-----
1 file changed, 8 insertions(+), 5 deletions(-)
diff --git a/tcg/ppc64/tcg-target.c b/tcg/ppc64/tcg-target.c
index 84e30ad..9199ac4 100644
--- a/tcg/ppc64/tcg-target.c
+++ b/tcg/ppc64/tcg-target.c
@@ -456,6 +456,11 @@ static inline void tcg_out_ext32u(TCGContext *s, TCGReg dst, TCGReg src)
tcg_out_rld(s, RLDICL, dst, src, 0, 32);
}
+static inline void tcg_out_shli64(TCGContext *s, TCGReg dst, TCGReg src, int c)
+{
+ tcg_out_rld(s, RLDICR, dst, src, c, 63 - c);
+}
+
static void tcg_out_movi32(TCGContext *s, TCGReg ret, int32_t arg)
{
if (arg == (int16_t) arg)
@@ -482,7 +487,7 @@ static void tcg_out_movi (TCGContext *s, TCGType type,
uint16_t l16 = arg;
tcg_out_movi32 (s, ret, arg >> 32);
- tcg_out_rld (s, RLDICR, ret, ret, 32, 31);
+ tcg_out_shli64(s, ret, ret, 32);
if (h16) tcg_out32 (s, ORIS | RS (ret) | RA (ret) | h16);
if (l16) tcg_out32 (s, ORI | RS (ret) | RA (ret) | l16);
}
@@ -597,9 +602,7 @@ static void tcg_out_tlb_read(TCGContext *s, TCGReg r0, TCGReg r1, TCGReg r2,
tcg_out_rld (s, RLDICL, r0, addr_reg,
64 - TARGET_PAGE_BITS,
64 - CPU_TLB_BITS);
- tcg_out_rld (s, RLDICR, r0, r0,
- CPU_TLB_ENTRY_BITS,
- 63 - CPU_TLB_ENTRY_BITS);
+ tcg_out_shli64(s, r0, r0, CPU_TLB_ENTRY_BITS);
tcg_out32 (s, ADD | TAB (r0, r0, TCG_AREG0));
tcg_out32 (s, LD_ADDR | RT (r1) | RA (r0) | offset);
@@ -1446,7 +1449,7 @@ static void tcg_out_op (TCGContext *s, TCGOpcode opc, const TCGArg *args,
case INDEX_op_shl_i64:
if (const_args[2])
- tcg_out_rld (s, RLDICR, args[0], args[1], args[2], 63 - args[2]);
+ tcg_out_shli64(s, args[0], args[1], args[2]);
else
tcg_out32 (s, SLD | SAB (args[1], args[0], args[2]));
break;
--
1.8.1.4
^ permalink raw reply related [flat|nested] 48+ messages in thread
* [Qemu-devel] [PATCH v4 06/33] tcg-ppc64: Introduce and use tcg_out_shri64
2013-04-04 22:55 [Qemu-devel] [PATCH v4 00/33] Modernize tcg/ppc64 Richard Henderson
` (4 preceding siblings ...)
2013-04-04 22:55 ` [Qemu-devel] [PATCH v4 05/33] tcg-ppc64: Introduce and use tcg_out_shli64 Richard Henderson
@ 2013-04-04 22:55 ` Richard Henderson
2013-04-04 22:56 ` [Qemu-devel] [PATCH v4 07/33] tcg-ppc64: Introduce and use TAI and SAI Richard Henderson
` (27 subsequent siblings)
33 siblings, 0 replies; 48+ messages in thread
From: Richard Henderson @ 2013-04-04 22:55 UTC (permalink / raw)
To: qemu-devel; +Cc: av1474, Aurelien Jarno
Reviewed-by: Aurelien Jarno <aurelien@aurel32.net>
Signed-off-by: Richard Henderson <rth@twiddle.net>
---
tcg/ppc64/tcg-target.c | 9 +++++++--
1 file changed, 7 insertions(+), 2 deletions(-)
diff --git a/tcg/ppc64/tcg-target.c b/tcg/ppc64/tcg-target.c
index 9199ac4..82e1da7 100644
--- a/tcg/ppc64/tcg-target.c
+++ b/tcg/ppc64/tcg-target.c
@@ -461,6 +461,11 @@ static inline void tcg_out_shli64(TCGContext *s, TCGReg dst, TCGReg src, int c)
tcg_out_rld(s, RLDICR, dst, src, c, 63 - c);
}
+static inline void tcg_out_shri64(TCGContext *s, TCGReg dst, TCGReg src, int c)
+{
+ tcg_out_rld(s, RLDICL, dst, src, 64 - c, c);
+}
+
static void tcg_out_movi32(TCGContext *s, TCGReg ret, int32_t arg)
{
if (arg == (int16_t) arg)
@@ -864,7 +869,7 @@ static void tcg_out_qemu_st (TCGContext *s, const TCGArg *args, int opc)
if (bswap) {
tcg_out32 (s, STWBRX | SAB (data_reg, rbase, r0));
tcg_out32 (s, ADDI | RT (r1) | RA (r0) | 4);
- tcg_out_rld (s, RLDICL, 0, data_reg, 32, 0);
+ tcg_out_shri64(s, 0, data_reg, 32);
tcg_out32 (s, STWBRX | SAB (0, rbase, r1));
}
else tcg_out32 (s, STDX | SAB (data_reg, rbase, r0));
@@ -1455,7 +1460,7 @@ static void tcg_out_op (TCGContext *s, TCGOpcode opc, const TCGArg *args,
break;
case INDEX_op_shr_i64:
if (const_args[2])
- tcg_out_rld (s, RLDICL, args[0], args[1], 64 - args[2], args[2]);
+ tcg_out_shri64(s, args[0], args[1], args[2]);
else
tcg_out32 (s, SRD | SAB (args[1], args[0], args[2]));
break;
--
1.8.1.4
^ permalink raw reply related [flat|nested] 48+ messages in thread
* [Qemu-devel] [PATCH v4 07/33] tcg-ppc64: Introduce and use TAI and SAI
2013-04-04 22:55 [Qemu-devel] [PATCH v4 00/33] Modernize tcg/ppc64 Richard Henderson
` (5 preceding siblings ...)
2013-04-04 22:55 ` [Qemu-devel] [PATCH v4 06/33] tcg-ppc64: Introduce and use tcg_out_shri64 Richard Henderson
@ 2013-04-04 22:56 ` Richard Henderson
2013-04-04 22:56 ` [Qemu-devel] [PATCH v4 08/33] tcg-ppc64: Fix setcond_i32 Richard Henderson
` (26 subsequent siblings)
33 siblings, 0 replies; 48+ messages in thread
From: Richard Henderson @ 2013-04-04 22:56 UTC (permalink / raw)
To: qemu-devel; +Cc: av1474, Aurelien Jarno
Reviewed-by: Aurelien Jarno <aurelien@aurel32.net>
Signed-off-by: Richard Henderson <rth@twiddle.net>
---
tcg/ppc64/tcg-target.c | 143 ++++++++++++++++++++++++-------------------------
1 file changed, 70 insertions(+), 73 deletions(-)
diff --git a/tcg/ppc64/tcg-target.c b/tcg/ppc64/tcg-target.c
index 82e1da7..b12cbec 100644
--- a/tcg/ppc64/tcg-target.c
+++ b/tcg/ppc64/tcg-target.c
@@ -398,8 +398,10 @@ static int tcg_target_const_match (tcg_target_long val,
#define LK 1
-#define TAB(t,a,b) (RT(t) | RA(a) | RB(b))
-#define SAB(s,a,b) (RS(s) | RA(a) | RB(b))
+#define TAB(t, a, b) (RT(t) | RA(a) | RB(b))
+#define SAB(s, a, b) (RS(s) | RA(a) | RB(b))
+#define TAI(s, a, i) (RT(s) | RA(a) | ((i) & 0xffff))
+#define SAI(s, a, i) (RS(s) | RA(a) | ((i) & 0xffff))
#define BF(n) ((n)<<23)
#define BI(n, c) (((c)+((n)*4))<<16)
@@ -468,12 +470,13 @@ static inline void tcg_out_shri64(TCGContext *s, TCGReg dst, TCGReg src, int c)
static void tcg_out_movi32(TCGContext *s, TCGReg ret, int32_t arg)
{
- if (arg == (int16_t) arg)
- tcg_out32 (s, ADDI | RT (ret) | RA (0) | (arg & 0xffff));
- else {
- tcg_out32 (s, ADDIS | RT (ret) | RA (0) | ((arg >> 16) & 0xffff));
- if (arg & 0xffff)
- tcg_out32 (s, ORI | RS (ret) | RA (ret) | (arg & 0xffff));
+ if (arg == (int16_t) arg) {
+ tcg_out32(s, ADDI | TAI(ret, 0, arg));
+ } else {
+ tcg_out32(s, ADDIS | TAI(ret, 0, arg >> 16));
+ if (arg & 0xffff) {
+ tcg_out32(s, ORI | SAI(ret, ret, arg));
+ }
}
}
@@ -491,12 +494,15 @@ static void tcg_out_movi (TCGContext *s, TCGType type,
uint16_t h16 = arg >> 16;
uint16_t l16 = arg;
- tcg_out_movi32 (s, ret, arg >> 32);
+ tcg_out_movi32(s, ret, arg >> 32);
tcg_out_shli64(s, ret, ret, 32);
- if (h16) tcg_out32 (s, ORIS | RS (ret) | RA (ret) | h16);
- if (l16) tcg_out32 (s, ORI | RS (ret) | RA (ret) | l16);
- }
- else {
+ if (h16) {
+ tcg_out32(s, ORIS | SAI(ret, ret, h16));
+ }
+ if (l16) {
+ tcg_out32(s, ORI | SAI(ret, ret, l16));
+ }
+ } else {
tcg_out_movi32 (s, ret, arg32);
if (arg32 < 0)
tcg_out_ext32u(s, ret, ret);
@@ -548,22 +554,22 @@ static void tcg_out_call (TCGContext *s, tcg_target_long arg, int const_arg)
static void tcg_out_ldst(TCGContext *s, TCGReg ret, TCGReg addr,
int offset, int op1, int op2)
{
- if (offset == (int16_t) offset)
- tcg_out32 (s, op1 | RT (ret) | RA (addr) | (offset & 0xffff));
- else {
- tcg_out_movi (s, TCG_TYPE_I64, 0, offset);
- tcg_out32 (s, op2 | RT (ret) | RA (addr) | RB (0));
+ if (offset == (int16_t) offset) {
+ tcg_out32(s, op1 | TAI(ret, addr, offset));
+ } else {
+ tcg_out_movi(s, TCG_TYPE_I64, 0, offset);
+ tcg_out32(s, op2 | TAB(ret, addr, 0));
}
}
static void tcg_out_ldsta(TCGContext *s, TCGReg ret, TCGReg addr,
int offset, int op1, int op2)
{
- if (offset == (int16_t) (offset & ~3))
- tcg_out32 (s, op1 | RT (ret) | RA (addr) | (offset & 0xffff));
- else {
- tcg_out_movi (s, TCG_TYPE_I64, 0, offset);
- tcg_out32 (s, op2 | RT (ret) | RA (addr) | RB (0));
+ if (offset == (int16_t) (offset & ~3)) {
+ tcg_out32(s, op1 | TAI(ret, addr, offset));
+ } else {
+ tcg_out_movi(s, TCG_TYPE_I64, 0, offset);
+ tcg_out32(s, op2 | TAB(ret, addr, 0));
}
}
@@ -599,8 +605,8 @@ static void tcg_out_tlb_read(TCGContext *s, TCGReg r0, TCGReg r1, TCGReg r2,
32 - (TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS),
32 - (CPU_TLB_BITS + CPU_TLB_ENTRY_BITS),
31 - CPU_TLB_ENTRY_BITS);
- tcg_out32 (s, ADD | RT (r0) | RA (r0) | RB (TCG_AREG0));
- tcg_out32 (s, (LWZU | RT (r1) | RA (r0) | offset));
+ tcg_out32(s, ADD | TAB(r0, r0, TCG_AREG0));
+ tcg_out32(s, LWZU | TAI(r1, r0, offset));
tcg_out_rlw(s, RLWINM, r2, addr_reg, 0,
(32 - s_bits) & 31, 31 - TARGET_PAGE_BITS);
#else
@@ -609,8 +615,8 @@ static void tcg_out_tlb_read(TCGContext *s, TCGReg r0, TCGReg r1, TCGReg r2,
64 - CPU_TLB_BITS);
tcg_out_shli64(s, r0, r0, CPU_TLB_ENTRY_BITS);
- tcg_out32 (s, ADD | TAB (r0, r0, TCG_AREG0));
- tcg_out32 (s, LD_ADDR | RT (r1) | RA (r0) | offset);
+ tcg_out32(s, ADD | TAB(r0, r0, TCG_AREG0));
+ tcg_out32(s, LD_ADDR | TAI(r1, r0, offset));
if (!s_bits) {
tcg_out_rld (s, RLDICR, r2, addr_reg, 0, 63 - TARGET_PAGE_BITS);
@@ -692,14 +698,11 @@ static void tcg_out_qemu_ld (TCGContext *s, const TCGArg *args, int opc)
#endif
/* r0 now contains &env->tlb_table[mem_index][index].addr_read */
- tcg_out32 (s, (LD
- | RT (r0)
- | RA (r0)
- | (offsetof (CPUTLBEntry, addend)
- - offsetof (CPUTLBEntry, addr_read))
- ));
+ tcg_out32(s, LD | TAI(r0, r0,
+ offsetof(CPUTLBEntry, addend)
+ - offsetof(CPUTLBEntry, addr_read)));
/* r0 = env->tlb_table[mem_index][index].addend */
- tcg_out32 (s, ADD | RT (r0) | RA (r0) | RB (addr_reg));
+ tcg_out32(s, ADD | TAB(r0, r0, addr_reg));
/* r0 = env->tlb_table[mem_index][index].addend + addr */
#else /* !CONFIG_SOFTMMU */
@@ -754,7 +757,7 @@ static void tcg_out_qemu_ld (TCGContext *s, const TCGArg *args, int opc)
case 3:
#ifdef CONFIG_USE_GUEST_BASE
if (bswap) {
- tcg_out32 (s, ADDI | RT (r1) | RA (r0) | 4);
+ tcg_out32(s, ADDI | TAI(r1, r0, 4));
tcg_out32 (s, LWBRX | TAB (data_reg, rbase, r0));
tcg_out32 (s, LWBRX | TAB ( r1, rbase, r1));
tcg_out_rld (s, RLDIMI, data_reg, r1, 32, 0);
@@ -832,7 +835,7 @@ static void tcg_out_qemu_st (TCGContext *s, const TCGArg *args, int opc)
- offsetof (CPUTLBEntry, addr_write))
));
/* r0 = env->tlb_table[mem_index][index].addend */
- tcg_out32 (s, ADD | RT (r0) | RA (r0) | RB (addr_reg));
+ tcg_out32(s, ADD | TAB(r0, r0, addr_reg));
/* r0 = env->tlb_table[mem_index][index].addend + addr */
#else /* !CONFIG_SOFTMMU */
@@ -868,7 +871,7 @@ static void tcg_out_qemu_st (TCGContext *s, const TCGArg *args, int opc)
case 3:
if (bswap) {
tcg_out32 (s, STWBRX | SAB (data_reg, rbase, r0));
- tcg_out32 (s, ADDI | RT (r1) | RA (r0) | 4);
+ tcg_out32(s, ADDI | TAI(r1, r0, 4));
tcg_out_shri64(s, 0, data_reg, 32);
tcg_out32 (s, STWBRX | SAB (0, rbase, r1));
}
@@ -945,10 +948,10 @@ static void tcg_target_qemu_prologue (TCGContext *s)
| (i * 8 + 48 + TCG_STATIC_CALL_ARGS_SIZE)
)
);
- tcg_out32 (s, LD | RT (0) | RA (1) | (frame_size + 16));
- tcg_out32 (s, MTSPR | RS (0) | LR);
- tcg_out32 (s, ADDI | RT (1) | RA (1) | frame_size);
- tcg_out32 (s, BCLR | BO_ALWAYS);
+ tcg_out32(s, LD | TAI(0, 1, frame_size + 16));
+ tcg_out32(s, MTSPR | RS(0) | LR);
+ tcg_out32(s, ADDI | TAI(1, 1, frame_size));
+ tcg_out32(s, BCLR | BO_ALWAYS);
}
static void tcg_out_ld (TCGContext *s, TCGType type, TCGReg ret, TCGReg arg1,
@@ -975,11 +978,11 @@ static void ppc_addi32(TCGContext *s, TCGReg rt, TCGReg ra, tcg_target_long si)
return;
if (si == (int16_t) si)
- tcg_out32 (s, ADDI | RT (rt) | RA (ra) | (si & 0xffff));
+ tcg_out32(s, ADDI | TAI(rt, ra, si));
else {
uint16_t h = ((si >> 16) & 0xffff) + ((uint16_t) si >> 15);
- tcg_out32 (s, ADDIS | RT (rt) | RA (ra) | h);
- tcg_out32 (s, ADDI | RT (rt) | RA (rt) | (si & 0xffff));
+ tcg_out32(s, ADDIS | TAI(rt, ra, h));
+ tcg_out32(s, ADDI | TAI(rt, rt, si));
}
}
@@ -991,7 +994,7 @@ static void ppc_addi64(TCGContext *s, TCGReg rt, TCGReg ra, tcg_target_long si)
ppc_addi32 (s, rt, ra, si);
else {
tcg_out_movi (s, TCG_TYPE_I64, 0, si);
- tcg_out32 (s, ADD | RT (rt) | RA (ra));
+ tcg_out32(s, ADD | TAB(rt, ra, 0));
}
}
@@ -1083,7 +1086,7 @@ static void tcg_out_setcond (TCGContext *s, TCGType type, TCGCond cond,
else {
arg = 0;
if ((uint16_t) arg2 == arg2) {
- tcg_out32 (s, XORI | RS (arg1) | RA (0) | arg2);
+ tcg_out32(s, XORI | SAI(arg1, 0, arg2));
}
else {
tcg_out_movi (s, type, 0, arg2);
@@ -1114,9 +1117,8 @@ static void tcg_out_setcond (TCGContext *s, TCGType type, TCGCond cond,
else {
arg = 0;
if ((uint16_t) arg2 == arg2) {
- tcg_out32 (s, XORI | RS (arg1) | RA (0) | arg2);
- }
- else {
+ tcg_out32(s, XORI | SAI(arg1, 0, arg2));
+ } else {
tcg_out_movi (s, type, 0, arg2);
tcg_out32 (s, XOR | SAB (arg1, 0, 0));
}
@@ -1128,12 +1130,12 @@ static void tcg_out_setcond (TCGContext *s, TCGType type, TCGCond cond,
}
if (arg == arg1 && arg1 == arg0) {
- tcg_out32 (s, ADDIC | RT (0) | RA (arg) | 0xffff);
- tcg_out32 (s, SUBFE | TAB (arg0, 0, arg));
+ tcg_out32(s, ADDIC | TAI(0, arg, -1));
+ tcg_out32(s, SUBFE | TAB(arg0, 0, arg));
}
else {
- tcg_out32 (s, ADDIC | RT (arg0) | RA (arg) | 0xffff);
- tcg_out32 (s, SUBFE | TAB (arg0, arg0, arg));
+ tcg_out32(s, ADDIC | TAI(arg0, arg, -1));
+ tcg_out32(s, SUBFE | TAB(arg0, arg0, arg));
}
break;
@@ -1311,12 +1313,11 @@ static void tcg_out_op (TCGContext *s, TCGOpcode opc, const TCGArg *args,
case INDEX_op_and_i64:
case INDEX_op_and_i32:
if (const_args[2]) {
- if ((args[2] & 0xffff) == args[2])
- tcg_out32 (s, ANDI | RS (args[1]) | RA (args[0]) | args[2]);
- else if ((args[2] & 0xffff0000) == args[2])
- tcg_out32 (s, ANDIS | RS (args[1]) | RA (args[0])
- | ((args[2] >> 16) & 0xffff));
- else {
+ if ((args[2] & 0xffff) == args[2]) {
+ tcg_out32(s, ANDI | SAI(args[1], args[0], args[2]));
+ } else if ((args[2] & 0xffff0000) == args[2]) {
+ tcg_out32(s, ANDIS | SAI(args[1], args[0], args[2] >> 16));
+ } else {
tcg_out_movi (s, (opc == INDEX_op_and_i32
? TCG_TYPE_I32
: TCG_TYPE_I64),
@@ -1331,15 +1332,13 @@ static void tcg_out_op (TCGContext *s, TCGOpcode opc, const TCGArg *args,
case INDEX_op_or_i32:
if (const_args[2]) {
if (args[2] & 0xffff) {
- tcg_out32 (s, ORI | RS (args[1]) | RA (args[0])
- | (args[2] & 0xffff));
- if (args[2] >> 16)
- tcg_out32 (s, ORIS | RS (args[0]) | RA (args[0])
- | ((args[2] >> 16) & 0xffff));
+ tcg_out32(s, ORI | SAI(args[1], args[0], args[2]));
+ if (args[2] >> 16) {
+ tcg_out32(s, ORIS | SAI(args[0], args[0], args[2] >> 16));
+ }
}
else {
- tcg_out32 (s, ORIS | RS (args[1]) | RA (args[0])
- | ((args[2] >> 16) & 0xffff));
+ tcg_out32(s, ORIS | SAI(args[1], args[0], args[2] >> 16));
}
}
else
@@ -1348,13 +1347,11 @@ static void tcg_out_op (TCGContext *s, TCGOpcode opc, const TCGArg *args,
case INDEX_op_xor_i64:
case INDEX_op_xor_i32:
if (const_args[2]) {
- if ((args[2] & 0xffff) == args[2])
- tcg_out32 (s, XORI | RS (args[1]) | RA (args[0])
- | (args[2] & 0xffff));
- else if ((args[2] & 0xffff0000) == args[2])
- tcg_out32 (s, XORIS | RS (args[1]) | RA (args[0])
- | ((args[2] >> 16) & 0xffff));
- else {
+ if ((args[2] & 0xffff) == args[2]) {
+ tcg_out32(s, XORI | SAI(args[1], args[0], args[2]));
+ } else if ((args[2] & 0xffff0000) == args[2]) {
+ tcg_out32(s, XORIS | SAI(args[1], args[0], args[2] >> 16));
+ } else {
tcg_out_movi (s, (opc == INDEX_op_and_i32
? TCG_TYPE_I32
: TCG_TYPE_I64),
--
1.8.1.4
^ permalink raw reply related [flat|nested] 48+ messages in thread
* [Qemu-devel] [PATCH v4 08/33] tcg-ppc64: Fix setcond_i32
2013-04-04 22:55 [Qemu-devel] [PATCH v4 00/33] Modernize tcg/ppc64 Richard Henderson
` (6 preceding siblings ...)
2013-04-04 22:56 ` [Qemu-devel] [PATCH v4 07/33] tcg-ppc64: Introduce and use TAI and SAI Richard Henderson
@ 2013-04-04 22:56 ` Richard Henderson
2013-04-15 7:54 ` Aurelien Jarno
2013-04-04 22:56 ` [Qemu-devel] [PATCH v4 09/33] tcg-ppc64: Cleanup tcg_out_movi Richard Henderson
` (25 subsequent siblings)
33 siblings, 1 reply; 48+ messages in thread
From: Richard Henderson @ 2013-04-04 22:56 UTC (permalink / raw)
To: qemu-devel; +Cc: av1474, Aurelien Jarno
We weren't ignoring the high 32 bits during a NE comparison.
Signed-off-by: Richard Henderson <rth@twiddle.net>
---
tcg/ppc64/tcg-target.c | 6 ++++++
1 file changed, 6 insertions(+)
diff --git a/tcg/ppc64/tcg-target.c b/tcg/ppc64/tcg-target.c
index b12cbec..822eb07 100644
--- a/tcg/ppc64/tcg-target.c
+++ b/tcg/ppc64/tcg-target.c
@@ -1129,6 +1129,12 @@ static void tcg_out_setcond (TCGContext *s, TCGType type, TCGCond cond,
tcg_out32 (s, XOR | SAB (arg1, 0, arg2));
}
+ /* Make sure and discard the high 32-bits of the input. */
+ if (type == TCG_TYPE_I32) {
+ tcg_out32(s, EXTSW | RA(TCG_REG_R0) | RS(arg));
+ arg = TCG_REG_R0;
+ }
+
if (arg == arg1 && arg1 == arg0) {
tcg_out32(s, ADDIC | TAI(0, arg, -1));
tcg_out32(s, SUBFE | TAB(arg0, 0, arg));
--
1.8.1.4
^ permalink raw reply related [flat|nested] 48+ messages in thread
* Re: [Qemu-devel] [PATCH v4 08/33] tcg-ppc64: Fix setcond_i32
2013-04-04 22:56 ` [Qemu-devel] [PATCH v4 08/33] tcg-ppc64: Fix setcond_i32 Richard Henderson
@ 2013-04-15 7:54 ` Aurelien Jarno
0 siblings, 0 replies; 48+ messages in thread
From: Aurelien Jarno @ 2013-04-15 7:54 UTC (permalink / raw)
To: Richard Henderson; +Cc: av1474, qemu-devel
On Thu, Apr 04, 2013 at 05:56:01PM -0500, Richard Henderson wrote:
> We weren't ignoring the high 32 bits during a NE comparison.
>
> Signed-off-by: Richard Henderson <rth@twiddle.net>
> ---
> tcg/ppc64/tcg-target.c | 6 ++++++
> 1 file changed, 6 insertions(+)
>
> diff --git a/tcg/ppc64/tcg-target.c b/tcg/ppc64/tcg-target.c
> index b12cbec..822eb07 100644
> --- a/tcg/ppc64/tcg-target.c
> +++ b/tcg/ppc64/tcg-target.c
> @@ -1129,6 +1129,12 @@ static void tcg_out_setcond (TCGContext *s, TCGType type, TCGCond cond,
> tcg_out32 (s, XOR | SAB (arg1, 0, arg2));
> }
>
> + /* Make sure and discard the high 32-bits of the input. */
> + if (type == TCG_TYPE_I32) {
> + tcg_out32(s, EXTSW | RA(TCG_REG_R0) | RS(arg));
> + arg = TCG_REG_R0;
> + }
> +
> if (arg == arg1 && arg1 == arg0) {
> tcg_out32(s, ADDIC | TAI(0, arg, -1));
> tcg_out32(s, SUBFE | TAB(arg0, 0, arg));
Given that it adds one more instruction, I do wonder if we still need a
different implementation than the EQ one and XORI 1. The latter is what
GCC choses.
That said the fix is correct so:
Reviewed-by: Aurelien Jarno <aurelien@aurel32.net>
--
Aurelien Jarno GPG: 1024D/F1BCDB73
aurelien@aurel32.net http://www.aurel32.net
^ permalink raw reply [flat|nested] 48+ messages in thread
* [Qemu-devel] [PATCH v4 09/33] tcg-ppc64: Cleanup tcg_out_movi
2013-04-04 22:55 [Qemu-devel] [PATCH v4 00/33] Modernize tcg/ppc64 Richard Henderson
` (7 preceding siblings ...)
2013-04-04 22:56 ` [Qemu-devel] [PATCH v4 08/33] tcg-ppc64: Fix setcond_i32 Richard Henderson
@ 2013-04-04 22:56 ` Richard Henderson
2013-04-04 22:56 ` [Qemu-devel] [PATCH v4 10/33] tcg-ppc64: Rearrange integer constant constraints Richard Henderson
` (24 subsequent siblings)
33 siblings, 0 replies; 48+ messages in thread
From: Richard Henderson @ 2013-04-04 22:56 UTC (permalink / raw)
To: qemu-devel; +Cc: av1474, Aurelien Jarno
The test for using movi32 was sub-optimal for TCG_TYPE_I32, comparing
a signed 32-bit quantity against an unsigned 32-bit quantity.
When possible, use addi+oris for 32-bit unsigned constants. Otherwise,
standardize on addi+oris+ori instead of addis+ori+rldicl.
Reviewed-by: Aurelien Jarno <aurelien@aurel32.net>
Signed-off-by: Richard Henderson <rth@twiddle.net>
---
tcg/ppc64/tcg-target.c | 41 +++++++++++++++++------------------------
1 file changed, 17 insertions(+), 24 deletions(-)
diff --git a/tcg/ppc64/tcg-target.c b/tcg/ppc64/tcg-target.c
index 822eb07..c6ff75b 100644
--- a/tcg/ppc64/tcg-target.c
+++ b/tcg/ppc64/tcg-target.c
@@ -480,32 +480,25 @@ static void tcg_out_movi32(TCGContext *s, TCGReg ret, int32_t arg)
}
}
-static void tcg_out_movi (TCGContext *s, TCGType type,
- TCGReg ret, tcg_target_long arg)
+static void tcg_out_movi(TCGContext *s, TCGType type, TCGReg ret,
+ tcg_target_long arg)
{
- int32_t arg32 = arg;
- arg = type == TCG_TYPE_I32 ? arg & 0xffffffff : arg;
-
- if (arg == arg32) {
- tcg_out_movi32 (s, ret, arg32);
- }
- else {
- if ((uint64_t) arg >> 32) {
- uint16_t h16 = arg >> 16;
- uint16_t l16 = arg;
-
- tcg_out_movi32(s, ret, arg >> 32);
+ if (type == TCG_TYPE_I32 || arg == (int32_t)arg) {
+ tcg_out_movi32(s, ret, arg);
+ } else if (arg == (uint32_t)arg && !(arg & 0x8000)) {
+ tcg_out32(s, ADDI | TAI(ret, 0, arg));
+ tcg_out32(s, ORIS | SAI(ret, ret, arg >> 16));
+ } else {
+ int32_t high = arg >> 32;
+ tcg_out_movi32(s, ret, high);
+ if (high) {
tcg_out_shli64(s, ret, ret, 32);
- if (h16) {
- tcg_out32(s, ORIS | SAI(ret, ret, h16));
- }
- if (l16) {
- tcg_out32(s, ORI | SAI(ret, ret, l16));
- }
- } else {
- tcg_out_movi32 (s, ret, arg32);
- if (arg32 < 0)
- tcg_out_ext32u(s, ret, ret);
+ }
+ if (arg & 0xffff0000) {
+ tcg_out32(s, ORIS | SAI(ret, ret, arg >> 16));
+ }
+ if (arg & 0xffff) {
+ tcg_out32(s, ORI | SAI(ret, ret, arg));
}
}
}
--
1.8.1.4
^ permalink raw reply related [flat|nested] 48+ messages in thread
* [Qemu-devel] [PATCH v4 10/33] tcg-ppc64: Rearrange integer constant constraints
2013-04-04 22:55 [Qemu-devel] [PATCH v4 00/33] Modernize tcg/ppc64 Richard Henderson
` (8 preceding siblings ...)
2013-04-04 22:56 ` [Qemu-devel] [PATCH v4 09/33] tcg-ppc64: Cleanup tcg_out_movi Richard Henderson
@ 2013-04-04 22:56 ` Richard Henderson
2013-04-04 22:56 ` [Qemu-devel] [PATCH v4 11/33] tcg-ppc64: Improve constant add and sub ops Richard Henderson
` (23 subsequent siblings)
33 siblings, 0 replies; 48+ messages in thread
From: Richard Henderson @ 2013-04-04 22:56 UTC (permalink / raw)
To: qemu-devel; +Cc: av1474, Aurelien Jarno
We'll need a zero, and Z makes more sense for that. Make sure we
have a full compliment of signed and unsigned 16 and 32-bit tests.
Reviewed-by: Aurelien Jarno <aurelien@aurel32.net>
Signed-off-by: Richard Henderson <rth@twiddle.net>
---
tcg/ppc64/tcg-target.c | 43 +++++++++++++++++++++++++++++++++----------
1 file changed, 33 insertions(+), 10 deletions(-)
diff --git a/tcg/ppc64/tcg-target.c b/tcg/ppc64/tcg-target.c
index c6ff75b..6ba09ab 100644
--- a/tcg/ppc64/tcg-target.c
+++ b/tcg/ppc64/tcg-target.c
@@ -22,7 +22,11 @@
* THE SOFTWARE.
*/
-#define TCG_CT_CONST_U32 0x100
+#define TCG_CT_CONST_S16 0x100
+#define TCG_CT_CONST_U16 0x200
+#define TCG_CT_CONST_S32 0x400
+#define TCG_CT_CONST_U32 0x800
+#define TCG_CT_CONST_ZERO 0x1000
static uint8_t *tb_ret_addr;
@@ -242,9 +246,21 @@ static int target_parse_constraint (TCGArgConstraint *ct, const char **pct_str)
tcg_regset_reset_reg (ct->u.regs, TCG_REG_R6);
#endif
break;
- case 'Z':
+ case 'I':
+ ct->ct |= TCG_CT_CONST_S16;
+ break;
+ case 'J':
+ ct->ct |= TCG_CT_CONST_U16;
+ break;
+ case 'T':
+ ct->ct |= TCG_CT_CONST_S32;
+ break;
+ case 'U':
ct->ct |= TCG_CT_CONST_U32;
break;
+ case 'Z':
+ ct->ct |= TCG_CT_CONST_ZERO;
+ break;
default:
return -1;
}
@@ -257,13 +273,20 @@ static int target_parse_constraint (TCGArgConstraint *ct, const char **pct_str)
static int tcg_target_const_match (tcg_target_long val,
const TCGArgConstraint *arg_ct)
{
- int ct;
-
- ct = arg_ct->ct;
- if (ct & TCG_CT_CONST)
+ int ct = arg_ct->ct;
+ if (ct & TCG_CT_CONST) {
+ return 1;
+ } else if ((ct & TCG_CT_CONST_S16) && val == (int16_t)val) {
+ return 1;
+ } else if ((ct & TCG_CT_CONST_U16) && val == (uint16_t)val) {
return 1;
- else if ((ct & TCG_CT_CONST_U32) && (val == (uint32_t) val))
+ } else if ((ct & TCG_CT_CONST_S32) && val == (int32_t)val) {
return 1;
+ } else if ((ct & TCG_CT_CONST_U32) && val == (uint32_t)val) {
+ return 1;
+ } else if ((ct & TCG_CT_CONST_ZERO) && val == 0) {
+ return 1;
+ }
return 0;
}
@@ -1613,9 +1636,9 @@ static const TCGTargetOpDef ppc_op_defs[] = {
{ INDEX_op_add_i64, { "r", "r", "ri" } },
{ INDEX_op_sub_i64, { "r", "r", "ri" } },
- { INDEX_op_and_i64, { "r", "r", "rZ" } },
- { INDEX_op_or_i64, { "r", "r", "rZ" } },
- { INDEX_op_xor_i64, { "r", "r", "rZ" } },
+ { INDEX_op_and_i64, { "r", "r", "rU" } },
+ { INDEX_op_or_i64, { "r", "r", "rU" } },
+ { INDEX_op_xor_i64, { "r", "r", "rU" } },
{ INDEX_op_shl_i64, { "r", "r", "ri" } },
{ INDEX_op_shr_i64, { "r", "r", "ri" } },
--
1.8.1.4
^ permalink raw reply related [flat|nested] 48+ messages in thread
* [Qemu-devel] [PATCH v4 11/33] tcg-ppc64: Improve constant add and sub ops.
2013-04-04 22:55 [Qemu-devel] [PATCH v4 00/33] Modernize tcg/ppc64 Richard Henderson
` (9 preceding siblings ...)
2013-04-04 22:56 ` [Qemu-devel] [PATCH v4 10/33] tcg-ppc64: Rearrange integer constant constraints Richard Henderson
@ 2013-04-04 22:56 ` Richard Henderson
2013-04-15 7:54 ` Aurelien Jarno
2013-04-04 22:56 ` [Qemu-devel] [PATCH v4 12/33] tcg-ppc64: Allow constant first argument to sub Richard Henderson
` (22 subsequent siblings)
33 siblings, 1 reply; 48+ messages in thread
From: Richard Henderson @ 2013-04-04 22:56 UTC (permalink / raw)
To: qemu-devel; +Cc: av1474, Aurelien Jarno
Improve constant addition -- previously we'd emit useless addi with 0.
Use new constraints to force the driver to pull full 64-bit constants
into a register.
Signed-off-by: Richard Henderson <rth@twiddle.net>
---
tcg/ppc64/tcg-target.c | 108 +++++++++++++++++++++++++++++--------------------
1 file changed, 64 insertions(+), 44 deletions(-)
diff --git a/tcg/ppc64/tcg-target.c b/tcg/ppc64/tcg-target.c
index 6ba09ab..384946b 100644
--- a/tcg/ppc64/tcg-target.c
+++ b/tcg/ppc64/tcg-target.c
@@ -988,32 +988,6 @@ static void tcg_out_st (TCGContext *s, TCGType type, TCGReg arg, TCGReg arg1,
tcg_out_ldsta (s, arg, arg1, arg2, STD, STDX);
}
-static void ppc_addi32(TCGContext *s, TCGReg rt, TCGReg ra, tcg_target_long si)
-{
- if (!si && rt == ra)
- return;
-
- if (si == (int16_t) si)
- tcg_out32(s, ADDI | TAI(rt, ra, si));
- else {
- uint16_t h = ((si >> 16) & 0xffff) + ((uint16_t) si >> 15);
- tcg_out32(s, ADDIS | TAI(rt, ra, h));
- tcg_out32(s, ADDI | TAI(rt, rt, si));
- }
-}
-
-static void ppc_addi64(TCGContext *s, TCGReg rt, TCGReg ra, tcg_target_long si)
-{
- /* XXX: suboptimal */
- if (si == (int16_t) si
- || ((((uint64_t) si >> 31) == 0) && (si & 0x8000) == 0))
- ppc_addi32 (s, rt, ra, si);
- else {
- tcg_out_movi (s, TCG_TYPE_I64, 0, si);
- tcg_out32(s, ADD | TAB(rt, ra, 0));
- }
-}
-
static void tcg_out_cmp (TCGContext *s, int cond, TCGArg arg1, TCGArg arg2,
int const_arg2, int cr, int arch64)
{
@@ -1232,6 +1206,7 @@ void ppc_tb_set_jmp_target (unsigned long jmp_addr, unsigned long addr)
static void tcg_out_op (TCGContext *s, TCGOpcode opc, const TCGArg *args,
const int *const_args)
{
+ TCGArg a0, a1, a2;
int c;
switch (opc) {
@@ -1320,16 +1295,31 @@ static void tcg_out_op (TCGContext *s, TCGOpcode opc, const TCGArg *args,
break;
case INDEX_op_add_i32:
- if (const_args[2])
- ppc_addi32 (s, args[0], args[1], args[2]);
- else
- tcg_out32 (s, ADD | TAB (args[0], args[1], args[2]));
+ a0 = args[0], a1 = args[1], a2 = args[2];
+ if (const_args[2]) {
+ int32_t l, h;
+ do_addi_32:
+ l = (int16_t)a2;
+ h = a2 - l;
+ if (h) {
+ tcg_out32(s, ADDIS | TAI(a0, a1, h >> 16));
+ a1 = a0;
+ }
+ if (l || a0 != a1) {
+ tcg_out32(s, ADDI | TAI(a0, a1, l));
+ }
+ } else {
+ tcg_out32(s, ADD | TAB(a0, a1, a2));
+ }
break;
case INDEX_op_sub_i32:
- if (const_args[2])
- ppc_addi32 (s, args[0], args[1], -args[2]);
- else
- tcg_out32 (s, SUBF | TAB (args[0], args[2], args[1]));
+ a0 = args[0], a1 = args[1], a2 = args[2];
+ if (const_args[2]) {
+ a2 = -a2;
+ goto do_addi_32;
+ } else {
+ tcg_out32(s, SUBF | TAB(a0, a2, a1));
+ }
break;
case INDEX_op_and_i64:
@@ -1459,16 +1449,46 @@ static void tcg_out_op (TCGContext *s, TCGOpcode opc, const TCGArg *args,
break;
case INDEX_op_add_i64:
- if (const_args[2])
- ppc_addi64 (s, args[0], args[1], args[2]);
- else
- tcg_out32 (s, ADD | TAB (args[0], args[1], args[2]));
+ a0 = args[0], a1 = args[1], a2 = args[2];
+ if (const_args[2]) {
+ int32_t l0, h1, h2;
+ do_addi_64:
+ /* We can always split any 32-bit signed constant into 3 pieces.
+ Note the positive 0x80000000 coming from the sub_i64 path,
+ handled with the same code we need for eg 0x7fff8000. */
+ assert(a2 == (int32_t)a2 || a2 == 0x80000000);
+ l0 = (int16_t)a2;
+ h1 = a2 - l0;
+ h2 = 0;
+ if (h1 < 0 && (int64_t)a2 > 0) {
+ h2 = 0x40000000;
+ h1 = a2 - h2 - l0;
+ }
+ assert((TCGArg)h2 + h1 + l0 == a2);
+
+ if (h2) {
+ tcg_out32(s, ADDIS | TAI(a0, a1, h2 >> 16));
+ a1 = a0;
+ }
+ if (h1) {
+ tcg_out32(s, ADDIS | TAI(a0, a1, h1 >> 16));
+ a1 = a0;
+ }
+ if (l0 || a0 != a1) {
+ tcg_out32(s, ADDI | TAI(a0, a1, l0));
+ }
+ } else {
+ tcg_out32(s, ADD | TAB(a0, a1, a2));
+ }
break;
case INDEX_op_sub_i64:
- if (const_args[2])
- ppc_addi64 (s, args[0], args[1], -args[2]);
- else
- tcg_out32 (s, SUBF | TAB (args[0], args[2], args[1]));
+ a0 = args[0], a1 = args[1], a2 = args[2];
+ if (const_args[2]) {
+ a2 = -a2;
+ goto do_addi_64;
+ } else {
+ tcg_out32(s, SUBF | TAB(a0, a2, a1));
+ }
break;
case INDEX_op_shl_i64:
@@ -1634,8 +1654,8 @@ static const TCGTargetOpDef ppc_op_defs[] = {
{ INDEX_op_neg_i32, { "r", "r" } },
{ INDEX_op_not_i32, { "r", "r" } },
- { INDEX_op_add_i64, { "r", "r", "ri" } },
- { INDEX_op_sub_i64, { "r", "r", "ri" } },
+ { INDEX_op_add_i64, { "r", "r", "rT" } },
+ { INDEX_op_sub_i64, { "r", "r", "rT" } },
{ INDEX_op_and_i64, { "r", "r", "rU" } },
{ INDEX_op_or_i64, { "r", "r", "rU" } },
{ INDEX_op_xor_i64, { "r", "r", "rU" } },
--
1.8.1.4
^ permalink raw reply related [flat|nested] 48+ messages in thread
* Re: [Qemu-devel] [PATCH v4 11/33] tcg-ppc64: Improve constant add and sub ops.
2013-04-04 22:56 ` [Qemu-devel] [PATCH v4 11/33] tcg-ppc64: Improve constant add and sub ops Richard Henderson
@ 2013-04-15 7:54 ` Aurelien Jarno
0 siblings, 0 replies; 48+ messages in thread
From: Aurelien Jarno @ 2013-04-15 7:54 UTC (permalink / raw)
To: Richard Henderson; +Cc: av1474, qemu-devel
On Thu, Apr 04, 2013 at 05:56:04PM -0500, Richard Henderson wrote:
> Improve constant addition -- previously we'd emit useless addi with 0.
> Use new constraints to force the driver to pull full 64-bit constants
> into a register.
>
> Signed-off-by: Richard Henderson <rth@twiddle.net>
> ---
> tcg/ppc64/tcg-target.c | 108 +++++++++++++++++++++++++++++--------------------
> 1 file changed, 64 insertions(+), 44 deletions(-)
>
> diff --git a/tcg/ppc64/tcg-target.c b/tcg/ppc64/tcg-target.c
> index 6ba09ab..384946b 100644
> --- a/tcg/ppc64/tcg-target.c
> +++ b/tcg/ppc64/tcg-target.c
> @@ -988,32 +988,6 @@ static void tcg_out_st (TCGContext *s, TCGType type, TCGReg arg, TCGReg arg1,
> tcg_out_ldsta (s, arg, arg1, arg2, STD, STDX);
> }
>
> -static void ppc_addi32(TCGContext *s, TCGReg rt, TCGReg ra, tcg_target_long si)
> -{
> - if (!si && rt == ra)
> - return;
> -
> - if (si == (int16_t) si)
> - tcg_out32(s, ADDI | TAI(rt, ra, si));
> - else {
> - uint16_t h = ((si >> 16) & 0xffff) + ((uint16_t) si >> 15);
> - tcg_out32(s, ADDIS | TAI(rt, ra, h));
> - tcg_out32(s, ADDI | TAI(rt, rt, si));
> - }
> -}
> -
> -static void ppc_addi64(TCGContext *s, TCGReg rt, TCGReg ra, tcg_target_long si)
> -{
> - /* XXX: suboptimal */
> - if (si == (int16_t) si
> - || ((((uint64_t) si >> 31) == 0) && (si & 0x8000) == 0))
> - ppc_addi32 (s, rt, ra, si);
> - else {
> - tcg_out_movi (s, TCG_TYPE_I64, 0, si);
> - tcg_out32(s, ADD | TAB(rt, ra, 0));
> - }
> -}
> -
> static void tcg_out_cmp (TCGContext *s, int cond, TCGArg arg1, TCGArg arg2,
> int const_arg2, int cr, int arch64)
> {
> @@ -1232,6 +1206,7 @@ void ppc_tb_set_jmp_target (unsigned long jmp_addr, unsigned long addr)
> static void tcg_out_op (TCGContext *s, TCGOpcode opc, const TCGArg *args,
> const int *const_args)
> {
> + TCGArg a0, a1, a2;
> int c;
>
> switch (opc) {
> @@ -1320,16 +1295,31 @@ static void tcg_out_op (TCGContext *s, TCGOpcode opc, const TCGArg *args,
> break;
>
> case INDEX_op_add_i32:
> - if (const_args[2])
> - ppc_addi32 (s, args[0], args[1], args[2]);
> - else
> - tcg_out32 (s, ADD | TAB (args[0], args[1], args[2]));
> + a0 = args[0], a1 = args[1], a2 = args[2];
> + if (const_args[2]) {
> + int32_t l, h;
> + do_addi_32:
> + l = (int16_t)a2;
> + h = a2 - l;
> + if (h) {
> + tcg_out32(s, ADDIS | TAI(a0, a1, h >> 16));
> + a1 = a0;
> + }
> + if (l || a0 != a1) {
> + tcg_out32(s, ADDI | TAI(a0, a1, l));
> + }
> + } else {
> + tcg_out32(s, ADD | TAB(a0, a1, a2));
> + }
> break;
> case INDEX_op_sub_i32:
> - if (const_args[2])
> - ppc_addi32 (s, args[0], args[1], -args[2]);
> - else
> - tcg_out32 (s, SUBF | TAB (args[0], args[2], args[1]));
> + a0 = args[0], a1 = args[1], a2 = args[2];
> + if (const_args[2]) {
> + a2 = -a2;
> + goto do_addi_32;
> + } else {
> + tcg_out32(s, SUBF | TAB(a0, a2, a1));
> + }
> break;
>
> case INDEX_op_and_i64:
> @@ -1459,16 +1449,46 @@ static void tcg_out_op (TCGContext *s, TCGOpcode opc, const TCGArg *args,
> break;
>
> case INDEX_op_add_i64:
> - if (const_args[2])
> - ppc_addi64 (s, args[0], args[1], args[2]);
> - else
> - tcg_out32 (s, ADD | TAB (args[0], args[1], args[2]));
> + a0 = args[0], a1 = args[1], a2 = args[2];
> + if (const_args[2]) {
> + int32_t l0, h1, h2;
> + do_addi_64:
> + /* We can always split any 32-bit signed constant into 3 pieces.
> + Note the positive 0x80000000 coming from the sub_i64 path,
> + handled with the same code we need for eg 0x7fff8000. */
> + assert(a2 == (int32_t)a2 || a2 == 0x80000000);
> + l0 = (int16_t)a2;
> + h1 = a2 - l0;
> + h2 = 0;
> + if (h1 < 0 && (int64_t)a2 > 0) {
> + h2 = 0x40000000;
> + h1 = a2 - h2 - l0;
> + }
> + assert((TCGArg)h2 + h1 + l0 == a2);
> +
> + if (h2) {
> + tcg_out32(s, ADDIS | TAI(a0, a1, h2 >> 16));
> + a1 = a0;
> + }
> + if (h1) {
> + tcg_out32(s, ADDIS | TAI(a0, a1, h1 >> 16));
> + a1 = a0;
> + }
> + if (l0 || a0 != a1) {
> + tcg_out32(s, ADDI | TAI(a0, a1, l0));
> + }
> + } else {
> + tcg_out32(s, ADD | TAB(a0, a1, a2));
> + }
> break;
> case INDEX_op_sub_i64:
> - if (const_args[2])
> - ppc_addi64 (s, args[0], args[1], -args[2]);
> - else
> - tcg_out32 (s, SUBF | TAB (args[0], args[2], args[1]));
> + a0 = args[0], a1 = args[1], a2 = args[2];
> + if (const_args[2]) {
> + a2 = -a2;
> + goto do_addi_64;
> + } else {
> + tcg_out32(s, SUBF | TAB(a0, a2, a1));
> + }
> break;
>
> case INDEX_op_shl_i64:
> @@ -1634,8 +1654,8 @@ static const TCGTargetOpDef ppc_op_defs[] = {
> { INDEX_op_neg_i32, { "r", "r" } },
> { INDEX_op_not_i32, { "r", "r" } },
>
> - { INDEX_op_add_i64, { "r", "r", "ri" } },
> - { INDEX_op_sub_i64, { "r", "r", "ri" } },
> + { INDEX_op_add_i64, { "r", "r", "rT" } },
> + { INDEX_op_sub_i64, { "r", "r", "rT" } },
> { INDEX_op_and_i64, { "r", "r", "rU" } },
> { INDEX_op_or_i64, { "r", "r", "rU" } },
> { INDEX_op_xor_i64, { "r", "r", "rU" } },
Reviewed-by: Aurelien Jarno <aurelien@aurel32.net>
--
Aurelien Jarno GPG: 1024D/F1BCDB73
aurelien@aurel32.net http://www.aurel32.net
^ permalink raw reply [flat|nested] 48+ messages in thread
* [Qemu-devel] [PATCH v4 12/33] tcg-ppc64: Allow constant first argument to sub
2013-04-04 22:55 [Qemu-devel] [PATCH v4 00/33] Modernize tcg/ppc64 Richard Henderson
` (10 preceding siblings ...)
2013-04-04 22:56 ` [Qemu-devel] [PATCH v4 11/33] tcg-ppc64: Improve constant add and sub ops Richard Henderson
@ 2013-04-04 22:56 ` Richard Henderson
2013-04-15 7:59 ` Aurelien Jarno
2013-04-04 22:56 ` [Qemu-devel] [PATCH v4 13/33] tcg-ppc64: Tidy or and xor patterns Richard Henderson
` (21 subsequent siblings)
33 siblings, 1 reply; 48+ messages in thread
From: Richard Henderson @ 2013-04-04 22:56 UTC (permalink / raw)
To: qemu-devel; +Cc: av1474, Aurelien Jarno
Using SUBFIC for 16-bit signed constants.
Signed-off-by: Richard Henderson <rth@twiddle.net>
---
tcg/ppc64/tcg-target.c | 21 +++++++++++++++++----
1 file changed, 17 insertions(+), 4 deletions(-)
diff --git a/tcg/ppc64/tcg-target.c b/tcg/ppc64/tcg-target.c
index 384946b..4da969f 100644
--- a/tcg/ppc64/tcg-target.c
+++ b/tcg/ppc64/tcg-target.c
@@ -329,6 +329,7 @@ static int tcg_target_const_match (tcg_target_long val,
#define MULLI OPCD( 7)
#define CMPLI OPCD( 10)
#define CMPI OPCD( 11)
+#define SUBFIC OPCD( 8)
#define LWZU OPCD( 33)
#define STWU OPCD( 37)
@@ -1314,7 +1315,13 @@ static void tcg_out_op (TCGContext *s, TCGOpcode opc, const TCGArg *args,
break;
case INDEX_op_sub_i32:
a0 = args[0], a1 = args[1], a2 = args[2];
- if (const_args[2]) {
+ if (const_args[1]) {
+ if (const_args[2]) {
+ tcg_out_movi(s, TCG_TYPE_I32, a0, a1 - a2);
+ } else {
+ tcg_out32(s, SUBFIC | TAI(a0, a2, a1));
+ }
+ } else if (const_args[2]) {
a2 = -a2;
goto do_addi_32;
} else {
@@ -1483,7 +1490,13 @@ static void tcg_out_op (TCGContext *s, TCGOpcode opc, const TCGArg *args,
break;
case INDEX_op_sub_i64:
a0 = args[0], a1 = args[1], a2 = args[2];
- if (const_args[2]) {
+ if (const_args[1]) {
+ if (const_args[2]) {
+ tcg_out_movi(s, TCG_TYPE_I64, a0, a1 - a2);
+ } else {
+ tcg_out32(s, SUBFIC | TAI(a0, a2, a1));
+ }
+ } else if (const_args[2]) {
a2 = -a2;
goto do_addi_64;
} else {
@@ -1639,7 +1652,7 @@ static const TCGTargetOpDef ppc_op_defs[] = {
{ INDEX_op_divu_i32, { "r", "r", "r" } },
{ INDEX_op_rem_i32, { "r", "r", "r" } },
{ INDEX_op_remu_i32, { "r", "r", "r" } },
- { INDEX_op_sub_i32, { "r", "r", "ri" } },
+ { INDEX_op_sub_i32, { "r", "rI", "ri" } },
{ INDEX_op_and_i32, { "r", "r", "ri" } },
{ INDEX_op_or_i32, { "r", "r", "ri" } },
{ INDEX_op_xor_i32, { "r", "r", "ri" } },
@@ -1655,7 +1668,7 @@ static const TCGTargetOpDef ppc_op_defs[] = {
{ INDEX_op_not_i32, { "r", "r" } },
{ INDEX_op_add_i64, { "r", "r", "rT" } },
- { INDEX_op_sub_i64, { "r", "r", "rT" } },
+ { INDEX_op_sub_i64, { "r", "rI", "rT" } },
{ INDEX_op_and_i64, { "r", "r", "rU" } },
{ INDEX_op_or_i64, { "r", "r", "rU" } },
{ INDEX_op_xor_i64, { "r", "r", "rU" } },
--
1.8.1.4
^ permalink raw reply related [flat|nested] 48+ messages in thread
* Re: [Qemu-devel] [PATCH v4 12/33] tcg-ppc64: Allow constant first argument to sub
2013-04-04 22:56 ` [Qemu-devel] [PATCH v4 12/33] tcg-ppc64: Allow constant first argument to sub Richard Henderson
@ 2013-04-15 7:59 ` Aurelien Jarno
0 siblings, 0 replies; 48+ messages in thread
From: Aurelien Jarno @ 2013-04-15 7:59 UTC (permalink / raw)
To: Richard Henderson; +Cc: av1474, qemu-devel
On Thu, Apr 04, 2013 at 05:56:05PM -0500, Richard Henderson wrote:
> Using SUBFIC for 16-bit signed constants.
>
> Signed-off-by: Richard Henderson <rth@twiddle.net>
> ---
> tcg/ppc64/tcg-target.c | 21 +++++++++++++++++----
> 1 file changed, 17 insertions(+), 4 deletions(-)
>
> diff --git a/tcg/ppc64/tcg-target.c b/tcg/ppc64/tcg-target.c
> index 384946b..4da969f 100644
> --- a/tcg/ppc64/tcg-target.c
> +++ b/tcg/ppc64/tcg-target.c
> @@ -329,6 +329,7 @@ static int tcg_target_const_match (tcg_target_long val,
> #define MULLI OPCD( 7)
> #define CMPLI OPCD( 10)
> #define CMPI OPCD( 11)
> +#define SUBFIC OPCD( 8)
>
> #define LWZU OPCD( 33)
> #define STWU OPCD( 37)
> @@ -1314,7 +1315,13 @@ static void tcg_out_op (TCGContext *s, TCGOpcode opc, const TCGArg *args,
> break;
> case INDEX_op_sub_i32:
> a0 = args[0], a1 = args[1], a2 = args[2];
> - if (const_args[2]) {
> + if (const_args[1]) {
> + if (const_args[2]) {
> + tcg_out_movi(s, TCG_TYPE_I32, a0, a1 - a2);
> + } else {
> + tcg_out32(s, SUBFIC | TAI(a0, a2, a1));
> + }
> + } else if (const_args[2]) {
> a2 = -a2;
> goto do_addi_32;
> } else {
> @@ -1483,7 +1490,13 @@ static void tcg_out_op (TCGContext *s, TCGOpcode opc, const TCGArg *args,
> break;
> case INDEX_op_sub_i64:
> a0 = args[0], a1 = args[1], a2 = args[2];
> - if (const_args[2]) {
> + if (const_args[1]) {
> + if (const_args[2]) {
> + tcg_out_movi(s, TCG_TYPE_I64, a0, a1 - a2);
> + } else {
> + tcg_out32(s, SUBFIC | TAI(a0, a2, a1));
> + }
> + } else if (const_args[2]) {
> a2 = -a2;
> goto do_addi_64;
> } else {
> @@ -1639,7 +1652,7 @@ static const TCGTargetOpDef ppc_op_defs[] = {
> { INDEX_op_divu_i32, { "r", "r", "r" } },
> { INDEX_op_rem_i32, { "r", "r", "r" } },
> { INDEX_op_remu_i32, { "r", "r", "r" } },
> - { INDEX_op_sub_i32, { "r", "r", "ri" } },
> + { INDEX_op_sub_i32, { "r", "rI", "ri" } },
> { INDEX_op_and_i32, { "r", "r", "ri" } },
> { INDEX_op_or_i32, { "r", "r", "ri" } },
> { INDEX_op_xor_i32, { "r", "r", "ri" } },
> @@ -1655,7 +1668,7 @@ static const TCGTargetOpDef ppc_op_defs[] = {
> { INDEX_op_not_i32, { "r", "r" } },
>
> { INDEX_op_add_i64, { "r", "r", "rT" } },
> - { INDEX_op_sub_i64, { "r", "r", "rT" } },
> + { INDEX_op_sub_i64, { "r", "rI", "rT" } },
> { INDEX_op_and_i64, { "r", "r", "rU" } },
> { INDEX_op_or_i64, { "r", "r", "rU" } },
> { INDEX_op_xor_i64, { "r", "r", "rU" } },
Reviewed-by: Aurelien Jarno <aurelien@aurel32.net>
--
Aurelien Jarno GPG: 1024D/F1BCDB73
aurelien@aurel32.net http://www.aurel32.net
^ permalink raw reply [flat|nested] 48+ messages in thread
* [Qemu-devel] [PATCH v4 13/33] tcg-ppc64: Tidy or and xor patterns.
2013-04-04 22:55 [Qemu-devel] [PATCH v4 00/33] Modernize tcg/ppc64 Richard Henderson
` (11 preceding siblings ...)
2013-04-04 22:56 ` [Qemu-devel] [PATCH v4 12/33] tcg-ppc64: Allow constant first argument to sub Richard Henderson
@ 2013-04-04 22:56 ` Richard Henderson
2013-04-04 22:56 ` [Qemu-devel] [PATCH v4 14/33] tcg-ppc64: Improve and_i32 with constant Richard Henderson
` (20 subsequent siblings)
33 siblings, 0 replies; 48+ messages in thread
From: Richard Henderson @ 2013-04-04 22:56 UTC (permalink / raw)
To: qemu-devel; +Cc: av1474, Aurelien Jarno
Handle constants in common code; we'll want to reuse that later.
Reviewed-by: Aurelien Jarno <aurelien@aurel32.net>
Signed-off-by: Richard Henderson <rth@twiddle.net>
---
tcg/ppc64/tcg-target.c | 55 ++++++++++++++++++++++++++++----------------------
1 file changed, 31 insertions(+), 24 deletions(-)
diff --git a/tcg/ppc64/tcg-target.c b/tcg/ppc64/tcg-target.c
index 4da969f..c8cae72 100644
--- a/tcg/ppc64/tcg-target.c
+++ b/tcg/ppc64/tcg-target.c
@@ -527,6 +527,29 @@ static void tcg_out_movi(TCGContext *s, TCGType type, TCGReg ret,
}
}
+static void tcg_out_zori32(TCGContext *s, TCGReg dst, TCGReg src, uint32_t c,
+ int op_lo, int op_hi)
+{
+ if (c >> 16) {
+ tcg_out32(s, op_hi | SAI(src, dst, c >> 16));
+ src = dst;
+ }
+ if (c & 0xffff) {
+ tcg_out32(s, op_lo | SAI(src, dst, c));
+ src = dst;
+ }
+}
+
+static void tcg_out_ori32(TCGContext *s, TCGReg dst, TCGReg src, uint32_t c)
+{
+ tcg_out_zori32(s, dst, src, c, ORI, ORIS);
+}
+
+static void tcg_out_xori32(TCGContext *s, TCGReg dst, TCGReg src, uint32_t c)
+{
+ tcg_out_zori32(s, dst, src, c, XORI, XORIS);
+}
+
static void tcg_out_b (TCGContext *s, int mask, tcg_target_long target)
{
tcg_target_long disp;
@@ -1349,37 +1372,21 @@ static void tcg_out_op (TCGContext *s, TCGOpcode opc, const TCGArg *args,
break;
case INDEX_op_or_i64:
case INDEX_op_or_i32:
+ a0 = args[0], a1 = args[1], a2 = args[2];
if (const_args[2]) {
- if (args[2] & 0xffff) {
- tcg_out32(s, ORI | SAI(args[1], args[0], args[2]));
- if (args[2] >> 16) {
- tcg_out32(s, ORIS | SAI(args[0], args[0], args[2] >> 16));
- }
- }
- else {
- tcg_out32(s, ORIS | SAI(args[1], args[0], args[2] >> 16));
- }
+ tcg_out_ori32(s, a0, a1, a2);
+ } else {
+ tcg_out32(s, OR | SAB(a1, a0, a2));
}
- else
- tcg_out32 (s, OR | SAB (args[1], args[0], args[2]));
break;
case INDEX_op_xor_i64:
case INDEX_op_xor_i32:
+ a0 = args[0], a1 = args[1], a2 = args[2];
if (const_args[2]) {
- if ((args[2] & 0xffff) == args[2]) {
- tcg_out32(s, XORI | SAI(args[1], args[0], args[2]));
- } else if ((args[2] & 0xffff0000) == args[2]) {
- tcg_out32(s, XORIS | SAI(args[1], args[0], args[2] >> 16));
- } else {
- tcg_out_movi (s, (opc == INDEX_op_and_i32
- ? TCG_TYPE_I32
- : TCG_TYPE_I64),
- 0, args[2]);
- tcg_out32 (s, XOR | SAB (args[1], args[0], 0));
- }
+ tcg_out_xori32(s, a0, a1, a2);
+ } else {
+ tcg_out32(s, XOR | SAB(a1, a0, a2));
}
- else
- tcg_out32 (s, XOR | SAB (args[1], args[0], args[2]));
break;
case INDEX_op_mul_i32:
--
1.8.1.4
^ permalink raw reply related [flat|nested] 48+ messages in thread
* [Qemu-devel] [PATCH v4 14/33] tcg-ppc64: Improve and_i32 with constant
2013-04-04 22:55 [Qemu-devel] [PATCH v4 00/33] Modernize tcg/ppc64 Richard Henderson
` (12 preceding siblings ...)
2013-04-04 22:56 ` [Qemu-devel] [PATCH v4 13/33] tcg-ppc64: Tidy or and xor patterns Richard Henderson
@ 2013-04-04 22:56 ` Richard Henderson
2013-04-04 22:56 ` [Qemu-devel] [PATCH v4 15/33] tcg-ppc64: Improve and_i64 " Richard Henderson
` (19 subsequent siblings)
33 siblings, 0 replies; 48+ messages in thread
From: Richard Henderson @ 2013-04-04 22:56 UTC (permalink / raw)
To: qemu-devel; +Cc: av1474, Aurelien Jarno
Use RLWINM
Reviewed-by: Aurelien Jarno <aurelien@aurel32.net>
Signed-off-by: Richard Henderson <rth@twiddle.net>
---
tcg/ppc64/tcg-target.c | 50 +++++++++++++++++++++++++++++++++++++++++++++++++-
tcg/ppc64/tcg-target.h | 6 ++++--
2 files changed, 53 insertions(+), 3 deletions(-)
diff --git a/tcg/ppc64/tcg-target.c b/tcg/ppc64/tcg-target.c
index c8cae72..1b0563f 100644
--- a/tcg/ppc64/tcg-target.c
+++ b/tcg/ppc64/tcg-target.c
@@ -527,6 +527,48 @@ static void tcg_out_movi(TCGContext *s, TCGType type, TCGReg ret,
}
}
+static inline bool mask_operand(uint32_t c, int *mb, int *me)
+{
+ uint32_t lsb, test;
+
+ /* Accept a bit pattern like:
+ 0....01....1
+ 1....10....0
+ 0..01..10..0
+ Keep track of the transitions. */
+ if (c == 0 || c == -1) {
+ return false;
+ }
+ test = c;
+ lsb = test & -test;
+ test += lsb;
+ if (test & (test - 1)) {
+ return false;
+ }
+
+ *me = clz32(lsb);
+ *mb = test ? clz32(test & -test) + 1 : 0;
+ return true;
+}
+
+static void tcg_out_andi32(TCGContext *s, TCGReg dst, TCGReg src, uint32_t c)
+{
+ int mb, me;
+
+ if ((c & 0xffff) == c) {
+ tcg_out32(s, ANDI | SAI(src, dst, c));
+ return;
+ } else if ((c & 0xffff0000) == c) {
+ tcg_out32(s, ANDIS | SAI(src, dst, c >> 16));
+ return;
+ } else if (mask_operand(c, &mb, &me)) {
+ tcg_out_rlw(s, RLWINM, dst, src, 0, mb, me);
+ } else {
+ tcg_out_movi(s, TCG_TYPE_I32, 0, c);
+ tcg_out32(s, AND | SAB(src, dst, 0));
+ }
+}
+
static void tcg_out_zori32(TCGContext *s, TCGReg dst, TCGReg src, uint32_t c,
int op_lo, int op_hi)
{
@@ -1352,9 +1394,15 @@ static void tcg_out_op (TCGContext *s, TCGOpcode opc, const TCGArg *args,
}
break;
- case INDEX_op_and_i64:
case INDEX_op_and_i32:
if (const_args[2]) {
+ tcg_out_andi32(s, args[0], args[1], args[2]);
+ } else {
+ tcg_out32(s, AND | SAB(args[1], args[0], args[2]));
+ }
+ break;
+ case INDEX_op_and_i64:
+ if (const_args[2]) {
if ((args[2] & 0xffff) == args[2]) {
tcg_out32(s, ANDI | SAI(args[1], args[0], args[2]));
} else if ((args[2] & 0xffff0000) == args[2]) {
diff --git a/tcg/ppc64/tcg-target.h b/tcg/ppc64/tcg-target.h
index aa6a0f0..f1c3067 100644
--- a/tcg/ppc64/tcg-target.h
+++ b/tcg/ppc64/tcg-target.h
@@ -67,13 +67,15 @@ typedef enum {
#define TCG_TARGET_STACK_ALIGN 16
#define TCG_TARGET_CALL_STACK_OFFSET 48
+/* optional instructions automatically implemented */
+#define TCG_TARGET_HAS_ext8u_i32 0 /* andi */
+#define TCG_TARGET_HAS_ext16u_i32 0
+
/* optional instructions */
#define TCG_TARGET_HAS_div_i32 1
#define TCG_TARGET_HAS_rot_i32 0
#define TCG_TARGET_HAS_ext8s_i32 1
#define TCG_TARGET_HAS_ext16s_i32 1
-#define TCG_TARGET_HAS_ext8u_i32 0
-#define TCG_TARGET_HAS_ext16u_i32 0
#define TCG_TARGET_HAS_bswap16_i32 0
#define TCG_TARGET_HAS_bswap32_i32 0
#define TCG_TARGET_HAS_not_i32 1
--
1.8.1.4
^ permalink raw reply related [flat|nested] 48+ messages in thread
* [Qemu-devel] [PATCH v4 15/33] tcg-ppc64: Improve and_i64 with constant
2013-04-04 22:55 [Qemu-devel] [PATCH v4 00/33] Modernize tcg/ppc64 Richard Henderson
` (13 preceding siblings ...)
2013-04-04 22:56 ` [Qemu-devel] [PATCH v4 14/33] tcg-ppc64: Improve and_i32 with constant Richard Henderson
@ 2013-04-04 22:56 ` Richard Henderson
2013-04-13 12:24 ` Aurelien Jarno
2013-04-04 22:56 ` [Qemu-devel] [PATCH v4 16/33] tcg-ppc64: Use automatic implementation of ext32u_i64 Richard Henderson
` (18 subsequent siblings)
33 siblings, 1 reply; 48+ messages in thread
From: Richard Henderson @ 2013-04-04 22:56 UTC (permalink / raw)
To: qemu-devel; +Cc: av1474, Aurelien Jarno
Use RLDICL and RLDICR.
Signed-off-by: Richard Henderson <rth@twiddle.net>
---
tcg/ppc64/tcg-target.c | 64 +++++++++++++++++++++++++++++++++++++++-----------
1 file changed, 50 insertions(+), 14 deletions(-)
diff --git a/tcg/ppc64/tcg-target.c b/tcg/ppc64/tcg-target.c
index 1b0563f..1bd456a 100644
--- a/tcg/ppc64/tcg-target.c
+++ b/tcg/ppc64/tcg-target.c
@@ -527,7 +527,7 @@ static void tcg_out_movi(TCGContext *s, TCGType type, TCGReg ret,
}
}
-static inline bool mask_operand(uint32_t c, int *mb, int *me)
+static bool mask_operand(uint32_t c, int *mb, int *me)
{
uint32_t lsb, test;
@@ -551,6 +551,30 @@ static inline bool mask_operand(uint32_t c, int *mb, int *me)
return true;
}
+static bool mask64_operand(uint64_t c, int *mb, int *me)
+{
+ uint64_t lsb;
+
+ if (c == 0) {
+ return false;
+ }
+
+ lsb = c & -c;
+ /* Accept 1..10..0. */
+ if (c == -lsb) {
+ *mb = 0;
+ *me = clz64(lsb);
+ return true;
+ }
+ /* Accept 0..01..1. */
+ if (lsb == 1 && (c & (c + 1)) == 0) {
+ *mb = clz64(c + 1) + 1;
+ *me = 63;
+ return true;
+ }
+ return false;
+}
+
static void tcg_out_andi32(TCGContext *s, TCGReg dst, TCGReg src, uint32_t c)
{
int mb, me;
@@ -569,6 +593,28 @@ static void tcg_out_andi32(TCGContext *s, TCGReg dst, TCGReg src, uint32_t c)
}
}
+static void tcg_out_andi64(TCGContext *s, TCGReg dst, TCGReg src, uint64_t c)
+{
+ int mb, me;
+
+ if ((c & 0xffff) == c) {
+ tcg_out32(s, ANDI | SAI(src, dst, c));
+ return;
+ } else if ((c & 0xffff0000) == c) {
+ tcg_out32(s, ANDIS | SAI(src, dst, c >> 16));
+ return;
+ } else if (mask64_operand(c, &mb, &me)) {
+ if (mb == 0) {
+ tcg_out_rld(s, RLDICR, dst, src, 0, me);
+ } else {
+ tcg_out_rld(s, RLDICL, dst, src, 0, mb);
+ }
+ } else {
+ tcg_out_movi(s, TCG_TYPE_I64, 0, c);
+ tcg_out32(s, AND | SAB(src, dst, 0));
+ }
+}
+
static void tcg_out_zori32(TCGContext *s, TCGReg dst, TCGReg src, uint32_t c,
int op_lo, int op_hi)
{
@@ -1403,20 +1449,10 @@ static void tcg_out_op (TCGContext *s, TCGOpcode opc, const TCGArg *args,
break;
case INDEX_op_and_i64:
if (const_args[2]) {
- if ((args[2] & 0xffff) == args[2]) {
- tcg_out32(s, ANDI | SAI(args[1], args[0], args[2]));
- } else if ((args[2] & 0xffff0000) == args[2]) {
- tcg_out32(s, ANDIS | SAI(args[1], args[0], args[2] >> 16));
- } else {
- tcg_out_movi (s, (opc == INDEX_op_and_i32
- ? TCG_TYPE_I32
- : TCG_TYPE_I64),
- 0, args[2]);
- tcg_out32 (s, AND | SAB (args[1], args[0], 0));
- }
+ tcg_out_andi64(s, args[0], args[1], args[2]);
+ } else {
+ tcg_out32(s, AND | SAB(args[1], args[0], args[2]));
}
- else
- tcg_out32 (s, AND | SAB (args[1], args[0], args[2]));
break;
case INDEX_op_or_i64:
case INDEX_op_or_i32:
--
1.8.1.4
^ permalink raw reply related [flat|nested] 48+ messages in thread
* Re: [Qemu-devel] [PATCH v4 15/33] tcg-ppc64: Improve and_i64 with constant
2013-04-04 22:56 ` [Qemu-devel] [PATCH v4 15/33] tcg-ppc64: Improve and_i64 " Richard Henderson
@ 2013-04-13 12:24 ` Aurelien Jarno
0 siblings, 0 replies; 48+ messages in thread
From: Aurelien Jarno @ 2013-04-13 12:24 UTC (permalink / raw)
To: Richard Henderson; +Cc: av1474, qemu-devel
On Thu, Apr 04, 2013 at 05:56:08PM -0500, Richard Henderson wrote:
> Use RLDICL and RLDICR.
>
> Signed-off-by: Richard Henderson <rth@twiddle.net>
> ---
> tcg/ppc64/tcg-target.c | 64 +++++++++++++++++++++++++++++++++++++++-----------
> 1 file changed, 50 insertions(+), 14 deletions(-)
>
> diff --git a/tcg/ppc64/tcg-target.c b/tcg/ppc64/tcg-target.c
> index 1b0563f..1bd456a 100644
> --- a/tcg/ppc64/tcg-target.c
> +++ b/tcg/ppc64/tcg-target.c
> @@ -527,7 +527,7 @@ static void tcg_out_movi(TCGContext *s, TCGType type, TCGReg ret,
> }
> }
>
> -static inline bool mask_operand(uint32_t c, int *mb, int *me)
> +static bool mask_operand(uint32_t c, int *mb, int *me)
> {
> uint32_t lsb, test;
>
> @@ -551,6 +551,30 @@ static inline bool mask_operand(uint32_t c, int *mb, int *me)
> return true;
> }
>
> +static bool mask64_operand(uint64_t c, int *mb, int *me)
> +{
> + uint64_t lsb;
> +
> + if (c == 0) {
> + return false;
> + }
> +
> + lsb = c & -c;
> + /* Accept 1..10..0. */
> + if (c == -lsb) {
> + *mb = 0;
> + *me = clz64(lsb);
> + return true;
> + }
> + /* Accept 0..01..1. */
> + if (lsb == 1 && (c & (c + 1)) == 0) {
> + *mb = clz64(c + 1) + 1;
> + *me = 63;
> + return true;
> + }
> + return false;
> +}
> +
> static void tcg_out_andi32(TCGContext *s, TCGReg dst, TCGReg src, uint32_t c)
> {
> int mb, me;
> @@ -569,6 +593,28 @@ static void tcg_out_andi32(TCGContext *s, TCGReg dst, TCGReg src, uint32_t c)
> }
> }
>
> +static void tcg_out_andi64(TCGContext *s, TCGReg dst, TCGReg src, uint64_t c)
> +{
> + int mb, me;
> +
> + if ((c & 0xffff) == c) {
> + tcg_out32(s, ANDI | SAI(src, dst, c));
> + return;
> + } else if ((c & 0xffff0000) == c) {
> + tcg_out32(s, ANDIS | SAI(src, dst, c >> 16));
> + return;
> + } else if (mask64_operand(c, &mb, &me)) {
> + if (mb == 0) {
> + tcg_out_rld(s, RLDICR, dst, src, 0, me);
> + } else {
> + tcg_out_rld(s, RLDICL, dst, src, 0, mb);
> + }
> + } else {
> + tcg_out_movi(s, TCG_TYPE_I64, 0, c);
> + tcg_out32(s, AND | SAB(src, dst, 0));
> + }
> +}
> +
> static void tcg_out_zori32(TCGContext *s, TCGReg dst, TCGReg src, uint32_t c,
> int op_lo, int op_hi)
> {
> @@ -1403,20 +1449,10 @@ static void tcg_out_op (TCGContext *s, TCGOpcode opc, const TCGArg *args,
> break;
> case INDEX_op_and_i64:
> if (const_args[2]) {
> - if ((args[2] & 0xffff) == args[2]) {
> - tcg_out32(s, ANDI | SAI(args[1], args[0], args[2]));
> - } else if ((args[2] & 0xffff0000) == args[2]) {
> - tcg_out32(s, ANDIS | SAI(args[1], args[0], args[2] >> 16));
> - } else {
> - tcg_out_movi (s, (opc == INDEX_op_and_i32
> - ? TCG_TYPE_I32
> - : TCG_TYPE_I64),
> - 0, args[2]);
> - tcg_out32 (s, AND | SAB (args[1], args[0], 0));
> - }
> + tcg_out_andi64(s, args[0], args[1], args[2]);
> + } else {
> + tcg_out32(s, AND | SAB(args[1], args[0], args[2]));
> }
> - else
> - tcg_out32 (s, AND | SAB (args[1], args[0], args[2]));
> break;
> case INDEX_op_or_i64:
> case INDEX_op_or_i32:
Reviewed-by: Aurelien Jarno <aurelien@aurel32.net>
--
Aurelien Jarno GPG: 1024D/F1BCDB73
aurelien@aurel32.net http://www.aurel32.net
^ permalink raw reply [flat|nested] 48+ messages in thread
* [Qemu-devel] [PATCH v4 16/33] tcg-ppc64: Use automatic implementation of ext32u_i64
2013-04-04 22:55 [Qemu-devel] [PATCH v4 00/33] Modernize tcg/ppc64 Richard Henderson
` (14 preceding siblings ...)
2013-04-04 22:56 ` [Qemu-devel] [PATCH v4 15/33] tcg-ppc64: Improve and_i64 " Richard Henderson
@ 2013-04-04 22:56 ` Richard Henderson
2013-04-13 12:25 ` Aurelien Jarno
2013-04-04 22:56 ` [Qemu-devel] [PATCH v4 17/33] tcg-ppc64: Streamline qemu_ld/st insn selection Richard Henderson
` (17 subsequent siblings)
33 siblings, 1 reply; 48+ messages in thread
From: Richard Henderson @ 2013-04-04 22:56 UTC (permalink / raw)
To: qemu-devel; +Cc: av1474, Aurelien Jarno
The enhancements to and immediate obviate this.
Signed-off-by: Richard Henderson <rth@twiddle.net>
---
tcg/ppc64/tcg-target.c | 5 -----
tcg/ppc64/tcg-target.h | 6 +++---
2 files changed, 3 insertions(+), 8 deletions(-)
diff --git a/tcg/ppc64/tcg-target.c b/tcg/ppc64/tcg-target.c
index 1bd456a..71d72b4 100644
--- a/tcg/ppc64/tcg-target.c
+++ b/tcg/ppc64/tcg-target.c
@@ -1686,10 +1686,6 @@ static void tcg_out_op (TCGContext *s, TCGOpcode opc, const TCGArg *args,
tcg_out32 (s, c | RS (args[1]) | RA (args[0]));
break;
- case INDEX_op_ext32u_i64:
- tcg_out_ext32u(s, args[0], args[1]);
- break;
-
case INDEX_op_setcond_i32:
tcg_out_setcond (s, TCG_TYPE_I32, args[3], args[0], args[1], args[2],
const_args[2]);
@@ -1796,7 +1792,6 @@ static const TCGTargetOpDef ppc_op_defs[] = {
{ INDEX_op_ext8s_i64, { "r", "r" } },
{ INDEX_op_ext16s_i64, { "r", "r" } },
{ INDEX_op_ext32s_i64, { "r", "r" } },
- { INDEX_op_ext32u_i64, { "r", "r" } },
{ INDEX_op_setcond_i32, { "r", "r", "ri" } },
{ INDEX_op_setcond_i64, { "r", "r", "ri" } },
diff --git a/tcg/ppc64/tcg-target.h b/tcg/ppc64/tcg-target.h
index f1c3067..a4078ae 100644
--- a/tcg/ppc64/tcg-target.h
+++ b/tcg/ppc64/tcg-target.h
@@ -70,6 +70,9 @@ typedef enum {
/* optional instructions automatically implemented */
#define TCG_TARGET_HAS_ext8u_i32 0 /* andi */
#define TCG_TARGET_HAS_ext16u_i32 0
+#define TCG_TARGET_HAS_ext8u_i64 0
+#define TCG_TARGET_HAS_ext16u_i64 0
+#define TCG_TARGET_HAS_ext32u_i64 0
/* optional instructions */
#define TCG_TARGET_HAS_div_i32 1
@@ -97,9 +100,6 @@ typedef enum {
#define TCG_TARGET_HAS_ext8s_i64 1
#define TCG_TARGET_HAS_ext16s_i64 1
#define TCG_TARGET_HAS_ext32s_i64 1
-#define TCG_TARGET_HAS_ext8u_i64 0
-#define TCG_TARGET_HAS_ext16u_i64 0
-#define TCG_TARGET_HAS_ext32u_i64 1
#define TCG_TARGET_HAS_bswap16_i64 0
#define TCG_TARGET_HAS_bswap32_i64 0
#define TCG_TARGET_HAS_bswap64_i64 0
--
1.8.1.4
^ permalink raw reply related [flat|nested] 48+ messages in thread
* Re: [Qemu-devel] [PATCH v4 16/33] tcg-ppc64: Use automatic implementation of ext32u_i64
2013-04-04 22:56 ` [Qemu-devel] [PATCH v4 16/33] tcg-ppc64: Use automatic implementation of ext32u_i64 Richard Henderson
@ 2013-04-13 12:25 ` Aurelien Jarno
0 siblings, 0 replies; 48+ messages in thread
From: Aurelien Jarno @ 2013-04-13 12:25 UTC (permalink / raw)
To: Richard Henderson; +Cc: av1474, qemu-devel
On Thu, Apr 04, 2013 at 05:56:09PM -0500, Richard Henderson wrote:
> The enhancements to and immediate obviate this.
>
> Signed-off-by: Richard Henderson <rth@twiddle.net>
> ---
> tcg/ppc64/tcg-target.c | 5 -----
> tcg/ppc64/tcg-target.h | 6 +++---
> 2 files changed, 3 insertions(+), 8 deletions(-)
>
> diff --git a/tcg/ppc64/tcg-target.c b/tcg/ppc64/tcg-target.c
> index 1bd456a..71d72b4 100644
> --- a/tcg/ppc64/tcg-target.c
> +++ b/tcg/ppc64/tcg-target.c
> @@ -1686,10 +1686,6 @@ static void tcg_out_op (TCGContext *s, TCGOpcode opc, const TCGArg *args,
> tcg_out32 (s, c | RS (args[1]) | RA (args[0]));
> break;
>
> - case INDEX_op_ext32u_i64:
> - tcg_out_ext32u(s, args[0], args[1]);
> - break;
> -
> case INDEX_op_setcond_i32:
> tcg_out_setcond (s, TCG_TYPE_I32, args[3], args[0], args[1], args[2],
> const_args[2]);
> @@ -1796,7 +1792,6 @@ static const TCGTargetOpDef ppc_op_defs[] = {
> { INDEX_op_ext8s_i64, { "r", "r" } },
> { INDEX_op_ext16s_i64, { "r", "r" } },
> { INDEX_op_ext32s_i64, { "r", "r" } },
> - { INDEX_op_ext32u_i64, { "r", "r" } },
>
> { INDEX_op_setcond_i32, { "r", "r", "ri" } },
> { INDEX_op_setcond_i64, { "r", "r", "ri" } },
> diff --git a/tcg/ppc64/tcg-target.h b/tcg/ppc64/tcg-target.h
> index f1c3067..a4078ae 100644
> --- a/tcg/ppc64/tcg-target.h
> +++ b/tcg/ppc64/tcg-target.h
> @@ -70,6 +70,9 @@ typedef enum {
> /* optional instructions automatically implemented */
> #define TCG_TARGET_HAS_ext8u_i32 0 /* andi */
> #define TCG_TARGET_HAS_ext16u_i32 0
> +#define TCG_TARGET_HAS_ext8u_i64 0
> +#define TCG_TARGET_HAS_ext16u_i64 0
> +#define TCG_TARGET_HAS_ext32u_i64 0
>
> /* optional instructions */
> #define TCG_TARGET_HAS_div_i32 1
> @@ -97,9 +100,6 @@ typedef enum {
> #define TCG_TARGET_HAS_ext8s_i64 1
> #define TCG_TARGET_HAS_ext16s_i64 1
> #define TCG_TARGET_HAS_ext32s_i64 1
> -#define TCG_TARGET_HAS_ext8u_i64 0
> -#define TCG_TARGET_HAS_ext16u_i64 0
> -#define TCG_TARGET_HAS_ext32u_i64 1
> #define TCG_TARGET_HAS_bswap16_i64 0
> #define TCG_TARGET_HAS_bswap32_i64 0
> #define TCG_TARGET_HAS_bswap64_i64 0
Reviewed-by: Aurelien Jarno <aurelien@aurel32.net>
--
Aurelien Jarno GPG: 1024D/F1BCDB73
aurelien@aurel32.net http://www.aurel32.net
^ permalink raw reply [flat|nested] 48+ messages in thread
* [Qemu-devel] [PATCH v4 17/33] tcg-ppc64: Streamline qemu_ld/st insn selection
2013-04-04 22:55 [Qemu-devel] [PATCH v4 00/33] Modernize tcg/ppc64 Richard Henderson
` (15 preceding siblings ...)
2013-04-04 22:56 ` [Qemu-devel] [PATCH v4 16/33] tcg-ppc64: Use automatic implementation of ext32u_i64 Richard Henderson
@ 2013-04-04 22:56 ` Richard Henderson
2013-04-13 12:25 ` Aurelien Jarno
2013-04-04 22:56 ` [Qemu-devel] [PATCH v4 18/33] tcg-ppc64: Implement rotates Richard Henderson
` (16 subsequent siblings)
33 siblings, 1 reply; 48+ messages in thread
From: Richard Henderson @ 2013-04-04 22:56 UTC (permalink / raw)
To: qemu-devel; +Cc: av1474, Aurelien Jarno
Using a table to look up insns of the right width and sign.
Include support for the Power 2.06 LDBRX and STDBRX insns.
Signed-off-by: Richard Henderson <rth@twiddle.net>
---
tcg/ppc64/tcg-target.c | 166 +++++++++++++++++--------------------------------
1 file changed, 56 insertions(+), 110 deletions(-)
diff --git a/tcg/ppc64/tcg-target.c b/tcg/ppc64/tcg-target.c
index 71d72b4..0cb1667 100644
--- a/tcg/ppc64/tcg-target.c
+++ b/tcg/ppc64/tcg-target.c
@@ -44,6 +44,8 @@ static uint8_t *tb_ret_addr;
#define GUEST_BASE 0
#endif
+#define HAVE_ISA_2_06 0
+
#ifdef CONFIG_USE_GUEST_BASE
#define TCG_GUEST_BASE_REG 30
#else
@@ -368,8 +370,10 @@ static int tcg_target_const_match (tcg_target_long val,
#define CMPL XO31( 32)
#define LHBRX XO31(790)
#define LWBRX XO31(534)
+#define LDBRX XO31(532)
#define STHBRX XO31(918)
#define STWBRX XO31(662)
+#define STDBRX XO31(660)
#define MFSPR XO31(339)
#define MTSPR XO31(467)
#define SRAWI XO31(824)
@@ -759,22 +763,44 @@ static void tcg_out_tlb_read(TCGContext *s, TCGReg r0, TCGReg r1, TCGReg r2,
}
#endif
+static const uint32_t qemu_ldx_opc[8] = {
+#ifdef TARGET_WORDS_BIGENDIAN
+ LBZX, LHZX, LWZX, LDX,
+ 0, LHAX, LWAX, LDX
+#else
+ LBZX, LHBRX, LWBRX, LDBRX,
+ 0, 0, 0, LDBRX,
+#endif
+};
+
+static const uint32_t qemu_stx_opc[4] = {
+#ifdef TARGET_WORDS_BIGENDIAN
+ STBX, STHX, STWX, STDX
+#else
+ STBX, STHBRX, STWBRX, STDBRX,
+#endif
+};
+
+static const uint32_t qemu_exts_opc[4] = {
+ EXTSB, EXTSH, EXTSW, 0
+};
+
static void tcg_out_qemu_ld (TCGContext *s, const TCGArg *args, int opc)
{
TCGReg addr_reg, data_reg, r0, r1, rbase;
- int bswap;
+ uint32_t insn, s_bits;
#ifdef CONFIG_SOFTMMU
TCGReg r2, ir;
- int mem_index, s_bits;
+ int mem_index;
void *label1_ptr, *label2_ptr;
#endif
data_reg = *args++;
addr_reg = *args++;
+ s_bits = opc & 3;
#ifdef CONFIG_SOFTMMU
mem_index = *args;
- s_bits = opc & 3;
r0 = 3;
r1 = 4;
@@ -799,23 +825,11 @@ static void tcg_out_qemu_ld (TCGContext *s, const TCGArg *args, int opc)
tcg_out_call (s, (tcg_target_long) qemu_ld_helpers[s_bits], 1);
- switch (opc) {
- case 0|4:
- tcg_out32 (s, EXTSB | RA (data_reg) | RS (3));
- break;
- case 1|4:
- tcg_out32 (s, EXTSH | RA (data_reg) | RS (3));
- break;
- case 2|4:
- tcg_out32 (s, EXTSW | RA (data_reg) | RS (3));
- break;
- case 0:
- case 1:
- case 2:
- case 3:
- if (data_reg != 3)
- tcg_out_mov (s, TCG_TYPE_I64, data_reg, 3);
- break;
+ if (opc & 4) {
+ insn = qemu_exts_opc[s_bits];
+ tcg_out32(s, insn | RA(data_reg) | RS(3));
+ } else if (data_reg != 3) {
+ tcg_out_mov(s, TCG_TYPE_I64, data_reg, 3);
}
label2_ptr = s->code_ptr;
tcg_out32 (s, B);
@@ -842,65 +856,19 @@ static void tcg_out_qemu_ld (TCGContext *s, const TCGArg *args, int opc)
rbase = GUEST_BASE ? TCG_GUEST_BASE_REG : 0;
#endif
-#ifdef TARGET_WORDS_BIGENDIAN
- bswap = 0;
-#else
- bswap = 1;
-#endif
- switch (opc) {
- default:
- case 0:
- tcg_out32 (s, LBZX | TAB (data_reg, rbase, r0));
- break;
- case 0|4:
- tcg_out32 (s, LBZX | TAB (data_reg, rbase, r0));
- tcg_out32 (s, EXTSB | RA (data_reg) | RS (data_reg));
- break;
- case 1:
- if (bswap)
- tcg_out32 (s, LHBRX | TAB (data_reg, rbase, r0));
- else
- tcg_out32 (s, LHZX | TAB (data_reg, rbase, r0));
- break;
- case 1|4:
- if (bswap) {
- tcg_out32 (s, LHBRX | TAB (data_reg, rbase, r0));
- tcg_out32 (s, EXTSH | RA (data_reg) | RS (data_reg));
- }
- else tcg_out32 (s, LHAX | TAB (data_reg, rbase, r0));
- break;
- case 2:
- if (bswap)
- tcg_out32 (s, LWBRX | TAB (data_reg, rbase, r0));
- else
- tcg_out32 (s, LWZX | TAB (data_reg, rbase, r0));
- break;
- case 2|4:
- if (bswap) {
- tcg_out32 (s, LWBRX | TAB (data_reg, rbase, r0));
- tcg_out32 (s, EXTSW | RA (data_reg) | RS (data_reg));
- }
- else tcg_out32 (s, LWAX | TAB (data_reg, rbase, r0));
- break;
- case 3:
-#ifdef CONFIG_USE_GUEST_BASE
- if (bswap) {
- tcg_out32(s, ADDI | TAI(r1, r0, 4));
- tcg_out32 (s, LWBRX | TAB (data_reg, rbase, r0));
- tcg_out32 (s, LWBRX | TAB ( r1, rbase, r1));
- tcg_out_rld (s, RLDIMI, data_reg, r1, 32, 0);
- }
- else tcg_out32 (s, LDX | TAB (data_reg, rbase, r0));
-#else
- if (bswap) {
- tcg_out_movi32 (s, 0, 4);
- tcg_out32 (s, LWBRX | RT (data_reg) | RB (r0));
- tcg_out32 (s, LWBRX | RT ( r1) | RA (r0));
- tcg_out_rld (s, RLDIMI, data_reg, r1, 32, 0);
- }
- else tcg_out32 (s, LD | RT (data_reg) | RA (r0));
-#endif
- break;
+ insn = qemu_ldx_opc[opc];
+ if (!HAVE_ISA_2_06 && insn == LDBRX) {
+ tcg_out32(s, ADDI | TAI(r1, r0, 4));
+ tcg_out32(s, LWBRX | TAB(data_reg, rbase, r0));
+ tcg_out32(s, LWBRX | TAB( r1, rbase, r1));
+ tcg_out_rld(s, RLDIMI, data_reg, r1, 32, 0);
+ } else if (insn) {
+ tcg_out32(s, insn | TAB(data_reg, rbase, r0));
+ } else {
+ insn = qemu_ldx_opc[s_bits];
+ tcg_out32(s, insn | TAB(data_reg, rbase, r0));
+ insn = qemu_exts_opc[s_bits];
+ tcg_out32 (s, insn | RA(data_reg) | RS(data_reg));
}
#ifdef CONFIG_SOFTMMU
@@ -911,7 +879,7 @@ static void tcg_out_qemu_ld (TCGContext *s, const TCGArg *args, int opc)
static void tcg_out_qemu_st (TCGContext *s, const TCGArg *args, int opc)
{
TCGReg addr_reg, r0, r1, rbase, data_reg;
- int bswap;
+ uint32_t insn;
#ifdef CONFIG_SOFTMMU
TCGReg r2, ir;
int mem_index;
@@ -975,36 +943,14 @@ static void tcg_out_qemu_st (TCGContext *s, const TCGArg *args, int opc)
rbase = GUEST_BASE ? TCG_GUEST_BASE_REG : 0;
#endif
-#ifdef TARGET_WORDS_BIGENDIAN
- bswap = 0;
-#else
- bswap = 1;
-#endif
- switch (opc) {
- case 0:
- tcg_out32 (s, STBX | SAB (data_reg, rbase, r0));
- break;
- case 1:
- if (bswap)
- tcg_out32 (s, STHBRX | SAB (data_reg, rbase, r0));
- else
- tcg_out32 (s, STHX | SAB (data_reg, rbase, r0));
- break;
- case 2:
- if (bswap)
- tcg_out32 (s, STWBRX | SAB (data_reg, rbase, r0));
- else
- tcg_out32 (s, STWX | SAB (data_reg, rbase, r0));
- break;
- case 3:
- if (bswap) {
- tcg_out32 (s, STWBRX | SAB (data_reg, rbase, r0));
- tcg_out32(s, ADDI | TAI(r1, r0, 4));
- tcg_out_shri64(s, 0, data_reg, 32);
- tcg_out32 (s, STWBRX | SAB (0, rbase, r1));
- }
- else tcg_out32 (s, STDX | SAB (data_reg, rbase, r0));
- break;
+ insn = qemu_stx_opc[opc];
+ if (!HAVE_ISA_2_06 && insn == STDBRX) {
+ tcg_out32(s, STWBRX | SAB(data_reg, rbase, r0));
+ tcg_out32(s, ADDI | TAI(r1, r0, 4));
+ tcg_out_shri64(s, 0, data_reg, 32);
+ tcg_out32(s, STWBRX | SAB(0, rbase, r1));
+ } else {
+ tcg_out32(s, insn | SAB(data_reg, rbase, r0));
}
#ifdef CONFIG_SOFTMMU
--
1.8.1.4
^ permalink raw reply related [flat|nested] 48+ messages in thread
* Re: [Qemu-devel] [PATCH v4 17/33] tcg-ppc64: Streamline qemu_ld/st insn selection
2013-04-04 22:56 ` [Qemu-devel] [PATCH v4 17/33] tcg-ppc64: Streamline qemu_ld/st insn selection Richard Henderson
@ 2013-04-13 12:25 ` Aurelien Jarno
0 siblings, 0 replies; 48+ messages in thread
From: Aurelien Jarno @ 2013-04-13 12:25 UTC (permalink / raw)
To: Richard Henderson; +Cc: av1474, qemu-devel
On Thu, Apr 04, 2013 at 05:56:10PM -0500, Richard Henderson wrote:
> Using a table to look up insns of the right width and sign.
> Include support for the Power 2.06 LDBRX and STDBRX insns.
>
> Signed-off-by: Richard Henderson <rth@twiddle.net>
> ---
> tcg/ppc64/tcg-target.c | 166 +++++++++++++++++--------------------------------
> 1 file changed, 56 insertions(+), 110 deletions(-)
>
> diff --git a/tcg/ppc64/tcg-target.c b/tcg/ppc64/tcg-target.c
> index 71d72b4..0cb1667 100644
> --- a/tcg/ppc64/tcg-target.c
> +++ b/tcg/ppc64/tcg-target.c
> @@ -44,6 +44,8 @@ static uint8_t *tb_ret_addr;
> #define GUEST_BASE 0
> #endif
>
> +#define HAVE_ISA_2_06 0
> +
> #ifdef CONFIG_USE_GUEST_BASE
> #define TCG_GUEST_BASE_REG 30
> #else
> @@ -368,8 +370,10 @@ static int tcg_target_const_match (tcg_target_long val,
> #define CMPL XO31( 32)
> #define LHBRX XO31(790)
> #define LWBRX XO31(534)
> +#define LDBRX XO31(532)
> #define STHBRX XO31(918)
> #define STWBRX XO31(662)
> +#define STDBRX XO31(660)
> #define MFSPR XO31(339)
> #define MTSPR XO31(467)
> #define SRAWI XO31(824)
> @@ -759,22 +763,44 @@ static void tcg_out_tlb_read(TCGContext *s, TCGReg r0, TCGReg r1, TCGReg r2,
> }
> #endif
>
> +static const uint32_t qemu_ldx_opc[8] = {
> +#ifdef TARGET_WORDS_BIGENDIAN
> + LBZX, LHZX, LWZX, LDX,
> + 0, LHAX, LWAX, LDX
> +#else
> + LBZX, LHBRX, LWBRX, LDBRX,
> + 0, 0, 0, LDBRX,
> +#endif
> +};
> +
> +static const uint32_t qemu_stx_opc[4] = {
> +#ifdef TARGET_WORDS_BIGENDIAN
> + STBX, STHX, STWX, STDX
> +#else
> + STBX, STHBRX, STWBRX, STDBRX,
> +#endif
> +};
> +
> +static const uint32_t qemu_exts_opc[4] = {
> + EXTSB, EXTSH, EXTSW, 0
> +};
> +
> static void tcg_out_qemu_ld (TCGContext *s, const TCGArg *args, int opc)
> {
> TCGReg addr_reg, data_reg, r0, r1, rbase;
> - int bswap;
> + uint32_t insn, s_bits;
> #ifdef CONFIG_SOFTMMU
> TCGReg r2, ir;
> - int mem_index, s_bits;
> + int mem_index;
> void *label1_ptr, *label2_ptr;
> #endif
>
> data_reg = *args++;
> addr_reg = *args++;
> + s_bits = opc & 3;
>
> #ifdef CONFIG_SOFTMMU
> mem_index = *args;
> - s_bits = opc & 3;
>
> r0 = 3;
> r1 = 4;
> @@ -799,23 +825,11 @@ static void tcg_out_qemu_ld (TCGContext *s, const TCGArg *args, int opc)
>
> tcg_out_call (s, (tcg_target_long) qemu_ld_helpers[s_bits], 1);
>
> - switch (opc) {
> - case 0|4:
> - tcg_out32 (s, EXTSB | RA (data_reg) | RS (3));
> - break;
> - case 1|4:
> - tcg_out32 (s, EXTSH | RA (data_reg) | RS (3));
> - break;
> - case 2|4:
> - tcg_out32 (s, EXTSW | RA (data_reg) | RS (3));
> - break;
> - case 0:
> - case 1:
> - case 2:
> - case 3:
> - if (data_reg != 3)
> - tcg_out_mov (s, TCG_TYPE_I64, data_reg, 3);
> - break;
> + if (opc & 4) {
> + insn = qemu_exts_opc[s_bits];
> + tcg_out32(s, insn | RA(data_reg) | RS(3));
> + } else if (data_reg != 3) {
> + tcg_out_mov(s, TCG_TYPE_I64, data_reg, 3);
> }
> label2_ptr = s->code_ptr;
> tcg_out32 (s, B);
> @@ -842,65 +856,19 @@ static void tcg_out_qemu_ld (TCGContext *s, const TCGArg *args, int opc)
> rbase = GUEST_BASE ? TCG_GUEST_BASE_REG : 0;
> #endif
>
> -#ifdef TARGET_WORDS_BIGENDIAN
> - bswap = 0;
> -#else
> - bswap = 1;
> -#endif
> - switch (opc) {
> - default:
> - case 0:
> - tcg_out32 (s, LBZX | TAB (data_reg, rbase, r0));
> - break;
> - case 0|4:
> - tcg_out32 (s, LBZX | TAB (data_reg, rbase, r0));
> - tcg_out32 (s, EXTSB | RA (data_reg) | RS (data_reg));
> - break;
> - case 1:
> - if (bswap)
> - tcg_out32 (s, LHBRX | TAB (data_reg, rbase, r0));
> - else
> - tcg_out32 (s, LHZX | TAB (data_reg, rbase, r0));
> - break;
> - case 1|4:
> - if (bswap) {
> - tcg_out32 (s, LHBRX | TAB (data_reg, rbase, r0));
> - tcg_out32 (s, EXTSH | RA (data_reg) | RS (data_reg));
> - }
> - else tcg_out32 (s, LHAX | TAB (data_reg, rbase, r0));
> - break;
> - case 2:
> - if (bswap)
> - tcg_out32 (s, LWBRX | TAB (data_reg, rbase, r0));
> - else
> - tcg_out32 (s, LWZX | TAB (data_reg, rbase, r0));
> - break;
> - case 2|4:
> - if (bswap) {
> - tcg_out32 (s, LWBRX | TAB (data_reg, rbase, r0));
> - tcg_out32 (s, EXTSW | RA (data_reg) | RS (data_reg));
> - }
> - else tcg_out32 (s, LWAX | TAB (data_reg, rbase, r0));
> - break;
> - case 3:
> -#ifdef CONFIG_USE_GUEST_BASE
> - if (bswap) {
> - tcg_out32(s, ADDI | TAI(r1, r0, 4));
> - tcg_out32 (s, LWBRX | TAB (data_reg, rbase, r0));
> - tcg_out32 (s, LWBRX | TAB ( r1, rbase, r1));
> - tcg_out_rld (s, RLDIMI, data_reg, r1, 32, 0);
> - }
> - else tcg_out32 (s, LDX | TAB (data_reg, rbase, r0));
> -#else
> - if (bswap) {
> - tcg_out_movi32 (s, 0, 4);
> - tcg_out32 (s, LWBRX | RT (data_reg) | RB (r0));
> - tcg_out32 (s, LWBRX | RT ( r1) | RA (r0));
> - tcg_out_rld (s, RLDIMI, data_reg, r1, 32, 0);
> - }
> - else tcg_out32 (s, LD | RT (data_reg) | RA (r0));
> -#endif
> - break;
> + insn = qemu_ldx_opc[opc];
> + if (!HAVE_ISA_2_06 && insn == LDBRX) {
> + tcg_out32(s, ADDI | TAI(r1, r0, 4));
> + tcg_out32(s, LWBRX | TAB(data_reg, rbase, r0));
> + tcg_out32(s, LWBRX | TAB( r1, rbase, r1));
> + tcg_out_rld(s, RLDIMI, data_reg, r1, 32, 0);
> + } else if (insn) {
> + tcg_out32(s, insn | TAB(data_reg, rbase, r0));
> + } else {
> + insn = qemu_ldx_opc[s_bits];
> + tcg_out32(s, insn | TAB(data_reg, rbase, r0));
> + insn = qemu_exts_opc[s_bits];
> + tcg_out32 (s, insn | RA(data_reg) | RS(data_reg));
> }
>
> #ifdef CONFIG_SOFTMMU
> @@ -911,7 +879,7 @@ static void tcg_out_qemu_ld (TCGContext *s, const TCGArg *args, int opc)
> static void tcg_out_qemu_st (TCGContext *s, const TCGArg *args, int opc)
> {
> TCGReg addr_reg, r0, r1, rbase, data_reg;
> - int bswap;
> + uint32_t insn;
> #ifdef CONFIG_SOFTMMU
> TCGReg r2, ir;
> int mem_index;
> @@ -975,36 +943,14 @@ static void tcg_out_qemu_st (TCGContext *s, const TCGArg *args, int opc)
> rbase = GUEST_BASE ? TCG_GUEST_BASE_REG : 0;
> #endif
>
> -#ifdef TARGET_WORDS_BIGENDIAN
> - bswap = 0;
> -#else
> - bswap = 1;
> -#endif
> - switch (opc) {
> - case 0:
> - tcg_out32 (s, STBX | SAB (data_reg, rbase, r0));
> - break;
> - case 1:
> - if (bswap)
> - tcg_out32 (s, STHBRX | SAB (data_reg, rbase, r0));
> - else
> - tcg_out32 (s, STHX | SAB (data_reg, rbase, r0));
> - break;
> - case 2:
> - if (bswap)
> - tcg_out32 (s, STWBRX | SAB (data_reg, rbase, r0));
> - else
> - tcg_out32 (s, STWX | SAB (data_reg, rbase, r0));
> - break;
> - case 3:
> - if (bswap) {
> - tcg_out32 (s, STWBRX | SAB (data_reg, rbase, r0));
> - tcg_out32(s, ADDI | TAI(r1, r0, 4));
> - tcg_out_shri64(s, 0, data_reg, 32);
> - tcg_out32 (s, STWBRX | SAB (0, rbase, r1));
> - }
> - else tcg_out32 (s, STDX | SAB (data_reg, rbase, r0));
> - break;
> + insn = qemu_stx_opc[opc];
> + if (!HAVE_ISA_2_06 && insn == STDBRX) {
> + tcg_out32(s, STWBRX | SAB(data_reg, rbase, r0));
> + tcg_out32(s, ADDI | TAI(r1, r0, 4));
> + tcg_out_shri64(s, 0, data_reg, 32);
> + tcg_out32(s, STWBRX | SAB(0, rbase, r1));
> + } else {
> + tcg_out32(s, insn | SAB(data_reg, rbase, r0));
> }
>
> #ifdef CONFIG_SOFTMMU
Reviewed-by: Aurelien Jarno <aurelien@aurel32.net>
--
Aurelien Jarno GPG: 1024D/F1BCDB73
aurelien@aurel32.net http://www.aurel32.net
^ permalink raw reply [flat|nested] 48+ messages in thread
* [Qemu-devel] [PATCH v4 18/33] tcg-ppc64: Implement rotates
2013-04-04 22:55 [Qemu-devel] [PATCH v4 00/33] Modernize tcg/ppc64 Richard Henderson
` (16 preceding siblings ...)
2013-04-04 22:56 ` [Qemu-devel] [PATCH v4 17/33] tcg-ppc64: Streamline qemu_ld/st insn selection Richard Henderson
@ 2013-04-04 22:56 ` Richard Henderson
2013-04-04 22:56 ` [Qemu-devel] [PATCH v4 19/33] tcg-ppc64: Implement bswap16 and bswap32 Richard Henderson
` (15 subsequent siblings)
33 siblings, 0 replies; 48+ messages in thread
From: Richard Henderson @ 2013-04-04 22:56 UTC (permalink / raw)
To: qemu-devel; +Cc: av1474, Aurelien Jarno
Reviewed-by: Aurelien Jarno <aurelien@aurel32.net>
Signed-off-by: Richard Henderson <rth@twiddle.net>
---
tcg/ppc64/tcg-target.c | 39 +++++++++++++++++++++++++++++++++++++++
tcg/ppc64/tcg-target.h | 4 ++--
2 files changed, 41 insertions(+), 2 deletions(-)
diff --git a/tcg/ppc64/tcg-target.c b/tcg/ppc64/tcg-target.c
index 0cb1667..18338a2 100644
--- a/tcg/ppc64/tcg-target.c
+++ b/tcg/ppc64/tcg-target.c
@@ -336,11 +336,14 @@ static int tcg_target_const_match (tcg_target_long val,
#define LWZU OPCD( 33)
#define STWU OPCD( 37)
+#define RLWIMI OPCD( 20)
#define RLWINM OPCD( 21)
+#define RLWNM OPCD( 23)
#define RLDICL XO30( 0)
#define RLDICR XO30( 1)
#define RLDIMI XO30( 3)
+#define RLDCL XO30( 8)
#define BCLR XO19( 16)
#define BCCTR XO19(528)
@@ -1473,6 +1476,23 @@ static void tcg_out_op (TCGContext *s, TCGOpcode opc, const TCGArg *args,
else
tcg_out32 (s, SRAW | SAB (args[1], args[0], args[2]));
break;
+ case INDEX_op_rotl_i32:
+ if (const_args[2]) {
+ tcg_out_rlw(s, RLWINM, args[0], args[1], args[2], 0, 31);
+ } else {
+ tcg_out32(s, RLWNM | SAB(args[1], args[0], args[2])
+ | MB(0) | ME(31));
+ }
+ break;
+ case INDEX_op_rotr_i32:
+ if (const_args[2]) {
+ tcg_out_rlw(s, RLWINM, args[0], args[1], 32 - args[2], 0, 31);
+ } else {
+ tcg_out32(s, SUBFIC | TAI(0, args[2], 32));
+ tcg_out32(s, RLWNM | SAB(args[1], args[0], args[2])
+ | MB(0) | ME(31));
+ }
+ break;
case INDEX_op_brcond_i32:
tcg_out_brcond (s, args[2], args[0], args[1], const_args[1], args[3], 0);
@@ -1561,6 +1581,21 @@ static void tcg_out_op (TCGContext *s, TCGOpcode opc, const TCGArg *args,
else
tcg_out32 (s, SRAD | SAB (args[1], args[0], args[2]));
break;
+ case INDEX_op_rotl_i64:
+ if (const_args[2]) {
+ tcg_out_rld(s, RLDICL, args[0], args[1], args[2], 0);
+ } else {
+ tcg_out32(s, RLDCL | SAB(args[1], args[0], args[2]) | MB64(0));
+ }
+ break;
+ case INDEX_op_rotr_i64:
+ if (const_args[2]) {
+ tcg_out_rld(s, RLDICL, args[0], args[1], 64 - args[2], 0);
+ } else {
+ tcg_out32(s, SUBFIC | TAI(0, args[2], 64));
+ tcg_out32(s, RLDCL | SAB(args[1], args[0], 0) | MB64(0));
+ }
+ break;
case INDEX_op_mul_i64:
tcg_out32 (s, MULLD | TAB (args[0], args[1], args[2]));
@@ -1693,6 +1728,8 @@ static const TCGTargetOpDef ppc_op_defs[] = {
{ INDEX_op_shl_i32, { "r", "r", "ri" } },
{ INDEX_op_shr_i32, { "r", "r", "ri" } },
{ INDEX_op_sar_i32, { "r", "r", "ri" } },
+ { INDEX_op_rotl_i32, { "r", "r", "ri" } },
+ { INDEX_op_rotr_i32, { "r", "r", "ri" } },
{ INDEX_op_brcond_i32, { "r", "ri" } },
{ INDEX_op_brcond_i64, { "r", "ri" } },
@@ -1709,6 +1746,8 @@ static const TCGTargetOpDef ppc_op_defs[] = {
{ INDEX_op_shl_i64, { "r", "r", "ri" } },
{ INDEX_op_shr_i64, { "r", "r", "ri" } },
{ INDEX_op_sar_i64, { "r", "r", "ri" } },
+ { INDEX_op_rotl_i64, { "r", "r", "ri" } },
+ { INDEX_op_rotr_i64, { "r", "r", "ri" } },
{ INDEX_op_mul_i64, { "r", "r", "r" } },
{ INDEX_op_div_i64, { "r", "r", "r" } },
diff --git a/tcg/ppc64/tcg-target.h b/tcg/ppc64/tcg-target.h
index a4078ae..b2713a0 100644
--- a/tcg/ppc64/tcg-target.h
+++ b/tcg/ppc64/tcg-target.h
@@ -76,7 +76,7 @@ typedef enum {
/* optional instructions */
#define TCG_TARGET_HAS_div_i32 1
-#define TCG_TARGET_HAS_rot_i32 0
+#define TCG_TARGET_HAS_rot_i32 1
#define TCG_TARGET_HAS_ext8s_i32 1
#define TCG_TARGET_HAS_ext16s_i32 1
#define TCG_TARGET_HAS_bswap16_i32 0
@@ -96,7 +96,7 @@ typedef enum {
#define TCG_TARGET_HAS_muls2_i32 0
#define TCG_TARGET_HAS_div_i64 1
-#define TCG_TARGET_HAS_rot_i64 0
+#define TCG_TARGET_HAS_rot_i64 1
#define TCG_TARGET_HAS_ext8s_i64 1
#define TCG_TARGET_HAS_ext16s_i64 1
#define TCG_TARGET_HAS_ext32s_i64 1
--
1.8.1.4
^ permalink raw reply related [flat|nested] 48+ messages in thread
* [Qemu-devel] [PATCH v4 19/33] tcg-ppc64: Implement bswap16 and bswap32
2013-04-04 22:55 [Qemu-devel] [PATCH v4 00/33] Modernize tcg/ppc64 Richard Henderson
` (17 preceding siblings ...)
2013-04-04 22:56 ` [Qemu-devel] [PATCH v4 18/33] tcg-ppc64: Implement rotates Richard Henderson
@ 2013-04-04 22:56 ` Richard Henderson
2013-04-15 7:59 ` Aurelien Jarno
2013-04-04 22:56 ` [Qemu-devel] [PATCH v4 20/33] tcg-ppc64: Implement bswap64 Richard Henderson
` (14 subsequent siblings)
33 siblings, 1 reply; 48+ messages in thread
From: Richard Henderson @ 2013-04-04 22:56 UTC (permalink / raw)
To: qemu-devel; +Cc: av1474, Aurelien Jarno
Signed-off-by: Richard Henderson <rth@twiddle.net>
---
tcg/ppc64/tcg-target.c | 43 +++++++++++++++++++++++++++++++++++++++++++
tcg/ppc64/tcg-target.h | 8 ++++----
2 files changed, 47 insertions(+), 4 deletions(-)
diff --git a/tcg/ppc64/tcg-target.c b/tcg/ppc64/tcg-target.c
index 18338a2..ee035fd 100644
--- a/tcg/ppc64/tcg-target.c
+++ b/tcg/ppc64/tcg-target.c
@@ -1676,6 +1676,44 @@ static void tcg_out_op (TCGContext *s, TCGOpcode opc, const TCGArg *args,
const_args[2]);
break;
+ case INDEX_op_bswap16_i32:
+ case INDEX_op_bswap16_i64:
+ a0 = args[0], a1 = args[1];
+ /* a1 = abcd */
+ if (a0 != a1) {
+ /* a0 = (a1 r<< 24) & 0xff # 000c */
+ tcg_out_rlw(s, RLWINM, a0, a1, 24, 24, 31);
+ /* a0 = (a0 & ~0xff00) | (a1 r<< 8) & 0xff00 # 00dc */
+ tcg_out_rlw(s, RLWIMI, a0, a1, 8, 16, 23);
+ } else {
+ /* r0 = (a1 r<< 8) & 0xff00 # 00d0 */
+ tcg_out_rlw(s, RLWINM, 0, a1, 8, 16, 23);
+ /* a0 = (a1 r<< 24) & 0xff # 000c */
+ tcg_out_rlw(s, RLWINM, a0, a1, 24, 24, 31);
+ /* a0 = a0 | r0 # 00dc */
+ tcg_out32(s, OR | SAB(0, a0, a0));
+ }
+ break;
+
+ case INDEX_op_bswap32_i32:
+ case INDEX_op_bswap32_i64:
+ /* Stolen from gcc's builtin_bswap32 */
+ a1 = args[1];
+ a0 = args[0] == a1 ? 0 : args[0];
+
+ /* a1 = args[1] # abcd */
+ /* a0 = rotate_left (a1, 8) # bcda */
+ tcg_out_rlw(s, RLWINM, a0, a1, 8, 0, 31);
+ /* a0 = (a0 & ~0xff000000) | ((a1 r<< 24) & 0xff000000) # dcda */
+ tcg_out_rlw(s, RLWIMI, a0, a1, 24, 0, 7);
+ /* a0 = (a0 & ~0x0000ff00) | ((a1 r<< 24) & 0x0000ff00) # dcba */
+ tcg_out_rlw(s, RLWIMI, a0, a1, 24, 16, 23);
+
+ if (!a0) {
+ tcg_out_mov(s, TCG_TYPE_I64, args[0], a0);
+ }
+ break;
+
default:
tcg_dump_ops (s);
tcg_abort ();
@@ -1781,6 +1819,11 @@ static const TCGTargetOpDef ppc_op_defs[] = {
{ INDEX_op_setcond_i32, { "r", "r", "ri" } },
{ INDEX_op_setcond_i64, { "r", "r", "ri" } },
+ { INDEX_op_bswap16_i32, { "r", "r" } },
+ { INDEX_op_bswap16_i64, { "r", "r" } },
+ { INDEX_op_bswap32_i32, { "r", "r" } },
+ { INDEX_op_bswap32_i64, { "r", "r" } },
+
{ -1 },
};
diff --git a/tcg/ppc64/tcg-target.h b/tcg/ppc64/tcg-target.h
index b2713a0..7cd1e98 100644
--- a/tcg/ppc64/tcg-target.h
+++ b/tcg/ppc64/tcg-target.h
@@ -79,8 +79,8 @@ typedef enum {
#define TCG_TARGET_HAS_rot_i32 1
#define TCG_TARGET_HAS_ext8s_i32 1
#define TCG_TARGET_HAS_ext16s_i32 1
-#define TCG_TARGET_HAS_bswap16_i32 0
-#define TCG_TARGET_HAS_bswap32_i32 0
+#define TCG_TARGET_HAS_bswap16_i32 1
+#define TCG_TARGET_HAS_bswap32_i32 1
#define TCG_TARGET_HAS_not_i32 1
#define TCG_TARGET_HAS_neg_i32 1
#define TCG_TARGET_HAS_andc_i32 0
@@ -100,8 +100,8 @@ typedef enum {
#define TCG_TARGET_HAS_ext8s_i64 1
#define TCG_TARGET_HAS_ext16s_i64 1
#define TCG_TARGET_HAS_ext32s_i64 1
-#define TCG_TARGET_HAS_bswap16_i64 0
-#define TCG_TARGET_HAS_bswap32_i64 0
+#define TCG_TARGET_HAS_bswap16_i64 1
+#define TCG_TARGET_HAS_bswap32_i64 1
#define TCG_TARGET_HAS_bswap64_i64 0
#define TCG_TARGET_HAS_not_i64 1
#define TCG_TARGET_HAS_neg_i64 1
--
1.8.1.4
^ permalink raw reply related [flat|nested] 48+ messages in thread
* Re: [Qemu-devel] [PATCH v4 19/33] tcg-ppc64: Implement bswap16 and bswap32
2013-04-04 22:56 ` [Qemu-devel] [PATCH v4 19/33] tcg-ppc64: Implement bswap16 and bswap32 Richard Henderson
@ 2013-04-15 7:59 ` Aurelien Jarno
0 siblings, 0 replies; 48+ messages in thread
From: Aurelien Jarno @ 2013-04-15 7:59 UTC (permalink / raw)
To: Richard Henderson; +Cc: av1474, qemu-devel
On Thu, Apr 04, 2013 at 05:56:12PM -0500, Richard Henderson wrote:
> Signed-off-by: Richard Henderson <rth@twiddle.net>
> ---
> tcg/ppc64/tcg-target.c | 43 +++++++++++++++++++++++++++++++++++++++++++
> tcg/ppc64/tcg-target.h | 8 ++++----
> 2 files changed, 47 insertions(+), 4 deletions(-)
>
> diff --git a/tcg/ppc64/tcg-target.c b/tcg/ppc64/tcg-target.c
> index 18338a2..ee035fd 100644
> --- a/tcg/ppc64/tcg-target.c
> +++ b/tcg/ppc64/tcg-target.c
> @@ -1676,6 +1676,44 @@ static void tcg_out_op (TCGContext *s, TCGOpcode opc, const TCGArg *args,
> const_args[2]);
> break;
>
> + case INDEX_op_bswap16_i32:
> + case INDEX_op_bswap16_i64:
> + a0 = args[0], a1 = args[1];
> + /* a1 = abcd */
> + if (a0 != a1) {
> + /* a0 = (a1 r<< 24) & 0xff # 000c */
> + tcg_out_rlw(s, RLWINM, a0, a1, 24, 24, 31);
> + /* a0 = (a0 & ~0xff00) | (a1 r<< 8) & 0xff00 # 00dc */
> + tcg_out_rlw(s, RLWIMI, a0, a1, 8, 16, 23);
> + } else {
> + /* r0 = (a1 r<< 8) & 0xff00 # 00d0 */
> + tcg_out_rlw(s, RLWINM, 0, a1, 8, 16, 23);
> + /* a0 = (a1 r<< 24) & 0xff # 000c */
> + tcg_out_rlw(s, RLWINM, a0, a1, 24, 24, 31);
> + /* a0 = a0 | r0 # 00dc */
> + tcg_out32(s, OR | SAB(0, a0, a0));
> + }
> + break;
> +
> + case INDEX_op_bswap32_i32:
> + case INDEX_op_bswap32_i64:
> + /* Stolen from gcc's builtin_bswap32 */
> + a1 = args[1];
> + a0 = args[0] == a1 ? 0 : args[0];
> +
> + /* a1 = args[1] # abcd */
> + /* a0 = rotate_left (a1, 8) # bcda */
> + tcg_out_rlw(s, RLWINM, a0, a1, 8, 0, 31);
> + /* a0 = (a0 & ~0xff000000) | ((a1 r<< 24) & 0xff000000) # dcda */
> + tcg_out_rlw(s, RLWIMI, a0, a1, 24, 0, 7);
> + /* a0 = (a0 & ~0x0000ff00) | ((a1 r<< 24) & 0x0000ff00) # dcba */
> + tcg_out_rlw(s, RLWIMI, a0, a1, 24, 16, 23);
> +
> + if (!a0) {
> + tcg_out_mov(s, TCG_TYPE_I64, args[0], a0);
> + }
> + break;
> +
It would make the code easier to read with TCG_REG_R0 instead of using
the zero value directly, especially for the if case.
> default:
> tcg_dump_ops (s);
> tcg_abort ();
> @@ -1781,6 +1819,11 @@ static const TCGTargetOpDef ppc_op_defs[] = {
> { INDEX_op_setcond_i32, { "r", "r", "ri" } },
> { INDEX_op_setcond_i64, { "r", "r", "ri" } },
>
> + { INDEX_op_bswap16_i32, { "r", "r" } },
> + { INDEX_op_bswap16_i64, { "r", "r" } },
> + { INDEX_op_bswap32_i32, { "r", "r" } },
> + { INDEX_op_bswap32_i64, { "r", "r" } },
> +
> { -1 },
> };
>
> diff --git a/tcg/ppc64/tcg-target.h b/tcg/ppc64/tcg-target.h
> index b2713a0..7cd1e98 100644
> --- a/tcg/ppc64/tcg-target.h
> +++ b/tcg/ppc64/tcg-target.h
> @@ -79,8 +79,8 @@ typedef enum {
> #define TCG_TARGET_HAS_rot_i32 1
> #define TCG_TARGET_HAS_ext8s_i32 1
> #define TCG_TARGET_HAS_ext16s_i32 1
> -#define TCG_TARGET_HAS_bswap16_i32 0
> -#define TCG_TARGET_HAS_bswap32_i32 0
> +#define TCG_TARGET_HAS_bswap16_i32 1
> +#define TCG_TARGET_HAS_bswap32_i32 1
> #define TCG_TARGET_HAS_not_i32 1
> #define TCG_TARGET_HAS_neg_i32 1
> #define TCG_TARGET_HAS_andc_i32 0
> @@ -100,8 +100,8 @@ typedef enum {
> #define TCG_TARGET_HAS_ext8s_i64 1
> #define TCG_TARGET_HAS_ext16s_i64 1
> #define TCG_TARGET_HAS_ext32s_i64 1
> -#define TCG_TARGET_HAS_bswap16_i64 0
> -#define TCG_TARGET_HAS_bswap32_i64 0
> +#define TCG_TARGET_HAS_bswap16_i64 1
> +#define TCG_TARGET_HAS_bswap32_i64 1
> #define TCG_TARGET_HAS_bswap64_i64 0
> #define TCG_TARGET_HAS_not_i64 1
> #define TCG_TARGET_HAS_neg_i64 1
That said that's only a minor nitpick, so
Reviewed-by: Aurelien Jarno <aurelien@aurel32.net>
--
Aurelien Jarno GPG: 1024D/F1BCDB73
aurelien@aurel32.net http://www.aurel32.net
^ permalink raw reply [flat|nested] 48+ messages in thread
* [Qemu-devel] [PATCH v4 20/33] tcg-ppc64: Implement bswap64
2013-04-04 22:55 [Qemu-devel] [PATCH v4 00/33] Modernize tcg/ppc64 Richard Henderson
` (18 preceding siblings ...)
2013-04-04 22:56 ` [Qemu-devel] [PATCH v4 19/33] tcg-ppc64: Implement bswap16 and bswap32 Richard Henderson
@ 2013-04-04 22:56 ` Richard Henderson
2013-04-04 22:56 ` [Qemu-devel] [PATCH v4 21/33] tcg-ppc64: Implement compound logicals Richard Henderson
` (13 subsequent siblings)
33 siblings, 0 replies; 48+ messages in thread
From: Richard Henderson @ 2013-04-04 22:56 UTC (permalink / raw)
To: qemu-devel; +Cc: av1474, Aurelien Jarno
Reviewed-by: Aurelien Jarno <aurelien@aurel32.net>
Signed-off-by: Richard Henderson <rth@twiddle.net>
---
tcg/ppc64/tcg-target.c | 35 +++++++++++++++++++++++++++++++++++
tcg/ppc64/tcg-target.h | 2 +-
2 files changed, 36 insertions(+), 1 deletion(-)
diff --git a/tcg/ppc64/tcg-target.c b/tcg/ppc64/tcg-target.c
index ee035fd..1352852 100644
--- a/tcg/ppc64/tcg-target.c
+++ b/tcg/ppc64/tcg-target.c
@@ -1714,6 +1714,40 @@ static void tcg_out_op (TCGContext *s, TCGOpcode opc, const TCGArg *args,
}
break;
+ case INDEX_op_bswap64_i64:
+ a0 = args[0], a1 = args[1], a2 = 0;
+ if (a0 == a1) {
+ a0 = 0;
+ a2 = a1;
+ }
+
+ /* a1 = # abcd efgh */
+ /* a0 = rl32(a1, 8) # 0000 fghe */
+ tcg_out_rlw(s, RLWINM, a0, a1, 8, 0, 31);
+ /* a0 = dep(a0, rl32(a1, 24), 0xff000000) # 0000 hghe */
+ tcg_out_rlw(s, RLWIMI, a0, a1, 24, 0, 7);
+ /* a0 = dep(a0, rl32(a1, 24), 0x0000ff00) # 0000 hgfe */
+ tcg_out_rlw(s, RLWIMI, a0, a1, 24, 16, 23);
+
+ /* a0 = rl64(a0, 32) # hgfe 0000 */
+ /* a2 = rl64(a1, 32) # efgh abcd */
+ tcg_out_rld(s, RLDICL, a0, a0, 32, 0);
+ tcg_out_rld(s, RLDICL, a2, a1, 32, 0);
+
+ /* a0 = dep(a0, rl32(a2, 8), 0xffffffff) # hgfe bcda */
+ tcg_out_rlw(s, RLWIMI, a0, a2, 8, 0, 31);
+ /* a0 = dep(a0, rl32(a2, 24), 0xff000000) # hgfe dcda */
+ tcg_out_rlw(s, RLWIMI, a0, a2, 24, 0, 7);
+ /* a0 = dep(a0, rl32(a2, 24), 0x0000ff00) # hgfe dcba */
+ tcg_out_rlw(s, RLWIMI, a0, a2, 24, 16, 23);
+
+ if (a0 == 0) {
+ tcg_out_mov(s, TCG_TYPE_I64, args[0], a0);
+ /* Revert the source rotate that we performed above. */
+ tcg_out_rld(s, RLDICL, a1, a1, 32, 0);
+ }
+ break;
+
default:
tcg_dump_ops (s);
tcg_abort ();
@@ -1823,6 +1857,7 @@ static const TCGTargetOpDef ppc_op_defs[] = {
{ INDEX_op_bswap16_i64, { "r", "r" } },
{ INDEX_op_bswap32_i32, { "r", "r" } },
{ INDEX_op_bswap32_i64, { "r", "r" } },
+ { INDEX_op_bswap64_i64, { "r", "r" } },
{ -1 },
};
diff --git a/tcg/ppc64/tcg-target.h b/tcg/ppc64/tcg-target.h
index 7cd1e98..76001e8 100644
--- a/tcg/ppc64/tcg-target.h
+++ b/tcg/ppc64/tcg-target.h
@@ -102,7 +102,7 @@ typedef enum {
#define TCG_TARGET_HAS_ext32s_i64 1
#define TCG_TARGET_HAS_bswap16_i64 1
#define TCG_TARGET_HAS_bswap32_i64 1
-#define TCG_TARGET_HAS_bswap64_i64 0
+#define TCG_TARGET_HAS_bswap64_i64 1
#define TCG_TARGET_HAS_not_i64 1
#define TCG_TARGET_HAS_neg_i64 1
#define TCG_TARGET_HAS_andc_i64 0
--
1.8.1.4
^ permalink raw reply related [flat|nested] 48+ messages in thread
* [Qemu-devel] [PATCH v4 21/33] tcg-ppc64: Implement compound logicals
2013-04-04 22:55 [Qemu-devel] [PATCH v4 00/33] Modernize tcg/ppc64 Richard Henderson
` (19 preceding siblings ...)
2013-04-04 22:56 ` [Qemu-devel] [PATCH v4 20/33] tcg-ppc64: Implement bswap64 Richard Henderson
@ 2013-04-04 22:56 ` Richard Henderson
2013-04-04 22:56 ` [Qemu-devel] [PATCH v4 22/33] tcg-ppc64: Handle constant inputs for some " Richard Henderson
` (12 subsequent siblings)
33 siblings, 0 replies; 48+ messages in thread
From: Richard Henderson @ 2013-04-04 22:56 UTC (permalink / raw)
To: qemu-devel; +Cc: av1474, Aurelien Jarno
Mostly copied from the ppc32 port.
Reviewed-by: Aurelien Jarno <aurelien@aurel32.net>
Signed-off-by: Richard Henderson <rth@twiddle.net>
---
tcg/ppc64/tcg-target.c | 34 ++++++++++++++++++++++++++++++++++
tcg/ppc64/tcg-target.h | 20 ++++++++++----------
2 files changed, 44 insertions(+), 10 deletions(-)
diff --git a/tcg/ppc64/tcg-target.c b/tcg/ppc64/tcg-target.c
index 1352852..89832e5 100644
--- a/tcg/ppc64/tcg-target.c
+++ b/tcg/ppc64/tcg-target.c
@@ -385,6 +385,10 @@ static int tcg_target_const_match (tcg_target_long val,
#define NOR XO31(124)
#define CNTLZW XO31( 26)
#define CNTLZD XO31( 58)
+#define ANDC XO31( 60)
+#define ORC XO31(412)
+#define EQV XO31(284)
+#define NAND XO31(476)
#define MULLD XO31(233)
#define MULHD XO31( 73)
@@ -1421,6 +1425,26 @@ static void tcg_out_op (TCGContext *s, TCGOpcode opc, const TCGArg *args,
tcg_out32(s, XOR | SAB(a1, a0, a2));
}
break;
+ case INDEX_op_andc_i32:
+ case INDEX_op_andc_i64:
+ tcg_out32(s, ANDC | SAB(args[1], args[0], args[2]));
+ break;
+ case INDEX_op_orc_i32:
+ case INDEX_op_orc_i64:
+ tcg_out32(s, ORC | SAB(args[1], args[0], args[2]));
+ break;
+ case INDEX_op_eqv_i32:
+ case INDEX_op_eqv_i64:
+ tcg_out32(s, EQV | SAB(args[1], args[0], args[2]));
+ break;
+ case INDEX_op_nand_i32:
+ case INDEX_op_nand_i64:
+ tcg_out32(s, NAND | SAB(args[1], args[0], args[2]));
+ break;
+ case INDEX_op_nor_i32:
+ case INDEX_op_nor_i64:
+ tcg_out32(s, NOR | SAB(args[1], args[0], args[2]));
+ break;
case INDEX_op_mul_i32:
if (const_args[2]) {
@@ -1796,6 +1820,11 @@ static const TCGTargetOpDef ppc_op_defs[] = {
{ INDEX_op_and_i32, { "r", "r", "ri" } },
{ INDEX_op_or_i32, { "r", "r", "ri" } },
{ INDEX_op_xor_i32, { "r", "r", "ri" } },
+ { INDEX_op_andc_i32, { "r", "r", "r" } },
+ { INDEX_op_orc_i32, { "r", "r", "r" } },
+ { INDEX_op_eqv_i32, { "r", "r", "r" } },
+ { INDEX_op_nand_i32, { "r", "r", "r" } },
+ { INDEX_op_nor_i32, { "r", "r", "r" } },
{ INDEX_op_shl_i32, { "r", "r", "ri" } },
{ INDEX_op_shr_i32, { "r", "r", "ri" } },
@@ -1814,6 +1843,11 @@ static const TCGTargetOpDef ppc_op_defs[] = {
{ INDEX_op_and_i64, { "r", "r", "rU" } },
{ INDEX_op_or_i64, { "r", "r", "rU" } },
{ INDEX_op_xor_i64, { "r", "r", "rU" } },
+ { INDEX_op_andc_i64, { "r", "r", "r" } },
+ { INDEX_op_orc_i64, { "r", "r", "r" } },
+ { INDEX_op_eqv_i64, { "r", "r", "r" } },
+ { INDEX_op_nand_i64, { "r", "r", "r" } },
+ { INDEX_op_nor_i64, { "r", "r", "r" } },
{ INDEX_op_shl_i64, { "r", "r", "ri" } },
{ INDEX_op_shr_i64, { "r", "r", "ri" } },
diff --git a/tcg/ppc64/tcg-target.h b/tcg/ppc64/tcg-target.h
index 76001e8..6ea4541 100644
--- a/tcg/ppc64/tcg-target.h
+++ b/tcg/ppc64/tcg-target.h
@@ -83,11 +83,11 @@ typedef enum {
#define TCG_TARGET_HAS_bswap32_i32 1
#define TCG_TARGET_HAS_not_i32 1
#define TCG_TARGET_HAS_neg_i32 1
-#define TCG_TARGET_HAS_andc_i32 0
-#define TCG_TARGET_HAS_orc_i32 0
-#define TCG_TARGET_HAS_eqv_i32 0
-#define TCG_TARGET_HAS_nand_i32 0
-#define TCG_TARGET_HAS_nor_i32 0
+#define TCG_TARGET_HAS_andc_i32 1
+#define TCG_TARGET_HAS_orc_i32 1
+#define TCG_TARGET_HAS_eqv_i32 1
+#define TCG_TARGET_HAS_nand_i32 1
+#define TCG_TARGET_HAS_nor_i32 1
#define TCG_TARGET_HAS_deposit_i32 0
#define TCG_TARGET_HAS_movcond_i32 0
#define TCG_TARGET_HAS_add2_i32 0
@@ -105,11 +105,11 @@ typedef enum {
#define TCG_TARGET_HAS_bswap64_i64 1
#define TCG_TARGET_HAS_not_i64 1
#define TCG_TARGET_HAS_neg_i64 1
-#define TCG_TARGET_HAS_andc_i64 0
-#define TCG_TARGET_HAS_orc_i64 0
-#define TCG_TARGET_HAS_eqv_i64 0
-#define TCG_TARGET_HAS_nand_i64 0
-#define TCG_TARGET_HAS_nor_i64 0
+#define TCG_TARGET_HAS_andc_i64 1
+#define TCG_TARGET_HAS_orc_i64 1
+#define TCG_TARGET_HAS_eqv_i64 1
+#define TCG_TARGET_HAS_nand_i64 1
+#define TCG_TARGET_HAS_nor_i64 1
#define TCG_TARGET_HAS_deposit_i64 0
#define TCG_TARGET_HAS_movcond_i64 0
#define TCG_TARGET_HAS_add2_i64 0
--
1.8.1.4
^ permalink raw reply related [flat|nested] 48+ messages in thread
* [Qemu-devel] [PATCH v4 22/33] tcg-ppc64: Handle constant inputs for some compound logicals
2013-04-04 22:55 [Qemu-devel] [PATCH v4 00/33] Modernize tcg/ppc64 Richard Henderson
` (20 preceding siblings ...)
2013-04-04 22:56 ` [Qemu-devel] [PATCH v4 21/33] tcg-ppc64: Implement compound logicals Richard Henderson
@ 2013-04-04 22:56 ` Richard Henderson
2013-04-04 22:56 ` [Qemu-devel] [PATCH v4 23/33] tcg-ppc64: Implement deposit Richard Henderson
` (11 subsequent siblings)
33 siblings, 0 replies; 48+ messages in thread
From: Richard Henderson @ 2013-04-04 22:56 UTC (permalink / raw)
To: qemu-devel; +Cc: av1474, Aurelien Jarno
Since we have special code to handle and/or/xor with a constant,
apply the same to andc/orc/eqv with a constant.
Reviewed-by: Aurelien Jarno <aurelien@aurel32.net>
Signed-off-by: Richard Henderson <rth@twiddle.net>
---
tcg/ppc64/tcg-target.c | 44 ++++++++++++++++++++++++++++++++++----------
1 file changed, 34 insertions(+), 10 deletions(-)
diff --git a/tcg/ppc64/tcg-target.c b/tcg/ppc64/tcg-target.c
index 89832e5..7c326c5 100644
--- a/tcg/ppc64/tcg-target.c
+++ b/tcg/ppc64/tcg-target.c
@@ -1394,17 +1394,19 @@ static void tcg_out_op (TCGContext *s, TCGOpcode opc, const TCGArg *args,
break;
case INDEX_op_and_i32:
+ a0 = args[0], a1 = args[1], a2 = args[2];
if (const_args[2]) {
- tcg_out_andi32(s, args[0], args[1], args[2]);
+ tcg_out_andi32(s, a0, a1, a2);
} else {
- tcg_out32(s, AND | SAB(args[1], args[0], args[2]));
+ tcg_out32(s, AND | SAB(a1, a0, a2));
}
break;
case INDEX_op_and_i64:
+ a0 = args[0], a1 = args[1], a2 = args[2];
if (const_args[2]) {
- tcg_out_andi64(s, args[0], args[1], args[2]);
+ tcg_out_andi64(s, a0, a1, a2);
} else {
- tcg_out32(s, AND | SAB(args[1], args[0], args[2]));
+ tcg_out32(s, AND | SAB(a1, a0, a2));
}
break;
case INDEX_op_or_i64:
@@ -1426,14 +1428,36 @@ static void tcg_out_op (TCGContext *s, TCGOpcode opc, const TCGArg *args,
}
break;
case INDEX_op_andc_i32:
+ a0 = args[0], a1 = args[1], a2 = args[2];
+ if (const_args[2]) {
+ tcg_out_andi32(s, a0, a1, ~a2);
+ } else {
+ tcg_out32(s, ANDC | SAB(a1, a0, a2));
+ }
+ break;
case INDEX_op_andc_i64:
- tcg_out32(s, ANDC | SAB(args[1], args[0], args[2]));
+ a0 = args[0], a1 = args[1], a2 = args[2];
+ if (const_args[2]) {
+ tcg_out_andi64(s, a0, a1, ~a2);
+ } else {
+ tcg_out32(s, ANDC | SAB(a1, a0, a2));
+ }
break;
case INDEX_op_orc_i32:
+ if (const_args[2]) {
+ tcg_out_ori32(s, args[0], args[1], ~args[2]);
+ break;
+ }
+ /* FALLTHRU */
case INDEX_op_orc_i64:
tcg_out32(s, ORC | SAB(args[1], args[0], args[2]));
break;
case INDEX_op_eqv_i32:
+ if (const_args[2]) {
+ tcg_out_xori32(s, args[0], args[1], ~args[2]);
+ break;
+ }
+ /* FALLTHRU */
case INDEX_op_eqv_i64:
tcg_out32(s, EQV | SAB(args[1], args[0], args[2]));
break;
@@ -1820,9 +1844,9 @@ static const TCGTargetOpDef ppc_op_defs[] = {
{ INDEX_op_and_i32, { "r", "r", "ri" } },
{ INDEX_op_or_i32, { "r", "r", "ri" } },
{ INDEX_op_xor_i32, { "r", "r", "ri" } },
- { INDEX_op_andc_i32, { "r", "r", "r" } },
- { INDEX_op_orc_i32, { "r", "r", "r" } },
- { INDEX_op_eqv_i32, { "r", "r", "r" } },
+ { INDEX_op_andc_i32, { "r", "r", "ri" } },
+ { INDEX_op_orc_i32, { "r", "r", "ri" } },
+ { INDEX_op_eqv_i32, { "r", "r", "ri" } },
{ INDEX_op_nand_i32, { "r", "r", "r" } },
{ INDEX_op_nor_i32, { "r", "r", "r" } },
@@ -1840,10 +1864,10 @@ static const TCGTargetOpDef ppc_op_defs[] = {
{ INDEX_op_add_i64, { "r", "r", "rT" } },
{ INDEX_op_sub_i64, { "r", "rI", "rT" } },
- { INDEX_op_and_i64, { "r", "r", "rU" } },
+ { INDEX_op_and_i64, { "r", "r", "ri" } },
{ INDEX_op_or_i64, { "r", "r", "rU" } },
{ INDEX_op_xor_i64, { "r", "r", "rU" } },
- { INDEX_op_andc_i64, { "r", "r", "r" } },
+ { INDEX_op_andc_i64, { "r", "r", "ri" } },
{ INDEX_op_orc_i64, { "r", "r", "r" } },
{ INDEX_op_eqv_i64, { "r", "r", "r" } },
{ INDEX_op_nand_i64, { "r", "r", "r" } },
--
1.8.1.4
^ permalink raw reply related [flat|nested] 48+ messages in thread
* [Qemu-devel] [PATCH v4 23/33] tcg-ppc64: Implement deposit
2013-04-04 22:55 [Qemu-devel] [PATCH v4 00/33] Modernize tcg/ppc64 Richard Henderson
` (21 preceding siblings ...)
2013-04-04 22:56 ` [Qemu-devel] [PATCH v4 22/33] tcg-ppc64: Handle constant inputs for some " Richard Henderson
@ 2013-04-04 22:56 ` Richard Henderson
2013-04-04 22:56 ` [Qemu-devel] [PATCH v4 24/33] tcg-ppc64: Use I constraint for mul Richard Henderson
` (10 subsequent siblings)
33 siblings, 0 replies; 48+ messages in thread
From: Richard Henderson @ 2013-04-04 22:56 UTC (permalink / raw)
To: qemu-devel; +Cc: av1474, Aurelien Jarno
Reviewed-by: Aurelien Jarno <aurelien@aurel32.net>
Signed-off-by: Richard Henderson <rth@twiddle.net>
---
tcg/ppc64/tcg-target.c | 12 ++++++++++++
tcg/ppc64/tcg-target.h | 4 ++--
2 files changed, 14 insertions(+), 2 deletions(-)
diff --git a/tcg/ppc64/tcg-target.c b/tcg/ppc64/tcg-target.c
index 7c326c5..909ba3c 100644
--- a/tcg/ppc64/tcg-target.c
+++ b/tcg/ppc64/tcg-target.c
@@ -1796,6 +1796,15 @@ static void tcg_out_op (TCGContext *s, TCGOpcode opc, const TCGArg *args,
}
break;
+ case INDEX_op_deposit_i32:
+ tcg_out_rlw(s, RLWIMI, args[0], args[2], args[3],
+ 32 - args[3] - args[4], 31 - args[3]);
+ break;
+ case INDEX_op_deposit_i64:
+ tcg_out_rld(s, RLDIMI, args[0], args[2], args[3],
+ 64 - args[3] - args[4]);
+ break;
+
default:
tcg_dump_ops (s);
tcg_abort ();
@@ -1917,6 +1926,9 @@ static const TCGTargetOpDef ppc_op_defs[] = {
{ INDEX_op_bswap32_i64, { "r", "r" } },
{ INDEX_op_bswap64_i64, { "r", "r" } },
+ { INDEX_op_deposit_i32, { "r", "0", "r" } },
+ { INDEX_op_deposit_i64, { "r", "0", "r" } },
+
{ -1 },
};
diff --git a/tcg/ppc64/tcg-target.h b/tcg/ppc64/tcg-target.h
index 6ea4541..7ffa895 100644
--- a/tcg/ppc64/tcg-target.h
+++ b/tcg/ppc64/tcg-target.h
@@ -88,7 +88,7 @@ typedef enum {
#define TCG_TARGET_HAS_eqv_i32 1
#define TCG_TARGET_HAS_nand_i32 1
#define TCG_TARGET_HAS_nor_i32 1
-#define TCG_TARGET_HAS_deposit_i32 0
+#define TCG_TARGET_HAS_deposit_i32 1
#define TCG_TARGET_HAS_movcond_i32 0
#define TCG_TARGET_HAS_add2_i32 0
#define TCG_TARGET_HAS_sub2_i32 0
@@ -110,7 +110,7 @@ typedef enum {
#define TCG_TARGET_HAS_eqv_i64 1
#define TCG_TARGET_HAS_nand_i64 1
#define TCG_TARGET_HAS_nor_i64 1
-#define TCG_TARGET_HAS_deposit_i64 0
+#define TCG_TARGET_HAS_deposit_i64 1
#define TCG_TARGET_HAS_movcond_i64 0
#define TCG_TARGET_HAS_add2_i64 0
#define TCG_TARGET_HAS_sub2_i64 0
--
1.8.1.4
^ permalink raw reply related [flat|nested] 48+ messages in thread
* [Qemu-devel] [PATCH v4 24/33] tcg-ppc64: Use I constraint for mul
2013-04-04 22:55 [Qemu-devel] [PATCH v4 00/33] Modernize tcg/ppc64 Richard Henderson
` (22 preceding siblings ...)
2013-04-04 22:56 ` [Qemu-devel] [PATCH v4 23/33] tcg-ppc64: Implement deposit Richard Henderson
@ 2013-04-04 22:56 ` Richard Henderson
2013-04-04 22:56 ` [Qemu-devel] [PATCH v4 25/33] tcg-ppc64: Cleanup i32 constants to tcg_out_cmp Richard Henderson
` (9 subsequent siblings)
33 siblings, 0 replies; 48+ messages in thread
From: Richard Henderson @ 2013-04-04 22:56 UTC (permalink / raw)
To: qemu-devel; +Cc: av1474, Aurelien Jarno
The mul_i32 pattern was loading non-16-bit constants into a register,
when we can get the middle-end to do that for us. The mul_i64 pattern
was not considering that MULLI takes 64-bit inputs.
Reviewed-by: Aurelien Jarno <aurelien@aurel32.net>
Signed-off-by: Richard Henderson <rth@twiddle.net>
---
tcg/ppc64/tcg-target.c | 24 ++++++++++++------------
1 file changed, 12 insertions(+), 12 deletions(-)
diff --git a/tcg/ppc64/tcg-target.c b/tcg/ppc64/tcg-target.c
index 909ba3c..edb2b6c 100644
--- a/tcg/ppc64/tcg-target.c
+++ b/tcg/ppc64/tcg-target.c
@@ -1471,17 +1471,12 @@ static void tcg_out_op (TCGContext *s, TCGOpcode opc, const TCGArg *args,
break;
case INDEX_op_mul_i32:
+ a0 = args[0], a1 = args[1], a2 = args[2];
if (const_args[2]) {
- if (args[2] == (int16_t) args[2])
- tcg_out32 (s, MULLI | RT (args[0]) | RA (args[1])
- | (args[2] & 0xffff));
- else {
- tcg_out_movi (s, TCG_TYPE_I32, 0, args[2]);
- tcg_out32 (s, MULLW | TAB (args[0], args[1], 0));
- }
+ tcg_out32(s, MULLI | TAI(a0, a1, a2));
+ } else {
+ tcg_out32(s, MULLW | TAB(a0, a1, a2));
}
- else
- tcg_out32 (s, MULLW | TAB (args[0], args[1], args[2]));
break;
case INDEX_op_div_i32:
@@ -1646,7 +1641,12 @@ static void tcg_out_op (TCGContext *s, TCGOpcode opc, const TCGArg *args,
break;
case INDEX_op_mul_i64:
- tcg_out32 (s, MULLD | TAB (args[0], args[1], args[2]));
+ a0 = args[0], a1 = args[1], a2 = args[2];
+ if (const_args[2]) {
+ tcg_out32(s, MULLI | TAI(a0, a1, a2));
+ } else {
+ tcg_out32(s, MULLD | TAB(a0, a1, a2));
+ }
break;
case INDEX_op_div_i64:
tcg_out32 (s, DIVD | TAB (args[0], args[1], args[2]));
@@ -1844,7 +1844,7 @@ static const TCGTargetOpDef ppc_op_defs[] = {
{ INDEX_op_ld32s_i64, { "r", "r" } },
{ INDEX_op_add_i32, { "r", "r", "ri" } },
- { INDEX_op_mul_i32, { "r", "r", "ri" } },
+ { INDEX_op_mul_i32, { "r", "r", "rI" } },
{ INDEX_op_div_i32, { "r", "r", "r" } },
{ INDEX_op_divu_i32, { "r", "r", "r" } },
{ INDEX_op_rem_i32, { "r", "r", "r" } },
@@ -1888,7 +1888,7 @@ static const TCGTargetOpDef ppc_op_defs[] = {
{ INDEX_op_rotl_i64, { "r", "r", "ri" } },
{ INDEX_op_rotr_i64, { "r", "r", "ri" } },
- { INDEX_op_mul_i64, { "r", "r", "r" } },
+ { INDEX_op_mul_i64, { "r", "r", "rI" } },
{ INDEX_op_div_i64, { "r", "r", "r" } },
{ INDEX_op_divu_i64, { "r", "r", "r" } },
{ INDEX_op_rem_i64, { "r", "r", "r" } },
--
1.8.1.4
^ permalink raw reply related [flat|nested] 48+ messages in thread
* [Qemu-devel] [PATCH v4 25/33] tcg-ppc64: Cleanup i32 constants to tcg_out_cmp
2013-04-04 22:55 [Qemu-devel] [PATCH v4 00/33] Modernize tcg/ppc64 Richard Henderson
` (23 preceding siblings ...)
2013-04-04 22:56 ` [Qemu-devel] [PATCH v4 24/33] tcg-ppc64: Use I constraint for mul Richard Henderson
@ 2013-04-04 22:56 ` Richard Henderson
2013-04-15 8:01 ` Aurelien Jarno
2013-04-04 22:56 ` [Qemu-devel] [PATCH v4 26/33] tcg-ppc64: Use TCGType throughout compares Richard Henderson
` (8 subsequent siblings)
33 siblings, 1 reply; 48+ messages in thread
From: Richard Henderson @ 2013-04-04 22:56 UTC (permalink / raw)
To: qemu-devel; +Cc: av1474, Aurelien Jarno
Nothing else in the call chain ensures that these
constants don't have garbage in the high bits.
Signed-off-by: Richard Henderson <rth@twiddle.net>
---
tcg/ppc64/tcg-target.c | 5 +++++
1 file changed, 5 insertions(+)
diff --git a/tcg/ppc64/tcg-target.c b/tcg/ppc64/tcg-target.c
index edb2b6c..27d5ea5 100644
--- a/tcg/ppc64/tcg-target.c
+++ b/tcg/ppc64/tcg-target.c
@@ -1059,6 +1059,11 @@ static void tcg_out_cmp (TCGContext *s, int cond, TCGArg arg1, TCGArg arg2,
int imm;
uint32_t op;
+ /* Simplify the comparisons below wrt CMPI. */
+ if (type == TCG_TYPE_I32) {
+ arg2 = (int32_t)arg2;
+ }
+
switch (cond) {
case TCG_COND_EQ:
case TCG_COND_NE:
--
1.8.1.4
^ permalink raw reply related [flat|nested] 48+ messages in thread
* Re: [Qemu-devel] [PATCH v4 25/33] tcg-ppc64: Cleanup i32 constants to tcg_out_cmp
2013-04-04 22:56 ` [Qemu-devel] [PATCH v4 25/33] tcg-ppc64: Cleanup i32 constants to tcg_out_cmp Richard Henderson
@ 2013-04-15 8:01 ` Aurelien Jarno
0 siblings, 0 replies; 48+ messages in thread
From: Aurelien Jarno @ 2013-04-15 8:01 UTC (permalink / raw)
To: Richard Henderson; +Cc: av1474, qemu-devel
On Thu, Apr 04, 2013 at 05:56:18PM -0500, Richard Henderson wrote:
> Nothing else in the call chain ensures that these
> constants don't have garbage in the high bits.
>
> Signed-off-by: Richard Henderson <rth@twiddle.net>
> ---
> tcg/ppc64/tcg-target.c | 5 +++++
> 1 file changed, 5 insertions(+)
>
> diff --git a/tcg/ppc64/tcg-target.c b/tcg/ppc64/tcg-target.c
> index edb2b6c..27d5ea5 100644
> --- a/tcg/ppc64/tcg-target.c
> +++ b/tcg/ppc64/tcg-target.c
> @@ -1059,6 +1059,11 @@ static void tcg_out_cmp (TCGContext *s, int cond, TCGArg arg1, TCGArg arg2,
> int imm;
> uint32_t op;
>
> + /* Simplify the comparisons below wrt CMPI. */
> + if (type == TCG_TYPE_I32) {
> + arg2 = (int32_t)arg2;
> + }
> +
> switch (cond) {
> case TCG_COND_EQ:
> case TCG_COND_NE:
Reviewed-by: Aurelien Jarno <aurelien@aurel32.net>
--
Aurelien Jarno GPG: 1024D/F1BCDB73
aurelien@aurel32.net http://www.aurel32.net
^ permalink raw reply [flat|nested] 48+ messages in thread
* [Qemu-devel] [PATCH v4 26/33] tcg-ppc64: Use TCGType throughout compares
2013-04-04 22:55 [Qemu-devel] [PATCH v4 00/33] Modernize tcg/ppc64 Richard Henderson
` (24 preceding siblings ...)
2013-04-04 22:56 ` [Qemu-devel] [PATCH v4 25/33] tcg-ppc64: Cleanup i32 constants to tcg_out_cmp Richard Henderson
@ 2013-04-04 22:56 ` Richard Henderson
2013-04-04 22:56 ` [Qemu-devel] [PATCH v4 27/33] tcg-ppc64: Use MFOCRF instead of MFCR Richard Henderson
` (7 subsequent siblings)
33 siblings, 0 replies; 48+ messages in thread
From: Richard Henderson @ 2013-04-04 22:56 UTC (permalink / raw)
To: qemu-devel; +Cc: av1474, Aurelien Jarno
The optimization/bug being fixed is that tcg_out_cmp was not applying the
right type to loading a constant, in the case it can't be implemented
directly. Rather than recomputing the TCGType enum from the arch64 bool,
pass around the original TCGType throughout.
Reviewed-by: Aurelien Jarno <aurelien@aurel32.net>
Signed-off-by: Richard Henderson <rth@twiddle.net>
---
tcg/ppc64/tcg-target.c | 38 +++++++++++++++++++-------------------
1 file changed, 19 insertions(+), 19 deletions(-)
diff --git a/tcg/ppc64/tcg-target.c b/tcg/ppc64/tcg-target.c
index 27d5ea5..806f3e2 100644
--- a/tcg/ppc64/tcg-target.c
+++ b/tcg/ppc64/tcg-target.c
@@ -1053,8 +1053,8 @@ static void tcg_out_st (TCGContext *s, TCGType type, TCGReg arg, TCGReg arg1,
tcg_out_ldsta (s, arg, arg1, arg2, STD, STDX);
}
-static void tcg_out_cmp (TCGContext *s, int cond, TCGArg arg1, TCGArg arg2,
- int const_arg2, int cr, int arch64)
+static void tcg_out_cmp(TCGContext *s, int cond, TCGArg arg1, TCGArg arg2,
+ int const_arg2, int cr, TCGType type)
{
int imm;
uint32_t op;
@@ -1116,19 +1116,17 @@ static void tcg_out_cmp (TCGContext *s, int cond, TCGArg arg1, TCGArg arg2,
default:
tcg_abort ();
}
- op |= BF (cr) | (arch64 << 21);
+ op |= BF(cr) | ((type == TCG_TYPE_I64) << 21);
- if (imm)
- tcg_out32 (s, op | RA (arg1) | (arg2 & 0xffff));
- else {
+ if (imm) {
+ tcg_out32(s, op | RA(arg1) | (arg2 & 0xffff));
+ } else {
if (const_arg2) {
- tcg_out_movi (s, TCG_TYPE_I64, 0, arg2);
- tcg_out32 (s, op | RA (arg1) | RB (0));
+ tcg_out_movi(s, type, 0, arg2);
+ arg2 = 0;
}
- else
- tcg_out32 (s, op | RA (arg1) | RB (arg2));
+ tcg_out32(s, op | RA(arg1) | RB(arg2));
}
-
}
static void tcg_out_setcond (TCGContext *s, TCGType type, TCGCond cond,
@@ -1228,7 +1226,7 @@ static void tcg_out_setcond (TCGContext *s, TCGType type, TCGCond cond,
sh = 31;
crop = CRNOR | BT (7, CR_EQ) | BA (7, CR_GT) | BB (7, CR_GT);
crtest:
- tcg_out_cmp (s, cond, arg1, arg2, const_arg2, 7, type == TCG_TYPE_I64);
+ tcg_out_cmp (s, cond, arg1, arg2, const_arg2, 7, type);
if (crop) tcg_out32 (s, crop);
tcg_out32 (s, MFCR | RT (0));
tcg_out_rlw(s, RLWINM, arg0, 0, sh, 31, 31);
@@ -1254,12 +1252,12 @@ static void tcg_out_bc (TCGContext *s, int bc, int label_index)
}
}
-static void tcg_out_brcond (TCGContext *s, TCGCond cond,
- TCGArg arg1, TCGArg arg2, int const_arg2,
- int label_index, int arch64)
+static void tcg_out_brcond(TCGContext *s, TCGCond cond,
+ TCGArg arg1, TCGArg arg2, int const_arg2,
+ int label_index, TCGType type)
{
- tcg_out_cmp (s, cond, arg1, arg2, const_arg2, 7, arch64);
- tcg_out_bc (s, tcg_to_bc[cond], label_index);
+ tcg_out_cmp(s, cond, arg1, arg2, const_arg2, 7, type);
+ tcg_out_bc(s, tcg_to_bc[cond], label_index);
}
void ppc_tb_set_jmp_target (unsigned long jmp_addr, unsigned long addr)
@@ -1543,11 +1541,13 @@ static void tcg_out_op (TCGContext *s, TCGOpcode opc, const TCGArg *args,
break;
case INDEX_op_brcond_i32:
- tcg_out_brcond (s, args[2], args[0], args[1], const_args[1], args[3], 0);
+ tcg_out_brcond(s, args[2], args[0], args[1], const_args[1],
+ args[3], TCG_TYPE_I32);
break;
case INDEX_op_brcond_i64:
- tcg_out_brcond (s, args[2], args[0], args[1], const_args[1], args[3], 1);
+ tcg_out_brcond(s, args[2], args[0], args[1], const_args[1],
+ args[3], TCG_TYPE_I64);
break;
case INDEX_op_neg_i32:
--
1.8.1.4
^ permalink raw reply related [flat|nested] 48+ messages in thread
* [Qemu-devel] [PATCH v4 27/33] tcg-ppc64: Use MFOCRF instead of MFCR
2013-04-04 22:55 [Qemu-devel] [PATCH v4 00/33] Modernize tcg/ppc64 Richard Henderson
` (25 preceding siblings ...)
2013-04-04 22:56 ` [Qemu-devel] [PATCH v4 26/33] tcg-ppc64: Use TCGType throughout compares Richard Henderson
@ 2013-04-04 22:56 ` Richard Henderson
2013-04-15 8:02 ` Aurelien Jarno
2013-04-04 22:56 ` [Qemu-devel] [PATCH v4 28/33] tcg-ppc64: Use ISEL for setcond Richard Henderson
` (6 subsequent siblings)
33 siblings, 1 reply; 48+ messages in thread
From: Richard Henderson @ 2013-04-04 22:56 UTC (permalink / raw)
To: qemu-devel; +Cc: av1474, Aurelien Jarno
It takes half the cycles to read one CR register instead of all 8.
This is a backward compatible addition to the ISA, so chips prior
to Power 2.00 spec will simply continue to read the entire CR register.
Signed-off-by: Richard Henderson <rth@twiddle.net>
---
tcg/ppc64/tcg-target.c | 12 ++++++++----
1 file changed, 8 insertions(+), 4 deletions(-)
diff --git a/tcg/ppc64/tcg-target.c b/tcg/ppc64/tcg-target.c
index 806f3e2..f0ed698 100644
--- a/tcg/ppc64/tcg-target.c
+++ b/tcg/ppc64/tcg-target.c
@@ -382,6 +382,7 @@ static int tcg_target_const_match (tcg_target_long val,
#define SRAWI XO31(824)
#define NEG XO31(104)
#define MFCR XO31( 19)
+#define MFOCRF (MFCR | (1u << 20))
#define NOR XO31(124)
#define CNTLZW XO31( 26)
#define CNTLZD XO31( 58)
@@ -430,6 +431,7 @@ static int tcg_target_const_match (tcg_target_long val,
#define ME(e) ((e)<<1)
#define BO(o) ((o)<<21)
#define MB64(b) ((b)<<5)
+#define FXM(b) (1 << (19 - (b)))
#define LK 1
@@ -1226,10 +1228,12 @@ static void tcg_out_setcond (TCGContext *s, TCGType type, TCGCond cond,
sh = 31;
crop = CRNOR | BT (7, CR_EQ) | BA (7, CR_GT) | BB (7, CR_GT);
crtest:
- tcg_out_cmp (s, cond, arg1, arg2, const_arg2, 7, type);
- if (crop) tcg_out32 (s, crop);
- tcg_out32 (s, MFCR | RT (0));
- tcg_out_rlw(s, RLWINM, arg0, 0, sh, 31, 31);
+ tcg_out_cmp(s, cond, arg1, arg2, const_arg2, 7, type);
+ if (crop) {
+ tcg_out32(s, crop);
+ }
+ tcg_out32(s, MFOCRF | RT(TCG_REG_R0) | FXM(7));
+ tcg_out_rlw(s, RLWINM, arg0, TCG_REG_R0, sh, 31, 31);
break;
default:
--
1.8.1.4
^ permalink raw reply related [flat|nested] 48+ messages in thread
* Re: [Qemu-devel] [PATCH v4 27/33] tcg-ppc64: Use MFOCRF instead of MFCR
2013-04-04 22:56 ` [Qemu-devel] [PATCH v4 27/33] tcg-ppc64: Use MFOCRF instead of MFCR Richard Henderson
@ 2013-04-15 8:02 ` Aurelien Jarno
0 siblings, 0 replies; 48+ messages in thread
From: Aurelien Jarno @ 2013-04-15 8:02 UTC (permalink / raw)
To: Richard Henderson; +Cc: av1474, qemu-devel
On Thu, Apr 04, 2013 at 05:56:20PM -0500, Richard Henderson wrote:
> It takes half the cycles to read one CR register instead of all 8.
> This is a backward compatible addition to the ISA, so chips prior
> to Power 2.00 spec will simply continue to read the entire CR register.
>
> Signed-off-by: Richard Henderson <rth@twiddle.net>
> ---
> tcg/ppc64/tcg-target.c | 12 ++++++++----
> 1 file changed, 8 insertions(+), 4 deletions(-)
>
> diff --git a/tcg/ppc64/tcg-target.c b/tcg/ppc64/tcg-target.c
> index 806f3e2..f0ed698 100644
> --- a/tcg/ppc64/tcg-target.c
> +++ b/tcg/ppc64/tcg-target.c
> @@ -382,6 +382,7 @@ static int tcg_target_const_match (tcg_target_long val,
> #define SRAWI XO31(824)
> #define NEG XO31(104)
> #define MFCR XO31( 19)
> +#define MFOCRF (MFCR | (1u << 20))
> #define NOR XO31(124)
> #define CNTLZW XO31( 26)
> #define CNTLZD XO31( 58)
> @@ -430,6 +431,7 @@ static int tcg_target_const_match (tcg_target_long val,
> #define ME(e) ((e)<<1)
> #define BO(o) ((o)<<21)
> #define MB64(b) ((b)<<5)
> +#define FXM(b) (1 << (19 - (b)))
>
> #define LK 1
>
> @@ -1226,10 +1228,12 @@ static void tcg_out_setcond (TCGContext *s, TCGType type, TCGCond cond,
> sh = 31;
> crop = CRNOR | BT (7, CR_EQ) | BA (7, CR_GT) | BB (7, CR_GT);
> crtest:
> - tcg_out_cmp (s, cond, arg1, arg2, const_arg2, 7, type);
> - if (crop) tcg_out32 (s, crop);
> - tcg_out32 (s, MFCR | RT (0));
> - tcg_out_rlw(s, RLWINM, arg0, 0, sh, 31, 31);
> + tcg_out_cmp(s, cond, arg1, arg2, const_arg2, 7, type);
> + if (crop) {
> + tcg_out32(s, crop);
> + }
> + tcg_out32(s, MFOCRF | RT(TCG_REG_R0) | FXM(7));
> + tcg_out_rlw(s, RLWINM, arg0, TCG_REG_R0, sh, 31, 31);
> break;
>
> default:
Reviewed-by: Aurelien Jarno <aurelien@aurel32.net>
--
Aurelien Jarno GPG: 1024D/F1BCDB73
aurelien@aurel32.net http://www.aurel32.net
^ permalink raw reply [flat|nested] 48+ messages in thread
* [Qemu-devel] [PATCH v4 28/33] tcg-ppc64: Use ISEL for setcond
2013-04-04 22:55 [Qemu-devel] [PATCH v4 00/33] Modernize tcg/ppc64 Richard Henderson
` (26 preceding siblings ...)
2013-04-04 22:56 ` [Qemu-devel] [PATCH v4 27/33] tcg-ppc64: Use MFOCRF instead of MFCR Richard Henderson
@ 2013-04-04 22:56 ` Richard Henderson
2013-04-15 8:13 ` Aurelien Jarno
2013-04-04 22:56 ` [Qemu-devel] [PATCH v4 29/33] tcg-ppc64: Implement movcond Richard Henderson
` (5 subsequent siblings)
33 siblings, 1 reply; 48+ messages in thread
From: Richard Henderson @ 2013-04-04 22:56 UTC (permalink / raw)
To: qemu-devel; +Cc: av1474, Aurelien Jarno
There are a few simple special cases that should be handled first.
Break these out to subroutines to avoid code duplication.
Signed-off-by: Richard Henderson <rth@twiddle.net>
---
tcg/ppc64/tcg-target.c | 181 ++++++++++++++++++++++++++++++++-----------------
1 file changed, 119 insertions(+), 62 deletions(-)
diff --git a/tcg/ppc64/tcg-target.c b/tcg/ppc64/tcg-target.c
index f0ed698..27a7ff2 100644
--- a/tcg/ppc64/tcg-target.c
+++ b/tcg/ppc64/tcg-target.c
@@ -45,6 +45,7 @@ static uint8_t *tb_ret_addr;
#endif
#define HAVE_ISA_2_06 0
+#define HAVE_ISEL 0
#ifdef CONFIG_USE_GUEST_BASE
#define TCG_GUEST_BASE_REG 30
@@ -390,6 +391,7 @@ static int tcg_target_const_match (tcg_target_long val,
#define ORC XO31(412)
#define EQV XO31(284)
#define NAND XO31(476)
+#define ISEL XO31( 15)
#define MULLD XO31(233)
#define MULHD XO31( 73)
@@ -445,6 +447,7 @@ static int tcg_target_const_match (tcg_target_long val,
#define BT(n, c) (((c)+((n)*4))<<21)
#define BA(n, c) (((c)+((n)*4))<<16)
#define BB(n, c) (((c)+((n)*4))<<11)
+#define BC_(n, c) (((c)+((n)*4))<<6)
#define BO_COND_TRUE BO (12)
#define BO_COND_FALSE BO ( 4)
@@ -470,6 +473,20 @@ static const uint32_t tcg_to_bc[] = {
[TCG_COND_GTU] = BC | BI (7, CR_GT) | BO_COND_TRUE,
};
+/* The low bit here is set if the RA and RB fields must be inverted. */
+static const uint32_t tcg_to_isel[] = {
+ [TCG_COND_EQ] = ISEL | BC_(7, CR_EQ),
+ [TCG_COND_NE] = ISEL | BC_(7, CR_EQ) | 1,
+ [TCG_COND_LT] = ISEL | BC_(7, CR_LT),
+ [TCG_COND_GE] = ISEL | BC_(7, CR_LT) | 1,
+ [TCG_COND_LE] = ISEL | BC_(7, CR_GT) | 1,
+ [TCG_COND_GT] = ISEL | BC_(7, CR_GT),
+ [TCG_COND_LTU] = ISEL | BC_(7, CR_LT),
+ [TCG_COND_GEU] = ISEL | BC_(7, CR_LT) | 1,
+ [TCG_COND_LEU] = ISEL | BC_(7, CR_GT) | 1,
+ [TCG_COND_GTU] = ISEL | BC_(7, CR_GT),
+};
+
static inline void tcg_out_mov(TCGContext *s, TCGType type,
TCGReg ret, TCGReg arg)
{
@@ -1131,79 +1148,119 @@ static void tcg_out_cmp(TCGContext *s, int cond, TCGArg arg1, TCGArg arg2,
}
}
-static void tcg_out_setcond (TCGContext *s, TCGType type, TCGCond cond,
- TCGArg arg0, TCGArg arg1, TCGArg arg2,
- int const_arg2)
+static void tcg_out_setcond_eq0(TCGContext *s, TCGType type,
+ TCGReg dst, TCGReg src)
{
- int crop, sh, arg;
+ tcg_out32(s, (type == TCG_TYPE_I64 ? CNTLZD : CNTLZW) | RS(src) | RA(dst));
+ tcg_out_shri64(s, dst, dst, type == TCG_TYPE_I64 ? 6 : 5);
+}
- switch (cond) {
- case TCG_COND_EQ:
- if (const_arg2) {
- if (!arg2) {
- arg = arg1;
- }
- else {
- arg = 0;
- if ((uint16_t) arg2 == arg2) {
- tcg_out32(s, XORI | SAI(arg1, 0, arg2));
- }
- else {
- tcg_out_movi (s, type, 0, arg2);
- tcg_out32 (s, XOR | SAB (arg1, 0, 0));
- }
- }
- }
- else {
- arg = 0;
- tcg_out32 (s, XOR | SAB (arg1, 0, arg2));
- }
+static void tcg_out_setcond_ne0(TCGContext *s, TCGReg dst, TCGReg src)
+{
+ /* X != 0 implies X + -1 generates a carry. Extra addition
+ trickery means: R = X-1 + ~X + C = X-1 + (-X+1) + C = C. */
+ if (dst != src) {
+ tcg_out32(s, ADDIC | TAI(dst, src, -1));
+ tcg_out32(s, SUBFE | TAB(dst, dst, src));
+ } else {
+ tcg_out32(s, ADDIC | TAI(0, src, -1));
+ tcg_out32(s, SUBFE | TAB(dst, 0, src));
+ }
+}
- if (type == TCG_TYPE_I64) {
- tcg_out32 (s, CNTLZD | RS (arg) | RA (0));
- tcg_out_rld (s, RLDICL, arg0, 0, 58, 6);
- }
- else {
- tcg_out32 (s, CNTLZW | RS (arg) | RA (0));
- tcg_out_rlw(s, RLWINM, arg0, 0, 27, 5, 31);
+static TCGReg tcg_gen_setcond_xor(TCGContext *s, TCGReg arg1, TCGArg arg2,
+ bool const_arg2)
+{
+ if (const_arg2) {
+ if ((uint32_t)arg2 == arg2) {
+ tcg_out_xori32(s, TCG_REG_R0, arg1, arg2);
+ } else {
+ tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_R0, arg2);
+ tcg_out32(s, XOR | SAB(arg1, TCG_REG_R0, TCG_REG_R0));
}
- break;
+ } else {
+ tcg_out32(s, XOR | SAB(arg1, TCG_REG_R0, arg2));
+ }
+ return TCG_REG_R0;
+}
- case TCG_COND_NE:
- if (const_arg2) {
- if (!arg2) {
- arg = arg1;
- }
- else {
- arg = 0;
- if ((uint16_t) arg2 == arg2) {
- tcg_out32(s, XORI | SAI(arg1, 0, arg2));
- } else {
- tcg_out_movi (s, type, 0, arg2);
- tcg_out32 (s, XOR | SAB (arg1, 0, 0));
- }
+static void tcg_out_setcond(TCGContext *s, TCGType type, TCGCond cond,
+ TCGArg arg0, TCGArg arg1, TCGArg arg2,
+ int const_arg2)
+{
+ int crop, sh;
+
+ /* Ignore high bits of a potential constant arg2. */
+ if (type == TCG_TYPE_I32) {
+ arg2 = (uint32_t)arg2;
+ }
+
+ /* Handle common and trivial cases before handling anything else. */
+ if (arg2 == 0) {
+ switch (cond) {
+ case TCG_COND_EQ:
+ tcg_out_setcond_eq0(s, type, arg0, arg1);
+ return;
+ case TCG_COND_NE:
+ if (type == TCG_TYPE_I32) {
+ tcg_out_ext32u(s, TCG_REG_R0, arg1);
+ arg1 = TCG_REG_R0;
}
+ tcg_out_setcond_ne0(s, arg0, arg1);
+ return;
+ case TCG_COND_GE:
+ tcg_out32(s, NOR | SAB(arg1, arg0, arg1));
+ arg1 = arg0;
+ /* FALLTHRU */
+ case TCG_COND_LT:
+ /* Extract the sign bit. */
+ tcg_out_rld(s, RLDICL, arg0, arg1,
+ type == TCG_TYPE_I64 ? 1 : 33, 63);
+ return;
+ default:
+ break;
}
- else {
- arg = 0;
- tcg_out32 (s, XOR | SAB (arg1, 0, arg2));
- }
+ }
- /* Make sure and discard the high 32-bits of the input. */
- if (type == TCG_TYPE_I32) {
- tcg_out32(s, EXTSW | RA(TCG_REG_R0) | RS(arg));
- arg = TCG_REG_R0;
- }
+ /* If we have ISEL, we can implement everything with 3 or 4 insns.
+ All other cases below are also at least 3 insns, so speed up the
+ code generator by not considering them and always using ISEL. */
+ if (HAVE_ISEL) {
+ int isel, tab;
+
+ tcg_out_cmp(s, cond, arg1, arg2, const_arg2, 7, type);
+
+ isel = tcg_to_isel[cond];
- if (arg == arg1 && arg1 == arg0) {
- tcg_out32(s, ADDIC | TAI(0, arg, -1));
- tcg_out32(s, SUBFE | TAB(arg0, 0, arg));
+ tcg_out_movi(s, type, arg0, 1);
+ if (isel & 1) {
+ /* arg0 = (bc ? 0 : 1) */
+ tab = TAB(arg0, 0, arg0);
+ isel &= ~1;
+ } else {
+ /* arg0 = (bc ? 1 : 0) */
+ tcg_out_movi(s, type, TCG_REG_R0, 0);
+ tab = TAB(arg0, arg0, TCG_REG_R0);
}
- else {
- tcg_out32(s, ADDIC | TAI(arg0, arg, -1));
- tcg_out32(s, SUBFE | TAB(arg0, arg0, arg));
+ tcg_out32(s, isel | tab);
+ return;
+ }
+
+ switch (cond) {
+ case TCG_COND_EQ:
+ arg1 = tcg_gen_setcond_xor(s, arg1, arg2, const_arg2);
+ tcg_out_setcond_eq0(s, type, arg0, arg1);
+ return;
+
+ case TCG_COND_NE:
+ arg1 = tcg_gen_setcond_xor(s, arg1, arg2, const_arg2);
+ /* Discard the high bits only once, rather than both inputs. */
+ if (type == TCG_TYPE_I32) {
+ tcg_out_ext32u(s, TCG_REG_R0, arg1);
+ arg1 = TCG_REG_R0;
}
- break;
+ tcg_out_setcond_ne0(s, arg0, arg1);
+ return;
case TCG_COND_GT:
case TCG_COND_GTU:
--
1.8.1.4
^ permalink raw reply related [flat|nested] 48+ messages in thread
* Re: [Qemu-devel] [PATCH v4 28/33] tcg-ppc64: Use ISEL for setcond
2013-04-04 22:56 ` [Qemu-devel] [PATCH v4 28/33] tcg-ppc64: Use ISEL for setcond Richard Henderson
@ 2013-04-15 8:13 ` Aurelien Jarno
0 siblings, 0 replies; 48+ messages in thread
From: Aurelien Jarno @ 2013-04-15 8:13 UTC (permalink / raw)
To: Richard Henderson; +Cc: av1474, qemu-devel
On Thu, Apr 04, 2013 at 05:56:21PM -0500, Richard Henderson wrote:
> There are a few simple special cases that should be handled first.
> Break these out to subroutines to avoid code duplication.
>
> Signed-off-by: Richard Henderson <rth@twiddle.net>
> ---
> tcg/ppc64/tcg-target.c | 181 ++++++++++++++++++++++++++++++++-----------------
> 1 file changed, 119 insertions(+), 62 deletions(-)
>
> diff --git a/tcg/ppc64/tcg-target.c b/tcg/ppc64/tcg-target.c
> index f0ed698..27a7ff2 100644
> --- a/tcg/ppc64/tcg-target.c
> +++ b/tcg/ppc64/tcg-target.c
> @@ -45,6 +45,7 @@ static uint8_t *tb_ret_addr;
> #endif
>
> #define HAVE_ISA_2_06 0
> +#define HAVE_ISEL 0
>
> #ifdef CONFIG_USE_GUEST_BASE
> #define TCG_GUEST_BASE_REG 30
> @@ -390,6 +391,7 @@ static int tcg_target_const_match (tcg_target_long val,
> #define ORC XO31(412)
> #define EQV XO31(284)
> #define NAND XO31(476)
> +#define ISEL XO31( 15)
>
> #define MULLD XO31(233)
> #define MULHD XO31( 73)
> @@ -445,6 +447,7 @@ static int tcg_target_const_match (tcg_target_long val,
> #define BT(n, c) (((c)+((n)*4))<<21)
> #define BA(n, c) (((c)+((n)*4))<<16)
> #define BB(n, c) (((c)+((n)*4))<<11)
> +#define BC_(n, c) (((c)+((n)*4))<<6)
>
> #define BO_COND_TRUE BO (12)
> #define BO_COND_FALSE BO ( 4)
> @@ -470,6 +473,20 @@ static const uint32_t tcg_to_bc[] = {
> [TCG_COND_GTU] = BC | BI (7, CR_GT) | BO_COND_TRUE,
> };
>
> +/* The low bit here is set if the RA and RB fields must be inverted. */
> +static const uint32_t tcg_to_isel[] = {
> + [TCG_COND_EQ] = ISEL | BC_(7, CR_EQ),
> + [TCG_COND_NE] = ISEL | BC_(7, CR_EQ) | 1,
> + [TCG_COND_LT] = ISEL | BC_(7, CR_LT),
> + [TCG_COND_GE] = ISEL | BC_(7, CR_LT) | 1,
> + [TCG_COND_LE] = ISEL | BC_(7, CR_GT) | 1,
> + [TCG_COND_GT] = ISEL | BC_(7, CR_GT),
> + [TCG_COND_LTU] = ISEL | BC_(7, CR_LT),
> + [TCG_COND_GEU] = ISEL | BC_(7, CR_LT) | 1,
> + [TCG_COND_LEU] = ISEL | BC_(7, CR_GT) | 1,
> + [TCG_COND_GTU] = ISEL | BC_(7, CR_GT),
> +};
> +
> static inline void tcg_out_mov(TCGContext *s, TCGType type,
> TCGReg ret, TCGReg arg)
> {
> @@ -1131,79 +1148,119 @@ static void tcg_out_cmp(TCGContext *s, int cond, TCGArg arg1, TCGArg arg2,
> }
> }
>
> -static void tcg_out_setcond (TCGContext *s, TCGType type, TCGCond cond,
> - TCGArg arg0, TCGArg arg1, TCGArg arg2,
> - int const_arg2)
> +static void tcg_out_setcond_eq0(TCGContext *s, TCGType type,
> + TCGReg dst, TCGReg src)
> {
> - int crop, sh, arg;
> + tcg_out32(s, (type == TCG_TYPE_I64 ? CNTLZD : CNTLZW) | RS(src) | RA(dst));
> + tcg_out_shri64(s, dst, dst, type == TCG_TYPE_I64 ? 6 : 5);
> +}
>
> - switch (cond) {
> - case TCG_COND_EQ:
> - if (const_arg2) {
> - if (!arg2) {
> - arg = arg1;
> - }
> - else {
> - arg = 0;
> - if ((uint16_t) arg2 == arg2) {
> - tcg_out32(s, XORI | SAI(arg1, 0, arg2));
> - }
> - else {
> - tcg_out_movi (s, type, 0, arg2);
> - tcg_out32 (s, XOR | SAB (arg1, 0, 0));
> - }
> - }
> - }
> - else {
> - arg = 0;
> - tcg_out32 (s, XOR | SAB (arg1, 0, arg2));
> - }
> +static void tcg_out_setcond_ne0(TCGContext *s, TCGReg dst, TCGReg src)
> +{
> + /* X != 0 implies X + -1 generates a carry. Extra addition
> + trickery means: R = X-1 + ~X + C = X-1 + (-X+1) + C = C. */
> + if (dst != src) {
> + tcg_out32(s, ADDIC | TAI(dst, src, -1));
> + tcg_out32(s, SUBFE | TAB(dst, dst, src));
> + } else {
> + tcg_out32(s, ADDIC | TAI(0, src, -1));
> + tcg_out32(s, SUBFE | TAB(dst, 0, src));
> + }
> +}
>
> - if (type == TCG_TYPE_I64) {
> - tcg_out32 (s, CNTLZD | RS (arg) | RA (0));
> - tcg_out_rld (s, RLDICL, arg0, 0, 58, 6);
> - }
> - else {
> - tcg_out32 (s, CNTLZW | RS (arg) | RA (0));
> - tcg_out_rlw(s, RLWINM, arg0, 0, 27, 5, 31);
> +static TCGReg tcg_gen_setcond_xor(TCGContext *s, TCGReg arg1, TCGArg arg2,
> + bool const_arg2)
> +{
> + if (const_arg2) {
> + if ((uint32_t)arg2 == arg2) {
> + tcg_out_xori32(s, TCG_REG_R0, arg1, arg2);
> + } else {
> + tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_R0, arg2);
> + tcg_out32(s, XOR | SAB(arg1, TCG_REG_R0, TCG_REG_R0));
> }
> - break;
> + } else {
> + tcg_out32(s, XOR | SAB(arg1, TCG_REG_R0, arg2));
> + }
> + return TCG_REG_R0;
> +}
>
> - case TCG_COND_NE:
> - if (const_arg2) {
> - if (!arg2) {
> - arg = arg1;
> - }
> - else {
> - arg = 0;
> - if ((uint16_t) arg2 == arg2) {
> - tcg_out32(s, XORI | SAI(arg1, 0, arg2));
> - } else {
> - tcg_out_movi (s, type, 0, arg2);
> - tcg_out32 (s, XOR | SAB (arg1, 0, 0));
> - }
> +static void tcg_out_setcond(TCGContext *s, TCGType type, TCGCond cond,
> + TCGArg arg0, TCGArg arg1, TCGArg arg2,
> + int const_arg2)
> +{
> + int crop, sh;
> +
> + /* Ignore high bits of a potential constant arg2. */
> + if (type == TCG_TYPE_I32) {
> + arg2 = (uint32_t)arg2;
> + }
> +
> + /* Handle common and trivial cases before handling anything else. */
> + if (arg2 == 0) {
> + switch (cond) {
> + case TCG_COND_EQ:
> + tcg_out_setcond_eq0(s, type, arg0, arg1);
> + return;
> + case TCG_COND_NE:
> + if (type == TCG_TYPE_I32) {
> + tcg_out_ext32u(s, TCG_REG_R0, arg1);
> + arg1 = TCG_REG_R0;
> }
> + tcg_out_setcond_ne0(s, arg0, arg1);
> + return;
> + case TCG_COND_GE:
> + tcg_out32(s, NOR | SAB(arg1, arg0, arg1));
> + arg1 = arg0;
> + /* FALLTHRU */
> + case TCG_COND_LT:
> + /* Extract the sign bit. */
> + tcg_out_rld(s, RLDICL, arg0, arg1,
> + type == TCG_TYPE_I64 ? 1 : 33, 63);
> + return;
> + default:
> + break;
> }
> - else {
> - arg = 0;
> - tcg_out32 (s, XOR | SAB (arg1, 0, arg2));
> - }
> + }
>
> - /* Make sure and discard the high 32-bits of the input. */
> - if (type == TCG_TYPE_I32) {
> - tcg_out32(s, EXTSW | RA(TCG_REG_R0) | RS(arg));
> - arg = TCG_REG_R0;
> - }
> + /* If we have ISEL, we can implement everything with 3 or 4 insns.
> + All other cases below are also at least 3 insns, so speed up the
> + code generator by not considering them and always using ISEL. */
> + if (HAVE_ISEL) {
> + int isel, tab;
> +
> + tcg_out_cmp(s, cond, arg1, arg2, const_arg2, 7, type);
> +
> + isel = tcg_to_isel[cond];
>
> - if (arg == arg1 && arg1 == arg0) {
> - tcg_out32(s, ADDIC | TAI(0, arg, -1));
> - tcg_out32(s, SUBFE | TAB(arg0, 0, arg));
> + tcg_out_movi(s, type, arg0, 1);
> + if (isel & 1) {
> + /* arg0 = (bc ? 0 : 1) */
> + tab = TAB(arg0, 0, arg0);
> + isel &= ~1;
> + } else {
> + /* arg0 = (bc ? 1 : 0) */
> + tcg_out_movi(s, type, TCG_REG_R0, 0);
> + tab = TAB(arg0, arg0, TCG_REG_R0);
> }
> - else {
> - tcg_out32(s, ADDIC | TAI(arg0, arg, -1));
> - tcg_out32(s, SUBFE | TAB(arg0, arg0, arg));
> + tcg_out32(s, isel | tab);
> + return;
> + }
> +
> + switch (cond) {
> + case TCG_COND_EQ:
> + arg1 = tcg_gen_setcond_xor(s, arg1, arg2, const_arg2);
> + tcg_out_setcond_eq0(s, type, arg0, arg1);
> + return;
> +
> + case TCG_COND_NE:
> + arg1 = tcg_gen_setcond_xor(s, arg1, arg2, const_arg2);
> + /* Discard the high bits only once, rather than both inputs. */
> + if (type == TCG_TYPE_I32) {
> + tcg_out_ext32u(s, TCG_REG_R0, arg1);
> + arg1 = TCG_REG_R0;
> }
> - break;
> + tcg_out_setcond_ne0(s, arg0, arg1);
> + return;
>
> case TCG_COND_GT:
> case TCG_COND_GTU:
Reviewed-by: Aurelien Jarno <aurelien@aurel32.net>
--
Aurelien Jarno GPG: 1024D/F1BCDB73
aurelien@aurel32.net http://www.aurel32.net
^ permalink raw reply [flat|nested] 48+ messages in thread
* [Qemu-devel] [PATCH v4 29/33] tcg-ppc64: Implement movcond
2013-04-04 22:55 [Qemu-devel] [PATCH v4 00/33] Modernize tcg/ppc64 Richard Henderson
` (27 preceding siblings ...)
2013-04-04 22:56 ` [Qemu-devel] [PATCH v4 28/33] tcg-ppc64: Use ISEL for setcond Richard Henderson
@ 2013-04-04 22:56 ` Richard Henderson
2013-04-04 22:56 ` [Qemu-devel] [PATCH v4 30/33] tcg-ppc64: Use getauxval for ISA detection Richard Henderson
` (4 subsequent siblings)
33 siblings, 0 replies; 48+ messages in thread
From: Richard Henderson @ 2013-04-04 22:56 UTC (permalink / raw)
To: qemu-devel; +Cc: av1474, Aurelien Jarno
Reviewed-by: Aurelien Jarno <aurelien@aurel32.net>
Signed-off-by: Richard Henderson <rth@twiddle.net>
---
tcg/ppc64/tcg-target.c | 59 ++++++++++++++++++++++++++++++++++++++++++++++++++
tcg/ppc64/tcg-target.h | 4 ++--
2 files changed, 61 insertions(+), 2 deletions(-)
diff --git a/tcg/ppc64/tcg-target.c b/tcg/ppc64/tcg-target.c
index 27a7ff2..faf110f 100644
--- a/tcg/ppc64/tcg-target.c
+++ b/tcg/ppc64/tcg-target.c
@@ -1321,6 +1321,54 @@ static void tcg_out_brcond(TCGContext *s, TCGCond cond,
tcg_out_bc(s, tcg_to_bc[cond], label_index);
}
+static void tcg_out_movcond(TCGContext *s, TCGType type, TCGCond cond,
+ TCGArg dest, TCGArg c1, TCGArg c2, TCGArg v1,
+ TCGArg v2, bool const_c2)
+{
+ /* If for some reason both inputs are zero, don't produce bad code. */
+ if (v1 == 0 && v2 == 0) {
+ tcg_out_movi(s, type, dest, 0);
+ return;
+ }
+
+ tcg_out_cmp(s, cond, c1, c2, const_c2, 7, type);
+
+ if (HAVE_ISEL) {
+ int isel = tcg_to_isel[cond];
+
+ /* Swap the V operands if the operation indicates inversion. */
+ if (isel & 1) {
+ int t = v1;
+ v1 = v2;
+ v2 = t;
+ isel &= ~1;
+ }
+ /* V1 == 0 is handled by isel; V2 == 0 must be handled by hand. */
+ if (v2 == 0) {
+ tcg_out_movi(s, type, 0, 0);
+ }
+ tcg_out32(s, isel | TAB(dest, v1, v2));
+ } else {
+ if (dest == v2) {
+ cond = tcg_invert_cond(cond);
+ v2 = v1;
+ } else if (dest != v1) {
+ if (v1 == 0) {
+ tcg_out_movi(s, type, dest, 0);
+ } else {
+ tcg_out_mov(s, type, dest, v1);
+ }
+ }
+ /* Branch forward over one insn */
+ tcg_out32(s, tcg_to_bc[cond] | 8);
+ if (v2 == 0) {
+ tcg_out_movi(s, type, dest, 0);
+ } else {
+ tcg_out_mov(s, type, dest, v2);
+ }
+ }
+}
+
void ppc_tb_set_jmp_target (unsigned long jmp_addr, unsigned long addr)
{
TCGContext s;
@@ -1871,6 +1919,15 @@ static void tcg_out_op (TCGContext *s, TCGOpcode opc, const TCGArg *args,
64 - args[3] - args[4]);
break;
+ case INDEX_op_movcond_i32:
+ tcg_out_movcond(s, TCG_TYPE_I32, args[5], args[0], args[1], args[2],
+ args[3], args[4], const_args[2]);
+ break;
+ case INDEX_op_movcond_i64:
+ tcg_out_movcond(s, TCG_TYPE_I64, args[5], args[0], args[1], args[2],
+ args[3], args[4], const_args[2]);
+ break;
+
default:
tcg_dump_ops (s);
tcg_abort ();
@@ -1985,6 +2042,8 @@ static const TCGTargetOpDef ppc_op_defs[] = {
{ INDEX_op_setcond_i32, { "r", "r", "ri" } },
{ INDEX_op_setcond_i64, { "r", "r", "ri" } },
+ { INDEX_op_movcond_i32, { "r", "r", "ri", "rZ", "rZ" } },
+ { INDEX_op_movcond_i64, { "r", "r", "ri", "rZ", "rZ" } },
{ INDEX_op_bswap16_i32, { "r", "r" } },
{ INDEX_op_bswap16_i64, { "r", "r" } },
diff --git a/tcg/ppc64/tcg-target.h b/tcg/ppc64/tcg-target.h
index 7ffa895..67313b9 100644
--- a/tcg/ppc64/tcg-target.h
+++ b/tcg/ppc64/tcg-target.h
@@ -89,7 +89,7 @@ typedef enum {
#define TCG_TARGET_HAS_nand_i32 1
#define TCG_TARGET_HAS_nor_i32 1
#define TCG_TARGET_HAS_deposit_i32 1
-#define TCG_TARGET_HAS_movcond_i32 0
+#define TCG_TARGET_HAS_movcond_i32 1
#define TCG_TARGET_HAS_add2_i32 0
#define TCG_TARGET_HAS_sub2_i32 0
#define TCG_TARGET_HAS_mulu2_i32 0
@@ -111,7 +111,7 @@ typedef enum {
#define TCG_TARGET_HAS_nand_i64 1
#define TCG_TARGET_HAS_nor_i64 1
#define TCG_TARGET_HAS_deposit_i64 1
-#define TCG_TARGET_HAS_movcond_i64 0
+#define TCG_TARGET_HAS_movcond_i64 1
#define TCG_TARGET_HAS_add2_i64 0
#define TCG_TARGET_HAS_sub2_i64 0
#define TCG_TARGET_HAS_mulu2_i64 0
--
1.8.1.4
^ permalink raw reply related [flat|nested] 48+ messages in thread
* [Qemu-devel] [PATCH v4 30/33] tcg-ppc64: Use getauxval for ISA detection
2013-04-04 22:55 [Qemu-devel] [PATCH v4 00/33] Modernize tcg/ppc64 Richard Henderson
` (28 preceding siblings ...)
2013-04-04 22:56 ` [Qemu-devel] [PATCH v4 29/33] tcg-ppc64: Implement movcond Richard Henderson
@ 2013-04-04 22:56 ` Richard Henderson
2013-04-05 16:59 ` Richard Henderson
2013-04-15 8:13 ` Aurelien Jarno
2013-04-04 22:56 ` [Qemu-devel] [PATCH v4 31/33] tcg-ppc64: Implement add2/sub2_i64 Richard Henderson
` (3 subsequent siblings)
33 siblings, 2 replies; 48+ messages in thread
From: Richard Henderson @ 2013-04-04 22:56 UTC (permalink / raw)
To: qemu-devel; +Cc: av1474, Aurelien Jarno
Glibc 2.16 includes an easy way to get feature bits previously
buried in /proc or the program startup auxiliary vector. Use it.
Signed-off-by: Richard Henderson <rth@twiddle.net>
---
configure | 18 ++++++++++++++++++
tcg/ppc64/tcg-target.c | 16 +++++++++++++++-
2 files changed, 33 insertions(+), 1 deletion(-)
diff --git a/configure b/configure
index d685275..40c3538 100755
--- a/configure
+++ b/configure
@@ -3252,6 +3252,20 @@ if compile_prog "" "" ; then
int128=yes
fi
+########################################
+# check if getauxval is available.
+
+getauxval=no
+cat > $TMPC << EOF
+#include <sys/auxv.h>
+int main(void) {
+ return getauxval(AT_HWCAP) == 0;
+}
+EOF
+if compile_prog "" "" ; then
+ getauxval=yes
+fi
+
##########################################
# End of CC checks
# After here, no more $cc or $ld runs
@@ -3809,6 +3823,10 @@ if test "$int128" = "yes" ; then
echo "CONFIG_INT128=y" >> $config_host_mak
fi
+if test "$getauxval" = "yes" ; then
+ echo "CONFIG_GETAUXVAL=y" >> $config_host_mak
+fi
+
if test "$glusterfs" = "yes" ; then
echo "CONFIG_GLUSTERFS=y" >> $config_host_mak
fi
diff --git a/tcg/ppc64/tcg-target.c b/tcg/ppc64/tcg-target.c
index faf110f..6dec600 100644
--- a/tcg/ppc64/tcg-target.c
+++ b/tcg/ppc64/tcg-target.c
@@ -44,8 +44,15 @@ static uint8_t *tb_ret_addr;
#define GUEST_BASE 0
#endif
+#ifdef CONFIG_GETAUXVAL
+#include <sys/auxv.h>
+static bool have_isa_2_06;
+#define HAVE_ISA_2_06 have_isa_2_06
+#define HAVE_ISEL have_isa_2_06
+#else
#define HAVE_ISA_2_06 0
-#define HAVE_ISEL 0
+#define HAVE_ISA_ISEL 0
+#endif
#ifdef CONFIG_USE_GUEST_BASE
#define TCG_GUEST_BASE_REG 30
@@ -2059,6 +2066,13 @@ static const TCGTargetOpDef ppc_op_defs[] = {
static void tcg_target_init (TCGContext *s)
{
+#ifdef CONFIG_GETAUXVAL
+ unsigned long hwcap = getauxval(AT_HWCAP);
+ if (hwcap & PPC_FEATURE_ARCH_2_06) {
+ have_isa_2_06 = true;
+ }
+#endif
+
tcg_regset_set32 (tcg_target_available_regs[TCG_TYPE_I32], 0, 0xffffffff);
tcg_regset_set32 (tcg_target_available_regs[TCG_TYPE_I64], 0, 0xffffffff);
tcg_regset_set32 (tcg_target_call_clobber_regs, 0,
--
1.8.1.4
^ permalink raw reply related [flat|nested] 48+ messages in thread
* Re: [Qemu-devel] [PATCH v4 30/33] tcg-ppc64: Use getauxval for ISA detection
2013-04-04 22:56 ` [Qemu-devel] [PATCH v4 30/33] tcg-ppc64: Use getauxval for ISA detection Richard Henderson
@ 2013-04-05 16:59 ` Richard Henderson
2013-04-15 8:13 ` Aurelien Jarno
1 sibling, 0 replies; 48+ messages in thread
From: Richard Henderson @ 2013-04-05 16:59 UTC (permalink / raw)
To: qemu-devel; +Cc: av1474, Aurelien Jarno
On 2013-04-04 17:56, Richard Henderson wrote:
> +#ifdef CONFIG_GETAUXVAL
> +#include <sys/auxv.h>
> +static bool have_isa_2_06;
> +#define HAVE_ISA_2_06 have_isa_2_06
> +#define HAVE_ISEL have_isa_2_06
> +#else
> #define HAVE_ISA_2_06 0
> -#define HAVE_ISEL 0
> +#define HAVE_ISA_ISEL 0
Guh. I've pushed a new tree with this typo fixed. I won't repost the
patch series just for this though.
r~
^ permalink raw reply [flat|nested] 48+ messages in thread
* Re: [Qemu-devel] [PATCH v4 30/33] tcg-ppc64: Use getauxval for ISA detection
2013-04-04 22:56 ` [Qemu-devel] [PATCH v4 30/33] tcg-ppc64: Use getauxval for ISA detection Richard Henderson
2013-04-05 16:59 ` Richard Henderson
@ 2013-04-15 8:13 ` Aurelien Jarno
1 sibling, 0 replies; 48+ messages in thread
From: Aurelien Jarno @ 2013-04-15 8:13 UTC (permalink / raw)
To: Richard Henderson; +Cc: av1474, qemu-devel
On Thu, Apr 04, 2013 at 05:56:23PM -0500, Richard Henderson wrote:
> Glibc 2.16 includes an easy way to get feature bits previously
> buried in /proc or the program startup auxiliary vector. Use it.
>
> Signed-off-by: Richard Henderson <rth@twiddle.net>
> ---
> configure | 18 ++++++++++++++++++
> tcg/ppc64/tcg-target.c | 16 +++++++++++++++-
> 2 files changed, 33 insertions(+), 1 deletion(-)
>
> diff --git a/configure b/configure
> index d685275..40c3538 100755
> --- a/configure
> +++ b/configure
> @@ -3252,6 +3252,20 @@ if compile_prog "" "" ; then
> int128=yes
> fi
>
> +########################################
> +# check if getauxval is available.
> +
> +getauxval=no
> +cat > $TMPC << EOF
> +#include <sys/auxv.h>
> +int main(void) {
> + return getauxval(AT_HWCAP) == 0;
> +}
> +EOF
> +if compile_prog "" "" ; then
> + getauxval=yes
> +fi
> +
> ##########################################
> # End of CC checks
> # After here, no more $cc or $ld runs
> @@ -3809,6 +3823,10 @@ if test "$int128" = "yes" ; then
> echo "CONFIG_INT128=y" >> $config_host_mak
> fi
>
> +if test "$getauxval" = "yes" ; then
> + echo "CONFIG_GETAUXVAL=y" >> $config_host_mak
> +fi
> +
> if test "$glusterfs" = "yes" ; then
> echo "CONFIG_GLUSTERFS=y" >> $config_host_mak
> fi
> diff --git a/tcg/ppc64/tcg-target.c b/tcg/ppc64/tcg-target.c
> index faf110f..6dec600 100644
> --- a/tcg/ppc64/tcg-target.c
> +++ b/tcg/ppc64/tcg-target.c
> @@ -44,8 +44,15 @@ static uint8_t *tb_ret_addr;
> #define GUEST_BASE 0
> #endif
>
> +#ifdef CONFIG_GETAUXVAL
> +#include <sys/auxv.h>
> +static bool have_isa_2_06;
> +#define HAVE_ISA_2_06 have_isa_2_06
> +#define HAVE_ISEL have_isa_2_06
> +#else
> #define HAVE_ISA_2_06 0
> -#define HAVE_ISEL 0
> +#define HAVE_ISA_ISEL 0
> +#endif
>
> #ifdef CONFIG_USE_GUEST_BASE
> #define TCG_GUEST_BASE_REG 30
> @@ -2059,6 +2066,13 @@ static const TCGTargetOpDef ppc_op_defs[] = {
>
> static void tcg_target_init (TCGContext *s)
> {
> +#ifdef CONFIG_GETAUXVAL
> + unsigned long hwcap = getauxval(AT_HWCAP);
> + if (hwcap & PPC_FEATURE_ARCH_2_06) {
> + have_isa_2_06 = true;
> + }
> +#endif
> +
> tcg_regset_set32 (tcg_target_available_regs[TCG_TYPE_I32], 0, 0xffffffff);
> tcg_regset_set32 (tcg_target_available_regs[TCG_TYPE_I64], 0, 0xffffffff);
> tcg_regset_set32 (tcg_target_call_clobber_regs, 0,
Reviewed-by: Aurelien Jarno <aurelien@aurel32.net>
--
Aurelien Jarno GPG: 1024D/F1BCDB73
aurelien@aurel32.net http://www.aurel32.net
^ permalink raw reply [flat|nested] 48+ messages in thread
* [Qemu-devel] [PATCH v4 31/33] tcg-ppc64: Implement add2/sub2_i64
2013-04-04 22:55 [Qemu-devel] [PATCH v4 00/33] Modernize tcg/ppc64 Richard Henderson
` (29 preceding siblings ...)
2013-04-04 22:56 ` [Qemu-devel] [PATCH v4 30/33] tcg-ppc64: Use getauxval for ISA detection Richard Henderson
@ 2013-04-04 22:56 ` Richard Henderson
2013-04-04 22:56 ` [Qemu-devel] [PATCH v4 32/33] tcg-ppc64: Implement mulu2/muls2_i64 Richard Henderson
` (2 subsequent siblings)
33 siblings, 0 replies; 48+ messages in thread
From: Richard Henderson @ 2013-04-04 22:56 UTC (permalink / raw)
To: qemu-devel; +Cc: av1474, Aurelien Jarno
Reviewed-by: Aurelien Jarno <aurelien@aurel32.net>
Signed-off-by: Richard Henderson <rth@twiddle.net>
---
tcg/ppc64/tcg-target.c | 56 ++++++++++++++++++++++++++++++++++++++++++++++++++
tcg/ppc64/tcg-target.h | 4 ++--
2 files changed, 58 insertions(+), 2 deletions(-)
diff --git a/tcg/ppc64/tcg-target.c b/tcg/ppc64/tcg-target.c
index 6dec600..b134029 100644
--- a/tcg/ppc64/tcg-target.c
+++ b/tcg/ppc64/tcg-target.c
@@ -27,6 +27,7 @@
#define TCG_CT_CONST_S32 0x400
#define TCG_CT_CONST_U32 0x800
#define TCG_CT_CONST_ZERO 0x1000
+#define TCG_CT_CONST_MONE 0x2000
static uint8_t *tb_ret_addr;
@@ -262,6 +263,9 @@ static int target_parse_constraint (TCGArgConstraint *ct, const char **pct_str)
case 'J':
ct->ct |= TCG_CT_CONST_U16;
break;
+ case 'M':
+ ct->ct |= TCG_CT_CONST_MONE;
+ break;
case 'T':
ct->ct |= TCG_CT_CONST_S32;
break;
@@ -296,6 +300,8 @@ static int tcg_target_const_match (tcg_target_long val,
return 1;
} else if ((ct & TCG_CT_CONST_ZERO) && val == 0) {
return 1;
+ } else if ((ct & TCG_CT_CONST_MONE) && val == -1) {
+ return 1;
}
return 0;
}
@@ -366,11 +372,15 @@ static int tcg_target_const_match (tcg_target_long val,
#define EXTSW XO31(986)
#define ADD XO31(266)
#define ADDE XO31(138)
+#define ADDME XO31(234)
+#define ADDZE XO31(202)
#define ADDC XO31( 10)
#define AND XO31( 28)
#define SUBF XO31( 40)
#define SUBFC XO31( 8)
#define SUBFE XO31(136)
+#define SUBFME XO31(232)
+#define SUBFZE XO31(200)
#define OR XO31(444)
#define XOR XO31(316)
#define MULLW XO31(235)
@@ -1935,6 +1945,49 @@ static void tcg_out_op (TCGContext *s, TCGOpcode opc, const TCGArg *args,
args[3], args[4], const_args[2]);
break;
+ case INDEX_op_add2_i64:
+ /* Note that the CA bit is defined based on the word size of the
+ environment. So in 64-bit mode it's always carry-out of bit 63.
+ The fallback code using deposit works just as well for 32-bit. */
+ a0 = args[0], a1 = args[1];
+ if (a0 == args[4] || (!const_args[5] && a0 == args[5])) {
+ a0 = TCG_REG_R0;
+ }
+ if (const_args[3]) {
+ tcg_out32(s, ADDIC | TAI(a0, args[2], args[3]));
+ } else {
+ tcg_out32(s, ADDC | TAB(a0, args[2], args[3]));
+ }
+ if (const_args[5]) {
+ tcg_out32(s, (args[5] ? ADDME : ADDZE) | RT(a1) | RA(args[4]));
+ } else {
+ tcg_out32(s, ADDE | TAB(a1, args[4], args[5]));
+ }
+ if (a0 != args[0]) {
+ tcg_out_mov(s, TCG_TYPE_I64, args[0], a0);
+ }
+ break;
+
+ case INDEX_op_sub2_i64:
+ a0 = args[0], a1 = args[1];
+ if (a0 == args[5] || (!const_args[4] && a0 == args[4])) {
+ a0 = TCG_REG_R0;
+ }
+ if (const_args[2]) {
+ tcg_out32(s, SUBFIC | TAI(a0, args[3], args[2]));
+ } else {
+ tcg_out32(s, SUBFC | TAB(a0, args[3], args[2]));
+ }
+ if (const_args[4]) {
+ tcg_out32(s, (args[4] ? SUBFME : SUBFZE) | RT(a1) | RA(args[5]));
+ } else {
+ tcg_out32(s, SUBFE | TAB(a1, args[5], args[4]));
+ }
+ if (a0 != args[0]) {
+ tcg_out_mov(s, TCG_TYPE_I64, args[0], a0);
+ }
+ break;
+
default:
tcg_dump_ops (s);
tcg_abort ();
@@ -2061,6 +2114,9 @@ static const TCGTargetOpDef ppc_op_defs[] = {
{ INDEX_op_deposit_i32, { "r", "0", "r" } },
{ INDEX_op_deposit_i64, { "r", "0", "r" } },
+ { INDEX_op_add2_i64, { "r", "r", "r", "rI", "r", "rZM" } },
+ { INDEX_op_sub2_i64, { "r", "r", "rI", "r", "rZM", "r" } },
+
{ -1 },
};
diff --git a/tcg/ppc64/tcg-target.h b/tcg/ppc64/tcg-target.h
index 67313b9..8dff1d5 100644
--- a/tcg/ppc64/tcg-target.h
+++ b/tcg/ppc64/tcg-target.h
@@ -112,8 +112,8 @@ typedef enum {
#define TCG_TARGET_HAS_nor_i64 1
#define TCG_TARGET_HAS_deposit_i64 1
#define TCG_TARGET_HAS_movcond_i64 1
-#define TCG_TARGET_HAS_add2_i64 0
-#define TCG_TARGET_HAS_sub2_i64 0
+#define TCG_TARGET_HAS_add2_i64 1
+#define TCG_TARGET_HAS_sub2_i64 1
#define TCG_TARGET_HAS_mulu2_i64 0
#define TCG_TARGET_HAS_muls2_i64 0
--
1.8.1.4
^ permalink raw reply related [flat|nested] 48+ messages in thread
* [Qemu-devel] [PATCH v4 32/33] tcg-ppc64: Implement mulu2/muls2_i64
2013-04-04 22:55 [Qemu-devel] [PATCH v4 00/33] Modernize tcg/ppc64 Richard Henderson
` (30 preceding siblings ...)
2013-04-04 22:56 ` [Qemu-devel] [PATCH v4 31/33] tcg-ppc64: Implement add2/sub2_i64 Richard Henderson
@ 2013-04-04 22:56 ` Richard Henderson
2013-04-04 22:56 ` [Qemu-devel] [PATCH v4 33/33] tcg-ppc64: Handle deposit of zero Richard Henderson
2013-04-13 12:24 ` [Qemu-devel] [PATCH v4 00/33] Modernize tcg/ppc64 Aurelien Jarno
33 siblings, 0 replies; 48+ messages in thread
From: Richard Henderson @ 2013-04-04 22:56 UTC (permalink / raw)
To: qemu-devel; +Cc: av1474, Aurelien Jarno
Reviewed-by: Aurelien Jarno <aurelien@aurel32.net>
Signed-off-by: Richard Henderson <rth@twiddle.net>
---
tcg/ppc64/tcg-target.c | 27 +++++++++++++++++++++++++++
tcg/ppc64/tcg-target.h | 4 ++--
2 files changed, 29 insertions(+), 2 deletions(-)
diff --git a/tcg/ppc64/tcg-target.c b/tcg/ppc64/tcg-target.c
index b134029..9583cf9 100644
--- a/tcg/ppc64/tcg-target.c
+++ b/tcg/ppc64/tcg-target.c
@@ -1988,6 +1988,31 @@ static void tcg_out_op (TCGContext *s, TCGOpcode opc, const TCGArg *args,
}
break;
+ case INDEX_op_mulu2_i64:
+ case INDEX_op_muls2_i64:
+ {
+ int oph = (opc == INDEX_op_mulu2_i64 ? MULHDU : MULHD);
+ TCGReg outl = args[0], outh = args[1];
+ a0 = args[2], a1 = args[3];
+
+ if (outl == a0 || outl == a1) {
+ if (outh == a0 || outh == a1) {
+ outl = TCG_REG_R0;
+ } else {
+ tcg_out32(s, oph | TAB(outh, a0, a1));
+ oph = 0;
+ }
+ }
+ tcg_out32(s, MULLD | TAB(outl, a0, a1));
+ if (oph != 0) {
+ tcg_out32(s, oph | TAB(outh, a0, a1));
+ }
+ if (outl != args[0]) {
+ tcg_out_mov(s, TCG_TYPE_I64, args[0], outl);
+ }
+ }
+ break;
+
default:
tcg_dump_ops (s);
tcg_abort ();
@@ -2116,6 +2141,8 @@ static const TCGTargetOpDef ppc_op_defs[] = {
{ INDEX_op_add2_i64, { "r", "r", "r", "rI", "r", "rZM" } },
{ INDEX_op_sub2_i64, { "r", "r", "rI", "r", "rZM", "r" } },
+ { INDEX_op_muls2_i64, { "r", "r", "r", "r" } },
+ { INDEX_op_mulu2_i64, { "r", "r", "r", "r" } },
{ -1 },
};
diff --git a/tcg/ppc64/tcg-target.h b/tcg/ppc64/tcg-target.h
index 8dff1d5..cb77634 100644
--- a/tcg/ppc64/tcg-target.h
+++ b/tcg/ppc64/tcg-target.h
@@ -114,8 +114,8 @@ typedef enum {
#define TCG_TARGET_HAS_movcond_i64 1
#define TCG_TARGET_HAS_add2_i64 1
#define TCG_TARGET_HAS_sub2_i64 1
-#define TCG_TARGET_HAS_mulu2_i64 0
-#define TCG_TARGET_HAS_muls2_i64 0
+#define TCG_TARGET_HAS_mulu2_i64 1
+#define TCG_TARGET_HAS_muls2_i64 1
#define TCG_AREG0 TCG_REG_R27
--
1.8.1.4
^ permalink raw reply related [flat|nested] 48+ messages in thread
* [Qemu-devel] [PATCH v4 33/33] tcg-ppc64: Handle deposit of zero
2013-04-04 22:55 [Qemu-devel] [PATCH v4 00/33] Modernize tcg/ppc64 Richard Henderson
` (31 preceding siblings ...)
2013-04-04 22:56 ` [Qemu-devel] [PATCH v4 32/33] tcg-ppc64: Implement mulu2/muls2_i64 Richard Henderson
@ 2013-04-04 22:56 ` Richard Henderson
2013-04-15 8:14 ` Aurelien Jarno
2013-04-13 12:24 ` [Qemu-devel] [PATCH v4 00/33] Modernize tcg/ppc64 Aurelien Jarno
33 siblings, 1 reply; 48+ messages in thread
From: Richard Henderson @ 2013-04-04 22:56 UTC (permalink / raw)
To: qemu-devel; +Cc: av1474, Aurelien Jarno
The TCG optimizer does great work when inserting constants, being able
to fold the open-coded deposit expansion to just an AND or an OR. Avoid
a bit the regression caused by having the deposit opcode by expanding
deposit of zero as an AND.
Signed-off-by: Richard Henderson <rth@twiddle.net>
---
tcg/ppc64/tcg-target.c | 22 ++++++++++++++++------
1 file changed, 16 insertions(+), 6 deletions(-)
diff --git a/tcg/ppc64/tcg-target.c b/tcg/ppc64/tcg-target.c
index 9583cf9..772f4ac 100644
--- a/tcg/ppc64/tcg-target.c
+++ b/tcg/ppc64/tcg-target.c
@@ -1928,12 +1928,22 @@ static void tcg_out_op (TCGContext *s, TCGOpcode opc, const TCGArg *args,
break;
case INDEX_op_deposit_i32:
- tcg_out_rlw(s, RLWIMI, args[0], args[2], args[3],
- 32 - args[3] - args[4], 31 - args[3]);
+ if (const_args[2]) {
+ uint32_t mask = ((2u << (args[4] - 1)) - 1) << args[3];
+ tcg_out_andi32(s, args[0], args[0], ~mask);
+ } else {
+ tcg_out_rlw(s, RLWIMI, args[0], args[2], args[3],
+ 32 - args[3] - args[4], 31 - args[3]);
+ }
break;
case INDEX_op_deposit_i64:
- tcg_out_rld(s, RLDIMI, args[0], args[2], args[3],
- 64 - args[3] - args[4]);
+ if (const_args[2]) {
+ uint64_t mask = ((2ull << (args[4] - 1)) - 1) << args[3];
+ tcg_out_andi64(s, args[0], args[0], ~mask);
+ } else {
+ tcg_out_rld(s, RLDIMI, args[0], args[2], args[3],
+ 64 - args[3] - args[4]);
+ }
break;
case INDEX_op_movcond_i32:
@@ -2136,8 +2146,8 @@ static const TCGTargetOpDef ppc_op_defs[] = {
{ INDEX_op_bswap32_i64, { "r", "r" } },
{ INDEX_op_bswap64_i64, { "r", "r" } },
- { INDEX_op_deposit_i32, { "r", "0", "r" } },
- { INDEX_op_deposit_i64, { "r", "0", "r" } },
+ { INDEX_op_deposit_i32, { "r", "0", "rZ" } },
+ { INDEX_op_deposit_i64, { "r", "0", "rZ" } },
{ INDEX_op_add2_i64, { "r", "r", "r", "rI", "r", "rZM" } },
{ INDEX_op_sub2_i64, { "r", "r", "rI", "r", "rZM", "r" } },
--
1.8.1.4
^ permalink raw reply related [flat|nested] 48+ messages in thread
* Re: [Qemu-devel] [PATCH v4 33/33] tcg-ppc64: Handle deposit of zero
2013-04-04 22:56 ` [Qemu-devel] [PATCH v4 33/33] tcg-ppc64: Handle deposit of zero Richard Henderson
@ 2013-04-15 8:14 ` Aurelien Jarno
0 siblings, 0 replies; 48+ messages in thread
From: Aurelien Jarno @ 2013-04-15 8:14 UTC (permalink / raw)
To: Richard Henderson; +Cc: av1474, qemu-devel
On Thu, Apr 04, 2013 at 05:56:26PM -0500, Richard Henderson wrote:
> The TCG optimizer does great work when inserting constants, being able
> to fold the open-coded deposit expansion to just an AND or an OR. Avoid
> a bit the regression caused by having the deposit opcode by expanding
> deposit of zero as an AND.
>
> Signed-off-by: Richard Henderson <rth@twiddle.net>
> ---
> tcg/ppc64/tcg-target.c | 22 ++++++++++++++++------
> 1 file changed, 16 insertions(+), 6 deletions(-)
>
> diff --git a/tcg/ppc64/tcg-target.c b/tcg/ppc64/tcg-target.c
> index 9583cf9..772f4ac 100644
> --- a/tcg/ppc64/tcg-target.c
> +++ b/tcg/ppc64/tcg-target.c
> @@ -1928,12 +1928,22 @@ static void tcg_out_op (TCGContext *s, TCGOpcode opc, const TCGArg *args,
> break;
>
> case INDEX_op_deposit_i32:
> - tcg_out_rlw(s, RLWIMI, args[0], args[2], args[3],
> - 32 - args[3] - args[4], 31 - args[3]);
> + if (const_args[2]) {
> + uint32_t mask = ((2u << (args[4] - 1)) - 1) << args[3];
> + tcg_out_andi32(s, args[0], args[0], ~mask);
> + } else {
> + tcg_out_rlw(s, RLWIMI, args[0], args[2], args[3],
> + 32 - args[3] - args[4], 31 - args[3]);
> + }
> break;
> case INDEX_op_deposit_i64:
> - tcg_out_rld(s, RLDIMI, args[0], args[2], args[3],
> - 64 - args[3] - args[4]);
> + if (const_args[2]) {
> + uint64_t mask = ((2ull << (args[4] - 1)) - 1) << args[3];
> + tcg_out_andi64(s, args[0], args[0], ~mask);
> + } else {
> + tcg_out_rld(s, RLDIMI, args[0], args[2], args[3],
> + 64 - args[3] - args[4]);
> + }
> break;
>
> case INDEX_op_movcond_i32:
> @@ -2136,8 +2146,8 @@ static const TCGTargetOpDef ppc_op_defs[] = {
> { INDEX_op_bswap32_i64, { "r", "r" } },
> { INDEX_op_bswap64_i64, { "r", "r" } },
>
> - { INDEX_op_deposit_i32, { "r", "0", "r" } },
> - { INDEX_op_deposit_i64, { "r", "0", "r" } },
> + { INDEX_op_deposit_i32, { "r", "0", "rZ" } },
> + { INDEX_op_deposit_i64, { "r", "0", "rZ" } },
>
> { INDEX_op_add2_i64, { "r", "r", "r", "rI", "r", "rZM" } },
> { INDEX_op_sub2_i64, { "r", "r", "rI", "r", "rZM", "r" } },
I first thought this should go into the middle end, but OTOH it will
de-optimize some TCG targets which have a zero register like MIPS.
In the long term I think we should allow the middle end to query the
backend for constraints, and take the right decision (which would also
improve constant propagation on some hosts). In the meantime it looks
like the right thing to do, so:
Reviewed-by: Aurelien Jarno <aurelien@aurel32.net>
--
Aurelien Jarno GPG: 1024D/F1BCDB73
aurelien@aurel32.net http://www.aurel32.net
^ permalink raw reply [flat|nested] 48+ messages in thread
* Re: [Qemu-devel] [PATCH v4 00/33] Modernize tcg/ppc64
2013-04-04 22:55 [Qemu-devel] [PATCH v4 00/33] Modernize tcg/ppc64 Richard Henderson
` (32 preceding siblings ...)
2013-04-04 22:56 ` [Qemu-devel] [PATCH v4 33/33] tcg-ppc64: Handle deposit of zero Richard Henderson
@ 2013-04-13 12:24 ` Aurelien Jarno
33 siblings, 0 replies; 48+ messages in thread
From: Aurelien Jarno @ 2013-04-13 12:24 UTC (permalink / raw)
To: Richard Henderson; +Cc: av1474, qemu-devel
On Thu, Apr 04, 2013 at 05:55:53PM -0500, Richard Henderson wrote:
> Changes v3-v4:
>
> There were two problems, both related to the setcond opcode:
>
> * The first was pre-existing: the current code gets 32-bit NE wrong,
> failing to ignore the high 32-bits of the register. This problem
> caused the rest of the patch set to be non-bisectable -- any non-
> trivial change to code generation of the 32-bit opcodes tended to
> expose this.
>
> * The second was in the setcond rewrite, in that my logic was wrong
> for the signed comparisons.
>
> In the process of tracking all of this down, I split up the setcond
> changes into three parts, and dropped the carry-bit optimization part
> entirely. It may still be an improvement for the unsigned comparisons,
> but since I have switched to MFOCRF it'll be harder to show a speedup
> in practice.
>
> Finally, one extra patch to avoid a regression in code generation when
> insertting zero via deposit.
>
> Tested with arm-test, sparc-test, and i386-softmmu nbench.
>
> Tree updated at
>
> git://github.com/rth7680/qemu.git tcg-ppc64
>
Oops it looks like I reviewed the V3, while testing the version in your
git tree, so the V4.
Therefore my comments about the tests I have done apply to this version,
and I'll review the remaining patches of this version over the week-end.
Tested-by: Aurelien Jarno <aurelien@aurel32.net>
--
Aurelien Jarno GPG: 1024D/F1BCDB73
aurelien@aurel32.net http://www.aurel32.net
^ permalink raw reply [flat|nested] 48+ messages in thread