From: Richard Henderson <richard.henderson@linaro.org>
To: qemu-devel@nongnu.org
Cc: Daniel Henrique Barboza <danielhb413@gmail.com>
Subject: [PULL 25/48] tcg/ppc: Implement negsetcond_*
Date: Wed, 23 Aug 2023 13:23:03 -0700 [thread overview]
Message-ID: <20230823202326.1353645-26-richard.henderson@linaro.org> (raw)
In-Reply-To: <20230823202326.1353645-1-richard.henderson@linaro.org>
In the general case we simply negate. However with isel we
may load -1 instead of 1 with no extra effort.
Consolidate EQ0 and NE0 logic. Replace the NE0 zero-extension
with inversion+negation of EQ0, which is never worse and may
eliminate one insn. Provide a special case for -EQ0.
Reviewed-by: Daniel Henrique Barboza <danielhb413@gmail.com>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
tcg/ppc/tcg-target.h | 4 +-
tcg/ppc/tcg-target.c.inc | 127 ++++++++++++++++++++++++---------------
2 files changed, 82 insertions(+), 49 deletions(-)
diff --git a/tcg/ppc/tcg-target.h b/tcg/ppc/tcg-target.h
index a2ca0b44ce..8bfb14998e 100644
--- a/tcg/ppc/tcg-target.h
+++ b/tcg/ppc/tcg-target.h
@@ -97,7 +97,7 @@ typedef enum {
#define TCG_TARGET_HAS_sextract_i32 0
#define TCG_TARGET_HAS_extract2_i32 0
#define TCG_TARGET_HAS_movcond_i32 1
-#define TCG_TARGET_HAS_negsetcond_i32 0
+#define TCG_TARGET_HAS_negsetcond_i32 1
#define TCG_TARGET_HAS_mulu2_i32 0
#define TCG_TARGET_HAS_muls2_i32 0
#define TCG_TARGET_HAS_muluh_i32 1
@@ -135,7 +135,7 @@ typedef enum {
#define TCG_TARGET_HAS_sextract_i64 0
#define TCG_TARGET_HAS_extract2_i64 0
#define TCG_TARGET_HAS_movcond_i64 1
-#define TCG_TARGET_HAS_negsetcond_i64 0
+#define TCG_TARGET_HAS_negsetcond_i64 1
#define TCG_TARGET_HAS_add2_i64 1
#define TCG_TARGET_HAS_sub2_i64 1
#define TCG_TARGET_HAS_mulu2_i64 0
diff --git a/tcg/ppc/tcg-target.c.inc b/tcg/ppc/tcg-target.c.inc
index 511e14b180..10448aa0e6 100644
--- a/tcg/ppc/tcg-target.c.inc
+++ b/tcg/ppc/tcg-target.c.inc
@@ -1548,8 +1548,20 @@ static void tcg_out_cmp(TCGContext *s, int cond, TCGArg arg1, TCGArg arg2,
}
static void tcg_out_setcond_eq0(TCGContext *s, TCGType type,
- TCGReg dst, TCGReg src)
+ TCGReg dst, TCGReg src, bool neg)
{
+ if (neg && (TCG_TARGET_REG_BITS == 32 || type == TCG_TYPE_I64)) {
+ /*
+ * X != 0 implies X + -1 generates a carry.
+ * RT = (~X + X) + CA
+ * = -1 + CA
+ * = CA ? 0 : -1
+ */
+ tcg_out32(s, ADDIC | TAI(TCG_REG_R0, src, -1));
+ tcg_out32(s, SUBFE | TAB(dst, src, src));
+ return;
+ }
+
if (type == TCG_TYPE_I32) {
tcg_out32(s, CNTLZW | RS(src) | RA(dst));
tcg_out_shri32(s, dst, dst, 5);
@@ -1557,18 +1569,28 @@ static void tcg_out_setcond_eq0(TCGContext *s, TCGType type,
tcg_out32(s, CNTLZD | RS(src) | RA(dst));
tcg_out_shri64(s, dst, dst, 6);
}
+ if (neg) {
+ tcg_out32(s, NEG | RT(dst) | RA(dst));
+ }
}
-static void tcg_out_setcond_ne0(TCGContext *s, TCGReg dst, TCGReg src)
+static void tcg_out_setcond_ne0(TCGContext *s, TCGType type,
+ TCGReg dst, TCGReg src, bool neg)
{
- /* X != 0 implies X + -1 generates a carry. Extra addition
- trickery means: R = X-1 + ~X + C = X-1 + (-X+1) + C = C. */
- if (dst != src) {
- tcg_out32(s, ADDIC | TAI(dst, src, -1));
- tcg_out32(s, SUBFE | TAB(dst, dst, src));
- } else {
+ if (!neg && (TCG_TARGET_REG_BITS == 32 || type == TCG_TYPE_I64)) {
+ /*
+ * X != 0 implies X + -1 generates a carry. Extra addition
+ * trickery means: R = X-1 + ~X + C = X-1 + (-X+1) + C = C.
+ */
tcg_out32(s, ADDIC | TAI(TCG_REG_R0, src, -1));
tcg_out32(s, SUBFE | TAB(dst, TCG_REG_R0, src));
+ return;
+ }
+ tcg_out_setcond_eq0(s, type, dst, src, false);
+ if (neg) {
+ tcg_out32(s, ADDI | TAI(dst, dst, -1));
+ } else {
+ tcg_out_xori32(s, dst, dst, 1);
}
}
@@ -1590,9 +1612,10 @@ static TCGReg tcg_gen_setcond_xor(TCGContext *s, TCGReg arg1, TCGArg arg2,
static void tcg_out_setcond(TCGContext *s, TCGType type, TCGCond cond,
TCGArg arg0, TCGArg arg1, TCGArg arg2,
- int const_arg2)
+ int const_arg2, bool neg)
{
- int crop, sh;
+ int sh;
+ bool inv;
tcg_debug_assert(TCG_TARGET_REG_BITS == 64 || type == TCG_TYPE_I32);
@@ -1605,14 +1628,10 @@ static void tcg_out_setcond(TCGContext *s, TCGType type, TCGCond cond,
if (arg2 == 0) {
switch (cond) {
case TCG_COND_EQ:
- tcg_out_setcond_eq0(s, type, arg0, arg1);
+ tcg_out_setcond_eq0(s, type, arg0, arg1, neg);
return;
case TCG_COND_NE:
- if (TCG_TARGET_REG_BITS == 64 && type == TCG_TYPE_I32) {
- tcg_out_ext32u(s, TCG_REG_R0, arg1);
- arg1 = TCG_REG_R0;
- }
- tcg_out_setcond_ne0(s, arg0, arg1);
+ tcg_out_setcond_ne0(s, type, arg0, arg1, neg);
return;
case TCG_COND_GE:
tcg_out32(s, NOR | SAB(arg1, arg0, arg1));
@@ -1621,9 +1640,17 @@ static void tcg_out_setcond(TCGContext *s, TCGType type, TCGCond cond,
case TCG_COND_LT:
/* Extract the sign bit. */
if (type == TCG_TYPE_I32) {
- tcg_out_shri32(s, arg0, arg1, 31);
+ if (neg) {
+ tcg_out_sari32(s, arg0, arg1, 31);
+ } else {
+ tcg_out_shri32(s, arg0, arg1, 31);
+ }
} else {
- tcg_out_shri64(s, arg0, arg1, 63);
+ if (neg) {
+ tcg_out_sari64(s, arg0, arg1, 63);
+ } else {
+ tcg_out_shri64(s, arg0, arg1, 63);
+ }
}
return;
default:
@@ -1641,7 +1668,7 @@ static void tcg_out_setcond(TCGContext *s, TCGType type, TCGCond cond,
isel = tcg_to_isel[cond];
- tcg_out_movi(s, type, arg0, 1);
+ tcg_out_movi(s, type, arg0, neg ? -1 : 1);
if (isel & 1) {
/* arg0 = (bc ? 0 : 1) */
tab = TAB(arg0, 0, arg0);
@@ -1655,51 +1682,47 @@ static void tcg_out_setcond(TCGContext *s, TCGType type, TCGCond cond,
return;
}
+ inv = false;
switch (cond) {
case TCG_COND_EQ:
arg1 = tcg_gen_setcond_xor(s, arg1, arg2, const_arg2);
- tcg_out_setcond_eq0(s, type, arg0, arg1);
- return;
+ tcg_out_setcond_eq0(s, type, arg0, arg1, neg);
+ break;
case TCG_COND_NE:
arg1 = tcg_gen_setcond_xor(s, arg1, arg2, const_arg2);
- /* Discard the high bits only once, rather than both inputs. */
- if (TCG_TARGET_REG_BITS == 64 && type == TCG_TYPE_I32) {
- tcg_out_ext32u(s, TCG_REG_R0, arg1);
- arg1 = TCG_REG_R0;
- }
- tcg_out_setcond_ne0(s, arg0, arg1);
- return;
+ tcg_out_setcond_ne0(s, type, arg0, arg1, neg);
+ break;
+ case TCG_COND_LE:
+ case TCG_COND_LEU:
+ inv = true;
+ /* fall through */
case TCG_COND_GT:
case TCG_COND_GTU:
- sh = 30;
- crop = 0;
- goto crtest;
-
- case TCG_COND_LT:
- case TCG_COND_LTU:
- sh = 29;
- crop = 0;
+ sh = 30; /* CR7 CR_GT */
goto crtest;
case TCG_COND_GE:
case TCG_COND_GEU:
- sh = 31;
- crop = CRNOR | BT(7, CR_EQ) | BA(7, CR_LT) | BB(7, CR_LT);
+ inv = true;
+ /* fall through */
+ case TCG_COND_LT:
+ case TCG_COND_LTU:
+ sh = 29; /* CR7 CR_LT */
goto crtest;
- case TCG_COND_LE:
- case TCG_COND_LEU:
- sh = 31;
- crop = CRNOR | BT(7, CR_EQ) | BA(7, CR_GT) | BB(7, CR_GT);
crtest:
tcg_out_cmp(s, cond, arg1, arg2, const_arg2, 7, type);
- if (crop) {
- tcg_out32(s, crop);
- }
tcg_out32(s, MFOCRF | RT(TCG_REG_R0) | FXM(7));
tcg_out_rlw(s, RLWINM, arg0, TCG_REG_R0, sh, 31, 31);
+ if (neg && inv) {
+ tcg_out32(s, ADDI | TAI(arg0, arg0, -1));
+ } else if (neg) {
+ tcg_out32(s, NEG | RT(arg0) | RA(arg0));
+ } else if (inv) {
+ tcg_out_xori32(s, arg0, arg0, 1);
+ }
break;
default:
@@ -2982,11 +3005,19 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
case INDEX_op_setcond_i32:
tcg_out_setcond(s, TCG_TYPE_I32, args[3], args[0], args[1], args[2],
- const_args[2]);
+ const_args[2], false);
break;
case INDEX_op_setcond_i64:
tcg_out_setcond(s, TCG_TYPE_I64, args[3], args[0], args[1], args[2],
- const_args[2]);
+ const_args[2], false);
+ break;
+ case INDEX_op_negsetcond_i32:
+ tcg_out_setcond(s, TCG_TYPE_I32, args[3], args[0], args[1], args[2],
+ const_args[2], true);
+ break;
+ case INDEX_op_negsetcond_i64:
+ tcg_out_setcond(s, TCG_TYPE_I64, args[3], args[0], args[1], args[2],
+ const_args[2], true);
break;
case INDEX_op_setcond2_i32:
tcg_out_setcond2(s, args, const_args);
@@ -3724,6 +3755,7 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
case INDEX_op_rotl_i32:
case INDEX_op_rotr_i32:
case INDEX_op_setcond_i32:
+ case INDEX_op_negsetcond_i32:
case INDEX_op_and_i64:
case INDEX_op_andc_i64:
case INDEX_op_shl_i64:
@@ -3732,6 +3764,7 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
case INDEX_op_rotl_i64:
case INDEX_op_rotr_i64:
case INDEX_op_setcond_i64:
+ case INDEX_op_negsetcond_i64:
return C_O1_I2(r, r, ri);
case INDEX_op_mul_i32:
--
2.34.1
next prev parent reply other threads:[~2023-08-23 20:28 UTC|newest]
Thread overview: 50+ messages / expand[flat|nested] mbox.gz Atom feed top
2023-08-23 20:22 [PULL 00/48] tcg patch queue Richard Henderson
2023-08-23 20:22 ` [PULL 01/48] accel/kvm: Widen pc/saved_insn for kvm_sw_breakpoint Richard Henderson
2023-08-23 20:22 ` [PULL 02/48] accel/hvf: Widen pc/saved_insn for hvf_sw_breakpoint Richard Henderson
2023-08-23 20:22 ` [PULL 03/48] sysemu/kvm: Use vaddr for kvm_arch_[insert|remove]_hw_breakpoint Richard Henderson
2023-08-23 20:22 ` [PULL 04/48] sysemu/hvf: Use vaddr for hvf_arch_[insert|remove]_hw_breakpoint Richard Henderson
2023-08-23 20:22 ` [PULL 05/48] include/exec: Replace target_ulong with abi_ptr in cpu_[st|ld]*() Richard Henderson
2023-08-23 20:22 ` [PULL 06/48] include/exec: typedef abi_ptr to vaddr in softmmu Richard Henderson
2023-08-23 20:22 ` [PULL 07/48] include/exec: Widen tlb_hit/tlb_hit_page() Richard Henderson
2023-08-23 20:22 ` [PULL 08/48] accel/tcg: Widen address arg in tlb_compare_set() Richard Henderson
2023-08-23 20:22 ` [PULL 09/48] accel/tcg: Update run_on_cpu_data static assert Richard Henderson
2023-08-23 20:22 ` [PULL 10/48] target/m68k: Use tcg_gen_deposit_i32 in gen_partset_reg Richard Henderson
2023-08-23 20:22 ` [PULL 11/48] tcg/i386: Drop BYTEH deposits for 64-bit Richard Henderson
2023-08-23 20:22 ` [PULL 12/48] tcg: Fold deposit with zero to and Richard Henderson
2023-08-23 20:22 ` [PULL 13/48] tcg/i386: Allow immediate as input to deposit_* Richard Henderson
2023-08-23 20:22 ` [PULL 14/48] docs/devel/tcg-ops: Bury mentions of trunc_shr_i64_i32() Richard Henderson
2023-08-23 20:22 ` [PULL 15/48] tcg: Unify TCG_TARGET_HAS_extr[lh]_i64_i32 Richard Henderson
2023-08-23 20:22 ` [PULL 16/48] tcg: Introduce negsetcond opcodes Richard Henderson
2023-08-23 20:22 ` [PULL 17/48] tcg: Use tcg_gen_negsetcond_* Richard Henderson
2023-08-23 20:22 ` [PULL 18/48] target/alpha: Use tcg_gen_movcond_i64 in gen_fold_mzero Richard Henderson
2023-08-23 20:22 ` [PULL 19/48] target/arm: Use tcg_gen_negsetcond_* Richard Henderson
2023-08-23 20:22 ` [PULL 20/48] target/m68k: " Richard Henderson
2023-08-23 20:22 ` [PULL 21/48] target/openrisc: " Richard Henderson
2023-08-23 20:23 ` [PULL 22/48] target/ppc: " Richard Henderson
2023-08-23 20:23 ` [PULL 23/48] target/sparc: Use tcg_gen_movcond_i64 in gen_edge Richard Henderson
2023-08-23 20:23 ` [PULL 24/48] target/tricore: Replace gen_cond_w with tcg_gen_negsetcond_tl Richard Henderson
2023-08-23 20:23 ` Richard Henderson [this message]
2023-08-23 20:23 ` [PULL 26/48] tcg/ppc: Use the Set Boolean Extension Richard Henderson
2023-08-23 20:23 ` [PULL 27/48] tcg/aarch64: Implement negsetcond_* Richard Henderson
2023-08-23 20:23 ` [PULL 28/48] tcg/arm: Implement negsetcond_i32 Richard Henderson
2023-08-23 20:23 ` [PULL 29/48] tcg/riscv: Implement negsetcond_* Richard Henderson
2023-08-23 20:23 ` [PULL 30/48] tcg/s390x: " Richard Henderson
2023-08-23 20:23 ` [PULL 31/48] tcg/sparc64: " Richard Henderson
2023-08-23 20:23 ` [PULL 32/48] tcg/i386: Merge tcg_out_brcond{32,64} Richard Henderson
2023-08-23 20:23 ` [PULL 33/48] tcg/i386: Merge tcg_out_setcond{32,64} Richard Henderson
2023-08-23 20:23 ` [PULL 34/48] tcg/i386: Merge tcg_out_movcond{32,64} Richard Henderson
2023-08-23 20:23 ` [PULL 35/48] tcg/i386: Use CMP+SBB in tcg_out_setcond Richard Henderson
2023-08-23 20:23 ` [PULL 36/48] tcg/i386: Clear dest first in tcg_out_setcond if possible Richard Henderson
2023-08-23 20:23 ` [PULL 37/48] tcg/i386: Use shift in tcg_out_setcond Richard Henderson
2023-08-23 20:23 ` [PULL 38/48] tcg/i386: Implement negsetcond_* Richard Henderson
2023-08-23 20:23 ` [PULL 39/48] tcg/tcg-op: Document bswap16_i32() byte pattern Richard Henderson
2023-08-23 20:23 ` [PULL 40/48] tcg/tcg-op: Document bswap16_i64() " Richard Henderson
2023-08-23 20:23 ` [PULL 41/48] tcg/tcg-op: Document bswap32_i32() " Richard Henderson
2023-08-23 20:23 ` [PULL 42/48] tcg/tcg-op: Document bswap32_i64() " Richard Henderson
2023-08-23 20:23 ` [PULL 43/48] tcg/tcg-op: Document bswap64_i64() " Richard Henderson
2023-08-23 20:23 ` [PULL 44/48] tcg/tcg-op: Document hswap_i32/64() " Richard Henderson
2023-08-23 20:23 ` [PULL 45/48] tcg/tcg-op: Document wswap_i64() " Richard Henderson
2023-08-23 20:23 ` [PULL 46/48] target/cris: Fix a typo in gen_swapr() Richard Henderson
2023-08-23 20:23 ` [PULL 47/48] docs/devel/tcg-ops: fix missing newlines in "Host vector operations" Richard Henderson
2023-08-23 20:23 ` [PULL 48/48] tcg: spelling fixes Richard Henderson
2023-08-24 14:05 ` [PULL 00/48] tcg patch queue Stefan Hajnoczi
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20230823202326.1353645-26-richard.henderson@linaro.org \
--to=richard.henderson@linaro.org \
--cc=danielhb413@gmail.com \
--cc=qemu-devel@nongnu.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).