* [RFC PATCH] tcg: allow a target to request canonicalization of SUBI to ADDI
@ 2023-10-25 18:59 Paolo Bonzini
2023-10-25 20:42 ` Richard Henderson
0 siblings, 1 reply; 2+ messages in thread
From: Paolo Bonzini @ 2023-10-25 18:59 UTC (permalink / raw)
To: qemu-devel; +Cc: richard.henderson
On x86, this is more efficient because it enables generation of
more LEA instructions.
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
include/tcg/tcg.h | 4 ++++
tcg/i386/tcg-target.h | 2 ++
tcg/tcg-op.c | 20 ++++++++++++++++----
3 files changed, 22 insertions(+), 4 deletions(-)
diff --git a/include/tcg/tcg.h b/include/tcg/tcg.h
index a9282cdcc60..48e5aeef173 100644
--- a/include/tcg/tcg.h
+++ b/include/tcg/tcg.h
@@ -109,6 +109,10 @@ typedef uint64_t TCGRegSet;
#define TCG_TARGET_HAS_sub2_i32 1
#endif
+#ifndef TCG_TARGET_PREFERS_addi
+#define TCG_TARGET_PREFERS_addi 0
+#endif
+
#ifndef TCG_TARGET_deposit_i32_valid
#define TCG_TARGET_deposit_i32_valid(ofs, len) 1
#endif
diff --git a/tcg/i386/tcg-target.h b/tcg/i386/tcg-target.h
index 8417ea4899e..452c6eba2d6 100644
--- a/tcg/i386/tcg-target.h
+++ b/tcg/i386/tcg-target.h
@@ -227,6 +227,8 @@ typedef enum {
#define TCG_TARGET_HAS_bitsel_vec have_avx512vl
#define TCG_TARGET_HAS_cmpsel_vec -1
+#define TCG_TARGET_PREFERS_addi 1
+
#define TCG_TARGET_deposit_i32_valid(ofs, len) \
(((ofs) == 0 && ((len) == 8 || (len) == 16)) || \
(TCG_TARGET_REG_BITS == 32 && (ofs) == 8 && (len) == 8))
diff --git a/tcg/tcg-op.c b/tcg/tcg-op.c
index 828eb9ee460..48c667032de 100644
--- a/tcg/tcg-op.c
+++ b/tcg/tcg-op.c
@@ -151,6 +151,8 @@ void tcg_gen_subi_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2)
/* some cases can be optimized here */
if (arg2 == 0) {
tcg_gen_mov_i32(ret, arg1);
+ } else if (TCG_TARGET_PREFERS_addi) {
+ tcg_gen_add_i32(ret, arg1, tcg_constant_i32(-arg2));
} else {
tcg_gen_sub_i32(ret, arg1, tcg_constant_i32(arg2));
}
@@ -1369,11 +1371,21 @@ void tcg_gen_subi_i64(TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2)
if (arg2 == 0) {
tcg_gen_mov_i64(ret, arg1);
} else if (TCG_TARGET_REG_BITS == 64) {
- tcg_gen_sub_i64(ret, arg1, tcg_constant_i64(arg2));
+ if (TCG_TARGET_PREFERS_addi) {
+ tcg_gen_add_i64(ret, arg1, tcg_constant_i64(-arg2));
+ } else {
+ tcg_gen_sub_i64(ret, arg1, tcg_constant_i64(arg2));
+ }
} else {
- tcg_gen_sub2_i32(TCGV_LOW(ret), TCGV_HIGH(ret),
- TCGV_LOW(arg1), TCGV_HIGH(arg1),
- tcg_constant_i32(arg2), tcg_constant_i32(arg2 >> 32));
+ if (TCG_TARGET_PREFERS_addi) {
+ tcg_gen_add2_i32(TCGV_LOW(ret), TCGV_HIGH(ret),
+ TCGV_LOW(arg1), TCGV_HIGH(arg1),
+ tcg_constant_i32(-arg2), tcg_constant_i32(-arg2 >> 32));
+ } else {
+ tcg_gen_sub2_i32(TCGV_LOW(ret), TCGV_HIGH(ret),
+ TCGV_LOW(arg1), TCGV_HIGH(arg1),
+ tcg_constant_i32(arg2), tcg_constant_i32(arg2 >> 32));
+ }
}
}
--
2.41.0
^ permalink raw reply related [flat|nested] 2+ messages in thread
* Re: [RFC PATCH] tcg: allow a target to request canonicalization of SUBI to ADDI
2023-10-25 18:59 [RFC PATCH] tcg: allow a target to request canonicalization of SUBI to ADDI Paolo Bonzini
@ 2023-10-25 20:42 ` Richard Henderson
0 siblings, 0 replies; 2+ messages in thread
From: Richard Henderson @ 2023-10-25 20:42 UTC (permalink / raw)
To: Paolo Bonzini, qemu-devel
On 10/25/23 11:59, Paolo Bonzini wrote:
> On x86, this is more efficient because it enables generation of
> more LEA instructions.
>
> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
> ---
> include/tcg/tcg.h | 4 ++++
> tcg/i386/tcg-target.h | 2 ++
> tcg/tcg-op.c | 20 ++++++++++++++++----
> 3 files changed, 22 insertions(+), 4 deletions(-)
>
> diff --git a/include/tcg/tcg.h b/include/tcg/tcg.h
> index a9282cdcc60..48e5aeef173 100644
> --- a/include/tcg/tcg.h
> +++ b/include/tcg/tcg.h
> @@ -109,6 +109,10 @@ typedef uint64_t TCGRegSet;
> #define TCG_TARGET_HAS_sub2_i32 1
> #endif
>
> +#ifndef TCG_TARGET_PREFERS_addi
> +#define TCG_TARGET_PREFERS_addi 0
> +#endif
I would rather do this unconditionally.
Many of the tcg backends do this manually, e.g. s390x:
case INDEX_op_sub_i64:
a0 = args[0], a1 = args[1], a2 = args[2];
if (const_args[2]) {
a2 = -a2;
goto do_addi_64;
} else {
tcg_out_insn(s, RRFa, SGRK, a0, a1, a2);
}
break;
While we could do something similar for i386, it would be better to not require such hoops
in each backend.
We would also want to perform this transformation in optimize.c.
r~
^ permalink raw reply [flat|nested] 2+ messages in thread
end of thread, other threads:[~2023-10-25 20:43 UTC | newest]
Thread overview: 2+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2023-10-25 18:59 [RFC PATCH] tcg: allow a target to request canonicalization of SUBI to ADDI Paolo Bonzini
2023-10-25 20:42 ` Richard Henderson
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).