* [Qemu-devel] [PATCH v2 1/6] Add TCG optimizations stub
2011-06-09 10:45 [Qemu-devel] [PATCH v2 0/6] Implement constant folding and copy propagation in TCG Kirill Batuzov
@ 2011-06-09 10:45 ` Kirill Batuzov
2011-06-09 10:45 ` [Qemu-devel] [PATCH v2 2/6] Add copy and constant propagation Kirill Batuzov
` (5 subsequent siblings)
6 siblings, 0 replies; 13+ messages in thread
From: Kirill Batuzov @ 2011-06-09 10:45 UTC (permalink / raw)
To: qemu-devel; +Cc: zhur
Added file tcg/optimize.c to hold TCG optimizations. Function tcg_optimize
is called from tcg_gen_code_common. It calls other functions performing
specific optimizations. Stub for constant folding was added.
Signed-off-by: Kirill Batuzov <batuzovk@ispras.ru>
---
Makefile.target | 2 +-
tcg/optimize.c | 91 +++++++++++++++++++++++++++++++++++++++++++++++++++++++
tcg/tcg.c | 6 ++++
tcg/tcg.h | 3 ++
4 files changed, 101 insertions(+), 1 deletions(-)
create mode 100644 tcg/optimize.c
diff --git a/Makefile.target b/Makefile.target
index 2e281a4..0b045ce 100644
--- a/Makefile.target
+++ b/Makefile.target
@@ -70,7 +70,7 @@ all: $(PROGS) stap
#########################################################
# cpu emulator library
libobj-y = exec.o translate-all.o cpu-exec.o translate.o
-libobj-y += tcg/tcg.o
+libobj-y += tcg/tcg.o tcg/optimize.o
libobj-$(CONFIG_SOFTFLOAT) += fpu/softfloat.o
libobj-$(CONFIG_NOSOFTFLOAT) += fpu/softfloat-native.o
libobj-y += op_helper.o helper.o
diff --git a/tcg/optimize.c b/tcg/optimize.c
new file mode 100644
index 0000000..5daf131
--- /dev/null
+++ b/tcg/optimize.c
@@ -0,0 +1,91 @@
+/*
+ * Optimizations for Tiny Code Generator for QEMU
+ *
+ * Copyright (c) 2010 Samsung Electronics.
+ * Contributed by Kirill Batuzov <batuzovk@ispras.ru>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include "config.h"
+
+#include <stdlib.h>
+#include <stdio.h>
+
+#include "qemu-common.h"
+#include "tcg-op.h"
+
+static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
+ TCGArg *args, TCGOpDef *tcg_op_defs)
+{
+ int i, nb_ops, op_index, op, nb_temps, nb_globals;
+ const TCGOpDef *def;
+ TCGArg *gen_args;
+
+ nb_temps = s->nb_temps;
+ nb_globals = s->nb_globals;
+
+ nb_ops = tcg_opc_ptr - gen_opc_buf;
+ gen_args = args;
+ for (op_index = 0; op_index < nb_ops; op_index++) {
+ op = gen_opc_buf[op_index];
+ def = &tcg_op_defs[op];
+ switch (op) {
+ case INDEX_op_call:
+ i = (args[0] >> 16) + (args[0] & 0xffff) + 3;
+ while (i) {
+ *gen_args = *args;
+ args++;
+ gen_args++;
+ i--;
+ }
+ break;
+ case INDEX_op_set_label:
+ case INDEX_op_jmp:
+ case INDEX_op_br:
+ case INDEX_op_brcond_i32:
+#if TCG_TARGET_REG_BITS == 64
+ case INDEX_op_brcond_i64:
+#endif
+ for (i = 0; i < def->nb_args; i++) {
+ *gen_args = *args;
+ args++;
+ gen_args++;
+ }
+ break;
+ default:
+ for (i = 0; i < def->nb_args; i++) {
+ gen_args[i] = args[i];
+ }
+ args += def->nb_args;
+ gen_args += def->nb_args;
+ break;
+ }
+ }
+
+ return gen_args;
+}
+
+TCGArg *tcg_optimize(TCGContext *s, uint16_t *tcg_opc_ptr,
+ TCGArg *args, TCGOpDef *tcg_op_defs)
+{
+ TCGArg *res;
+ res = tcg_constant_folding(s, tcg_opc_ptr, args, tcg_op_defs);
+ return res;
+}
diff --git a/tcg/tcg.c b/tcg/tcg.c
index fad92f9..6309dce 100644
--- a/tcg/tcg.c
+++ b/tcg/tcg.c
@@ -24,6 +24,7 @@
/* define it to use liveness analysis (better code) */
#define USE_LIVENESS_ANALYSIS
+#define USE_TCG_OPTIMIZATIONS
#include "config.h"
@@ -2033,6 +2034,11 @@ static inline int tcg_gen_code_common(TCGContext *s, uint8_t *gen_code_buf,
}
#endif
+#ifdef USE_TCG_OPTIMIZATIONS
+ gen_opparam_ptr =
+ tcg_optimize(s, gen_opc_ptr, gen_opparam_buf, tcg_op_defs);
+#endif
+
#ifdef CONFIG_PROFILER
s->la_time -= profile_getclock();
#endif
diff --git a/tcg/tcg.h b/tcg/tcg.h
index 2b985ac..91a3cda 100644
--- a/tcg/tcg.h
+++ b/tcg/tcg.h
@@ -486,6 +486,9 @@ void tcg_gen_callN(TCGContext *s, TCGv_ptr func, unsigned int flags,
void tcg_gen_shifti_i64(TCGv_i64 ret, TCGv_i64 arg1,
int c, int right, int arith);
+TCGArg *tcg_optimize(TCGContext *s, uint16_t *tcg_opc_ptr, TCGArg *args,
+ TCGOpDef *tcg_op_def);
+
/* only used for debugging purposes */
void tcg_register_helper(void *func, const char *name);
const char *tcg_helper_get_name(TCGContext *s, void *func);
--
1.7.4.1
^ permalink raw reply related [flat|nested] 13+ messages in thread* [Qemu-devel] [PATCH v2 2/6] Add copy and constant propagation.
2011-06-09 10:45 [Qemu-devel] [PATCH v2 0/6] Implement constant folding and copy propagation in TCG Kirill Batuzov
2011-06-09 10:45 ` [Qemu-devel] [PATCH v2 1/6] Add TCG optimizations stub Kirill Batuzov
@ 2011-06-09 10:45 ` Kirill Batuzov
2011-06-10 17:44 ` Richard Henderson
2011-06-09 10:45 ` [Qemu-devel] [PATCH v2 3/6] Do constant folding for basic arithmetic operations Kirill Batuzov
` (4 subsequent siblings)
6 siblings, 1 reply; 13+ messages in thread
From: Kirill Batuzov @ 2011-06-09 10:45 UTC (permalink / raw)
To: qemu-devel; +Cc: zhur
Make tcg_constant_folding do copy and constant propagation. It is a
preparational work before actual constant folding.
Signed-off-by: Kirill Batuzov <batuzovk@ispras.ru>
---
tcg/optimize.c | 161 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++
1 files changed, 161 insertions(+), 0 deletions(-)
diff --git a/tcg/optimize.c b/tcg/optimize.c
index 5daf131..7996798 100644
--- a/tcg/optimize.c
+++ b/tcg/optimize.c
@@ -31,23 +31,178 @@
#include "qemu-common.h"
#include "tcg-op.h"
+typedef enum {
+ TCG_TEMP_UNDEF = 0,
+ TCG_TEMP_CONST,
+ TCG_TEMP_COPY,
+ TCG_TEMP_ANY
+} tcg_temp_state;
+
+struct tcg_temp_info {
+ tcg_temp_state state;
+ uint16_t num_copies;
+ tcg_target_ulong val;
+};
+
+/* Reset TEMP's state to TCG_TEMP_ANY. If TEMP was a representative of some
+ class of equivalent temp's, a new representative should be chosen in this
+ class. */
+static void reset_temp(struct tcg_temp_info *temps, TCGArg temp, int nb_temps,
+ int nb_globals)
+{
+ int i;
+ TCGArg new_base;
+ if (temps[temp].state == TCG_TEMP_COPY) {
+ temps[temps[temp].val].num_copies--;
+ }
+ if (temps[temp].num_copies > 0) {
+ new_base = (TCGArg)-1;
+ for (i = nb_globals; i < nb_temps; i++) {
+ if (temps[i].state == TCG_TEMP_COPY && temps[i].val == temp) {
+ if (new_base == ((TCGArg)-1)) {
+ new_base = (TCGArg)i;
+ temps[i].state = TCG_TEMP_ANY;
+ temps[i].num_copies = 0;
+ } else {
+ temps[i].val = new_base;
+ temps[new_base].num_copies++;
+ }
+ }
+ }
+ for (i = 0; i < nb_globals; i++) {
+ if (temps[i].state == TCG_TEMP_COPY && temps[i].val == temp) {
+ if (new_base == ((TCGArg)-1)) {
+ temps[i].state = TCG_TEMP_ANY;
+ temps[i].num_copies = 0;
+ } else {
+ temps[i].val = new_base;
+ temps[new_base].num_copies++;
+ }
+ }
+ }
+ }
+ temps[temp].state = TCG_TEMP_ANY;
+ temps[temp].num_copies = 0;
+}
+
+static int op_bits(int op)
+{
+ switch (op) {
+ case INDEX_op_mov_i32:
+ return 32;
+#if TCG_TARGET_REG_BITS == 64
+ case INDEX_op_mov_i64:
+ return 64;
+#endif
+ default:
+ fprintf(stderr, "Unrecognized operation %d in op_bits.\n", op);
+ tcg_abort();
+ }
+}
+
+static int op_to_movi(int op)
+{
+ if (op_bits(op) == 32) {
+ return INDEX_op_movi_i32;
+ }
+#if TCG_TARGET_REG_BITS == 64
+ if (op_bits(op) == 64) {
+ return INDEX_op_movi_i64;
+ }
+#endif
+ tcg_abort();
+}
+
+/* Propagate constants and copies, fold constant expressions. */
static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
TCGArg *args, TCGOpDef *tcg_op_defs)
{
int i, nb_ops, op_index, op, nb_temps, nb_globals;
const TCGOpDef *def;
TCGArg *gen_args;
+ /* Array VALS has an element for each temp.
+ If this temp holds a constant then its value is kept in VALS' element.
+ If this temp is a copy of other ones then this equivalence class'
+ representative is kept in VALS' element.
+ If this temp is neither copy nor constant then corresponding VALS'
+ element is unused. */
+ struct tcg_temp_info temps[TCG_MAX_TEMPS];
nb_temps = s->nb_temps;
nb_globals = s->nb_globals;
+ memset(temps, 0, nb_temps * sizeof(struct tcg_temp_info));
nb_ops = tcg_opc_ptr - gen_opc_buf;
gen_args = args;
for (op_index = 0; op_index < nb_ops; op_index++) {
op = gen_opc_buf[op_index];
def = &tcg_op_defs[op];
+ /* Do copy propagation */
+ if (!(def->flags & (TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS))) {
+ assert(op != INDEX_op_call);
+ for (i = def->nb_oargs; i < def->nb_oargs + def->nb_iargs; i++) {
+ if (temps[args[i]].state == TCG_TEMP_COPY
+ && !(def->args_ct[i].ct & TCG_CT_IALIAS)
+ && (def->args_ct[i].ct & TCG_CT_REG)) {
+ args[i] = temps[args[i]].val;
+ }
+ }
+ }
+
+ /* Propagate constants through copy operations and do constant
+ folding. Constants will be substituted to arguments by register
+ allocator where needed and possible. Also detect copies. */
switch (op) {
+ case INDEX_op_mov_i32:
+#if TCG_TARGET_REG_BITS == 64
+ case INDEX_op_mov_i64:
+#endif
+ if ((temps[args[1]].state == TCG_TEMP_COPY
+ && temps[args[1]].val == args[0])
+ || args[0] == args[1]) {
+ args += 2;
+ gen_opc_buf[op_index] = INDEX_op_nop;
+ break;
+ }
+ if (temps[args[1]].state != TCG_TEMP_CONST) {
+ reset_temp(temps, args[0], nb_temps, nb_globals);
+ if (args[1] >= s->nb_globals) {
+ temps[args[0]].state = TCG_TEMP_COPY;
+ temps[args[0]].val = args[1];
+ temps[args[1]].num_copies++;
+ }
+ gen_args[0] = args[0];
+ gen_args[1] = args[1];
+ gen_args += 2;
+ args += 2;
+ break;
+ }
+ /* Source argument is constant. Rewrite the operation and
+ let movi case handle it. */
+ op = op_to_movi(op);
+ gen_opc_buf[op_index] = op;
+ args[1] = temps[args[1]].val;
+ /* fallthrough */
+ case INDEX_op_movi_i32:
+#if TCG_TARGET_REG_BITS == 64
+ case INDEX_op_movi_i64:
+#endif
+ reset_temp(temps, args[0], nb_temps, nb_globals);
+ temps[args[0]].state = TCG_TEMP_CONST;
+ temps[args[0]].val = args[1];
+ assert(temps[args[0]].num_copies == 0);
+ gen_args[0] = args[0];
+ gen_args[1] = args[1];
+ gen_args += 2;
+ args += 2;
+ break;
case INDEX_op_call:
+ for (i = 0; i < nb_globals; i++) {
+ reset_temp(temps, i, nb_temps, nb_globals);
+ }
+ for (i = 0; i < (args[0] >> 16); i++) {
+ reset_temp(temps, args[i + 1], nb_temps, nb_globals);
+ }
i = (args[0] >> 16) + (args[0] & 0xffff) + 3;
while (i) {
*gen_args = *args;
@@ -63,6 +218,7 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
#if TCG_TARGET_REG_BITS == 64
case INDEX_op_brcond_i64:
#endif
+ memset(temps, 0, nb_temps * sizeof(struct tcg_temp_info));
for (i = 0; i < def->nb_args; i++) {
*gen_args = *args;
args++;
@@ -70,6 +226,11 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
}
break;
default:
+ /* Default case: we do know nothing about operation so no
+ propagation is done. We only trash output args. */
+ for (i = 0; i < def->nb_oargs; i++) {
+ reset_temp(temps, args[i], nb_temps, nb_globals);
+ }
for (i = 0; i < def->nb_args; i++) {
gen_args[i] = args[i];
}
--
1.7.4.1
^ permalink raw reply related [flat|nested] 13+ messages in thread* Re: [Qemu-devel] [PATCH v2 2/6] Add copy and constant propagation.
2011-06-09 10:45 ` [Qemu-devel] [PATCH v2 2/6] Add copy and constant propagation Kirill Batuzov
@ 2011-06-10 17:44 ` Richard Henderson
2011-07-07 12:36 ` Kirill Batuzov
0 siblings, 1 reply; 13+ messages in thread
From: Richard Henderson @ 2011-06-10 17:44 UTC (permalink / raw)
To: Kirill Batuzov; +Cc: qemu-devel, zhur
On 06/09/2011 03:45 AM, Kirill Batuzov wrote:
> Make tcg_constant_folding do copy and constant propagation. It is a
> preparational work before actual constant folding.
>
> Signed-off-by: Kirill Batuzov <batuzovk@ispras.ru>
> ---
> tcg/optimize.c | 161 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++
> 1 files changed, 161 insertions(+), 0 deletions(-)
>
> diff --git a/tcg/optimize.c b/tcg/optimize.c
> index 5daf131..7996798 100644
> --- a/tcg/optimize.c
> +++ b/tcg/optimize.c
> @@ -31,23 +31,178 @@
> #include "qemu-common.h"
> #include "tcg-op.h"
>
> +typedef enum {
> + TCG_TEMP_UNDEF = 0,
> + TCG_TEMP_CONST,
> + TCG_TEMP_COPY,
> + TCG_TEMP_ANY
> +} tcg_temp_state;
Coding conventions request StudlyCaps, fwiw.
> +
> +struct tcg_temp_info {
> + tcg_temp_state state;
> + uint16_t num_copies;
> + tcg_target_ulong val;
> +};
> +
> +/* Reset TEMP's state to TCG_TEMP_ANY. If TEMP was a representative of some
> + class of equivalent temp's, a new representative should be chosen in this
> + class. */
> +static void reset_temp(struct tcg_temp_info *temps, TCGArg temp, int nb_temps,
> + int nb_globals)
> +{
> + int i;
> + TCGArg new_base;
> + if (temps[temp].state == TCG_TEMP_COPY) {
> + temps[temps[temp].val].num_copies--;
> + }
> + if (temps[temp].num_copies > 0) {
> + new_base = (TCGArg)-1;
> + for (i = nb_globals; i < nb_temps; i++) {
I think it's probably better to place the elements of the equiv class into
a double-linked circular list, rather than a mere num_copies that forces
you to iterate over nb_temps to remove an element. E.g.
struct tcg_temp_info {
tcg_temp_state state;
uint16_t prev_copy;
uint16_t next_copy;
tcg_target_ulong val;
};
This makes elements easy to remove, and easy to choose a new representative.
> +static int op_bits(int op)
> +{
> + switch (op) {
> + case INDEX_op_mov_i32:
> + return 32;
> +#if TCG_TARGET_REG_BITS == 64
> + case INDEX_op_mov_i64:
> + return 64;
> +#endif
> + default:
> + fprintf(stderr, "Unrecognized operation %d in op_bits.\n", op);
> + tcg_abort();
> + }
> +}
I think we would be well-served to move this to a property of the opcode,
much the same way as TCG_OPF_CALL_CLOBBER et al. I would not, of course,
suggest to block this patch series on that cleanup.
> +static int op_to_movi(int op)
> +{
> + if (op_bits(op) == 32) {
> + return INDEX_op_movi_i32;
> + }
> +#if TCG_TARGET_REG_BITS == 64
> + if (op_bits(op) == 64) {
> + return INDEX_op_movi_i64;
> + }
> +#endif
> + tcg_abort();
> +}
This should use a switch, not two calls to a function.
> + struct tcg_temp_info temps[TCG_MAX_TEMPS];
Given that gen_opc_buf is static, I see no reason not to make this a
static variable as well. Put it at file scope so you don't have to
pass it as arguments to subroutines.
> + /* Do copy propagation */
> + if (!(def->flags & (TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS))) {
Why are you suppressing copy propagation in this way? I see no reason for it.
> + assert(op != INDEX_op_call);
> + for (i = def->nb_oargs; i < def->nb_oargs + def->nb_iargs; i++) {
> + if (temps[args[i]].state == TCG_TEMP_COPY
> + && !(def->args_ct[i].ct & TCG_CT_IALIAS)
>From looking at only ARM translator output, you might suppose that we've
already performed matching constraint substitution. But you'd be wrong.
I realize that at present you get a smidge smaller code by suppressing
substitution around matching constraints, but that's only because our
copy propagation is incomplete. From a pure theoretic stance, I think
this line is wrong. E.g.
With IALIAS suppression Without
---- 0x40802e50 ---- 0x40802e50
mov_i32 tmp5,r10 | nopn $0x2,$0x2
movi_i32 tmp6,$0x40802e58 movi_i32 tmp6,$0x40802e58
add_i32 tmp6,tmp5,tmp6 | add_i32 tmp6,r10,tmp6
mov_i32 r10,tmp6 mov_i32 r10,tmp6
I'll claim that that the right-hand column is better, even though it
currently forces the generation of an LEA insn instead of an ADD.
What's needed to fix this is either (1) a much better register allocator
or (2) a reverse copy-propagation pass that pushes global variables up
into outputs containing temporaries.
> + && (def->args_ct[i].ct & TCG_CT_REG)) {
This line looks redundant. Don't we assert this property
in tcg_add_target_add_op_defs? Certainly elsewhere we expect all
arguments to be able to accept a register...
> + case INDEX_op_mov_i32:
> +#if TCG_TARGET_REG_BITS == 64
> + case INDEX_op_mov_i64:
> +#endif
I suggest we take a page from tcg/sparc/tcg-target.c:
#if TCG_TARGET_REG_BITS == 64
#define CASE_OP_32_64(x) \
glue(glue(case INDEX_op_, x), _i32): \
glue(glue(case INDEX_op_, x), _i64)
#else
#define CASE_OP_32_64(x) \
glue(glue(case INDEX_op_, x), _i32)
#endif
Used as
CASE_OP_32_64(mov):
in order to avoid rampant ifdefs like this.
Unfortunately that also raises the spectre of the fact that almost
all of the logical operations are optional.
Again, probably not something to tackle within this patch series,
but I suggest that we no longer conditionally define the opcodes.
Unconditionally define them, but mark them with TCG_OPF_UNSUPPORTED,
do not generate them, and abort in final code generation if they're
seen. I think that would clean up a lot of if-deffery in and
around tcg/*.c.
> case INDEX_op_call:
> + for (i = 0; i < nb_globals; i++) {
> + reset_temp(temps, i, nb_temps, nb_globals);
> + }
No need to reset globals if the call is PURE or CONST.
> case INDEX_op_brcond_i64:
> #endif
> + memset(temps, 0, nb_temps * sizeof(struct tcg_temp_info));
Perhaps to pedantic, but we can do better in extended basic blocks.
A full flush of all temps is only needed at labels.
r~
^ permalink raw reply [flat|nested] 13+ messages in thread* Re: [Qemu-devel] [PATCH v2 2/6] Add copy and constant propagation.
2011-06-10 17:44 ` Richard Henderson
@ 2011-07-07 12:36 ` Kirill Batuzov
0 siblings, 0 replies; 13+ messages in thread
From: Kirill Batuzov @ 2011-07-07 12:36 UTC (permalink / raw)
To: Richard Henderson; +Cc: qemu-devel, zhur
On Fri, 10 Jun 2011, Richard Henderson wrote:
> > + /* Do copy propagation */
> > + if (!(def->flags & (TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS))) {
>
> Why are you suppressing copy propagation in this way? I see no reason for it.
>
This was suggested by Aurelien Jarno. I do not know how safe it is to
propagate copies through such operations so I decided to be
conservative.
> > case INDEX_op_brcond_i64:
> > #endif
> > + memset(temps, 0, nb_temps * sizeof(struct tcg_temp_info));
>
> Perhaps to pedantic, but we can do better in extended basic blocks.
> A full flush of all temps is only needed at labels.
>
Not much better unfortunately. Globals got spilled at basic block end,
temps just die. The only things we can keep are locals but I have not
seen much of them in the intermediate representation.
----
Kirill Batuzov
^ permalink raw reply [flat|nested] 13+ messages in thread
* [Qemu-devel] [PATCH v2 3/6] Do constant folding for basic arithmetic operations.
2011-06-09 10:45 [Qemu-devel] [PATCH v2 0/6] Implement constant folding and copy propagation in TCG Kirill Batuzov
2011-06-09 10:45 ` [Qemu-devel] [PATCH v2 1/6] Add TCG optimizations stub Kirill Batuzov
2011-06-09 10:45 ` [Qemu-devel] [PATCH v2 2/6] Add copy and constant propagation Kirill Batuzov
@ 2011-06-09 10:45 ` Kirill Batuzov
2011-06-10 17:57 ` Richard Henderson
2011-06-09 10:45 ` [Qemu-devel] [PATCH v2 4/6] Do constant folding for boolean operations Kirill Batuzov
` (3 subsequent siblings)
6 siblings, 1 reply; 13+ messages in thread
From: Kirill Batuzov @ 2011-06-09 10:45 UTC (permalink / raw)
To: qemu-devel; +Cc: zhur
Perform actual constant folding for ADD, SUB and MUL operations.
Signed-off-by: Kirill Batuzov <batuzovk@ispras.ru>
---
tcg/optimize.c | 156 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++
1 files changed, 156 insertions(+), 0 deletions(-)
diff --git a/tcg/optimize.c b/tcg/optimize.c
index 7996798..29da6fa 100644
--- a/tcg/optimize.c
+++ b/tcg/optimize.c
@@ -89,9 +89,15 @@ static int op_bits(int op)
{
switch (op) {
case INDEX_op_mov_i32:
+ case INDEX_op_add_i32:
+ case INDEX_op_sub_i32:
+ case INDEX_op_mul_i32:
return 32;
#if TCG_TARGET_REG_BITS == 64
case INDEX_op_mov_i64:
+ case INDEX_op_add_i64:
+ case INDEX_op_sub_i64:
+ case INDEX_op_mul_i64:
return 64;
#endif
default:
@@ -113,6 +119,58 @@ static int op_to_movi(int op)
tcg_abort();
}
+static int op_to_mov(int op)
+{
+ if (op_bits(op) == 32) {
+ return INDEX_op_mov_i32;
+ }
+#if TCG_TARGET_REG_BITS == 64
+ if (op_bits(op) == 64) {
+ return INDEX_op_mov_i64;
+ }
+#endif
+ tcg_abort();
+}
+
+static TCGArg do_constant_folding_2(int op, TCGArg x, TCGArg y)
+{
+ switch (op) {
+ case INDEX_op_add_i32:
+#if TCG_TARGET_REG_BITS == 64
+ case INDEX_op_add_i64:
+#endif
+ return x + y;
+
+ case INDEX_op_sub_i32:
+#if TCG_TARGET_REG_BITS == 64
+ case INDEX_op_sub_i64:
+#endif
+ return x - y;
+
+ case INDEX_op_mul_i32:
+#if TCG_TARGET_REG_BITS == 64
+ case INDEX_op_mul_i64:
+#endif
+ return x * y;
+
+ default:
+ fprintf(stderr,
+ "Unrecognized operation %d in do_constant_folding.\n", op);
+ tcg_abort();
+ }
+}
+
+static TCGArg do_constant_folding(int op, TCGArg x, TCGArg y)
+{
+ TCGArg res = do_constant_folding_2(op, x, y);
+#if TCG_TARGET_REG_BITS == 64
+ if (op_bits(op) == 32) {
+ res &= 0xffffffff;
+ }
+#endif
+ return res;
+}
+
/* Propagate constants and copies, fold constant expressions. */
static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
TCGArg *args, TCGOpDef *tcg_op_defs)
@@ -120,6 +178,7 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
int i, nb_ops, op_index, op, nb_temps, nb_globals;
const TCGOpDef *def;
TCGArg *gen_args;
+ TCGArg tmp;
/* Array VALS has an element for each temp.
If this temp holds a constant then its value is kept in VALS' element.
If this temp is a copy of other ones then this equivalence class'
@@ -149,6 +208,72 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
}
}
+ /* Simplify expression if possible. */
+ switch (op) {
+ case INDEX_op_add_i32:
+#if TCG_TARGET_REG_BITS == 64
+ case INDEX_op_add_i64:
+#endif
+ if (temps[args[1]].state == TCG_TEMP_CONST) {
+ /* 0 + x == x + 0 */
+ tmp = args[1];
+ args[1] = args[2];
+ args[2] = tmp;
+ }
+ /* Fallthrough */
+ case INDEX_op_sub_i32:
+#if TCG_TARGET_REG_BITS == 64
+ case INDEX_op_sub_i64:
+#endif
+ if (temps[args[1]].state == TCG_TEMP_CONST) {
+ /* Proceed with possible constant folding. */
+ break;
+ }
+ if (temps[args[2]].state == TCG_TEMP_CONST
+ && temps[args[2]].val == 0) {
+ if ((temps[args[0]].state == TCG_TEMP_COPY
+ && temps[args[0]].val == args[1])
+ || args[0] == args[1]) {
+ args += 3;
+ gen_opc_buf[op_index] = INDEX_op_nop;
+ } else {
+ reset_temp(temps, args[0], nb_temps, nb_globals);
+ if (args[1] >= s->nb_globals) {
+ temps[args[0]].state = TCG_TEMP_COPY;
+ temps[args[0]].val = args[1];
+ temps[args[1]].num_copies++;
+ }
+ gen_opc_buf[op_index] = op_to_mov(op);
+ gen_args[0] = args[0];
+ gen_args[1] = args[1];
+ gen_args += 2;
+ args += 3;
+ }
+ continue;
+ }
+ break;
+ case INDEX_op_mul_i32:
+#if TCG_TARGET_REG_BITS == 64
+ case INDEX_op_mul_i64:
+#endif
+ if ((temps[args[1]].state == TCG_TEMP_CONST
+ && temps[args[1]].val == 0)
+ || (temps[args[2]].state == TCG_TEMP_CONST
+ && temps[args[2]].val == 0)) {
+ reset_temp(temps, args[0], nb_temps, nb_globals);
+ temps[args[0]].state = TCG_TEMP_CONST;
+ temps[args[0]].val = 0;
+ assert(temps[args[0]].num_copies == 0);
+ gen_opc_buf[op_index] = op_to_movi(op);
+ gen_args[0] = args[0];
+ gen_args[1] = 0;
+ args += 3;
+ gen_args += 2;
+ continue;
+ }
+ break;
+ }
+
/* Propagate constants through copy operations and do constant
folding. Constants will be substituted to arguments by register
allocator where needed and possible. Also detect copies. */
@@ -196,6 +321,37 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
gen_args += 2;
args += 2;
break;
+ case INDEX_op_add_i32:
+ case INDEX_op_sub_i32:
+ case INDEX_op_mul_i32:
+#if TCG_TARGET_REG_BITS == 64
+ case INDEX_op_add_i64:
+ case INDEX_op_sub_i64:
+ case INDEX_op_mul_i64:
+#endif
+ if (temps[args[1]].state == TCG_TEMP_CONST
+ && temps[args[2]].state == TCG_TEMP_CONST) {
+ gen_opc_buf[op_index] = op_to_movi(op);
+ gen_args[0] = args[0];
+ gen_args[1] =
+ do_constant_folding(op, temps[args[1]].val,
+ temps[args[2]].val);
+ reset_temp(temps, gen_args[0], nb_temps, nb_globals);
+ temps[gen_args[0]].state = TCG_TEMP_CONST;
+ temps[gen_args[0]].val = gen_args[1];
+ assert(temps[gen_args[0]].num_copies == 0);
+ gen_args += 2;
+ args += 3;
+ break;
+ } else {
+ reset_temp(temps, args[0], nb_temps, nb_globals);
+ gen_args[0] = args[0];
+ gen_args[1] = args[1];
+ gen_args[2] = args[2];
+ gen_args += 3;
+ args += 3;
+ break;
+ }
case INDEX_op_call:
for (i = 0; i < nb_globals; i++) {
reset_temp(temps, i, nb_temps, nb_globals);
--
1.7.4.1
^ permalink raw reply related [flat|nested] 13+ messages in thread* Re: [Qemu-devel] [PATCH v2 3/6] Do constant folding for basic arithmetic operations.
2011-06-09 10:45 ` [Qemu-devel] [PATCH v2 3/6] Do constant folding for basic arithmetic operations Kirill Batuzov
@ 2011-06-10 17:57 ` Richard Henderson
0 siblings, 0 replies; 13+ messages in thread
From: Richard Henderson @ 2011-06-10 17:57 UTC (permalink / raw)
To: Kirill Batuzov; +Cc: qemu-devel, zhur
On 06/09/2011 03:45 AM, Kirill Batuzov wrote:
> +static int op_to_mov(int op)
> +{
> + if (op_bits(op) == 32) {
> + return INDEX_op_mov_i32;
> + }
> +#if TCG_TARGET_REG_BITS == 64
> + if (op_bits(op) == 64) {
> + return INDEX_op_mov_i64;
> + }
> +#endif
> + tcg_abort();
> +}
Again, switch not two calls.
> +static TCGArg do_constant_folding(int op, TCGArg x, TCGArg y)
> +{
> + TCGArg res = do_constant_folding_2(op, x, y);
> +#if TCG_TARGET_REG_BITS == 64
> + if (op_bits(op) == 32) {
> + res &= 0xffffffff;
Strictly speaking, this isn't required. The top bits of any
constant are considered garbage to the code generators. C.f.
the code in tcg_out_movi for any 64-bit host.
That said, only x86_64 and s390 get this right for the constraints.
x86_64 by being able to use 'i' to accept all constants for 32-bit
operations, and s390 by using 'W' as a modifier to force 32-bit
comparison in tcg_target_const_match.
So it's probably best to keep this for now.
> + /* Simplify expression if possible. */
> + switch (op) {
> + case INDEX_op_add_i32:
> +#if TCG_TARGET_REG_BITS == 64
> + case INDEX_op_add_i64:
> +#endif
> + if (temps[args[1]].state == TCG_TEMP_CONST) {
> + /* 0 + x == x + 0 */
> + tmp = args[1];
> + args[1] = args[2];
> + args[2] = tmp;
> + }
Probably best to break this out into another switch so that
you can handle all of the commutative operations.
> + /* Fallthrough */
> + case INDEX_op_sub_i32:
> +#if TCG_TARGET_REG_BITS == 64
> + case INDEX_op_sub_i64:
> +#endif
> + if (temps[args[1]].state == TCG_TEMP_CONST) {
> + /* Proceed with possible constant folding. */
> + break;
> + }
> + if (temps[args[2]].state == TCG_TEMP_CONST
> + && temps[args[2]].val == 0) {
> + if ((temps[args[0]].state == TCG_TEMP_COPY
> + && temps[args[0]].val == args[1])
> + || args[0] == args[1]) {
> + args += 3;
> + gen_opc_buf[op_index] = INDEX_op_nop;
> + } else {
> + reset_temp(temps, args[0], nb_temps, nb_globals);
> + if (args[1] >= s->nb_globals) {
> + temps[args[0]].state = TCG_TEMP_COPY;
> + temps[args[0]].val = args[1];
> + temps[args[1]].num_copies++;
> + }
> + gen_opc_buf[op_index] = op_to_mov(op);
> + gen_args[0] = args[0];
> + gen_args[1] = args[1];
> + gen_args += 2;
> + args += 3;
> + }
> + continue;
> + }
> + break;
> + case INDEX_op_mul_i32:
> +#if TCG_TARGET_REG_BITS == 64
> + case INDEX_op_mul_i64:
> +#endif
> + if ((temps[args[1]].state == TCG_TEMP_CONST
> + && temps[args[1]].val == 0)
> + || (temps[args[2]].state == TCG_TEMP_CONST
> + && temps[args[2]].val == 0)) {
> + reset_temp(temps, args[0], nb_temps, nb_globals);
> + temps[args[0]].state = TCG_TEMP_CONST;
> + temps[args[0]].val = 0;
> + assert(temps[args[0]].num_copies == 0);
> + gen_opc_buf[op_index] = op_to_movi(op);
> + gen_args[0] = args[0];
> + gen_args[1] = 0;
> + args += 3;
> + gen_args += 2;
> + continue;
> + }
This is *way* too much code duplication, particularly with the
same code sequences already existing for mov and movi and more
to come with the logicals.
r~
^ permalink raw reply [flat|nested] 13+ messages in thread
* [Qemu-devel] [PATCH v2 4/6] Do constant folding for boolean operations.
2011-06-09 10:45 [Qemu-devel] [PATCH v2 0/6] Implement constant folding and copy propagation in TCG Kirill Batuzov
` (2 preceding siblings ...)
2011-06-09 10:45 ` [Qemu-devel] [PATCH v2 3/6] Do constant folding for basic arithmetic operations Kirill Batuzov
@ 2011-06-09 10:45 ` Kirill Batuzov
2011-06-09 10:45 ` [Qemu-devel] [PATCH v2 5/6] Do constant folding for shift operations Kirill Batuzov
` (2 subsequent siblings)
6 siblings, 0 replies; 13+ messages in thread
From: Kirill Batuzov @ 2011-06-09 10:45 UTC (permalink / raw)
To: qemu-devel; +Cc: zhur
Perform constant folding for AND, OR, XOR operations.
Signed-off-by: Kirill Batuzov <batuzovk@ispras.ru>
---
tcg/optimize.c | 56 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++
1 files changed, 56 insertions(+), 0 deletions(-)
diff --git a/tcg/optimize.c b/tcg/optimize.c
index 29da6fa..0bd8c78 100644
--- a/tcg/optimize.c
+++ b/tcg/optimize.c
@@ -92,12 +92,18 @@ static int op_bits(int op)
case INDEX_op_add_i32:
case INDEX_op_sub_i32:
case INDEX_op_mul_i32:
+ case INDEX_op_and_i32:
+ case INDEX_op_or_i32:
+ case INDEX_op_xor_i32:
return 32;
#if TCG_TARGET_REG_BITS == 64
case INDEX_op_mov_i64:
case INDEX_op_add_i64:
case INDEX_op_sub_i64:
case INDEX_op_mul_i64:
+ case INDEX_op_and_i64:
+ case INDEX_op_or_i64:
+ case INDEX_op_xor_i64:
return 64;
#endif
default:
@@ -153,6 +159,24 @@ static TCGArg do_constant_folding_2(int op, TCGArg x, TCGArg y)
#endif
return x * y;
+ case INDEX_op_and_i32:
+#if TCG_TARGET_REG_BITS == 64
+ case INDEX_op_and_i64:
+#endif
+ return x & y;
+
+ case INDEX_op_or_i32:
+#if TCG_TARGET_REG_BITS == 64
+ case INDEX_op_or_i64:
+#endif
+ return x | y;
+
+ case INDEX_op_xor_i32:
+#if TCG_TARGET_REG_BITS == 64
+ case INDEX_op_xor_i64:
+#endif
+ return x ^ y;
+
default:
fprintf(stderr,
"Unrecognized operation %d in do_constant_folding.\n", op);
@@ -272,6 +296,32 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
continue;
}
break;
+ case INDEX_op_or_i32:
+ case INDEX_op_and_i32:
+#if TCG_TARGET_REG_BITS == 64
+ case INDEX_op_and_i64:
+ case INDEX_op_or_i64:
+#endif
+ if (args[1] == args[2]) {
+ if (args[1] == args[0]) {
+ args += 3;
+ gen_opc_buf[op_index] = INDEX_op_nop;
+ } else {
+ reset_temp(temps, args[0], nb_temps, nb_globals);
+ if (args[1] >= s->nb_globals) {
+ temps[args[0]].state = TCG_TEMP_COPY;
+ temps[args[0]].val = args[1];
+ assert(temps[args[0]].num_copies == 0);
+ }
+ gen_opc_buf[op_index] = op_to_mov(op);
+ gen_args[0] = args[0];
+ gen_args[1] = args[1];
+ gen_args += 2;
+ args += 3;
+ }
+ continue;
+ }
+ break;
}
/* Propagate constants through copy operations and do constant
@@ -321,10 +371,16 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
gen_args += 2;
args += 2;
break;
+ case INDEX_op_or_i32:
+ case INDEX_op_and_i32:
+ case INDEX_op_xor_i32:
case INDEX_op_add_i32:
case INDEX_op_sub_i32:
case INDEX_op_mul_i32:
#if TCG_TARGET_REG_BITS == 64
+ case INDEX_op_and_i64:
+ case INDEX_op_or_i64:
+ case INDEX_op_xor_i64:
case INDEX_op_add_i64:
case INDEX_op_sub_i64:
case INDEX_op_mul_i64:
--
1.7.4.1
^ permalink raw reply related [flat|nested] 13+ messages in thread* [Qemu-devel] [PATCH v2 5/6] Do constant folding for shift operations.
2011-06-09 10:45 [Qemu-devel] [PATCH v2 0/6] Implement constant folding and copy propagation in TCG Kirill Batuzov
` (3 preceding siblings ...)
2011-06-09 10:45 ` [Qemu-devel] [PATCH v2 4/6] Do constant folding for boolean operations Kirill Batuzov
@ 2011-06-09 10:45 ` Kirill Batuzov
2011-06-10 18:02 ` Richard Henderson
2011-06-09 10:45 ` [Qemu-devel] [PATCH v2 6/6] Do constant folding for unary operations Kirill Batuzov
2011-06-10 18:08 ` [Qemu-devel] [PATCH v2 0/6] Implement constant folding and copy propagation in TCG Richard Henderson
6 siblings, 1 reply; 13+ messages in thread
From: Kirill Batuzov @ 2011-06-09 10:45 UTC (permalink / raw)
To: qemu-devel; +Cc: zhur
Perform constant forlding for SHR, SHL, SAR, ROTR, ROTL operations.
Signed-off-by: Kirill Batuzov <batuzovk@ispras.ru>
---
tcg/optimize.c | 86 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++
1 files changed, 86 insertions(+), 0 deletions(-)
diff --git a/tcg/optimize.c b/tcg/optimize.c
index 0bd8c78..653f399 100644
--- a/tcg/optimize.c
+++ b/tcg/optimize.c
@@ -95,6 +95,11 @@ static int op_bits(int op)
case INDEX_op_and_i32:
case INDEX_op_or_i32:
case INDEX_op_xor_i32:
+ case INDEX_op_shl_i32:
+ case INDEX_op_shr_i32:
+ case INDEX_op_sar_i32:
+ case INDEX_op_rotl_i32:
+ case INDEX_op_rotr_i32:
return 32;
#if TCG_TARGET_REG_BITS == 64
case INDEX_op_mov_i64:
@@ -104,6 +109,11 @@ static int op_bits(int op)
case INDEX_op_and_i64:
case INDEX_op_or_i64:
case INDEX_op_xor_i64:
+ case INDEX_op_shl_i64:
+ case INDEX_op_shr_i64:
+ case INDEX_op_sar_i64:
+ case INDEX_op_rotl_i64:
+ case INDEX_op_rotr_i64:
return 64;
#endif
default:
@@ -177,6 +187,62 @@ static TCGArg do_constant_folding_2(int op, TCGArg x, TCGArg y)
#endif
return x ^ y;
+ case INDEX_op_shl_i32:
+#if TCG_TARGET_REG_BITS == 64
+ y &= 0xffffffff;
+ case INDEX_op_shl_i64:
+#endif
+ return x << y;
+
+ case INDEX_op_shr_i32:
+#if TCG_TARGET_REG_BITS == 64
+ x &= 0xffffffff;
+ y &= 0xffffffff;
+ case INDEX_op_shr_i64:
+#endif
+ /* Assuming TCGArg to be unsigned */
+ return x >> y;
+
+ case INDEX_op_sar_i32:
+#if TCG_TARGET_REG_BITS == 64
+ x &= 0xffffffff;
+ y &= 0xffffffff;
+#endif
+ return (int32_t)x >> (int32_t)y;
+
+#if TCG_TARGET_REG_BITS == 64
+ case INDEX_op_sar_i64:
+ return (int64_t)x >> (int64_t)y;
+#endif
+
+ case INDEX_op_rotr_i32:
+#if TCG_TARGET_REG_BITS == 64
+ x &= 0xffffffff;
+ y &= 0xffffffff;
+#endif
+ x = (x << (32 - y)) | (x >> y);
+ return x;
+
+#if TCG_TARGET_REG_BITS == 64
+ case INDEX_op_rotr_i64:
+ x = (x << (64 - y)) | (x >> y);
+ return x;
+#endif
+
+ case INDEX_op_rotl_i32:
+#if TCG_TARGET_REG_BITS == 64
+ x &= 0xffffffff;
+ y &= 0xffffffff;
+#endif
+ x = (x << y) | (x >> (32 - y));
+ return x;
+
+#if TCG_TARGET_REG_BITS == 64
+ case INDEX_op_rotl_i64:
+ x = (x << y) | (x >> (64 - y));
+ return x;
+#endif
+
default:
fprintf(stderr,
"Unrecognized operation %d in do_constant_folding.\n", op);
@@ -246,8 +312,18 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
}
/* Fallthrough */
case INDEX_op_sub_i32:
+ case INDEX_op_shl_i32:
+ case INDEX_op_shr_i32:
+ case INDEX_op_sar_i32:
+ case INDEX_op_rotl_i32:
+ case INDEX_op_rotr_i32:
#if TCG_TARGET_REG_BITS == 64
case INDEX_op_sub_i64:
+ case INDEX_op_shl_i64:
+ case INDEX_op_shr_i64:
+ case INDEX_op_sar_i64:
+ case INDEX_op_rotl_i64:
+ case INDEX_op_rotr_i64:
#endif
if (temps[args[1]].state == TCG_TEMP_CONST) {
/* Proceed with possible constant folding. */
@@ -377,6 +453,11 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
case INDEX_op_add_i32:
case INDEX_op_sub_i32:
case INDEX_op_mul_i32:
+ case INDEX_op_shl_i32:
+ case INDEX_op_shr_i32:
+ case INDEX_op_sar_i32:
+ case INDEX_op_rotl_i32:
+ case INDEX_op_rotr_i32:
#if TCG_TARGET_REG_BITS == 64
case INDEX_op_and_i64:
case INDEX_op_or_i64:
@@ -384,6 +465,11 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
case INDEX_op_add_i64:
case INDEX_op_sub_i64:
case INDEX_op_mul_i64:
+ case INDEX_op_shl_i64:
+ case INDEX_op_shr_i64:
+ case INDEX_op_sar_i64:
+ case INDEX_op_rotl_i64:
+ case INDEX_op_rotr_i64:
#endif
if (temps[args[1]].state == TCG_TEMP_CONST
&& temps[args[2]].state == TCG_TEMP_CONST) {
--
1.7.4.1
^ permalink raw reply related [flat|nested] 13+ messages in thread* Re: [Qemu-devel] [PATCH v2 5/6] Do constant folding for shift operations.
2011-06-09 10:45 ` [Qemu-devel] [PATCH v2 5/6] Do constant folding for shift operations Kirill Batuzov
@ 2011-06-10 18:02 ` Richard Henderson
0 siblings, 0 replies; 13+ messages in thread
From: Richard Henderson @ 2011-06-10 18:02 UTC (permalink / raw)
To: Kirill Batuzov; +Cc: qemu-devel, zhur
On 06/09/2011 03:45 AM, Kirill Batuzov wrote:
> + case INDEX_op_shl_i32:
> +#if TCG_TARGET_REG_BITS == 64
> + y &= 0xffffffff;
> + case INDEX_op_shl_i64:
> +#endif
> + return x << y;
> +
> + case INDEX_op_shr_i32:
> +#if TCG_TARGET_REG_BITS == 64
> + x &= 0xffffffff;
> + y &= 0xffffffff;
> + case INDEX_op_shr_i64:
> +#endif
> + /* Assuming TCGArg to be unsigned */
> + return x >> y;
Don't assume when you've got a uint64_t type readily available.
> + case INDEX_op_sar_i32:
> +#if TCG_TARGET_REG_BITS == 64
> + x &= 0xffffffff;
> + y &= 0xffffffff;
> +#endif
> + return (int32_t)x >> (int32_t)y;
Masks are redundant with the casts.
> + case INDEX_op_rotr_i32:
> +#if TCG_TARGET_REG_BITS == 64
> + x &= 0xffffffff;
> + y &= 0xffffffff;
> +#endif
> + x = (x << (32 - y)) | (x >> y);
Have you looked to see if this gets recognized as a rotate
by the compiler? I suspect that it will if you use a cast
to uint32_t here, but not if it is left as a 64-bit TCGArg.
> +#if TCG_TARGET_REG_BITS == 64
> + case INDEX_op_rotl_i64:
> + x = (x << y) | (x >> (64 - y));
> + return x;
> +#endif
Likewise it's probably best to cast to uint64_t here.
r~
^ permalink raw reply [flat|nested] 13+ messages in thread
* [Qemu-devel] [PATCH v2 6/6] Do constant folding for unary operations.
2011-06-09 10:45 [Qemu-devel] [PATCH v2 0/6] Implement constant folding and copy propagation in TCG Kirill Batuzov
` (4 preceding siblings ...)
2011-06-09 10:45 ` [Qemu-devel] [PATCH v2 5/6] Do constant folding for shift operations Kirill Batuzov
@ 2011-06-09 10:45 ` Kirill Batuzov
2011-06-10 18:04 ` Richard Henderson
2011-06-10 18:08 ` [Qemu-devel] [PATCH v2 0/6] Implement constant folding and copy propagation in TCG Richard Henderson
6 siblings, 1 reply; 13+ messages in thread
From: Kirill Batuzov @ 2011-06-09 10:45 UTC (permalink / raw)
To: qemu-devel; +Cc: zhur
Perform constant folding for NOT and EXT{8,16,32}{S,U} operations.
Signed-off-by: Kirill Batuzov <batuzovk@ispras.ru>
---
tcg/optimize.c | 83 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++
1 files changed, 83 insertions(+), 0 deletions(-)
diff --git a/tcg/optimize.c b/tcg/optimize.c
index 653f399..2cdcc29 100644
--- a/tcg/optimize.c
+++ b/tcg/optimize.c
@@ -100,6 +100,11 @@ static int op_bits(int op)
case INDEX_op_sar_i32:
case INDEX_op_rotl_i32:
case INDEX_op_rotr_i32:
+ case INDEX_op_not_i32:
+ case INDEX_op_ext8s_i32:
+ case INDEX_op_ext16s_i32:
+ case INDEX_op_ext8u_i32:
+ case INDEX_op_ext16u_i32:
return 32;
#if TCG_TARGET_REG_BITS == 64
case INDEX_op_mov_i64:
@@ -114,6 +119,13 @@ static int op_bits(int op)
case INDEX_op_sar_i64:
case INDEX_op_rotl_i64:
case INDEX_op_rotr_i64:
+ case INDEX_op_not_i64:
+ case INDEX_op_ext8s_i64:
+ case INDEX_op_ext16s_i64:
+ case INDEX_op_ext32s_i64:
+ case INDEX_op_ext8u_i64:
+ case INDEX_op_ext16u_i64:
+ case INDEX_op_ext32u_i64:
return 64;
#endif
default:
@@ -243,6 +255,44 @@ static TCGArg do_constant_folding_2(int op, TCGArg x, TCGArg y)
return x;
#endif
+ case INDEX_op_not_i32:
+#if TCG_TARGET_REG_BITS == 64
+ case INDEX_op_not_i64:
+#endif
+ return ~x;
+
+ case INDEX_op_ext8s_i32:
+ return (int32_t)(int8_t)x;
+
+ case INDEX_op_ext16s_i32:
+ return (int32_t)(int16_t)x;
+
+ case INDEX_op_ext8u_i32:
+ return (uint32_t)(uint8_t)x;
+
+ case INDEX_op_ext16u_i32:
+ return (uint32_t)(uint16_t)x;
+
+#if TCG_TARGET_REG_BITS == 64
+ case INDEX_op_ext8s_i64:
+ return (int64_t)(int8_t)x;
+
+ case INDEX_op_ext16s_i64:
+ return (int64_t)(int16_t)x;
+
+ case INDEX_op_ext32s_i64:
+ return (int64_t)(int32_t)x;
+
+ case INDEX_op_ext8u_i64:
+ return (uint64_t)(uint8_t)x;
+
+ case INDEX_op_ext16u_i64:
+ return (uint64_t)(uint16_t)x;
+
+ case INDEX_op_ext32u_i64:
+ return (uint64_t)(uint32_t)x;
+#endif
+
default:
fprintf(stderr,
"Unrecognized operation %d in do_constant_folding.\n", op);
@@ -447,6 +497,39 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
gen_args += 2;
args += 2;
break;
+ case INDEX_op_not_i32:
+ case INDEX_op_ext8s_i32:
+ case INDEX_op_ext16s_i32:
+ case INDEX_op_ext8u_i32:
+ case INDEX_op_ext16u_i32:
+#if TCG_TARGET_REG_BITS == 64
+ case INDEX_op_not_i64:
+ case INDEX_op_ext8s_i64:
+ case INDEX_op_ext16s_i64:
+ case INDEX_op_ext32s_i64:
+ case INDEX_op_ext8u_i64:
+ case INDEX_op_ext16u_i64:
+ case INDEX_op_ext32u_i64:
+#endif
+ if (temps[args[1]].state == TCG_TEMP_CONST) {
+ gen_opc_buf[op_index] = op_to_movi(op);
+ gen_args[0] = args[0];
+ gen_args[1] = do_constant_folding(op, temps[args[1]].val, 0);
+ reset_temp(temps, gen_args[0], nb_temps, nb_globals);
+ temps[gen_args[0]].state = TCG_TEMP_CONST;
+ temps[gen_args[0]].val = gen_args[1];
+ assert(temps[gen_args[0]].num_copies == 0);
+ gen_args += 2;
+ args += 2;
+ break;
+ } else {
+ reset_temp(temps, args[0], nb_temps, nb_globals);
+ gen_args[0] = args[0];
+ gen_args[1] = args[1];
+ gen_args += 2;
+ args += 2;
+ break;
+ }
case INDEX_op_or_i32:
case INDEX_op_and_i32:
case INDEX_op_xor_i32:
--
1.7.4.1
^ permalink raw reply related [flat|nested] 13+ messages in thread* Re: [Qemu-devel] [PATCH v2 0/6] Implement constant folding and copy propagation in TCG
2011-06-09 10:45 [Qemu-devel] [PATCH v2 0/6] Implement constant folding and copy propagation in TCG Kirill Batuzov
` (5 preceding siblings ...)
2011-06-09 10:45 ` [Qemu-devel] [PATCH v2 6/6] Do constant folding for unary operations Kirill Batuzov
@ 2011-06-10 18:08 ` Richard Henderson
6 siblings, 0 replies; 13+ messages in thread
From: Richard Henderson @ 2011-06-10 18:08 UTC (permalink / raw)
To: Kirill Batuzov; +Cc: qemu-devel, zhur
On 06/09/2011 03:45 AM, Kirill Batuzov wrote:
> Changes:
> v1 -> v2
> - State and Vals arrays merged to an array of structures.
> - Added reference counting of temp's copies. This helps to reset temp's state
> faster in most cases.
> - Do not make copy propagation through operations with TCG_OPF_CALL_CLOBBER or
> TCG_OPF_SIDE_EFFECTS flag.
> - Split some expression simplifications into independent switch.
> - Let compiler handle signed shifts and sign/zero extends in it's
> implementation defined way.
>
> Kirill Batuzov (6):
> Add TCG optimizations stub
> Add copy and constant propagation.
> Do constant folding for basic arithmetic operations.
> Do constant folding for boolean operations.
> Do constant folding for shift operations.
> Do constant folding for unary operations.
>
> Makefile.target | 2 +-
> tcg/optimize.c | 633 +++++++++++++++++++++++++++++++++++++++++++++++++++++++
> tcg/tcg.c | 6 +
> tcg/tcg.h | 3 +
> 4 files changed, 643 insertions(+), 1 deletions(-)
> create mode 100644 tcg/optimize.c
>
FWIW, a patch that implements most of the suggestions that I made
to the indivual patches in this thread, including a major restructure
of the body of the optimization to avoid code duplication.
I havn't bothered to break it up for now, but at least you can see
what I was talking about.
r~
---
>From 2b49e328d3d2461f97f0b6802e0c8e88e0165b1c Mon Sep 17 00:00:00 2001
From: Richard Henderson <rth@anchor.twiddle.net>
Date: Fri, 10 Jun 2011 10:08:17 -0700
Subject: [PATCH] Re-org tcg optimizer.
1: Put equivalence class members in a double-linked list.
2: Use glue to handle 32+64-bit opcodes at the same time.
3: Tidy duplicate code sequences inside tcg_constant_folding.
---
tcg/optimize.c | 822 +++++++++++++++++++++++++++++---------------------------
1 files changed, 419 insertions(+), 403 deletions(-)
diff --git a/tcg/optimize.c b/tcg/optimize.c
index 2cdcc29..9768043 100644
--- a/tcg/optimize.c
+++ b/tcg/optimize.c
@@ -32,63 +32,124 @@
#include "tcg-op.h"
typedef enum {
- TCG_TEMP_UNDEF = 0,
+ TCG_TEMP_VAR = 0,
TCG_TEMP_CONST,
TCG_TEMP_COPY,
- TCG_TEMP_ANY
} tcg_temp_state;
struct tcg_temp_info {
tcg_temp_state state;
- uint16_t num_copies;
+ uint16_t prev_copy;
+ uint16_t next_copy;
tcg_target_ulong val;
};
-/* Reset TEMP's state to TCG_TEMP_ANY. If TEMP was a representative of some
- class of equivalent temp's, a new representative should be chosen in this
- class. */
-static void reset_temp(struct tcg_temp_info *temps, TCGArg temp, int nb_temps,
- int nb_globals)
+/* Array TEMPS has an element for each temp.
+ If this temp holds a constant then its value is kept in .VAL.
+ If this temp is a copy of other ones then this equivalence class'
+ representative is kept in .VAL. */
+struct tcg_temp_info temps[TCG_MAX_TEMPS];
+
+/* Avoid some of the rampant if-deffery related to 32 vs 64-bit operations. */
+#if TCG_TARGET_REG_BITS == 64
+#define CASE_OP_32_64(x) \
+ glue(glue(case INDEX_op_, x), _i32): \
+ glue(glue(case INDEX_op_, x), _i64)
+#else
+#define CASE_OP_32_64(x) \
+ glue(glue(case INDEX_op_, x), _i32)
+#endif
+
+
+/* Reset all temporaries to TCG_TEMP_VAR. */
+static void reset_all_temps(int nb_temps)
{
int i;
- TCGArg new_base;
- if (temps[temp].state == TCG_TEMP_COPY) {
- temps[temps[temp].val].num_copies--;
+ for (i = 0; i < nb_temps; ++i) {
+ temps[i].state = TCG_TEMP_VAR;
+ temps[i].prev_copy = i;
+ temps[i].next_copy = i;
}
- if (temps[temp].num_copies > 0) {
- new_base = (TCGArg)-1;
- for (i = nb_globals; i < nb_temps; i++) {
- if (temps[i].state == TCG_TEMP_COPY && temps[i].val == temp) {
- if (new_base == ((TCGArg)-1)) {
- new_base = (TCGArg)i;
- temps[i].state = TCG_TEMP_ANY;
- temps[i].num_copies = 0;
- } else {
- temps[i].val = new_base;
- temps[new_base].num_copies++;
- }
+}
+
+/* Reset T's state to TCG_TEMP_VAR. If T was a representative of some
+ class of equivalent temp's, a new representative should be chosen
+ in this class. */
+static void reset_temp(int t)
+{
+ int prev = temps[t].prev_copy;
+ int next = temps[t].next_copy;
+
+ switch (temps[t].state) {
+ case TCG_TEMP_VAR:
+ /* If next (and prev) equals temp, that means there are no copies.
+ Otherwise, promote NEXT to be the new representative of the
+ equivalence class. */
+ if (next != t) {
+ int i;
+ temps[next].state = TCG_TEMP_VAR;
+ for (i = temps[next].next_copy; i != t; i = temps[i].next_copy) {
+ temps[i].val = next;
}
}
- for (i = 0; i < nb_globals; i++) {
- if (temps[i].state == TCG_TEMP_COPY && temps[i].val == temp) {
- if (new_base == ((TCGArg)-1)) {
- temps[i].state = TCG_TEMP_ANY;
- temps[i].num_copies = 0;
- } else {
- temps[i].val = new_base;
- temps[new_base].num_copies++;
- }
- }
+ /* FALLTHRU */
+
+ case TCG_TEMP_COPY:
+ /* Unlink T from the equivalence class. */
+ temps[prev].next_copy = next;
+ temps[next].prev_copy = prev;
+ break;
+
+ case TCG_TEMP_CONST:
+ break;
+
+ default:
+ tcg_abort();
+ }
+
+ temps[t].state = TCG_TEMP_VAR;
+ temps[t].prev_copy = t;
+ temps[t].next_copy = t;
+}
+
+static void reset_temps_bb(TCGContext *s)
+{
+ int i, nb_temps = s->nb_temps;
+
+ for (i = s->nb_globals; i < nb_temps; ++i) {
+ if (!s->temps[i].temp_local) {
+ reset_temp(i);
}
}
- temps[temp].state = TCG_TEMP_ANY;
- temps[temp].num_copies = 0;
}
-static int op_bits(int op)
+static void make_copy(int dest, int src, int nb_globals)
+{
+ int prev, next, head = src;
+
+ if (temps[head].state == TCG_TEMP_COPY) {
+ head = temps[head].val;
+ }
+
+ temps[dest].state = TCG_TEMP_COPY;
+ temps[dest].val = head;
+
+ /* Insert at the tail of the list. This will tend to choose
+ the next-oldest copy when HEAD gets flushed. */
+ prev = temps[head].prev_copy;
+ next = head;
+
+ temps[prev].next_copy = dest;
+ temps[dest].prev_copy = prev;
+ temps[dest].next_copy = next;
+ temps[next].prev_copy = dest;
+}
+
+static int op_bits(TCGOpcode op)
{
switch (op) {
case INDEX_op_mov_i32:
+ case INDEX_op_movi_i32:
case INDEX_op_add_i32:
case INDEX_op_sub_i32:
case INDEX_op_mul_i32:
@@ -101,6 +162,7 @@ static int op_bits(int op)
case INDEX_op_rotl_i32:
case INDEX_op_rotr_i32:
case INDEX_op_not_i32:
+ case INDEX_op_neg_i32:
case INDEX_op_ext8s_i32:
case INDEX_op_ext16s_i32:
case INDEX_op_ext8u_i32:
@@ -108,6 +170,7 @@ static int op_bits(int op)
return 32;
#if TCG_TARGET_REG_BITS == 64
case INDEX_op_mov_i64:
+ case INDEX_op_movi_i64:
case INDEX_op_add_i64:
case INDEX_op_sub_i64:
case INDEX_op_mul_i64:
@@ -120,6 +183,7 @@ static int op_bits(int op)
case INDEX_op_rotl_i64:
case INDEX_op_rotr_i64:
case INDEX_op_not_i64:
+ case INDEX_op_neg_i64:
case INDEX_op_ext8s_i64:
case INDEX_op_ext16s_i64:
case INDEX_op_ext32s_i64:
@@ -134,163 +198,105 @@ static int op_bits(int op)
}
}
-static int op_to_movi(int op)
+static int op_to_movi(TCGOpcode op)
{
- if (op_bits(op) == 32) {
+ switch (op_bits(op)) {
+ case 32:
return INDEX_op_movi_i32;
- }
#if TCG_TARGET_REG_BITS == 64
- if (op_bits(op) == 64) {
+ case 64:
return INDEX_op_movi_i64;
- }
#endif
+ }
tcg_abort();
}
-static int op_to_mov(int op)
+static int op_to_mov(TCGOpcode op)
{
- if (op_bits(op) == 32) {
+ switch (op_bits(op)) {
+ case 32:
return INDEX_op_mov_i32;
- }
#if TCG_TARGET_REG_BITS == 64
- if (op_bits(op) == 64) {
+ case 64:
return INDEX_op_mov_i64;
- }
#endif
+ }
tcg_abort();
}
-static TCGArg do_constant_folding_2(int op, TCGArg x, TCGArg y)
+static TCGArg do_constant_folding_2(TCGOpcode op, TCGArg x, TCGArg y)
{
switch (op) {
- case INDEX_op_add_i32:
-#if TCG_TARGET_REG_BITS == 64
- case INDEX_op_add_i64:
-#endif
+ CASE_OP_32_64(add):
return x + y;
-
- case INDEX_op_sub_i32:
-#if TCG_TARGET_REG_BITS == 64
- case INDEX_op_sub_i64:
-#endif
+ CASE_OP_32_64(sub):
return x - y;
-
- case INDEX_op_mul_i32:
-#if TCG_TARGET_REG_BITS == 64
- case INDEX_op_mul_i64:
-#endif
+ CASE_OP_32_64(mul):
return x * y;
-
- case INDEX_op_and_i32:
-#if TCG_TARGET_REG_BITS == 64
- case INDEX_op_and_i64:
-#endif
+ CASE_OP_32_64(and):
return x & y;
-
- case INDEX_op_or_i32:
-#if TCG_TARGET_REG_BITS == 64
- case INDEX_op_or_i64:
-#endif
+ CASE_OP_32_64(or):
return x | y;
-
- case INDEX_op_xor_i32:
-#if TCG_TARGET_REG_BITS == 64
- case INDEX_op_xor_i64:
-#endif
+ CASE_OP_32_64(xor):
return x ^ y;
- case INDEX_op_shl_i32:
-#if TCG_TARGET_REG_BITS == 64
- y &= 0xffffffff;
- case INDEX_op_shl_i64:
-#endif
- return x << y;
+ CASE_OP_32_64(neg):
+ return -x;
+ CASE_OP_32_64(not):
+ return ~x;
+
+ CASE_OP_32_64(shl):
+ return x << (y & (op_bits(op) - 1));
case INDEX_op_shr_i32:
+ return (uint32_t)x >> (y & 31);
#if TCG_TARGET_REG_BITS == 64
- x &= 0xffffffff;
- y &= 0xffffffff;
case INDEX_op_shr_i64:
+ return (uint64_t)x >> (y & 63);
#endif
- /* Assuming TCGArg to be unsigned */
- return x >> y;
case INDEX_op_sar_i32:
-#if TCG_TARGET_REG_BITS == 64
- x &= 0xffffffff;
- y &= 0xffffffff;
-#endif
- return (int32_t)x >> (int32_t)y;
-
+ return (int32_t)x >> (y & 31);
#if TCG_TARGET_REG_BITS == 64
case INDEX_op_sar_i64:
- return (int64_t)x >> (int64_t)y;
+ return (int64_t)x >> (y & 63);
#endif
case INDEX_op_rotr_i32:
-#if TCG_TARGET_REG_BITS == 64
- x &= 0xffffffff;
- y &= 0xffffffff;
-#endif
- x = (x << (32 - y)) | (x >> y);
+ y &= 31;
+ x = ((uint32_t)x << (32 - y)) | ((uint32_t)x >> y);
return x;
-
#if TCG_TARGET_REG_BITS == 64
case INDEX_op_rotr_i64:
- x = (x << (64 - y)) | (x >> y);
+ y &= 63;
+ x = ((uint64_t)x << (64 - y)) | ((uint64_t)x >> y);
return x;
#endif
case INDEX_op_rotl_i32:
-#if TCG_TARGET_REG_BITS == 64
- x &= 0xffffffff;
- y &= 0xffffffff;
-#endif
- x = (x << y) | (x >> (32 - y));
+ y &= 31;
+ x = ((uint32_t)x << y) | ((uint32_t)x >> (32 - y));
return x;
-
#if TCG_TARGET_REG_BITS == 64
case INDEX_op_rotl_i64:
- x = (x << y) | (x >> (64 - y));
+ y &= 63;
+ x = ((uint64_t)x << y) | ((uint64_t)x >> (64 - y));
return x;
#endif
- case INDEX_op_not_i32:
-#if TCG_TARGET_REG_BITS == 64
- case INDEX_op_not_i64:
-#endif
- return ~x;
-
- case INDEX_op_ext8s_i32:
- return (int32_t)(int8_t)x;
-
- case INDEX_op_ext16s_i32:
- return (int32_t)(int16_t)x;
-
- case INDEX_op_ext8u_i32:
- return (uint32_t)(uint8_t)x;
-
- case INDEX_op_ext16u_i32:
- return (uint32_t)(uint16_t)x;
-
+ CASE_OP_32_64(ext8s):
+ return (int8_t)x;
+ CASE_OP_32_64(ext8u):
+ return (uint8_t)x;
+ CASE_OP_32_64(ext16s):
+ return (int16_t)x;
+ CASE_OP_32_64(ext16u):
+ return (uint16_t)x;
#if TCG_TARGET_REG_BITS == 64
- case INDEX_op_ext8s_i64:
- return (int64_t)(int8_t)x;
-
- case INDEX_op_ext16s_i64:
- return (int64_t)(int16_t)x;
-
case INDEX_op_ext32s_i64:
- return (int64_t)(int32_t)x;
-
- case INDEX_op_ext8u_i64:
- return (uint64_t)(uint8_t)x;
-
- case INDEX_op_ext16u_i64:
- return (uint64_t)(uint16_t)x;
-
+ return (int32_t)x;
case INDEX_op_ext32u_i64:
- return (uint64_t)(uint32_t)x;
+ return (uint32_t)x;
#endif
default:
@@ -300,7 +306,7 @@ static TCGArg do_constant_folding_2(int op, TCGArg x, TCGArg y)
}
}
-static TCGArg do_constant_folding(int op, TCGArg x, TCGArg y)
+static TCGArg do_constant_folding(TCGOpcode op, TCGArg x, TCGArg y)
{
TCGArg res = do_constant_folding_2(op, x, y);
#if TCG_TARGET_REG_BITS == 64
@@ -315,310 +321,320 @@ static TCGArg do_constant_folding(int op, TCGArg x, TCGArg y)
static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
TCGArg *args, TCGOpDef *tcg_op_defs)
{
- int i, nb_ops, op_index, op, nb_temps, nb_globals;
- const TCGOpDef *def;
+ int i, nb_ops, op_index, nb_temps, nb_globals;
TCGArg *gen_args;
- TCGArg tmp;
- /* Array VALS has an element for each temp.
- If this temp holds a constant then its value is kept in VALS' element.
- If this temp is a copy of other ones then this equivalence class'
- representative is kept in VALS' element.
- If this temp is neither copy nor constant then corresponding VALS'
- element is unused. */
- struct tcg_temp_info temps[TCG_MAX_TEMPS];
nb_temps = s->nb_temps;
nb_globals = s->nb_globals;
- memset(temps, 0, nb_temps * sizeof(struct tcg_temp_info));
+ reset_all_temps(nb_temps);
nb_ops = tcg_opc_ptr - gen_opc_buf;
gen_args = args;
for (op_index = 0; op_index < nb_ops; op_index++) {
- op = gen_opc_buf[op_index];
- def = &tcg_op_defs[op];
- /* Do copy propagation */
- if (!(def->flags & (TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS))) {
- assert(op != INDEX_op_call);
- for (i = def->nb_oargs; i < def->nb_oargs + def->nb_iargs; i++) {
- if (temps[args[i]].state == TCG_TEMP_COPY
- && !(def->args_ct[i].ct & TCG_CT_IALIAS)
- && (def->args_ct[i].ct & TCG_CT_REG)) {
- args[i] = temps[args[i]].val;
+ TCGOpcode op = gen_opc_buf[op_index];
+ const TCGOpDef *def = &tcg_op_defs[op];
+ TCGArg arg0, arg1, arg2, tmp;
+
+ /* The format of CALL operations is special. */
+ if (op == INDEX_op_call) {
+ int n_in, n_out, flags;
+
+ n_in = *gen_args++ = *args++; /* in/out argument */
+ n_out = n_in >> 16;
+ n_in &= 0xffff;
+
+ /* Both pure and const functions do not modify globals. */
+ flags = args[n_out + n_in];
+ if ((flags & (TCG_CALL_PURE | TCG_CALL_CONST)) == 0) {
+ for (i = 0; i < nb_globals; i++) {
+ reset_temp(i);
}
}
- }
- /* Simplify expression if possible. */
- switch (op) {
- case INDEX_op_add_i32:
-#if TCG_TARGET_REG_BITS == 64
- case INDEX_op_add_i64:
-#endif
- if (temps[args[1]].state == TCG_TEMP_CONST) {
- /* 0 + x == x + 0 */
- tmp = args[1];
- args[1] = args[2];
- args[2] = tmp;
+ /* All outputs are now variable. */
+ for (i = 0; i < n_out; i++) {
+ tmp = *args++;
+ *gen_args++ = tmp;
+ reset_temp(tmp);
}
- /* Fallthrough */
- case INDEX_op_sub_i32:
- case INDEX_op_shl_i32:
- case INDEX_op_shr_i32:
- case INDEX_op_sar_i32:
- case INDEX_op_rotl_i32:
- case INDEX_op_rotr_i32:
-#if TCG_TARGET_REG_BITS == 64
- case INDEX_op_sub_i64:
- case INDEX_op_shl_i64:
- case INDEX_op_shr_i64:
- case INDEX_op_sar_i64:
- case INDEX_op_rotl_i64:
- case INDEX_op_rotr_i64:
-#endif
- if (temps[args[1]].state == TCG_TEMP_CONST) {
- /* Proceed with possible constant folding. */
- break;
- }
- if (temps[args[2]].state == TCG_TEMP_CONST
- && temps[args[2]].val == 0) {
- if ((temps[args[0]].state == TCG_TEMP_COPY
- && temps[args[0]].val == args[1])
- || args[0] == args[1]) {
- args += 3;
- gen_opc_buf[op_index] = INDEX_op_nop;
- } else {
- reset_temp(temps, args[0], nb_temps, nb_globals);
- if (args[1] >= s->nb_globals) {
- temps[args[0]].state = TCG_TEMP_COPY;
- temps[args[0]].val = args[1];
- temps[args[1]].num_copies++;
- }
- gen_opc_buf[op_index] = op_to_mov(op);
- gen_args[0] = args[0];
- gen_args[1] = args[1];
- gen_args += 2;
- args += 3;
+
+ /* Copy propagate into input arguments. */
+ for (i = 0; i < n_in; i++) {
+ tmp = *args++;
+ if (temps[tmp].state == TCG_TEMP_COPY) {
+ tmp = temps[tmp].val;
}
- continue;
+ *gen_args++ = tmp;
}
- break;
- case INDEX_op_mul_i32:
-#if TCG_TARGET_REG_BITS == 64
- case INDEX_op_mul_i64:
-#endif
- if ((temps[args[1]].state == TCG_TEMP_CONST
- && temps[args[1]].val == 0)
- || (temps[args[2]].state == TCG_TEMP_CONST
- && temps[args[2]].val == 0)) {
- reset_temp(temps, args[0], nb_temps, nb_globals);
- temps[args[0]].state = TCG_TEMP_CONST;
- temps[args[0]].val = 0;
- assert(temps[args[0]].num_copies == 0);
- gen_opc_buf[op_index] = op_to_movi(op);
- gen_args[0] = args[0];
- gen_args[1] = 0;
- args += 3;
- gen_args += 2;
- continue;
+
+ for (i = def->nb_cargs; i > 0; --i) {
+ *gen_args++ = *args++;
}
- break;
- case INDEX_op_or_i32:
- case INDEX_op_and_i32:
-#if TCG_TARGET_REG_BITS == 64
- case INDEX_op_and_i64:
- case INDEX_op_or_i64:
-#endif
- if (args[1] == args[2]) {
- if (args[1] == args[0]) {
- args += 3;
- gen_opc_buf[op_index] = INDEX_op_nop;
- } else {
- reset_temp(temps, args[0], nb_temps, nb_globals);
- if (args[1] >= s->nb_globals) {
- temps[args[0]].state = TCG_TEMP_COPY;
- temps[args[0]].val = args[1];
- assert(temps[args[0]].num_copies == 0);
- }
- gen_opc_buf[op_index] = op_to_mov(op);
- gen_args[0] = args[0];
- gen_args[1] = args[1];
- gen_args += 2;
- args += 3;
- }
- continue;
+ continue;
+ }
+
+ /* Do copy propagation. */
+ for (i = def->nb_oargs; i < def->nb_oargs + def->nb_iargs; i++) {
+ tmp = args[i];
+ if (temps[tmp].state == TCG_TEMP_COPY
+ && (def->args_ct[i].ct & TCG_CT_REG)) {
+ args[i] = temps[tmp].val;
}
- break;
}
- /* Propagate constants through copy operations and do constant
- folding. Constants will be substituted to arguments by register
- allocator where needed and possible. Also detect copies. */
+ /* Most everything below operates on unary or binary ops. We
+ can encourage better code by pulling the arguments into
+ registers now. At the same time, filter out ops that we
+ don't handle at all. */
+ arg2 = 0;
switch (op) {
- case INDEX_op_mov_i32:
+ CASE_OP_32_64(add):
+ CASE_OP_32_64(sub):
+ CASE_OP_32_64(mul):
+ CASE_OP_32_64(and):
+ CASE_OP_32_64(or):
+ CASE_OP_32_64(xor):
+ CASE_OP_32_64(shl):
+ CASE_OP_32_64(shr):
+ CASE_OP_32_64(sar):
+ CASE_OP_32_64(rotl):
+ CASE_OP_32_64(rotr):
+ arg2 = args[2];
+ /* FALLTHRU */
+
+ CASE_OP_32_64(mov):
+ CASE_OP_32_64(movi):
+ CASE_OP_32_64(not):
+ CASE_OP_32_64(neg):
+ CASE_OP_32_64(ext8s):
+ CASE_OP_32_64(ext8u):
+ CASE_OP_32_64(ext16s):
+ CASE_OP_32_64(ext16u):
#if TCG_TARGET_REG_BITS == 64
- case INDEX_op_mov_i64:
+ case INDEX_op_ext32s_i64:
+ case INDEX_op_ext32u_i64:
#endif
- if ((temps[args[1]].state == TCG_TEMP_COPY
- && temps[args[1]].val == args[0])
- || args[0] == args[1]) {
- args += 2;
- gen_opc_buf[op_index] = INDEX_op_nop;
- break;
+ arg0 = args[0];
+ arg1 = args[1];
+ break;
+
+ case INDEX_op_set_label:
+ /* Flush info at the beginning of an extended basic block. */
+ reset_all_temps(nb_temps);
+ *gen_args++ = *args++;
+ continue;
+
+ default:
+ /* At the end of a basic block (but not extended bb)
+ we must flush all bb temporaries. */
+ if (def->flags & TCG_OPF_BB_END) {
+ reset_temps_bb(s);
}
- if (temps[args[1]].state != TCG_TEMP_CONST) {
- reset_temp(temps, args[0], nb_temps, nb_globals);
- if (args[1] >= s->nb_globals) {
- temps[args[0]].state = TCG_TEMP_COPY;
- temps[args[0]].val = args[1];
- temps[args[1]].num_copies++;
- }
- gen_args[0] = args[0];
- gen_args[1] = args[1];
- gen_args += 2;
- args += 2;
- break;
+
+ /* An unhandled opcode. Outputs are variable. */
+ for (i = 0; i < def->nb_oargs; i++) {
+ reset_temp(args[i]);
+ }
+ for (i = def->nb_args; i > 0; --i) {
+ *gen_args++ = *args++;
+ }
+ continue;
+ }
+
+ /* Do full constant folding. */
+ switch (op) {
+ CASE_OP_32_64(add):
+ CASE_OP_32_64(sub):
+ CASE_OP_32_64(mul):
+ CASE_OP_32_64(and):
+ CASE_OP_32_64(or):
+ CASE_OP_32_64(xor):
+ CASE_OP_32_64(shl):
+ CASE_OP_32_64(shr):
+ CASE_OP_32_64(sar):
+ CASE_OP_32_64(rotl):
+ CASE_OP_32_64(rotr):
+ if (temps[arg1].state == TCG_TEMP_CONST
+ && temps[arg2].state == TCG_TEMP_CONST) {
+ arg1 = do_constant_folding(op, temps[arg1].val,
+ temps[arg2].val);
+ gen_opc_buf[op_index] = op_to_movi(op);
+ goto do_const_prop;
}
- /* Source argument is constant. Rewrite the operation and
- let movi case handle it. */
- op = op_to_movi(op);
- gen_opc_buf[op_index] = op;
- args[1] = temps[args[1]].val;
- /* fallthrough */
- case INDEX_op_movi_i32:
-#if TCG_TARGET_REG_BITS == 64
- case INDEX_op_movi_i64:
-#endif
- reset_temp(temps, args[0], nb_temps, nb_globals);
- temps[args[0]].state = TCG_TEMP_CONST;
- temps[args[0]].val = args[1];
- assert(temps[args[0]].num_copies == 0);
- gen_args[0] = args[0];
- gen_args[1] = args[1];
- gen_args += 2;
- args += 2;
break;
- case INDEX_op_not_i32:
- case INDEX_op_ext8s_i32:
- case INDEX_op_ext16s_i32:
- case INDEX_op_ext8u_i32:
- case INDEX_op_ext16u_i32:
+
+ CASE_OP_32_64(not):
+ CASE_OP_32_64(neg):
+ CASE_OP_32_64(ext8s):
+ CASE_OP_32_64(ext8u):
+ CASE_OP_32_64(ext16s):
+ CASE_OP_32_64(ext16u):
#if TCG_TARGET_REG_BITS == 64
- case INDEX_op_not_i64:
- case INDEX_op_ext8s_i64:
- case INDEX_op_ext16s_i64:
case INDEX_op_ext32s_i64:
- case INDEX_op_ext8u_i64:
- case INDEX_op_ext16u_i64:
case INDEX_op_ext32u_i64:
#endif
- if (temps[args[1]].state == TCG_TEMP_CONST) {
+ if (temps[arg1].state == TCG_TEMP_CONST) {
+ arg1 = do_constant_folding(op, temps[arg1].val, 0);
gen_opc_buf[op_index] = op_to_movi(op);
- gen_args[0] = args[0];
- gen_args[1] = do_constant_folding(op, temps[args[1]].val, 0);
- reset_temp(temps, gen_args[0], nb_temps, nb_globals);
- temps[gen_args[0]].state = TCG_TEMP_CONST;
- temps[gen_args[0]].val = gen_args[1];
- assert(temps[gen_args[0]].num_copies == 0);
- gen_args += 2;
- args += 2;
- break;
- } else {
- reset_temp(temps, args[0], nb_temps, nb_globals);
- gen_args[0] = args[0];
- gen_args[1] = args[1];
- gen_args += 2;
- args += 2;
- break;
+ goto do_const_prop;
}
- case INDEX_op_or_i32:
- case INDEX_op_and_i32:
- case INDEX_op_xor_i32:
- case INDEX_op_add_i32:
- case INDEX_op_sub_i32:
- case INDEX_op_mul_i32:
- case INDEX_op_shl_i32:
- case INDEX_op_shr_i32:
- case INDEX_op_sar_i32:
- case INDEX_op_rotl_i32:
- case INDEX_op_rotr_i32:
-#if TCG_TARGET_REG_BITS == 64
- case INDEX_op_and_i64:
- case INDEX_op_or_i64:
- case INDEX_op_xor_i64:
- case INDEX_op_add_i64:
- case INDEX_op_sub_i64:
- case INDEX_op_mul_i64:
- case INDEX_op_shl_i64:
- case INDEX_op_shr_i64:
- case INDEX_op_sar_i64:
- case INDEX_op_rotl_i64:
- case INDEX_op_rotr_i64:
-#endif
- if (temps[args[1]].state == TCG_TEMP_CONST
- && temps[args[2]].state == TCG_TEMP_CONST) {
- gen_opc_buf[op_index] = op_to_movi(op);
- gen_args[0] = args[0];
- gen_args[1] =
- do_constant_folding(op, temps[args[1]].val,
- temps[args[2]].val);
- reset_temp(temps, gen_args[0], nb_temps, nb_globals);
- temps[gen_args[0]].state = TCG_TEMP_CONST;
- temps[gen_args[0]].val = gen_args[1];
- assert(temps[gen_args[0]].num_copies == 0);
- gen_args += 2;
- args += 3;
- break;
- } else {
- reset_temp(temps, args[0], nb_temps, nb_globals);
- gen_args[0] = args[0];
- gen_args[1] = args[1];
- gen_args[2] = args[2];
- gen_args += 3;
- args += 3;
- break;
+ break;
+
+ default:
+ break;
+ }
+
+ /* For commutative operations, put the constant second.
+ Also swap operands to help matching operands. */
+ switch (op) {
+ CASE_OP_32_64(add):
+ CASE_OP_32_64(mul):
+ CASE_OP_32_64(and):
+ CASE_OP_32_64(or):
+ CASE_OP_32_64(xor):
+ if (temps[arg1].state == TCG_TEMP_CONST || arg0 == arg2) {
+ tmp = arg1;
+ arg1 = arg2;
+ arg2 = tmp;
}
- case INDEX_op_call:
- for (i = 0; i < nb_globals; i++) {
- reset_temp(temps, i, nb_temps, nb_globals);
+ break;
+
+ default:
+ break;
+ }
+
+ /* Simplify expressions involving one constant. */
+ switch (op) {
+ CASE_OP_32_64(add):
+ CASE_OP_32_64(sub):
+ CASE_OP_32_64(shl):
+ CASE_OP_32_64(shr):
+ CASE_OP_32_64(sar):
+ CASE_OP_32_64(rotl):
+ CASE_OP_32_64(rotr):
+ CASE_OP_32_64(or):
+ CASE_OP_32_64(xor):
+ if (temps[arg2].state == TCG_TEMP_CONST && temps[arg2].val == 0) {
+ goto do_copy_const_prop;
}
- for (i = 0; i < (args[0] >> 16); i++) {
- reset_temp(temps, args[i + 1], nb_temps, nb_globals);
+ break;
+
+ CASE_OP_32_64(mul):
+ CASE_OP_32_64(and):
+ if (temps[arg2].state == TCG_TEMP_CONST && temps[arg2].val == 0) {
+ arg1 = 0;
+ goto do_const_prop;
}
- i = (args[0] >> 16) + (args[0] & 0xffff) + 3;
- while (i) {
- *gen_args = *args;
- args++;
- gen_args++;
- i--;
+ break;
+
+ default:
+ break;
+ }
+
+ /* Simplify expressions involving no constants. */
+ switch (op) {
+ CASE_OP_32_64(and):
+ CASE_OP_32_64(or):
+ if (arg1 == arg2) {
+ goto do_copy_const_prop;
}
break;
- case INDEX_op_set_label:
- case INDEX_op_jmp:
- case INDEX_op_br:
- case INDEX_op_brcond_i32:
-#if TCG_TARGET_REG_BITS == 64
- case INDEX_op_brcond_i64:
-#endif
- memset(temps, 0, nb_temps * sizeof(struct tcg_temp_info));
- for (i = 0; i < def->nb_args; i++) {
- *gen_args = *args;
- args++;
- gen_args++;
+
+ CASE_OP_32_64(xor):
+ if (arg1 == arg2) {
+ arg1 = 0;
+ goto do_const_prop;
}
break;
+
default:
- /* Default case: we do know nothing about operation so no
- propagation is done. We only trash output args. */
- for (i = 0; i < def->nb_oargs; i++) {
- reset_temp(temps, args[i], nb_temps, nb_globals);
+ break;
+ }
+
+ /* Finish constant and copy propagation. */
+ switch (op) {
+ CASE_OP_32_64(mov):
+ do_copy_const_prop:
+ /* Notice copies back to self. */
+ if (arg0 == arg1) {
+ do_nop:
+ gen_opc_buf[op_index] = INDEX_op_nop;
+ break;
+ }
+
+ if (temps[arg1].state == TCG_TEMP_CONST) {
+ arg1 = temps[arg1].val;
+ goto do_const_prop;
}
- for (i = 0; i < def->nb_args; i++) {
- gen_args[i] = args[i];
+
+ /* Notice copies through another member of the equivalence. */
+ for (i = temps[arg1].next_copy; i != arg1; i = temps[i].next_copy) {
+ if (i == arg0) {
+ goto do_nop;
+ }
}
- args += def->nb_args;
- gen_args += def->nb_args;
+
+ /* Set up the equivalence class for copy propagation. */
+ reset_temp(arg0);
+ make_copy(arg0, arg1, nb_globals);
+
+ /* "Copy" the move down. Note that we get here by any path
+ that simplifies to a move, so OP may not itself be a move. */
+ gen_opc_buf[op_index] = op_to_mov(op);
+ gen_args[0] = arg0;
+ gen_args[1] = arg1;
+ gen_args += 2;
break;
+
+ CASE_OP_32_64(movi):
+ do_const_prop:
+ reset_temp(arg0);
+ temps[arg0].state = TCG_TEMP_CONST;
+ temps[arg0].val = arg1;
+
+ gen_opc_buf[op_index] = op_to_movi(op);
+ gen_args[0] = arg0;
+ gen_args[1] = arg1;
+ gen_args += 2;
+ break;
+
+ CASE_OP_32_64(add):
+ CASE_OP_32_64(sub):
+ CASE_OP_32_64(mul):
+ CASE_OP_32_64(and):
+ CASE_OP_32_64(or):
+ CASE_OP_32_64(xor):
+ CASE_OP_32_64(shl):
+ CASE_OP_32_64(shr):
+ CASE_OP_32_64(sar):
+ CASE_OP_32_64(rotl):
+ CASE_OP_32_64(rotr):
+ /* Given that the opcode is already as simple as we can make it,
+ and the result is not a simple copy or constant, the output
+ must now be variable. */
+ reset_temp(arg0);
+ gen_args[0] = arg0;
+ gen_args[1] = arg1;
+ gen_args[2] = arg2;
+ gen_args += 3;
+ break;
+
+ CASE_OP_32_64(not):
+ CASE_OP_32_64(neg):
+ CASE_OP_32_64(ext8s):
+ CASE_OP_32_64(ext8u):
+ CASE_OP_32_64(ext16s):
+ CASE_OP_32_64(ext16u):
+ reset_temp(arg0);
+ gen_args[0] = arg0;
+ gen_args[1] = arg1;
+ gen_args += 2;
+ break;
+
+ default:
+ tcg_abort();
}
+ args += def->nb_args;
}
return gen_args;
--
1.7.5.2
^ permalink raw reply related [flat|nested] 13+ messages in thread