* [Qemu-devel] [PATCH 2/3] optimize: track nonzero bits of registers
2013-01-11 23:42 [Qemu-devel] [PATCH 0/3] tcg-optimize with known-zero bits Richard Henderson
@ 2013-01-11 23:42 ` Richard Henderson
0 siblings, 0 replies; 3+ messages in thread
From: Richard Henderson @ 2013-01-11 23:42 UTC (permalink / raw)
To: qemu-devel; +Cc: Paolo Bonzini, Aurelien Jarno
From: Paolo Bonzini <pbonzini@redhat.com>
Add a "mask" field to the tcg_temp_info struct. A bit that is zero
in "mask" will always be zero in the corresponding temporary.
Zero bits in the mask can be produced from moves of immediates,
zero-extensions, ANDs with constants, shifts; they can then be
be propagated by logical operations, shifts, sign-extensions,
negations, deposit operations, and conditional moves. Other
operations will just reset the mask to all-ones, i.e. unknown.
[rth: s/target_ulong/tcg_target_ulong/]
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Richard Henderson <rth@twiddle.net>
---
tcg/optimize.c | 132 +++++++++++++++++++++++++++++++++++++++++++++++----------
1 file changed, 110 insertions(+), 22 deletions(-)
diff --git a/tcg/optimize.c b/tcg/optimize.c
index 9d05a72..090efbc 100644
--- a/tcg/optimize.c
+++ b/tcg/optimize.c
@@ -46,6 +46,7 @@ struct tcg_temp_info {
uint16_t prev_copy;
uint16_t next_copy;
tcg_target_ulong val;
+ tcg_target_ulong mask;
};
static struct tcg_temp_info temps[TCG_MAX_TEMPS];
@@ -63,6 +64,7 @@ static void reset_temp(TCGArg temp)
}
}
temps[temp].state = TCG_TEMP_UNDEF;
+ temps[temp].mask = -1;
}
/* Reset all temporaries, given that there are NB_TEMPS of them. */
@@ -71,6 +73,7 @@ static void reset_all_temps(int nb_temps)
int i;
for (i = 0; i < nb_temps; i++) {
temps[i].state = TCG_TEMP_UNDEF;
+ temps[i].mask = -1;
}
}
@@ -148,33 +151,35 @@ static bool temps_are_copies(TCGArg arg1, TCGArg arg2)
static void tcg_opt_gen_mov(TCGContext *s, TCGArg *gen_args,
TCGArg dst, TCGArg src)
{
- reset_temp(dst);
- assert(temps[src].state != TCG_TEMP_CONST);
-
- if (s->temps[src].type == s->temps[dst].type) {
- if (temps[src].state != TCG_TEMP_COPY) {
- temps[src].state = TCG_TEMP_COPY;
- temps[src].next_copy = src;
- temps[src].prev_copy = src;
- }
- temps[dst].state = TCG_TEMP_COPY;
- temps[dst].next_copy = temps[src].next_copy;
- temps[dst].prev_copy = src;
- temps[temps[dst].next_copy].prev_copy = dst;
- temps[src].next_copy = dst;
+ reset_temp(dst);
+ temps[dst].mask = temps[src].mask;
+ assert(temps[src].state != TCG_TEMP_CONST);
+
+ if (s->temps[src].type == s->temps[dst].type) {
+ if (temps[src].state != TCG_TEMP_COPY) {
+ temps[src].state = TCG_TEMP_COPY;
+ temps[src].next_copy = src;
+ temps[src].prev_copy = src;
}
+ temps[dst].state = TCG_TEMP_COPY;
+ temps[dst].next_copy = temps[src].next_copy;
+ temps[dst].prev_copy = src;
+ temps[temps[dst].next_copy].prev_copy = dst;
+ temps[src].next_copy = dst;
+ }
- gen_args[0] = dst;
- gen_args[1] = src;
+ gen_args[0] = dst;
+ gen_args[1] = src;
}
static void tcg_opt_gen_movi(TCGArg *gen_args, TCGArg dst, TCGArg val)
{
- reset_temp(dst);
- temps[dst].state = TCG_TEMP_CONST;
- temps[dst].val = val;
- gen_args[0] = dst;
- gen_args[1] = val;
+ reset_temp(dst);
+ temps[dst].state = TCG_TEMP_CONST;
+ temps[dst].val = val;
+ temps[dst].mask = val;
+ gen_args[0] = dst;
+ gen_args[1] = val;
}
static TCGOpcode op_to_mov(TCGOpcode op)
@@ -479,6 +484,7 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
TCGArg *args, TCGOpDef *tcg_op_defs)
{
int i, nb_ops, op_index, nb_temps, nb_globals, nb_call_args;
+ tcg_target_ulong mask;
TCGOpcode op;
const TCGOpDef *def;
TCGArg *gen_args;
@@ -621,6 +627,87 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
break;
}
+ /* Simplify using known-zero bits */
+ mask = -1;
+ switch (op) {
+ CASE_OP_32_64(ext8s):
+ if ((temps[args[1]].mask & 0x80) != 0) {
+ break;
+ }
+ CASE_OP_32_64(ext8u):
+ mask = 0xff;
+ goto and_const;
+ CASE_OP_32_64(ext16s):
+ if ((temps[args[1]].mask & 0x8000) != 0) {
+ break;
+ }
+ CASE_OP_32_64(ext16u):
+ mask = 0xffff;
+ goto and_const;
+ case INDEX_op_ext32s_i64:
+ if ((temps[args[1]].mask & 0x80000000) != 0) {
+ break;
+ }
+ case INDEX_op_ext32u_i64:
+ mask = 0xffffffffU;
+ goto and_const;
+
+ CASE_OP_32_64(and):
+ mask = temps[args[2]].mask;
+ if (temps[args[2]].state == TCG_TEMP_CONST) {
+ and_const:
+ ;
+ }
+ mask = temps[args[1]].mask & mask;
+ break;
+
+ CASE_OP_32_64(sar):
+ if (temps[args[2]].state == TCG_TEMP_CONST) {
+ mask = ((tcg_target_long)temps[args[1]].mask
+ >> temps[args[2]].val);
+ }
+ break;
+
+ CASE_OP_32_64(shr):
+ if (temps[args[2]].state == TCG_TEMP_CONST) {
+ mask = temps[args[1]].mask >> temps[args[2]].val;
+ }
+ break;
+
+ CASE_OP_32_64(shl):
+ if (temps[args[2]].state == TCG_TEMP_CONST) {
+ mask = temps[args[1]].mask << temps[args[2]].val;
+ }
+ break;
+
+ CASE_OP_32_64(neg):
+ /* Set to 1 all bits to the left of the rightmost. */
+ mask = -(temps[args[1]].mask & -temps[args[1]].mask);
+ break;
+
+ CASE_OP_32_64(deposit):
+ tmp = ((1ull << args[4]) - 1);
+ mask = ((temps[args[1]].mask & ~(tmp << args[3]))
+ | ((temps[args[2]].mask & tmp) << args[3]));
+ break;
+
+ CASE_OP_32_64(or):
+ CASE_OP_32_64(xor):
+ mask = temps[args[1]].mask | temps[args[2]].mask;
+ break;
+
+ CASE_OP_32_64(setcond):
+ mask = 1;
+ break;
+
+ CASE_OP_32_64(movcond):
+ mask = temps[args[3]].mask | temps[args[4]].mask;
+ break;
+
+ default:
+ break;
+ }
+
/* Simplify expression for "op r, a, 0 => movi r, 0" cases */
switch (op) {
CASE_OP_32_64(and):
@@ -947,7 +1034,8 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
/* Default case: we know nothing about operation (or were unable
to compute the operation result) so no propagation is done.
We trash everything if the operation is the end of a basic
- block, otherwise we only trash the output args. */
+ block, otherwise we only trash the output args. "mask" is
+ the non-zero bits mask for the first output arg. */
if (def->flags & TCG_OPF_BB_END) {
reset_all_temps(nb_temps);
} else {
--
1.7.11.7
^ permalink raw reply related [flat|nested] 3+ messages in thread
* Re: [Qemu-devel] [PATCH 2/3] optimize: track nonzero bits of registers
@ 2013-01-16 9:09 Jay Foad
2013-01-16 10:47 ` Paolo Bonzini
0 siblings, 1 reply; 3+ messages in thread
From: Jay Foad @ 2013-01-16 9:09 UTC (permalink / raw)
To: Richard Henderson; +Cc: Paolo Bonzini, qemu-devel
> @@ -621,6 +627,87 @@ static TCGArg *tcg_constant_folding(TCGContext *s,
> uint16_t *tcg_opc_ptr,
> break;
> }
>
> + /* Simplify using known-zero bits */
> + mask = -1;
> + switch (op) {
> + CASE_OP_32_64(ext8s):
> + if ((temps[args[1]].mask & 0x80) != 0) {
> + break;
> + }
> + CASE_OP_32_64(ext8u):
> + mask = 0xff;
> + goto and_const;
> + CASE_OP_32_64(ext16s):
> + if ((temps[args[1]].mask & 0x8000) != 0) {
> + break;
> + }
> + CASE_OP_32_64(ext16u):
> + mask = 0xffff;
> + goto and_const;
> + case INDEX_op_ext32s_i64:
> + if ((temps[args[1]].mask & 0x80000000) != 0) {
> + break;
> + }
> + case INDEX_op_ext32u_i64:
> + mask = 0xffffffffU;
> + goto and_const;
> +
> + CASE_OP_32_64(and):
> + mask = temps[args[2]].mask;
> + if (temps[args[2]].state == TCG_TEMP_CONST) {
> + and_const:
> + ;
> + }
What's the purpose of this "if"?
Jay.
^ permalink raw reply [flat|nested] 3+ messages in thread
* Re: [Qemu-devel] [PATCH 2/3] optimize: track nonzero bits of registers
2013-01-16 9:09 [Qemu-devel] [PATCH 2/3] optimize: track nonzero bits of registers Jay Foad
@ 2013-01-16 10:47 ` Paolo Bonzini
0 siblings, 0 replies; 3+ messages in thread
From: Paolo Bonzini @ 2013-01-16 10:47 UTC (permalink / raw)
To: Jay Foad; +Cc: qemu-devel, Richard Henderson
Il 16/01/2013 10:09, Jay Foad ha scritto:
>> @@ -621,6 +627,87 @@ static TCGArg *tcg_constant_folding(TCGContext *s,
>> uint16_t *tcg_opc_ptr,
>> break;
>> }
>>
>> + /* Simplify using known-zero bits */
>> + mask = -1;
>> + switch (op) {
>> + CASE_OP_32_64(ext8s):
>> + if ((temps[args[1]].mask & 0x80) != 0) {
>> + break;
>> + }
>> + CASE_OP_32_64(ext8u):
>> + mask = 0xff;
>> + goto and_const;
>> + CASE_OP_32_64(ext16s):
>> + if ((temps[args[1]].mask & 0x8000) != 0) {
>> + break;
>> + }
>> + CASE_OP_32_64(ext16u):
>> + mask = 0xffff;
>> + goto and_const;
>> + case INDEX_op_ext32s_i64:
>> + if ((temps[args[1]].mask & 0x80000000) != 0) {
>> + break;
>> + }
>> + case INDEX_op_ext32u_i64:
>> + mask = 0xffffffffU;
>> + goto and_const;
>> +
>> + CASE_OP_32_64(and):
>> + mask = temps[args[2]].mask;
>> + if (temps[args[2]].state == TCG_TEMP_CONST) {
>> + and_const:
>> + ;
>> + }
>
> What's the purpose of this "if"?
It is filled in by patch 3/3.
Paolo
^ permalink raw reply [flat|nested] 3+ messages in thread
end of thread, other threads:[~2013-01-16 12:11 UTC | newest]
Thread overview: 3+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2013-01-16 9:09 [Qemu-devel] [PATCH 2/3] optimize: track nonzero bits of registers Jay Foad
2013-01-16 10:47 ` Paolo Bonzini
-- strict thread matches above, loose matches on Subject: below --
2013-01-11 23:42 [Qemu-devel] [PATCH 0/3] tcg-optimize with known-zero bits Richard Henderson
2013-01-11 23:42 ` [Qemu-devel] [PATCH 2/3] optimize: track nonzero bits of registers Richard Henderson
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).