From: Richard Henderson <rth@twiddle.net>
To: qemu-devel@nongnu.org
Cc: mark.cave-ayland@ilande.co.uk, aurelien@aurel32.net, atar4qemu@gmail.com
Subject: [Qemu-devel] [PATCH v3 7/9] tcg: Compress dead_temps and mem_temps into a single array
Date: Thu, 23 Jun 2016 20:48:25 -0700 [thread overview]
Message-ID: <1466740107-15042-8-git-send-email-rth@twiddle.net> (raw)
In-Reply-To: <1466740107-15042-1-git-send-email-rth@twiddle.net>
We only need two bits per temporary. Fold the two bytes into one,
and reduce the memory and cachelines required during compilation.
Signed-off-by: Richard Henderson <rth@twiddle.net>
---
tcg/tcg.c | 118 +++++++++++++++++++++++++++++++-------------------------------
1 file changed, 59 insertions(+), 59 deletions(-)
diff --git a/tcg/tcg.c b/tcg/tcg.c
index c41640f..fd92b06 100644
--- a/tcg/tcg.c
+++ b/tcg/tcg.c
@@ -332,7 +332,7 @@ void tcg_context_init(TCGContext *s)
memset(s, 0, sizeof(*s));
s->nb_globals = 0;
-
+
/* Count total number of arguments and allocate the corresponding
space */
total_args = 0;
@@ -824,16 +824,16 @@ void tcg_gen_callN(TCGContext *s, void *func, TCGArg ret,
real_args++;
}
#endif
- /* If stack grows up, then we will be placing successive
- arguments at lower addresses, which means we need to
- reverse the order compared to how we would normally
- treat either big or little-endian. For those arguments
- that will wind up in registers, this still works for
- HPPA (the only current STACK_GROWSUP target) since the
- argument registers are *also* allocated in decreasing
- order. If another such target is added, this logic may
- have to get more complicated to differentiate between
- stack arguments and register arguments. */
+ /* If stack grows up, then we will be placing successive
+ arguments at lower addresses, which means we need to
+ reverse the order compared to how we would normally
+ treat either big or little-endian. For those arguments
+ that will wind up in registers, this still works for
+ HPPA (the only current STACK_GROWSUP target) since the
+ argument registers are *also* allocated in decreasing
+ order. If another such target is added, this logic may
+ have to get more complicated to differentiate between
+ stack arguments and register arguments. */
#if defined(HOST_WORDS_BIGENDIAN) != defined(TCG_TARGET_STACK_GROWSUP)
s->gen_opparam_buf[pi++] = args[i] + 1;
s->gen_opparam_buf[pi++] = args[i];
@@ -1297,27 +1297,28 @@ void tcg_op_remove(TCGContext *s, TCGOp *op)
#endif
}
+#define TS_DEAD 1
+#define TS_SYNC 2
+
/* liveness analysis: end of function: all temps are dead, and globals
should be in memory. */
-static inline void tcg_la_func_end(TCGContext *s, uint8_t *dead_temps,
- uint8_t *mem_temps)
+static inline void tcg_la_func_end(TCGContext *s, uint8_t *temp_state)
{
- memset(dead_temps, 1, s->nb_temps);
- memset(mem_temps, 1, s->nb_globals);
- memset(mem_temps + s->nb_globals, 0, s->nb_temps - s->nb_globals);
+ memset(temp_state, TS_DEAD | TS_SYNC, s->nb_globals);
+ memset(temp_state + s->nb_globals, TS_DEAD, s->nb_temps - s->nb_globals);
}
/* liveness analysis: end of basic block: all temps are dead, globals
and local temps should be in memory. */
-static inline void tcg_la_bb_end(TCGContext *s, uint8_t *dead_temps,
- uint8_t *mem_temps)
+static inline void tcg_la_bb_end(TCGContext *s, uint8_t *temp_state)
{
- int i;
+ int i, n;
- memset(dead_temps, 1, s->nb_temps);
- memset(mem_temps, 1, s->nb_globals);
- for(i = s->nb_globals; i < s->nb_temps; i++) {
- mem_temps[i] = s->temps[i].temp_local;
+ tcg_la_func_end(s, temp_state);
+ for (i = s->nb_globals, n = s->nb_temps; i < n; i++) {
+ if (s->temps[i].temp_local) {
+ temp_state[i] |= TS_SYNC;
+ }
}
}
@@ -1326,12 +1327,12 @@ static inline void tcg_la_bb_end(TCGContext *s, uint8_t *dead_temps,
temporaries are removed. */
static void tcg_liveness_analysis(TCGContext *s)
{
- uint8_t *dead_temps, *mem_temps;
+ uint8_t *temp_state;
int oi, oi_prev;
+ int nb_globals = s->nb_globals;
- dead_temps = tcg_malloc(s->nb_temps);
- mem_temps = tcg_malloc(s->nb_temps);
- tcg_la_func_end(s, dead_temps, mem_temps);
+ temp_state = tcg_malloc(s->nb_temps);
+ tcg_la_func_end(s, temp_state);
for (oi = s->gen_op_buf[0].prev; oi != 0; oi = oi_prev) {
int i, nb_iargs, nb_oargs;
@@ -1360,7 +1361,7 @@ static void tcg_liveness_analysis(TCGContext *s)
if (call_flags & TCG_CALL_NO_SIDE_EFFECTS) {
for (i = 0; i < nb_oargs; i++) {
arg = args[i];
- if (!dead_temps[arg] || mem_temps[arg]) {
+ if (temp_state[arg] != TS_DEAD) {
goto do_not_remove_call;
}
}
@@ -1371,39 +1372,41 @@ static void tcg_liveness_analysis(TCGContext *s)
/* output args are dead */
for (i = 0; i < nb_oargs; i++) {
arg = args[i];
- if (dead_temps[arg]) {
+ if (temp_state[arg] & TS_DEAD) {
arg_life |= DEAD_ARG << i;
}
- if (mem_temps[arg]) {
+ if (temp_state[arg] & TS_SYNC) {
arg_life |= SYNC_ARG << i;
}
- dead_temps[arg] = 1;
- mem_temps[arg] = 0;
+ temp_state[arg] = TS_DEAD;
}
- if (!(call_flags & TCG_CALL_NO_READ_GLOBALS)) {
- /* globals should be synced to memory */
- memset(mem_temps, 1, s->nb_globals);
- }
if (!(call_flags & (TCG_CALL_NO_WRITE_GLOBALS |
TCG_CALL_NO_READ_GLOBALS))) {
/* globals should go back to memory */
- memset(dead_temps, 1, s->nb_globals);
+ memset(temp_state, TS_DEAD | TS_SYNC, nb_globals);
+ } else if (!(call_flags & TCG_CALL_NO_READ_GLOBALS)) {
+ /* globals should be synced to memory */
+ for (i = 0; i < nb_globals; i++) {
+ temp_state[i] |= TS_SYNC;
+ }
}
/* record arguments that die in this helper */
for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
arg = args[i];
if (arg != TCG_CALL_DUMMY_ARG) {
- if (dead_temps[arg]) {
+ if (temp_state[arg] & TS_DEAD) {
arg_life |= DEAD_ARG << i;
}
}
}
/* input arguments are live for preceding opcodes */
- for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
+ for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
arg = args[i];
- dead_temps[arg] = 0;
+ if (arg != TCG_CALL_DUMMY_ARG) {
+ temp_state[arg] &= ~TS_DEAD;
+ }
}
}
}
@@ -1412,8 +1415,7 @@ static void tcg_liveness_analysis(TCGContext *s)
break;
case INDEX_op_discard:
/* mark the temporary as dead */
- dead_temps[args[0]] = 1;
- mem_temps[args[0]] = 0;
+ temp_state[args[0]] = TS_DEAD;
break;
case INDEX_op_add2_i32:
@@ -1434,8 +1436,8 @@ static void tcg_liveness_analysis(TCGContext *s)
the low part. The result can be optimized to a simple
add or sub. This happens often for x86_64 guest when the
cpu mode is set to 32 bit. */
- if (dead_temps[args[1]] && !mem_temps[args[1]]) {
- if (dead_temps[args[0]] && !mem_temps[args[0]]) {
+ if (temp_state[args[1]] == TS_DEAD) {
+ if (temp_state[args[0]] == TS_DEAD) {
goto do_remove;
}
/* Replace the opcode and adjust the args in place,
@@ -1472,8 +1474,8 @@ static void tcg_liveness_analysis(TCGContext *s)
do_mul2:
nb_iargs = 2;
nb_oargs = 2;
- if (dead_temps[args[1]] && !mem_temps[args[1]]) {
- if (dead_temps[args[0]] && !mem_temps[args[0]]) {
+ if (temp_state[args[1]] == TS_DEAD) {
+ if (temp_state[args[0]] == TS_DEAD) {
/* Both parts of the operation are dead. */
goto do_remove;
}
@@ -1481,8 +1483,7 @@ static void tcg_liveness_analysis(TCGContext *s)
op->opc = opc = opc_new;
args[1] = args[2];
args[2] = args[3];
- } else if (have_opc_new2 && dead_temps[args[0]]
- && !mem_temps[args[0]]) {
+ } else if (temp_state[args[0]] == TS_DEAD && have_opc_new2) {
/* The low part of the operation is dead; generate the high. */
op->opc = opc = opc_new2;
args[0] = args[1];
@@ -1505,8 +1506,7 @@ static void tcg_liveness_analysis(TCGContext *s)
implies side effects */
if (!(def->flags & TCG_OPF_SIDE_EFFECTS) && nb_oargs != 0) {
for (i = 0; i < nb_oargs; i++) {
- arg = args[i];
- if (!dead_temps[arg] || mem_temps[arg]) {
+ if (temp_state[args[i]] != TS_DEAD) {
goto do_not_remove;
}
}
@@ -1517,35 +1517,35 @@ static void tcg_liveness_analysis(TCGContext *s)
/* output args are dead */
for (i = 0; i < nb_oargs; i++) {
arg = args[i];
- if (dead_temps[arg]) {
+ if (temp_state[arg] & TS_DEAD) {
arg_life |= DEAD_ARG << i;
}
- if (mem_temps[arg]) {
+ if (temp_state[arg] & TS_SYNC) {
arg_life |= SYNC_ARG << i;
}
- dead_temps[arg] = 1;
- mem_temps[arg] = 0;
+ temp_state[arg] = TS_DEAD;
}
/* if end of basic block, update */
if (def->flags & TCG_OPF_BB_END) {
- tcg_la_bb_end(s, dead_temps, mem_temps);
+ tcg_la_bb_end(s, temp_state);
} else if (def->flags & TCG_OPF_SIDE_EFFECTS) {
/* globals should be synced to memory */
- memset(mem_temps, 1, s->nb_globals);
+ for (i = 0; i < nb_globals; i++) {
+ temp_state[i] |= TS_SYNC;
+ }
}
/* record arguments that die in this opcode */
for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
arg = args[i];
- if (dead_temps[arg]) {
+ if (temp_state[arg] & TS_DEAD) {
arg_life |= DEAD_ARG << i;
}
}
/* input arguments are live for preceding opcodes */
for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
- arg = args[i];
- dead_temps[arg] = 0;
+ temp_state[args[i]] &= ~TS_DEAD;
}
}
break;
--
2.5.5
next prev parent reply other threads:[~2016-06-24 3:48 UTC|newest]
Thread overview: 25+ messages / expand[flat|nested] mbox.gz Atom feed top
2016-06-24 3:48 [Qemu-devel] [PATCH v3 0/9] Third try at fixing sparc register allocation Richard Henderson
2016-06-24 3:48 ` [Qemu-devel] [PATCH v3 1/9] tcg: Fix name for high-half register Richard Henderson
2016-07-25 11:23 ` Aurelien Jarno
2016-06-24 3:48 ` [Qemu-devel] [PATCH v3 2/9] tcg: Optimize spills of constants Richard Henderson
2016-06-24 3:48 ` [Qemu-devel] [PATCH v3 3/9] tcg: Require liveness analysis Richard Henderson
2016-07-25 11:23 ` Aurelien Jarno
2016-06-24 3:48 ` [Qemu-devel] [PATCH v3 4/9] tcg: Compress liveness data to 16 bits Richard Henderson
2016-07-25 11:23 ` Aurelien Jarno
2016-06-24 3:48 ` [Qemu-devel] [PATCH v3 5/9] tcg: Reorg TCGOp chaining Richard Henderson
2016-07-25 11:23 ` Aurelien Jarno
2016-08-03 18:08 ` Richard Henderson
2016-06-24 3:48 ` [Qemu-devel] [PATCH v3 6/9] tcg: Fold life data into TCGOp Richard Henderson
2016-07-25 14:07 ` Aurelien Jarno
2016-06-24 3:48 ` Richard Henderson [this message]
2016-07-25 15:15 ` [Qemu-devel] [PATCH v3 7/9] tcg: Compress dead_temps and mem_temps into a single array Aurelien Jarno
2016-08-03 18:22 ` Richard Henderson
2016-06-24 3:48 ` [Qemu-devel] [PATCH v3 8/9] tcg: Include liveness info in the dumps Richard Henderson
2016-07-25 15:15 ` Aurelien Jarno
2016-07-25 16:16 ` Aurelien Jarno
2016-06-24 3:48 ` [Qemu-devel] [PATCH v3 9/9] tcg: Lower indirect registers in a separate pass Richard Henderson
2016-07-25 19:23 ` Aurelien Jarno
2016-08-03 19:27 ` Richard Henderson
2016-06-24 10:31 ` [Qemu-devel] [PATCH v3 0/9] Third try at fixing sparc register allocation Mark Cave-Ayland
2016-07-19 3:39 ` Richard Henderson
2016-07-22 13:40 ` Aurelien Jarno
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1466740107-15042-8-git-send-email-rth@twiddle.net \
--to=rth@twiddle.net \
--cc=atar4qemu@gmail.com \
--cc=aurelien@aurel32.net \
--cc=mark.cave-ayland@ilande.co.uk \
--cc=qemu-devel@nongnu.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).