* [Qemu-devel] [PATCH v4 for-2.7 1/7] tcg: Compress liveness data to 16 bits
2016-08-04 16:26 [Qemu-devel] [PATCH v4 for-2.7 0/7] Fixing i686 host / sparc64 guest crash Richard Henderson
@ 2016-08-04 16:26 ` Richard Henderson
2016-08-04 16:26 ` [Qemu-devel] [PATCH v4 for-2.7 2/7] tcg: Reorg TCGOp chaining Richard Henderson
` (6 subsequent siblings)
7 siblings, 0 replies; 11+ messages in thread
From: Richard Henderson @ 2016-08-04 16:26 UTC (permalink / raw)
To: qemu-devel; +Cc: aurelien, peter.maydell
This reduces both memory usage and per-insn cacheline usage
during code generation.
Reviewed-by: Aurelien Jarno <aurelien@aurel32.net>
Signed-off-by: Richard Henderson <rth@twiddle.net>
---
tcg/tcg.c | 58 ++++++++++++++++++++++------------------------------------
tcg/tcg.h | 16 ++++++++++------
2 files changed, 32 insertions(+), 42 deletions(-)
diff --git a/tcg/tcg.c b/tcg/tcg.c
index 0c46c43..4aa1933 100644
--- a/tcg/tcg.c
+++ b/tcg/tcg.c
@@ -1341,7 +1341,7 @@ static inline void tcg_la_bb_end(TCGContext *s, uint8_t *dead_temps,
}
}
-/* Liveness analysis : update the opc_dead_args array to tell if a
+/* Liveness analysis : update the opc_arg_life array to tell if a
given input arguments is dead. Instructions updating dead
temporaries are removed. */
static void tcg_liveness_analysis(TCGContext *s)
@@ -1350,9 +1350,8 @@ static void tcg_liveness_analysis(TCGContext *s)
int oi, oi_prev, nb_ops;
nb_ops = s->gen_next_op_idx;
- s->op_dead_args = tcg_malloc(nb_ops * sizeof(uint16_t));
- s->op_sync_args = tcg_malloc(nb_ops * sizeof(uint8_t));
-
+ s->op_arg_life = tcg_malloc(nb_ops * sizeof(TCGLifeData));
+
dead_temps = tcg_malloc(s->nb_temps);
mem_temps = tcg_malloc(s->nb_temps);
tcg_la_func_end(s, dead_temps, mem_temps);
@@ -1361,8 +1360,7 @@ static void tcg_liveness_analysis(TCGContext *s)
int i, nb_iargs, nb_oargs;
TCGOpcode opc_new, opc_new2;
bool have_opc_new2;
- uint16_t dead_args;
- uint8_t sync_args;
+ TCGLifeData arg_life = 0;
TCGArg arg;
TCGOp * const op = &s->gen_op_buf[oi];
@@ -1394,15 +1392,13 @@ static void tcg_liveness_analysis(TCGContext *s)
do_not_remove_call:
/* output args are dead */
- dead_args = 0;
- sync_args = 0;
for (i = 0; i < nb_oargs; i++) {
arg = args[i];
if (dead_temps[arg]) {
- dead_args |= (1 << i);
+ arg_life |= DEAD_ARG << i;
}
if (mem_temps[arg]) {
- sync_args |= (1 << i);
+ arg_life |= SYNC_ARG << i;
}
dead_temps[arg] = 1;
mem_temps[arg] = 0;
@@ -1423,7 +1419,7 @@ static void tcg_liveness_analysis(TCGContext *s)
arg = args[i];
if (arg != TCG_CALL_DUMMY_ARG) {
if (dead_temps[arg]) {
- dead_args |= (1 << i);
+ arg_life |= DEAD_ARG << i;
}
}
}
@@ -1432,8 +1428,6 @@ static void tcg_liveness_analysis(TCGContext *s)
arg = args[i];
dead_temps[arg] = 0;
}
- s->op_dead_args[oi] = dead_args;
- s->op_sync_args[oi] = sync_args;
}
}
break;
@@ -1544,15 +1538,13 @@ static void tcg_liveness_analysis(TCGContext *s)
} else {
do_not_remove:
/* output args are dead */
- dead_args = 0;
- sync_args = 0;
for (i = 0; i < nb_oargs; i++) {
arg = args[i];
if (dead_temps[arg]) {
- dead_args |= (1 << i);
+ arg_life |= DEAD_ARG << i;
}
if (mem_temps[arg]) {
- sync_args |= (1 << i);
+ arg_life |= SYNC_ARG << i;
}
dead_temps[arg] = 1;
mem_temps[arg] = 0;
@@ -1570,7 +1562,7 @@ static void tcg_liveness_analysis(TCGContext *s)
for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
arg = args[i];
if (dead_temps[arg]) {
- dead_args |= (1 << i);
+ arg_life |= DEAD_ARG << i;
}
}
/* input arguments are live for preceding opcodes */
@@ -1578,11 +1570,10 @@ static void tcg_liveness_analysis(TCGContext *s)
arg = args[i];
dead_temps[arg] = 0;
}
- s->op_dead_args[oi] = dead_args;
- s->op_sync_args[oi] = sync_args;
}
break;
}
+ s->op_arg_life[oi] = arg_life;
}
}
#else
@@ -1921,11 +1912,11 @@ static void tcg_reg_alloc_bb_end(TCGContext *s, TCGRegSet allocated_regs)
save_globals(s, allocated_regs);
}
-#define IS_DEAD_ARG(n) ((dead_args >> (n)) & 1)
-#define NEED_SYNC_ARG(n) ((sync_args >> (n)) & 1)
+#define IS_DEAD_ARG(n) (arg_life & (DEAD_ARG << (n)))
+#define NEED_SYNC_ARG(n) (arg_life & (SYNC_ARG << (n)))
static void tcg_reg_alloc_movi(TCGContext *s, const TCGArg *args,
- uint16_t dead_args, uint8_t sync_args)
+ TCGLifeData arg_life)
{
TCGTemp *ots;
tcg_target_ulong val;
@@ -1954,8 +1945,7 @@ static void tcg_reg_alloc_movi(TCGContext *s, const TCGArg *args,
}
static void tcg_reg_alloc_mov(TCGContext *s, const TCGOpDef *def,
- const TCGArg *args, uint16_t dead_args,
- uint8_t sync_args)
+ const TCGArg *args, TCGLifeData arg_life)
{
TCGRegSet allocated_regs;
TCGTemp *ts, *ots;
@@ -2040,8 +2030,7 @@ static void tcg_reg_alloc_mov(TCGContext *s, const TCGOpDef *def,
static void tcg_reg_alloc_op(TCGContext *s,
const TCGOpDef *def, TCGOpcode opc,
- const TCGArg *args, uint16_t dead_args,
- uint8_t sync_args)
+ const TCGArg *args, TCGLifeData arg_life)
{
TCGRegSet allocated_regs;
int i, k, nb_iargs, nb_oargs;
@@ -2206,8 +2195,7 @@ static void tcg_reg_alloc_op(TCGContext *s,
#endif
static void tcg_reg_alloc_call(TCGContext *s, int nb_oargs, int nb_iargs,
- const TCGArg * const args, uint16_t dead_args,
- uint8_t sync_args)
+ const TCGArg * const args, TCGLifeData arg_life)
{
int flags, nb_regs, i;
TCGReg reg;
@@ -2427,8 +2415,7 @@ int tcg_gen_code(TCGContext *s, TranslationBlock *tb)
TCGArg * const args = &s->gen_opparam_buf[op->args];
TCGOpcode opc = op->opc;
const TCGOpDef *def = &tcg_op_defs[opc];
- uint16_t dead_args = s->op_dead_args[oi];
- uint8_t sync_args = s->op_sync_args[oi];
+ TCGLifeData arg_life = s->op_arg_life[oi];
oi_next = op->next;
#ifdef CONFIG_PROFILER
@@ -2438,11 +2425,11 @@ int tcg_gen_code(TCGContext *s, TranslationBlock *tb)
switch (opc) {
case INDEX_op_mov_i32:
case INDEX_op_mov_i64:
- tcg_reg_alloc_mov(s, def, args, dead_args, sync_args);
+ tcg_reg_alloc_mov(s, def, args, arg_life);
break;
case INDEX_op_movi_i32:
case INDEX_op_movi_i64:
- tcg_reg_alloc_movi(s, args, dead_args, sync_args);
+ tcg_reg_alloc_movi(s, args, arg_life);
break;
case INDEX_op_insn_start:
if (num_insns >= 0) {
@@ -2467,8 +2454,7 @@ int tcg_gen_code(TCGContext *s, TranslationBlock *tb)
tcg_out_label(s, arg_label(args[0]), s->code_ptr);
break;
case INDEX_op_call:
- tcg_reg_alloc_call(s, op->callo, op->calli, args,
- dead_args, sync_args);
+ tcg_reg_alloc_call(s, op->callo, op->calli, args, arg_life);
break;
default:
/* Sanity check that we've not introduced any unhandled opcodes. */
@@ -2478,7 +2464,7 @@ int tcg_gen_code(TCGContext *s, TranslationBlock *tb)
/* Note: in order to speed up the code, it would be much
faster to have specialized register allocator functions for
some common argument patterns */
- tcg_reg_alloc_op(s, def, opc, args, dead_args, sync_args);
+ tcg_reg_alloc_op(s, def, opc, args, arg_life);
break;
}
#ifdef CONFIG_DEBUG_TCG
diff --git a/tcg/tcg.h b/tcg/tcg.h
index 6046dcd..7c0a138 100644
--- a/tcg/tcg.h
+++ b/tcg/tcg.h
@@ -575,6 +575,14 @@ typedef struct TCGTempSet {
unsigned long l[BITS_TO_LONGS(TCG_MAX_TEMPS)];
} TCGTempSet;
+/* While we limit helpers to 6 arguments, for 32-bit hosts, with padding,
+ this imples a max of 6*2 (64-bit in) + 2 (64-bit out) = 14 operands.
+ There are never more than 2 outputs, which means that we can store all
+ dead + sync data within 16 bits. */
+#define DEAD_ARG 4
+#define SYNC_ARG 1
+typedef uint16_t TCGLifeData;
+
typedef struct TCGOp {
TCGOpcode opc : 8;
@@ -608,12 +616,8 @@ struct TCGContext {
uintptr_t *tb_jmp_target_addr; /* tb->jmp_target_addr if !USE_DIRECT_JUMP */
/* liveness analysis */
- uint16_t *op_dead_args; /* for each operation, each bit tells if the
- corresponding argument is dead */
- uint8_t *op_sync_args; /* for each operation, each bit tells if the
- corresponding output argument needs to be
- sync to memory. */
-
+ TCGLifeData *op_arg_life;
+
TCGRegSet reserved_regs;
intptr_t current_frame_offset;
intptr_t frame_start;
--
2.7.4
^ permalink raw reply related [flat|nested] 11+ messages in thread
* [Qemu-devel] [PATCH v4 for-2.7 2/7] tcg: Reorg TCGOp chaining
2016-08-04 16:26 [Qemu-devel] [PATCH v4 for-2.7 0/7] Fixing i686 host / sparc64 guest crash Richard Henderson
2016-08-04 16:26 ` [Qemu-devel] [PATCH v4 for-2.7 1/7] tcg: Compress liveness data to 16 bits Richard Henderson
@ 2016-08-04 16:26 ` Richard Henderson
2016-08-05 11:58 ` Aurelien Jarno
2016-08-04 16:26 ` [Qemu-devel] [PATCH v4 for-2.7 3/7] tcg: Fold life data into TCGOp Richard Henderson
` (5 subsequent siblings)
7 siblings, 1 reply; 11+ messages in thread
From: Richard Henderson @ 2016-08-04 16:26 UTC (permalink / raw)
To: qemu-devel; +Cc: aurelien, peter.maydell
Instead of using -1 as end of chain, use 0, and link through the 0
entry as a fully circular double-linked list.
Signed-off-by: Richard Henderson <rth@twiddle.net>
---
include/exec/gen-icount.h | 2 +-
tcg/optimize.c | 8 ++------
tcg/tcg-op.c | 2 +-
tcg/tcg.c | 35 +++++++++++++++--------------------
tcg/tcg.h | 22 ++++++++++++----------
5 files changed, 31 insertions(+), 38 deletions(-)
diff --git a/include/exec/gen-icount.h b/include/exec/gen-icount.h
index 1af03d8..050de59 100644
--- a/include/exec/gen-icount.h
+++ b/include/exec/gen-icount.h
@@ -59,7 +59,7 @@ static void gen_tb_end(TranslationBlock *tb, int num_insns)
}
/* Terminate the linked list. */
- tcg_ctx.gen_op_buf[tcg_ctx.gen_last_op_idx].next = -1;
+ tcg_ctx.gen_op_buf[tcg_ctx.gen_op_buf[0].prev].next = 0;
}
static inline void gen_io_start(void)
diff --git a/tcg/optimize.c b/tcg/optimize.c
index c0d975b..8df7fc7 100644
--- a/tcg/optimize.c
+++ b/tcg/optimize.c
@@ -103,11 +103,7 @@ static TCGOp *insert_op_before(TCGContext *s, TCGOp *old_op,
.prev = prev,
.next = next
};
- if (prev >= 0) {
- s->gen_op_buf[prev].next = oi;
- } else {
- s->gen_first_op_idx = oi;
- }
+ s->gen_op_buf[prev].next = oi;
old_op->prev = oi;
return new_op;
@@ -583,7 +579,7 @@ void tcg_optimize(TCGContext *s)
nb_globals = s->nb_globals;
reset_all_temps(nb_temps);
- for (oi = s->gen_first_op_idx; oi >= 0; oi = oi_next) {
+ for (oi = s->gen_op_buf[0].next; oi != 0; oi = oi_next) {
tcg_target_ulong mask, partmask, affected;
int nb_oargs, nb_iargs, i;
TCGArg tmp;
diff --git a/tcg/tcg-op.c b/tcg/tcg-op.c
index 293b854..0243c99 100644
--- a/tcg/tcg-op.c
+++ b/tcg/tcg-op.c
@@ -52,7 +52,7 @@ static void tcg_emit_op(TCGContext *ctx, TCGOpcode opc, int args)
int pi = oi - 1;
tcg_debug_assert(oi < OPC_BUF_SIZE);
- ctx->gen_last_op_idx = oi;
+ ctx->gen_op_buf[0].prev = oi;
ctx->gen_next_op_idx = ni;
ctx->gen_op_buf[oi] = (TCGOp){
diff --git a/tcg/tcg.c b/tcg/tcg.c
index 4aa1933..cd76e42 100644
--- a/tcg/tcg.c
+++ b/tcg/tcg.c
@@ -438,9 +438,9 @@ void tcg_func_start(TCGContext *s)
s->goto_tb_issue_mask = 0;
#endif
- s->gen_first_op_idx = 0;
- s->gen_last_op_idx = -1;
- s->gen_next_op_idx = 0;
+ s->gen_op_buf[0].next = 1;
+ s->gen_op_buf[0].prev = 0;
+ s->gen_next_op_idx = 1;
s->gen_next_parm_idx = 0;
s->be = tcg_malloc(sizeof(TCGBackendData));
@@ -869,7 +869,7 @@ void tcg_gen_callN(TCGContext *s, void *func, TCGArg ret,
/* Make sure the calli field didn't overflow. */
tcg_debug_assert(s->gen_op_buf[i].calli == real_args);
- s->gen_last_op_idx = i;
+ s->gen_op_buf[0].prev = i;
s->gen_next_op_idx = i + 1;
s->gen_next_parm_idx = pi;
@@ -1021,7 +1021,7 @@ void tcg_dump_ops(TCGContext *s)
TCGOp *op;
int oi;
- for (oi = s->gen_first_op_idx; oi >= 0; oi = op->next) {
+ for (oi = s->gen_op_buf[0].next; oi != 0; oi = op->next) {
int i, k, nb_oargs, nb_iargs, nb_cargs;
const TCGOpDef *def;
const TCGArg *args;
@@ -1033,7 +1033,7 @@ void tcg_dump_ops(TCGContext *s)
args = &s->gen_opparam_buf[op->args];
if (c == INDEX_op_insn_start) {
- qemu_log("%s ----", oi != s->gen_first_op_idx ? "\n" : "");
+ qemu_log("%s ----", oi != s->gen_op_buf[0].next ? "\n" : "");
for (i = 0; i < TARGET_INSN_START_WORDS; ++i) {
target_ulong a;
@@ -1298,18 +1298,13 @@ void tcg_op_remove(TCGContext *s, TCGOp *op)
int next = op->next;
int prev = op->prev;
- if (next >= 0) {
- s->gen_op_buf[next].prev = prev;
- } else {
- s->gen_last_op_idx = prev;
- }
- if (prev >= 0) {
- s->gen_op_buf[prev].next = next;
- } else {
- s->gen_first_op_idx = next;
- }
+ /* We should never attempt to remove the list terminator. */
+ tcg_debug_assert(op != &s->gen_op_buf[0]);
+
+ s->gen_op_buf[next].prev = prev;
+ s->gen_op_buf[prev].next = next;
- memset(op, -1, sizeof(*op));
+ memset(op, 0, sizeof(*op));
#ifdef CONFIG_PROFILER
s->del_op_count++;
@@ -1356,7 +1351,7 @@ static void tcg_liveness_analysis(TCGContext *s)
mem_temps = tcg_malloc(s->nb_temps);
tcg_la_func_end(s, dead_temps, mem_temps);
- for (oi = s->gen_last_op_idx; oi >= 0; oi = oi_prev) {
+ for (oi = s->gen_op_buf[0].prev; oi != 0; oi = oi_prev) {
int i, nb_iargs, nb_oargs;
TCGOpcode opc_new, opc_new2;
bool have_opc_new2;
@@ -2351,7 +2346,7 @@ int tcg_gen_code(TCGContext *s, TranslationBlock *tb)
{
int n;
- n = s->gen_last_op_idx + 1;
+ n = s->gen_op_buf[0].prev + 1;
s->op_count += n;
if (n > s->op_count_max) {
s->op_count_max = n;
@@ -2410,7 +2405,7 @@ int tcg_gen_code(TCGContext *s, TranslationBlock *tb)
tcg_out_tb_init(s);
num_insns = -1;
- for (oi = s->gen_first_op_idx; oi >= 0; oi = oi_next) {
+ for (oi = s->gen_op_buf[0].next; oi != 0; oi = oi_next) {
TCGOp * const op = &s->gen_op_buf[oi];
TCGArg * const args = &s->gen_opparam_buf[op->args];
TCGOpcode opc = op->opc;
diff --git a/tcg/tcg.h b/tcg/tcg.h
index 7c0a138..007d7bc 100644
--- a/tcg/tcg.h
+++ b/tcg/tcg.h
@@ -590,17 +590,21 @@ typedef struct TCGOp {
unsigned callo : 2;
unsigned calli : 6;
- /* Index of the arguments for this op, or -1 for zero-operand ops. */
- signed args : 16;
+ /* Index of the arguments for this op, or 0 for zero-operand ops. */
+ unsigned args : 16;
- /* Index of the prex/next op, or -1 for the end of the list. */
- signed prev : 16;
- signed next : 16;
+ /* Index of the prev/next op, or 0 for the end of the list. */
+ unsigned prev : 16;
+ unsigned next : 16;
} TCGOp;
-QEMU_BUILD_BUG_ON(NB_OPS > 0xff);
-QEMU_BUILD_BUG_ON(OPC_BUF_SIZE >= 0x7fff);
-QEMU_BUILD_BUG_ON(OPPARAM_BUF_SIZE >= 0x7fff);
+/* Make sure operands fit in the bitfields above. */
+QEMU_BUILD_BUG_ON(NB_OPS > (1 << 8));
+QEMU_BUILD_BUG_ON(OPC_BUF_SIZE > (1 << 16));
+QEMU_BUILD_BUG_ON(OPPARAM_BUF_SIZE > (1 << 16));
+
+/* Make sure that we don't overflow 64 bits without noticing. */
+QEMU_BUILD_BUG_ON(sizeof(TCGOp) > 8);
struct TCGContext {
uint8_t *pool_cur, *pool_end;
@@ -653,8 +657,6 @@ struct TCGContext {
int goto_tb_issue_mask;
#endif
- int gen_first_op_idx;
- int gen_last_op_idx;
int gen_next_op_idx;
int gen_next_parm_idx;
--
2.7.4
^ permalink raw reply related [flat|nested] 11+ messages in thread
* [Qemu-devel] [PATCH v4 for-2.7 3/7] tcg: Fold life data into TCGOp
2016-08-04 16:26 [Qemu-devel] [PATCH v4 for-2.7 0/7] Fixing i686 host / sparc64 guest crash Richard Henderson
2016-08-04 16:26 ` [Qemu-devel] [PATCH v4 for-2.7 1/7] tcg: Compress liveness data to 16 bits Richard Henderson
2016-08-04 16:26 ` [Qemu-devel] [PATCH v4 for-2.7 2/7] tcg: Reorg TCGOp chaining Richard Henderson
@ 2016-08-04 16:26 ` Richard Henderson
2016-08-04 16:26 ` [Qemu-devel] [PATCH v4 for-2.7 4/7] tcg: Compress dead_temps and mem_temps into a single array Richard Henderson
` (4 subsequent siblings)
7 siblings, 0 replies; 11+ messages in thread
From: Richard Henderson @ 2016-08-04 16:26 UTC (permalink / raw)
To: qemu-devel; +Cc: aurelien, peter.maydell
Reduce the size of other bitfields to make room.
This reduces the cache footprint of compilation.
Reviewed-by: Aurelien Jarno <aurelien@aurel32.net>
Signed-off-by: Richard Henderson <rth@twiddle.net>
---
tcg/tcg.c | 9 +++------
tcg/tcg.h | 26 ++++++++++++++------------
2 files changed, 17 insertions(+), 18 deletions(-)
diff --git a/tcg/tcg.c b/tcg/tcg.c
index cd76e42..6bcf6e5 100644
--- a/tcg/tcg.c
+++ b/tcg/tcg.c
@@ -1342,10 +1342,7 @@ static inline void tcg_la_bb_end(TCGContext *s, uint8_t *dead_temps,
static void tcg_liveness_analysis(TCGContext *s)
{
uint8_t *dead_temps, *mem_temps;
- int oi, oi_prev, nb_ops;
-
- nb_ops = s->gen_next_op_idx;
- s->op_arg_life = tcg_malloc(nb_ops * sizeof(TCGLifeData));
+ int oi, oi_prev;
dead_temps = tcg_malloc(s->nb_temps);
mem_temps = tcg_malloc(s->nb_temps);
@@ -1568,7 +1565,7 @@ static void tcg_liveness_analysis(TCGContext *s)
}
break;
}
- s->op_arg_life[oi] = arg_life;
+ op->life = arg_life;
}
}
#else
@@ -2410,7 +2407,7 @@ int tcg_gen_code(TCGContext *s, TranslationBlock *tb)
TCGArg * const args = &s->gen_opparam_buf[op->args];
TCGOpcode opc = op->opc;
const TCGOpDef *def = &tcg_op_defs[opc];
- TCGLifeData arg_life = s->op_arg_life[oi];
+ TCGLifeData arg_life = op->life;
oi_next = op->next;
#ifdef CONFIG_PROFILER
diff --git a/tcg/tcg.h b/tcg/tcg.h
index 007d7bc..ebf6867 100644
--- a/tcg/tcg.h
+++ b/tcg/tcg.h
@@ -583,25 +583,30 @@ typedef struct TCGTempSet {
#define SYNC_ARG 1
typedef uint16_t TCGLifeData;
+/* The layout here is designed to avoid crossing of a 32-bit boundary.
+ If we do so, gcc adds padding, expanding the size to 12. */
typedef struct TCGOp {
- TCGOpcode opc : 8;
+ TCGOpcode opc : 8; /* 8 */
+
+ /* Index of the prev/next op, or 0 for the end of the list. */
+ unsigned prev : 10; /* 18 */
+ unsigned next : 10; /* 28 */
/* The number of out and in parameter for a call. */
- unsigned callo : 2;
- unsigned calli : 6;
+ unsigned calli : 4; /* 32 */
+ unsigned callo : 2; /* 34 */
/* Index of the arguments for this op, or 0 for zero-operand ops. */
- unsigned args : 16;
+ unsigned args : 14; /* 48 */
- /* Index of the prev/next op, or 0 for the end of the list. */
- unsigned prev : 16;
- unsigned next : 16;
+ /* Lifetime data of the operands. */
+ unsigned life : 16; /* 64 */
} TCGOp;
/* Make sure operands fit in the bitfields above. */
QEMU_BUILD_BUG_ON(NB_OPS > (1 << 8));
-QEMU_BUILD_BUG_ON(OPC_BUF_SIZE > (1 << 16));
-QEMU_BUILD_BUG_ON(OPPARAM_BUF_SIZE > (1 << 16));
+QEMU_BUILD_BUG_ON(OPC_BUF_SIZE > (1 << 10));
+QEMU_BUILD_BUG_ON(OPPARAM_BUF_SIZE > (1 << 14));
/* Make sure that we don't overflow 64 bits without noticing. */
QEMU_BUILD_BUG_ON(sizeof(TCGOp) > 8);
@@ -619,9 +624,6 @@ struct TCGContext {
uint16_t *tb_jmp_insn_offset; /* tb->jmp_insn_offset if USE_DIRECT_JUMP */
uintptr_t *tb_jmp_target_addr; /* tb->jmp_target_addr if !USE_DIRECT_JUMP */
- /* liveness analysis */
- TCGLifeData *op_arg_life;
-
TCGRegSet reserved_regs;
intptr_t current_frame_offset;
intptr_t frame_start;
--
2.7.4
^ permalink raw reply related [flat|nested] 11+ messages in thread
* [Qemu-devel] [PATCH v4 for-2.7 4/7] tcg: Compress dead_temps and mem_temps into a single array
2016-08-04 16:26 [Qemu-devel] [PATCH v4 for-2.7 0/7] Fixing i686 host / sparc64 guest crash Richard Henderson
` (2 preceding siblings ...)
2016-08-04 16:26 ` [Qemu-devel] [PATCH v4 for-2.7 3/7] tcg: Fold life data into TCGOp Richard Henderson
@ 2016-08-04 16:26 ` Richard Henderson
2016-08-04 16:26 ` [Qemu-devel] [PATCH v4 for-2.7 5/7] tcg: Include liveness info in the dumps Richard Henderson
` (3 subsequent siblings)
7 siblings, 0 replies; 11+ messages in thread
From: Richard Henderson @ 2016-08-04 16:26 UTC (permalink / raw)
To: qemu-devel; +Cc: aurelien, peter.maydell
We only need two bits per temporary. Fold the two bytes into one,
and reduce the memory and cachelines required during compilation.
Reviewed-by: Aurelien Jarno <aurelien@aurel32.net>
Signed-off-by: Richard Henderson <rth@twiddle.net>
---
tcg/tcg.c | 119 +++++++++++++++++++++++++++++++-------------------------------
1 file changed, 60 insertions(+), 59 deletions(-)
diff --git a/tcg/tcg.c b/tcg/tcg.c
index 6bcf6e5..27bbb4d 100644
--- a/tcg/tcg.c
+++ b/tcg/tcg.c
@@ -333,7 +333,7 @@ void tcg_context_init(TCGContext *s)
memset(s, 0, sizeof(*s));
s->nb_globals = 0;
-
+
/* Count total number of arguments and allocate the corresponding
space */
total_args = 0;
@@ -825,16 +825,16 @@ void tcg_gen_callN(TCGContext *s, void *func, TCGArg ret,
real_args++;
}
#endif
- /* If stack grows up, then we will be placing successive
- arguments at lower addresses, which means we need to
- reverse the order compared to how we would normally
- treat either big or little-endian. For those arguments
- that will wind up in registers, this still works for
- HPPA (the only current STACK_GROWSUP target) since the
- argument registers are *also* allocated in decreasing
- order. If another such target is added, this logic may
- have to get more complicated to differentiate between
- stack arguments and register arguments. */
+ /* If stack grows up, then we will be placing successive
+ arguments at lower addresses, which means we need to
+ reverse the order compared to how we would normally
+ treat either big or little-endian. For those arguments
+ that will wind up in registers, this still works for
+ HPPA (the only current STACK_GROWSUP target) since the
+ argument registers are *also* allocated in decreasing
+ order. If another such target is added, this logic may
+ have to get more complicated to differentiate between
+ stack arguments and register arguments. */
#if defined(HOST_WORDS_BIGENDIAN) != defined(TCG_TARGET_STACK_GROWSUP)
s->gen_opparam_buf[pi++] = args[i] + 1;
s->gen_opparam_buf[pi++] = args[i];
@@ -1312,27 +1312,29 @@ void tcg_op_remove(TCGContext *s, TCGOp *op)
}
#ifdef USE_LIVENESS_ANALYSIS
+
+#define TS_DEAD 1
+#define TS_MEM 2
+
/* liveness analysis: end of function: all temps are dead, and globals
should be in memory. */
-static inline void tcg_la_func_end(TCGContext *s, uint8_t *dead_temps,
- uint8_t *mem_temps)
+static inline void tcg_la_func_end(TCGContext *s, uint8_t *temp_state)
{
- memset(dead_temps, 1, s->nb_temps);
- memset(mem_temps, 1, s->nb_globals);
- memset(mem_temps + s->nb_globals, 0, s->nb_temps - s->nb_globals);
+ memset(temp_state, TS_DEAD | TS_MEM, s->nb_globals);
+ memset(temp_state + s->nb_globals, TS_DEAD, s->nb_temps - s->nb_globals);
}
/* liveness analysis: end of basic block: all temps are dead, globals
and local temps should be in memory. */
-static inline void tcg_la_bb_end(TCGContext *s, uint8_t *dead_temps,
- uint8_t *mem_temps)
+static inline void tcg_la_bb_end(TCGContext *s, uint8_t *temp_state)
{
- int i;
+ int i, n;
- memset(dead_temps, 1, s->nb_temps);
- memset(mem_temps, 1, s->nb_globals);
- for(i = s->nb_globals; i < s->nb_temps; i++) {
- mem_temps[i] = s->temps[i].temp_local;
+ tcg_la_func_end(s, temp_state);
+ for (i = s->nb_globals, n = s->nb_temps; i < n; i++) {
+ if (s->temps[i].temp_local) {
+ temp_state[i] |= TS_MEM;
+ }
}
}
@@ -1341,12 +1343,12 @@ static inline void tcg_la_bb_end(TCGContext *s, uint8_t *dead_temps,
temporaries are removed. */
static void tcg_liveness_analysis(TCGContext *s)
{
- uint8_t *dead_temps, *mem_temps;
+ uint8_t *temp_state;
int oi, oi_prev;
+ int nb_globals = s->nb_globals;
- dead_temps = tcg_malloc(s->nb_temps);
- mem_temps = tcg_malloc(s->nb_temps);
- tcg_la_func_end(s, dead_temps, mem_temps);
+ temp_state = tcg_malloc(s->nb_temps);
+ tcg_la_func_end(s, temp_state);
for (oi = s->gen_op_buf[0].prev; oi != 0; oi = oi_prev) {
int i, nb_iargs, nb_oargs;
@@ -1375,7 +1377,7 @@ static void tcg_liveness_analysis(TCGContext *s)
if (call_flags & TCG_CALL_NO_SIDE_EFFECTS) {
for (i = 0; i < nb_oargs; i++) {
arg = args[i];
- if (!dead_temps[arg] || mem_temps[arg]) {
+ if (temp_state[arg] != TS_DEAD) {
goto do_not_remove_call;
}
}
@@ -1386,39 +1388,41 @@ static void tcg_liveness_analysis(TCGContext *s)
/* output args are dead */
for (i = 0; i < nb_oargs; i++) {
arg = args[i];
- if (dead_temps[arg]) {
+ if (temp_state[arg] & TS_DEAD) {
arg_life |= DEAD_ARG << i;
}
- if (mem_temps[arg]) {
+ if (temp_state[arg] & TS_MEM) {
arg_life |= SYNC_ARG << i;
}
- dead_temps[arg] = 1;
- mem_temps[arg] = 0;
+ temp_state[arg] = TS_DEAD;
}
- if (!(call_flags & TCG_CALL_NO_READ_GLOBALS)) {
- /* globals should be synced to memory */
- memset(mem_temps, 1, s->nb_globals);
- }
if (!(call_flags & (TCG_CALL_NO_WRITE_GLOBALS |
TCG_CALL_NO_READ_GLOBALS))) {
/* globals should go back to memory */
- memset(dead_temps, 1, s->nb_globals);
+ memset(temp_state, TS_DEAD | TS_MEM, nb_globals);
+ } else if (!(call_flags & TCG_CALL_NO_READ_GLOBALS)) {
+ /* globals should be synced to memory */
+ for (i = 0; i < nb_globals; i++) {
+ temp_state[i] |= TS_MEM;
+ }
}
/* record arguments that die in this helper */
for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
arg = args[i];
if (arg != TCG_CALL_DUMMY_ARG) {
- if (dead_temps[arg]) {
+ if (temp_state[arg] & TS_DEAD) {
arg_life |= DEAD_ARG << i;
}
}
}
/* input arguments are live for preceding opcodes */
- for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
+ for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
arg = args[i];
- dead_temps[arg] = 0;
+ if (arg != TCG_CALL_DUMMY_ARG) {
+ temp_state[arg] &= ~TS_DEAD;
+ }
}
}
}
@@ -1427,8 +1431,7 @@ static void tcg_liveness_analysis(TCGContext *s)
break;
case INDEX_op_discard:
/* mark the temporary as dead */
- dead_temps[args[0]] = 1;
- mem_temps[args[0]] = 0;
+ temp_state[args[0]] = TS_DEAD;
break;
case INDEX_op_add2_i32:
@@ -1449,8 +1452,8 @@ static void tcg_liveness_analysis(TCGContext *s)
the low part. The result can be optimized to a simple
add or sub. This happens often for x86_64 guest when the
cpu mode is set to 32 bit. */
- if (dead_temps[args[1]] && !mem_temps[args[1]]) {
- if (dead_temps[args[0]] && !mem_temps[args[0]]) {
+ if (temp_state[args[1]] == TS_DEAD) {
+ if (temp_state[args[0]] == TS_DEAD) {
goto do_remove;
}
/* Replace the opcode and adjust the args in place,
@@ -1487,8 +1490,8 @@ static void tcg_liveness_analysis(TCGContext *s)
do_mul2:
nb_iargs = 2;
nb_oargs = 2;
- if (dead_temps[args[1]] && !mem_temps[args[1]]) {
- if (dead_temps[args[0]] && !mem_temps[args[0]]) {
+ if (temp_state[args[1]] == TS_DEAD) {
+ if (temp_state[args[0]] == TS_DEAD) {
/* Both parts of the operation are dead. */
goto do_remove;
}
@@ -1496,8 +1499,7 @@ static void tcg_liveness_analysis(TCGContext *s)
op->opc = opc = opc_new;
args[1] = args[2];
args[2] = args[3];
- } else if (have_opc_new2 && dead_temps[args[0]]
- && !mem_temps[args[0]]) {
+ } else if (temp_state[args[0]] == TS_DEAD && have_opc_new2) {
/* The low part of the operation is dead; generate the high. */
op->opc = opc = opc_new2;
args[0] = args[1];
@@ -1520,8 +1522,7 @@ static void tcg_liveness_analysis(TCGContext *s)
implies side effects */
if (!(def->flags & TCG_OPF_SIDE_EFFECTS) && nb_oargs != 0) {
for (i = 0; i < nb_oargs; i++) {
- arg = args[i];
- if (!dead_temps[arg] || mem_temps[arg]) {
+ if (temp_state[args[i]] != TS_DEAD) {
goto do_not_remove;
}
}
@@ -1532,35 +1533,35 @@ static void tcg_liveness_analysis(TCGContext *s)
/* output args are dead */
for (i = 0; i < nb_oargs; i++) {
arg = args[i];
- if (dead_temps[arg]) {
+ if (temp_state[arg] & TS_DEAD) {
arg_life |= DEAD_ARG << i;
}
- if (mem_temps[arg]) {
+ if (temp_state[arg] & TS_MEM) {
arg_life |= SYNC_ARG << i;
}
- dead_temps[arg] = 1;
- mem_temps[arg] = 0;
+ temp_state[arg] = TS_DEAD;
}
/* if end of basic block, update */
if (def->flags & TCG_OPF_BB_END) {
- tcg_la_bb_end(s, dead_temps, mem_temps);
+ tcg_la_bb_end(s, temp_state);
} else if (def->flags & TCG_OPF_SIDE_EFFECTS) {
/* globals should be synced to memory */
- memset(mem_temps, 1, s->nb_globals);
+ for (i = 0; i < nb_globals; i++) {
+ temp_state[i] |= TS_MEM;
+ }
}
/* record arguments that die in this opcode */
for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
arg = args[i];
- if (dead_temps[arg]) {
+ if (temp_state[arg] & TS_DEAD) {
arg_life |= DEAD_ARG << i;
}
}
/* input arguments are live for preceding opcodes */
for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
- arg = args[i];
- dead_temps[arg] = 0;
+ temp_state[args[i]] &= ~TS_DEAD;
}
}
break;
--
2.7.4
^ permalink raw reply related [flat|nested] 11+ messages in thread
* [Qemu-devel] [PATCH v4 for-2.7 5/7] tcg: Include liveness info in the dumps
2016-08-04 16:26 [Qemu-devel] [PATCH v4 for-2.7 0/7] Fixing i686 host / sparc64 guest crash Richard Henderson
` (3 preceding siblings ...)
2016-08-04 16:26 ` [Qemu-devel] [PATCH v4 for-2.7 4/7] tcg: Compress dead_temps and mem_temps into a single array Richard Henderson
@ 2016-08-04 16:26 ` Richard Henderson
2016-08-04 16:26 ` [Qemu-devel] [PATCH v4 for-2.7 6/7] tcg: Require liveness analysis Richard Henderson
` (2 subsequent siblings)
7 siblings, 0 replies; 11+ messages in thread
From: Richard Henderson @ 2016-08-04 16:26 UTC (permalink / raw)
To: qemu-devel; +Cc: aurelien, peter.maydell
Reviewed-by: Aurelien Jarno <aurelien@aurel32.net>
Signed-off-by: Richard Henderson <rth@twiddle.net>
---
include/qemu/log.h | 2 +-
tcg/tcg.c | 68 +++++++++++++++++++++++++++++++++++++-----------------
util/log.c | 19 ++++++++++-----
3 files changed, 61 insertions(+), 28 deletions(-)
diff --git a/include/qemu/log.h b/include/qemu/log.h
index 8bec6b4..9ab8f51 100644
--- a/include/qemu/log.h
+++ b/include/qemu/log.h
@@ -54,7 +54,7 @@ static inline bool qemu_loglevel_mask(int mask)
/* main logging function
*/
-void GCC_FMT_ATTR(1, 2) qemu_log(const char *fmt, ...);
+int GCC_FMT_ATTR(1, 2) qemu_log(const char *fmt, ...);
/* vfprintf-like logging function
*/
diff --git a/tcg/tcg.c b/tcg/tcg.c
index 27bbb4d..b0a88ba 100644
--- a/tcg/tcg.c
+++ b/tcg/tcg.c
@@ -1026,6 +1026,7 @@ void tcg_dump_ops(TCGContext *s)
const TCGOpDef *def;
const TCGArg *args;
TCGOpcode c;
+ int col = 0;
op = &s->gen_op_buf[oi];
c = op->opc;
@@ -1033,7 +1034,7 @@ void tcg_dump_ops(TCGContext *s)
args = &s->gen_opparam_buf[op->args];
if (c == INDEX_op_insn_start) {
- qemu_log("%s ----", oi != s->gen_op_buf[0].next ? "\n" : "");
+ col += qemu_log("%s ----", oi != s->gen_op_buf[0].next ? "\n" : "");
for (i = 0; i < TARGET_INSN_START_WORDS; ++i) {
target_ulong a;
@@ -1042,7 +1043,7 @@ void tcg_dump_ops(TCGContext *s)
#else
a = args[i];
#endif
- qemu_log(" " TARGET_FMT_lx, a);
+ col += qemu_log(" " TARGET_FMT_lx, a);
}
} else if (c == INDEX_op_call) {
/* variable number of arguments */
@@ -1051,12 +1052,12 @@ void tcg_dump_ops(TCGContext *s)
nb_cargs = def->nb_cargs;
/* function name, flags, out args */
- qemu_log(" %s %s,$0x%" TCG_PRIlx ",$%d", def->name,
- tcg_find_helper(s, args[nb_oargs + nb_iargs]),
- args[nb_oargs + nb_iargs + 1], nb_oargs);
+ col += qemu_log(" %s %s,$0x%" TCG_PRIlx ",$%d", def->name,
+ tcg_find_helper(s, args[nb_oargs + nb_iargs]),
+ args[nb_oargs + nb_iargs + 1], nb_oargs);
for (i = 0; i < nb_oargs; i++) {
- qemu_log(",%s", tcg_get_arg_str_idx(s, buf, sizeof(buf),
- args[i]));
+ col += qemu_log(",%s", tcg_get_arg_str_idx(s, buf, sizeof(buf),
+ args[i]));
}
for (i = 0; i < nb_iargs; i++) {
TCGArg arg = args[nb_oargs + i];
@@ -1064,10 +1065,10 @@ void tcg_dump_ops(TCGContext *s)
if (arg != TCG_CALL_DUMMY_ARG) {
t = tcg_get_arg_str_idx(s, buf, sizeof(buf), arg);
}
- qemu_log(",%s", t);
+ col += qemu_log(",%s", t);
}
} else {
- qemu_log(" %s ", def->name);
+ col += qemu_log(" %s ", def->name);
nb_oargs = def->nb_oargs;
nb_iargs = def->nb_iargs;
@@ -1076,17 +1077,17 @@ void tcg_dump_ops(TCGContext *s)
k = 0;
for (i = 0; i < nb_oargs; i++) {
if (k != 0) {
- qemu_log(",");
+ col += qemu_log(",");
}
- qemu_log("%s", tcg_get_arg_str_idx(s, buf, sizeof(buf),
- args[k++]));
+ col += qemu_log("%s", tcg_get_arg_str_idx(s, buf, sizeof(buf),
+ args[k++]));
}
for (i = 0; i < nb_iargs; i++) {
if (k != 0) {
- qemu_log(",");
+ col += qemu_log(",");
}
- qemu_log("%s", tcg_get_arg_str_idx(s, buf, sizeof(buf),
- args[k++]));
+ col += qemu_log("%s", tcg_get_arg_str_idx(s, buf, sizeof(buf),
+ args[k++]));
}
switch (c) {
case INDEX_op_brcond_i32:
@@ -1098,9 +1099,9 @@ void tcg_dump_ops(TCGContext *s)
case INDEX_op_setcond_i64:
case INDEX_op_movcond_i64:
if (args[k] < ARRAY_SIZE(cond_name) && cond_name[args[k]]) {
- qemu_log(",%s", cond_name[args[k++]]);
+ col += qemu_log(",%s", cond_name[args[k++]]);
} else {
- qemu_log(",$0x%" TCG_PRIlx, args[k++]);
+ col += qemu_log(",$0x%" TCG_PRIlx, args[k++]);
}
i = 1;
break;
@@ -1114,12 +1115,12 @@ void tcg_dump_ops(TCGContext *s)
unsigned ix = get_mmuidx(oi);
if (op & ~(MO_AMASK | MO_BSWAP | MO_SSIZE)) {
- qemu_log(",$0x%x,%u", op, ix);
+ col += qemu_log(",$0x%x,%u", op, ix);
} else {
const char *s_al, *s_op;
s_al = alignment_name[(op & MO_AMASK) >> MO_ASHIFT];
s_op = ldst_name[op & (MO_BSWAP | MO_SSIZE)];
- qemu_log(",%s%s,%u", s_al, s_op, ix);
+ col += qemu_log(",%s%s,%u", s_al, s_op, ix);
}
i = 1;
}
@@ -1134,14 +1135,39 @@ void tcg_dump_ops(TCGContext *s)
case INDEX_op_brcond_i32:
case INDEX_op_brcond_i64:
case INDEX_op_brcond2_i32:
- qemu_log("%s$L%d", k ? "," : "", arg_label(args[k])->id);
+ col += qemu_log("%s$L%d", k ? "," : "", arg_label(args[k])->id);
i++, k++;
break;
default:
break;
}
for (; i < nb_cargs; i++, k++) {
- qemu_log("%s$0x%" TCG_PRIlx, k ? "," : "", args[k]);
+ col += qemu_log("%s$0x%" TCG_PRIlx, k ? "," : "", args[k]);
+ }
+ }
+ if (op->life) {
+ unsigned life = op->life;
+
+ for (; col < 48; ++col) {
+ putc(' ', qemu_logfile);
+ }
+
+ if (life & (SYNC_ARG * 3)) {
+ qemu_log(" sync:");
+ for (i = 0; i < 2; ++i) {
+ if (life & (SYNC_ARG << i)) {
+ qemu_log(" %d", i);
+ }
+ }
+ }
+ life /= DEAD_ARG;
+ if (life) {
+ qemu_log(" dead:");
+ for (i = 0; life; ++i, life >>= 1) {
+ if (life & 1) {
+ qemu_log(" %d", i);
+ }
+ }
}
}
qemu_log("\n");
diff --git a/util/log.c b/util/log.c
index b6c75b1..9f08444 100644
--- a/util/log.c
+++ b/util/log.c
@@ -32,15 +32,22 @@ int qemu_loglevel;
static int log_append = 0;
static GArray *debug_regions;
-void qemu_log(const char *fmt, ...)
+/* Return the number of characters emitted. */
+int qemu_log(const char *fmt, ...)
{
- va_list ap;
-
- va_start(ap, fmt);
+ int ret = 0;
if (qemu_logfile) {
- vfprintf(qemu_logfile, fmt, ap);
+ va_list ap;
+ va_start(ap, fmt);
+ ret = vfprintf(qemu_logfile, fmt, ap);
+ va_end(ap);
+
+ /* Don't pass back error results. */
+ if (ret < 0) {
+ ret = 0;
+ }
}
- va_end(ap);
+ return ret;
}
static bool log_uses_own_buffers;
--
2.7.4
^ permalink raw reply related [flat|nested] 11+ messages in thread
* [Qemu-devel] [PATCH v4 for-2.7 6/7] tcg: Require liveness analysis
2016-08-04 16:26 [Qemu-devel] [PATCH v4 for-2.7 0/7] Fixing i686 host / sparc64 guest crash Richard Henderson
` (4 preceding siblings ...)
2016-08-04 16:26 ` [Qemu-devel] [PATCH v4 for-2.7 5/7] tcg: Include liveness info in the dumps Richard Henderson
@ 2016-08-04 16:26 ` Richard Henderson
2016-08-04 16:26 ` [Qemu-devel] [PATCH v4 for-2.7 7/7] tcg: Lower indirect registers in a separate pass Richard Henderson
2016-08-04 16:37 ` [Qemu-devel] [PATCH v4 for-2.7 0/7] Fixing i686 host / sparc64 guest crash Peter Maydell
7 siblings, 0 replies; 11+ messages in thread
From: Richard Henderson @ 2016-08-04 16:26 UTC (permalink / raw)
To: qemu-devel; +Cc: aurelien, peter.maydell
Reviewed-by: Aurelien Jarno <aurelien@aurel32.net>
Signed-off-by: Richard Henderson <rth@twiddle.net>
---
tcg/tcg.c | 21 ---------------------
1 file changed, 21 deletions(-)
diff --git a/tcg/tcg.c b/tcg/tcg.c
index b0a88ba..3c1f526 100644
--- a/tcg/tcg.c
+++ b/tcg/tcg.c
@@ -23,7 +23,6 @@
*/
/* define it to use liveness analysis (better code) */
-#define USE_LIVENESS_ANALYSIS
#define USE_TCG_OPTIMIZATIONS
#include "qemu/osdep.h"
@@ -1337,8 +1336,6 @@ void tcg_op_remove(TCGContext *s, TCGOp *op)
#endif
}
-#ifdef USE_LIVENESS_ANALYSIS
-
#define TS_DEAD 1
#define TS_MEM 2
@@ -1595,18 +1592,6 @@ static void tcg_liveness_analysis(TCGContext *s)
op->life = arg_life;
}
}
-#else
-/* dummy liveness analysis */
-static void tcg_liveness_analysis(TCGContext *s)
-{
- int nb_ops = s->gen_next_op_idx;
-
- s->op_dead_args = tcg_malloc(nb_ops * sizeof(uint16_t));
- memset(s->op_dead_args, 0, nb_ops * sizeof(uint16_t));
- s->op_sync_args = tcg_malloc(nb_ops * sizeof(uint8_t));
- memset(s->op_sync_args, 0, nb_ops * sizeof(uint8_t));
-}
-#endif
#ifdef CONFIG_DEBUG_TCG
static void dump_regs(TCGContext *s)
@@ -1858,7 +1843,6 @@ static void temp_load(TCGContext *s, TCGTemp *ts, TCGRegSet desired_regs,
temporary registers needs to be allocated to store a constant. */
static void temp_save(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs)
{
-#ifdef USE_LIVENESS_ANALYSIS
/* ??? Liveness does not yet incorporate indirect bases. */
if (!ts->indirect_base) {
/* The liveness analysis already ensures that globals are back
@@ -1866,7 +1850,6 @@ static void temp_save(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs)
tcg_debug_assert(ts->val_type == TEMP_VAL_MEM || ts->fixed_reg);
return;
}
-#endif
temp_sync(s, ts, allocated_regs, 1);
}
@@ -1891,7 +1874,6 @@ static void sync_globals(TCGContext *s, TCGRegSet allocated_regs)
for (i = 0; i < s->nb_globals; i++) {
TCGTemp *ts = &s->temps[i];
-#ifdef USE_LIVENESS_ANALYSIS
/* ??? Liveness does not yet incorporate indirect bases. */
if (!ts->indirect_base) {
tcg_debug_assert(ts->val_type != TEMP_VAL_REG
@@ -1899,7 +1881,6 @@ static void sync_globals(TCGContext *s, TCGRegSet allocated_regs)
|| ts->mem_coherent);
continue;
}
-#endif
temp_sync(s, ts, allocated_regs, 0);
}
}
@@ -1915,7 +1896,6 @@ static void tcg_reg_alloc_bb_end(TCGContext *s, TCGRegSet allocated_regs)
if (ts->temp_local) {
temp_save(s, ts, allocated_regs);
} else {
-#ifdef USE_LIVENESS_ANALYSIS
/* ??? Liveness does not yet incorporate indirect bases. */
if (!ts->indirect_base) {
/* The liveness analysis already ensures that temps are dead.
@@ -1923,7 +1903,6 @@ static void tcg_reg_alloc_bb_end(TCGContext *s, TCGRegSet allocated_regs)
tcg_debug_assert(ts->val_type == TEMP_VAL_DEAD);
continue;
}
-#endif
temp_dead(s, ts);
}
}
--
2.7.4
^ permalink raw reply related [flat|nested] 11+ messages in thread
* [Qemu-devel] [PATCH v4 for-2.7 7/7] tcg: Lower indirect registers in a separate pass
2016-08-04 16:26 [Qemu-devel] [PATCH v4 for-2.7 0/7] Fixing i686 host / sparc64 guest crash Richard Henderson
` (5 preceding siblings ...)
2016-08-04 16:26 ` [Qemu-devel] [PATCH v4 for-2.7 6/7] tcg: Require liveness analysis Richard Henderson
@ 2016-08-04 16:26 ` Richard Henderson
2016-08-04 16:37 ` [Qemu-devel] [PATCH v4 for-2.7 0/7] Fixing i686 host / sparc64 guest crash Peter Maydell
7 siblings, 0 replies; 11+ messages in thread
From: Richard Henderson @ 2016-08-04 16:26 UTC (permalink / raw)
To: qemu-devel; +Cc: aurelien, peter.maydell
Rather than rely on recursion during the middle of register allocation,
lower indirect registers to loads and stores off the indirect base into
plain temps.
For an x86_64 host, with sufficient registers, this results in identical
code, modulo the actual register assignments.
For an i686 host, with insufficient registers, this means that temps can
be (temporarily) spilled to the stack in order to satisfy an allocation.
This as opposed to the possibility of not being able to spill, to allocate
a register for the indirect base, in order to perform a spill.
Reviewed-by: Aurelien Jarno <aurelien@aurel32.net>
Signed-off-by: Richard Henderson <rth@twiddle.net>
---
include/qemu/log.h | 1 +
tcg/optimize.c | 31 +-----
tcg/tcg.c | 306 +++++++++++++++++++++++++++++++++++++++++++----------
tcg/tcg.h | 4 +
util/log.c | 5 +-
5 files changed, 263 insertions(+), 84 deletions(-)
diff --git a/include/qemu/log.h b/include/qemu/log.h
index 9ab8f51..00bf37f 100644
--- a/include/qemu/log.h
+++ b/include/qemu/log.h
@@ -42,6 +42,7 @@ static inline bool qemu_log_separate(void)
#define CPU_LOG_TB_NOCHAIN (1 << 13)
#define CPU_LOG_PAGE (1 << 14)
#define LOG_TRACE (1 << 15)
+#define CPU_LOG_TB_OP_IND (1 << 16)
/* Returns true if a bit is set in the current loglevel mask
*/
diff --git a/tcg/optimize.c b/tcg/optimize.c
index 8df7fc7..cffe89b 100644
--- a/tcg/optimize.c
+++ b/tcg/optimize.c
@@ -82,33 +82,6 @@ static void init_temp_info(TCGArg temp)
}
}
-static TCGOp *insert_op_before(TCGContext *s, TCGOp *old_op,
- TCGOpcode opc, int nargs)
-{
- int oi = s->gen_next_op_idx;
- int pi = s->gen_next_parm_idx;
- int prev = old_op->prev;
- int next = old_op - s->gen_op_buf;
- TCGOp *new_op;
-
- tcg_debug_assert(oi < OPC_BUF_SIZE);
- tcg_debug_assert(pi + nargs <= OPPARAM_BUF_SIZE);
- s->gen_next_op_idx = oi + 1;
- s->gen_next_parm_idx = pi + nargs;
-
- new_op = &s->gen_op_buf[oi];
- *new_op = (TCGOp){
- .opc = opc,
- .args = pi,
- .prev = prev,
- .next = next
- };
- s->gen_op_buf[prev].next = oi;
- old_op->prev = oi;
-
- return new_op;
-}
-
static int op_bits(TCGOpcode op)
{
const TCGOpDef *def = &tcg_op_defs[op];
@@ -1116,7 +1089,7 @@ void tcg_optimize(TCGContext *s)
uint64_t a = ((uint64_t)ah << 32) | al;
uint64_t b = ((uint64_t)bh << 32) | bl;
TCGArg rl, rh;
- TCGOp *op2 = insert_op_before(s, op, INDEX_op_movi_i32, 2);
+ TCGOp *op2 = tcg_op_insert_before(s, op, INDEX_op_movi_i32, 2);
TCGArg *args2 = &s->gen_opparam_buf[op2->args];
if (opc == INDEX_op_add2_i32) {
@@ -1142,7 +1115,7 @@ void tcg_optimize(TCGContext *s)
uint32_t b = temps[args[3]].val;
uint64_t r = (uint64_t)a * b;
TCGArg rl, rh;
- TCGOp *op2 = insert_op_before(s, op, INDEX_op_movi_i32, 2);
+ TCGOp *op2 = tcg_op_insert_before(s, op, INDEX_op_movi_i32, 2);
TCGArg *args2 = &s->gen_opparam_buf[op2->args];
rl = args[0];
diff --git a/tcg/tcg.c b/tcg/tcg.c
index 3c1f526..42417bd 100644
--- a/tcg/tcg.c
+++ b/tcg/tcg.c
@@ -531,8 +531,12 @@ int tcg_global_mem_new_internal(TCGType type, TCGv_ptr base,
#endif
if (!base_ts->fixed_reg) {
- indirect_reg = 1;
+ /* We do not support double-indirect registers. */
+ tcg_debug_assert(!base_ts->indirect_reg);
base_ts->indirect_base = 1;
+ s->nb_indirects += (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64
+ ? 2 : 1);
+ indirect_reg = 1;
}
if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) {
@@ -1336,9 +1340,66 @@ void tcg_op_remove(TCGContext *s, TCGOp *op)
#endif
}
+TCGOp *tcg_op_insert_before(TCGContext *s, TCGOp *old_op,
+ TCGOpcode opc, int nargs)
+{
+ int oi = s->gen_next_op_idx;
+ int pi = s->gen_next_parm_idx;
+ int prev = old_op->prev;
+ int next = old_op - s->gen_op_buf;
+ TCGOp *new_op;
+
+ tcg_debug_assert(oi < OPC_BUF_SIZE);
+ tcg_debug_assert(pi + nargs <= OPPARAM_BUF_SIZE);
+ s->gen_next_op_idx = oi + 1;
+ s->gen_next_parm_idx = pi + nargs;
+
+ new_op = &s->gen_op_buf[oi];
+ *new_op = (TCGOp){
+ .opc = opc,
+ .args = pi,
+ .prev = prev,
+ .next = next
+ };
+ s->gen_op_buf[prev].next = oi;
+ old_op->prev = oi;
+
+ return new_op;
+}
+
+TCGOp *tcg_op_insert_after(TCGContext *s, TCGOp *old_op,
+ TCGOpcode opc, int nargs)
+{
+ int oi = s->gen_next_op_idx;
+ int pi = s->gen_next_parm_idx;
+ int prev = old_op - s->gen_op_buf;
+ int next = old_op->next;
+ TCGOp *new_op;
+
+ tcg_debug_assert(oi < OPC_BUF_SIZE);
+ tcg_debug_assert(pi + nargs <= OPPARAM_BUF_SIZE);
+ s->gen_next_op_idx = oi + 1;
+ s->gen_next_parm_idx = pi + nargs;
+
+ new_op = &s->gen_op_buf[oi];
+ *new_op = (TCGOp){
+ .opc = opc,
+ .args = pi,
+ .prev = prev,
+ .next = next
+ };
+ s->gen_op_buf[next].prev = oi;
+ old_op->next = oi;
+
+ return new_op;
+}
+
#define TS_DEAD 1
#define TS_MEM 2
+#define IS_DEAD_ARG(n) (arg_life & (DEAD_ARG << (n)))
+#define NEED_SYNC_ARG(n) (arg_life & (SYNC_ARG << (n)))
+
/* liveness analysis: end of function: all temps are dead, and globals
should be in memory. */
static inline void tcg_la_func_end(TCGContext *s, uint8_t *temp_state)
@@ -1364,13 +1425,11 @@ static inline void tcg_la_bb_end(TCGContext *s, uint8_t *temp_state)
/* Liveness analysis : update the opc_arg_life array to tell if a
given input arguments is dead. Instructions updating dead
temporaries are removed. */
-static void tcg_liveness_analysis(TCGContext *s)
+static void liveness_pass_1(TCGContext *s, uint8_t *temp_state)
{
- uint8_t *temp_state;
- int oi, oi_prev;
int nb_globals = s->nb_globals;
+ int oi, oi_prev;
- temp_state = tcg_malloc(s->nb_temps);
tcg_la_func_end(s, temp_state);
for (oi = s->gen_op_buf[0].prev; oi != 0; oi = oi_prev) {
@@ -1593,6 +1652,165 @@ static void tcg_liveness_analysis(TCGContext *s)
}
}
+/* Liveness analysis: Convert indirect regs to direct temporaries. */
+static bool liveness_pass_2(TCGContext *s, uint8_t *temp_state)
+{
+ int nb_globals = s->nb_globals;
+ int16_t *dir_temps;
+ int i, oi, oi_next;
+ bool changes = false;
+
+ dir_temps = tcg_malloc(nb_globals * sizeof(int16_t));
+ memset(dir_temps, 0, nb_globals * sizeof(int16_t));
+
+ /* Create a temporary for each indirect global. */
+ for (i = 0; i < nb_globals; ++i) {
+ TCGTemp *its = &s->temps[i];
+ if (its->indirect_reg) {
+ TCGTemp *dts = tcg_temp_alloc(s);
+ dts->type = its->type;
+ dts->base_type = its->base_type;
+ dir_temps[i] = temp_idx(s, dts);
+ }
+ }
+
+ memset(temp_state, TS_DEAD, nb_globals);
+
+ for (oi = s->gen_op_buf[0].next; oi != 0; oi = oi_next) {
+ TCGOp *op = &s->gen_op_buf[oi];
+ TCGArg *args = &s->gen_opparam_buf[op->args];
+ TCGOpcode opc = op->opc;
+ const TCGOpDef *def = &tcg_op_defs[opc];
+ TCGLifeData arg_life = op->life;
+ int nb_iargs, nb_oargs, call_flags;
+ TCGArg arg, dir;
+
+ oi_next = op->next;
+
+ if (opc == INDEX_op_call) {
+ nb_oargs = op->callo;
+ nb_iargs = op->calli;
+ call_flags = args[nb_oargs + nb_iargs + 1];
+ } else {
+ nb_iargs = def->nb_iargs;
+ nb_oargs = def->nb_oargs;
+
+ /* Set flags similar to how calls require. */
+ if (def->flags & TCG_OPF_BB_END) {
+ /* Like writing globals: save_globals */
+ call_flags = 0;
+ } else if (def->flags & TCG_OPF_SIDE_EFFECTS) {
+ /* Like reading globals: sync_globals */
+ call_flags = TCG_CALL_NO_WRITE_GLOBALS;
+ } else {
+ /* No effect on globals. */
+ call_flags = (TCG_CALL_NO_READ_GLOBALS |
+ TCG_CALL_NO_WRITE_GLOBALS);
+ }
+ }
+
+ /* Make sure that input arguments are available. */
+ for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
+ arg = args[i];
+ /* Note this unsigned test catches TCG_CALL_ARG_DUMMY too. */
+ if (arg < nb_globals) {
+ dir = dir_temps[arg];
+ if (dir != 0 && temp_state[arg] == TS_DEAD) {
+ TCGTemp *its = &s->temps[arg];
+ TCGOpcode lopc = (its->type == TCG_TYPE_I32
+ ? INDEX_op_ld_i32
+ : INDEX_op_ld_i64);
+ TCGOp *lop = tcg_op_insert_before(s, op, lopc, 3);
+ TCGArg *largs = &s->gen_opparam_buf[lop->args];
+
+ largs[0] = dir;
+ largs[1] = temp_idx(s, its->mem_base);
+ largs[2] = its->mem_offset;
+
+ /* Loaded, but synced with memory. */
+ temp_state[arg] = TS_MEM;
+ }
+ }
+ }
+
+ /* Perform input replacement, and mark inputs that became dead.
+ No action is required except keeping temp_state up to date
+ so that we reload when needed. */
+ for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
+ arg = args[i];
+ if (arg < nb_globals) {
+ dir = dir_temps[arg];
+ if (dir != 0) {
+ args[i] = dir;
+ changes = true;
+ if (IS_DEAD_ARG(i)) {
+ temp_state[arg] = TS_DEAD;
+ }
+ }
+ }
+ }
+
+ /* Liveness analysis should ensure that the following are
+ all correct, for call sites and basic block end points. */
+ if (call_flags & TCG_CALL_NO_READ_GLOBALS) {
+ /* Nothing to do */
+ } else if (call_flags & TCG_CALL_NO_WRITE_GLOBALS) {
+ for (i = 0; i < nb_globals; ++i) {
+ /* Liveness should see that globals are synced back,
+ that is, either TS_DEAD or TS_MEM. */
+ tcg_debug_assert(dir_temps[i] == 0
+ || temp_state[i] != 0);
+ }
+ } else {
+ for (i = 0; i < nb_globals; ++i) {
+ /* Liveness should see that globals are saved back,
+ that is, TS_DEAD, waiting to be reloaded. */
+ tcg_debug_assert(dir_temps[i] == 0
+ || temp_state[i] == TS_DEAD);
+ }
+ }
+
+ /* Outputs become available. */
+ for (i = 0; i < nb_oargs; i++) {
+ arg = args[i];
+ if (arg >= nb_globals) {
+ continue;
+ }
+ dir = dir_temps[arg];
+ if (dir == 0) {
+ continue;
+ }
+ args[i] = dir;
+ changes = true;
+
+ /* The output is now live and modified. */
+ temp_state[arg] = 0;
+
+ /* Sync outputs upon their last write. */
+ if (NEED_SYNC_ARG(i)) {
+ TCGTemp *its = &s->temps[arg];
+ TCGOpcode sopc = (its->type == TCG_TYPE_I32
+ ? INDEX_op_st_i32
+ : INDEX_op_st_i64);
+ TCGOp *sop = tcg_op_insert_after(s, op, sopc, 3);
+ TCGArg *sargs = &s->gen_opparam_buf[sop->args];
+
+ sargs[0] = dir;
+ sargs[1] = temp_idx(s, its->mem_base);
+ sargs[2] = its->mem_offset;
+
+ temp_state[arg] = TS_MEM;
+ }
+ /* Drop outputs that are dead. */
+ if (IS_DEAD_ARG(i)) {
+ temp_state[arg] = TS_DEAD;
+ }
+ }
+ }
+
+ return changes;
+}
+
#ifdef CONFIG_DEBUG_TCG
static void dump_regs(TCGContext *s)
{
@@ -1723,14 +1941,6 @@ static void temp_sync(TCGContext *s, TCGTemp *ts,
if (!ts->mem_allocated) {
temp_allocate_frame(s, temp_idx(s, ts));
}
- if (ts->indirect_reg) {
- if (ts->val_type == TEMP_VAL_REG) {
- tcg_regset_set_reg(allocated_regs, ts->reg);
- }
- temp_load(s, ts->mem_base,
- tcg_target_available_regs[TCG_TYPE_PTR],
- allocated_regs);
- }
switch (ts->val_type) {
case TEMP_VAL_CONST:
/* If we're going to free the temp immediately, then we won't
@@ -1821,12 +2031,6 @@ static void temp_load(TCGContext *s, TCGTemp *ts, TCGRegSet desired_regs,
break;
case TEMP_VAL_MEM:
reg = tcg_reg_alloc(s, desired_regs, allocated_regs, ts->indirect_base);
- if (ts->indirect_reg) {
- tcg_regset_set_reg(allocated_regs, reg);
- temp_load(s, ts->mem_base,
- tcg_target_available_regs[TCG_TYPE_PTR],
- allocated_regs);
- }
tcg_out_ld(s, ts->type, reg, ts->mem_base->reg, ts->mem_offset);
ts->mem_coherent = 1;
break;
@@ -1843,14 +2047,9 @@ static void temp_load(TCGContext *s, TCGTemp *ts, TCGRegSet desired_regs,
temporary registers needs to be allocated to store a constant. */
static void temp_save(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs)
{
- /* ??? Liveness does not yet incorporate indirect bases. */
- if (!ts->indirect_base) {
- /* The liveness analysis already ensures that globals are back
- in memory. Keep an tcg_debug_assert for safety. */
- tcg_debug_assert(ts->val_type == TEMP_VAL_MEM || ts->fixed_reg);
- return;
- }
- temp_sync(s, ts, allocated_regs, 1);
+ /* The liveness analysis already ensures that globals are back
+ in memory. Keep an tcg_debug_assert for safety. */
+ tcg_debug_assert(ts->val_type == TEMP_VAL_MEM || ts->fixed_reg);
}
/* save globals to their canonical location and assume they can be
@@ -1874,14 +2073,9 @@ static void sync_globals(TCGContext *s, TCGRegSet allocated_regs)
for (i = 0; i < s->nb_globals; i++) {
TCGTemp *ts = &s->temps[i];
- /* ??? Liveness does not yet incorporate indirect bases. */
- if (!ts->indirect_base) {
- tcg_debug_assert(ts->val_type != TEMP_VAL_REG
- || ts->fixed_reg
- || ts->mem_coherent);
- continue;
- }
- temp_sync(s, ts, allocated_regs, 0);
+ tcg_debug_assert(ts->val_type != TEMP_VAL_REG
+ || ts->fixed_reg
+ || ts->mem_coherent);
}
}
@@ -1896,23 +2090,15 @@ static void tcg_reg_alloc_bb_end(TCGContext *s, TCGRegSet allocated_regs)
if (ts->temp_local) {
temp_save(s, ts, allocated_regs);
} else {
- /* ??? Liveness does not yet incorporate indirect bases. */
- if (!ts->indirect_base) {
- /* The liveness analysis already ensures that temps are dead.
- Keep an tcg_debug_assert for safety. */
- tcg_debug_assert(ts->val_type == TEMP_VAL_DEAD);
- continue;
- }
- temp_dead(s, ts);
+ /* The liveness analysis already ensures that temps are dead.
+ Keep an tcg_debug_assert for safety. */
+ tcg_debug_assert(ts->val_type == TEMP_VAL_DEAD);
}
}
save_globals(s, allocated_regs);
}
-#define IS_DEAD_ARG(n) (arg_life & (DEAD_ARG << (n)))
-#define NEED_SYNC_ARG(n) (arg_life & (SYNC_ARG << (n)))
-
static void tcg_reg_alloc_movi(TCGContext *s, const TCGArg *args,
TCGLifeData arg_life)
{
@@ -1975,12 +2161,6 @@ static void tcg_reg_alloc_mov(TCGContext *s, const TCGOpDef *def,
if (!ots->mem_allocated) {
temp_allocate_frame(s, args[0]);
}
- if (ots->indirect_reg) {
- tcg_regset_set_reg(allocated_regs, ts->reg);
- temp_load(s, ots->mem_base,
- tcg_target_available_regs[TCG_TYPE_PTR],
- allocated_regs);
- }
tcg_out_st(s, otype, ts->reg, ots->mem_base->reg, ots->mem_offset);
if (IS_DEAD_ARG(1)) {
temp_dead(s, ts);
@@ -2385,7 +2565,27 @@ int tcg_gen_code(TCGContext *s, TranslationBlock *tb)
s->la_time -= profile_getclock();
#endif
- tcg_liveness_analysis(s);
+ {
+ uint8_t *temp_state = tcg_malloc(s->nb_temps + s->nb_indirects);
+
+ liveness_pass_1(s, temp_state);
+
+ if (s->nb_indirects > 0) {
+#ifdef DEBUG_DISAS
+ if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_IND)
+ && qemu_log_in_addr_range(tb->pc))) {
+ qemu_log("OP before indirect lowering:\n");
+ tcg_dump_ops(s);
+ qemu_log("\n");
+ }
+#endif
+ /* Replace indirect temps with direct temps. */
+ if (liveness_pass_2(s, temp_state)) {
+ /* If changes were made, re-run liveness. */
+ liveness_pass_1(s, temp_state);
+ }
+ }
+ }
#ifdef CONFIG_PROFILER
s->la_time += profile_getclock();
diff --git a/tcg/tcg.h b/tcg/tcg.h
index ebf6867..1bcabca 100644
--- a/tcg/tcg.h
+++ b/tcg/tcg.h
@@ -617,6 +617,7 @@ struct TCGContext {
int nb_labels;
int nb_globals;
int nb_temps;
+ int nb_indirects;
/* goto_tb support */
tcg_insn_unit *code_buf;
@@ -898,6 +899,9 @@ void tcg_gen_callN(TCGContext *s, void *func,
TCGArg ret, int nargs, TCGArg *args);
void tcg_op_remove(TCGContext *s, TCGOp *op);
+TCGOp *tcg_op_insert_before(TCGContext *s, TCGOp *op, TCGOpcode opc, int narg);
+TCGOp *tcg_op_insert_after(TCGContext *s, TCGOp *op, TCGOpcode opc, int narg);
+
void tcg_optimize(TCGContext *s);
/* only used for debugging purposes */
diff --git a/util/log.c b/util/log.c
index 9f08444..54b54e8 100644
--- a/util/log.c
+++ b/util/log.c
@@ -247,8 +247,9 @@ const QEMULogItem qemu_log_items[] = {
{ CPU_LOG_TB_OP, "op",
"show micro ops for each compiled TB" },
{ CPU_LOG_TB_OP_OPT, "op_opt",
- "show micro ops (x86 only: before eflags optimization) and\n"
- "after liveness analysis" },
+ "show micro ops after optimization" },
+ { CPU_LOG_TB_OP_IND, "op_ind",
+ "show micro ops before indirect lowering" },
{ CPU_LOG_INT, "int",
"show interrupts/exceptions in short format" },
{ CPU_LOG_EXEC, "exec",
--
2.7.4
^ permalink raw reply related [flat|nested] 11+ messages in thread
* Re: [Qemu-devel] [PATCH v4 for-2.7 0/7] Fixing i686 host / sparc64 guest crash
2016-08-04 16:26 [Qemu-devel] [PATCH v4 for-2.7 0/7] Fixing i686 host / sparc64 guest crash Richard Henderson
` (6 preceding siblings ...)
2016-08-04 16:26 ` [Qemu-devel] [PATCH v4 for-2.7 7/7] tcg: Lower indirect registers in a separate pass Richard Henderson
@ 2016-08-04 16:37 ` Peter Maydell
2016-08-04 16:55 ` Richard Henderson
7 siblings, 1 reply; 11+ messages in thread
From: Peter Maydell @ 2016-08-04 16:37 UTC (permalink / raw)
To: Richard Henderson; +Cc: QEMU Developers, Aurelien Jarno
On 4 August 2016 at 17:26, Richard Henderson <rth@twiddle.net> wrote:
> This is a revision of my "third" attempt, tweaked a bit for Aurelien's
> review. I've sort of lost track of where we are with the release process,
> so I'll understand if we've gone too far now.
> include/exec/gen-icount.h | 2 +-
> include/qemu/log.h | 3 +-
> tcg/optimize.c | 37 +--
> tcg/tcg-op.c | 2 +-
> tcg/tcg.c | 588 ++++++++++++++++++++++++++++++----------------
> tcg/tcg.h | 52 ++--
> util/log.c | 24 +-
> 7 files changed, 441 insertions(+), 267 deletions(-)
We're probably going to tag rc2 tomorrow or maybe Monday, with a theoretical
final release date of the 16th (about a fortnight or so). So this
is a pretty big diffstat to be going in, given it's in generic code
rather than anything specific to only (say) sparc targets or i386
hosts. On the other hand it is a bugfix...
thanks
-- PMM
^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [Qemu-devel] [PATCH v4 for-2.7 0/7] Fixing i686 host / sparc64 guest crash
2016-08-04 16:37 ` [Qemu-devel] [PATCH v4 for-2.7 0/7] Fixing i686 host / sparc64 guest crash Peter Maydell
@ 2016-08-04 16:55 ` Richard Henderson
0 siblings, 0 replies; 11+ messages in thread
From: Richard Henderson @ 2016-08-04 16:55 UTC (permalink / raw)
To: Peter Maydell; +Cc: QEMU Developers, Aurelien Jarno
On 08/04/2016 10:07 PM, Peter Maydell wrote:
> On 4 August 2016 at 17:26, Richard Henderson <rth@twiddle.net> wrote:
>> This is a revision of my "third" attempt, tweaked a bit for Aurelien's
>> review. I've sort of lost track of where we are with the release process,
>> so I'll understand if we've gone too far now.
>
>> include/exec/gen-icount.h | 2 +-
>> include/qemu/log.h | 3 +-
>> tcg/optimize.c | 37 +--
>> tcg/tcg-op.c | 2 +-
>> tcg/tcg.c | 588 ++++++++++++++++++++++++++++++----------------
>> tcg/tcg.h | 52 ++--
>> util/log.c | 24 +-
>> 7 files changed, 441 insertions(+), 267 deletions(-)
>
> We're probably going to tag rc2 tomorrow or maybe Monday, with a theoretical
> final release date of the 16th (about a fortnight or so). So this
> is a pretty big diffstat to be going in, given it's in generic code
> rather than anything specific to only (say) sparc targets or i386
> hosts. On the other hand it is a bugfix...
Yeah. Review delay and travel delay have conspired against in this case.
r~
^ permalink raw reply [flat|nested] 11+ messages in thread