All of lore.kernel.org
 help / color / mirror / Atom feed
From: Richard Henderson <rth@twiddle.net>
To: qemu-devel@nongnu.org
Cc: aurelien@aurel32.net, peter.maydell@linaro.org
Subject: [Qemu-devel] [PATCH v4 for-2.7 1/7] tcg: Compress liveness data to 16 bits
Date: Thu,  4 Aug 2016 21:56:41 +0530	[thread overview]
Message-ID: <1470328007-7909-2-git-send-email-rth@twiddle.net> (raw)
In-Reply-To: <1470328007-7909-1-git-send-email-rth@twiddle.net>

This reduces both memory usage and per-insn cacheline usage
during code generation.

Reviewed-by: Aurelien Jarno <aurelien@aurel32.net>
Signed-off-by: Richard Henderson <rth@twiddle.net>
---
 tcg/tcg.c | 58 ++++++++++++++++++++++------------------------------------
 tcg/tcg.h | 16 ++++++++++------
 2 files changed, 32 insertions(+), 42 deletions(-)

diff --git a/tcg/tcg.c b/tcg/tcg.c
index 0c46c43..4aa1933 100644
--- a/tcg/tcg.c
+++ b/tcg/tcg.c
@@ -1341,7 +1341,7 @@ static inline void tcg_la_bb_end(TCGContext *s, uint8_t *dead_temps,
     }
 }
 
-/* Liveness analysis : update the opc_dead_args array to tell if a
+/* Liveness analysis : update the opc_arg_life array to tell if a
    given input arguments is dead. Instructions updating dead
    temporaries are removed. */
 static void tcg_liveness_analysis(TCGContext *s)
@@ -1350,9 +1350,8 @@ static void tcg_liveness_analysis(TCGContext *s)
     int oi, oi_prev, nb_ops;
 
     nb_ops = s->gen_next_op_idx;
-    s->op_dead_args = tcg_malloc(nb_ops * sizeof(uint16_t));
-    s->op_sync_args = tcg_malloc(nb_ops * sizeof(uint8_t));
-    
+    s->op_arg_life = tcg_malloc(nb_ops * sizeof(TCGLifeData));
+
     dead_temps = tcg_malloc(s->nb_temps);
     mem_temps = tcg_malloc(s->nb_temps);
     tcg_la_func_end(s, dead_temps, mem_temps);
@@ -1361,8 +1360,7 @@ static void tcg_liveness_analysis(TCGContext *s)
         int i, nb_iargs, nb_oargs;
         TCGOpcode opc_new, opc_new2;
         bool have_opc_new2;
-        uint16_t dead_args;
-        uint8_t sync_args;
+        TCGLifeData arg_life = 0;
         TCGArg arg;
 
         TCGOp * const op = &s->gen_op_buf[oi];
@@ -1394,15 +1392,13 @@ static void tcg_liveness_analysis(TCGContext *s)
                 do_not_remove_call:
 
                     /* output args are dead */
-                    dead_args = 0;
-                    sync_args = 0;
                     for (i = 0; i < nb_oargs; i++) {
                         arg = args[i];
                         if (dead_temps[arg]) {
-                            dead_args |= (1 << i);
+                            arg_life |= DEAD_ARG << i;
                         }
                         if (mem_temps[arg]) {
-                            sync_args |= (1 << i);
+                            arg_life |= SYNC_ARG << i;
                         }
                         dead_temps[arg] = 1;
                         mem_temps[arg] = 0;
@@ -1423,7 +1419,7 @@ static void tcg_liveness_analysis(TCGContext *s)
                         arg = args[i];
                         if (arg != TCG_CALL_DUMMY_ARG) {
                             if (dead_temps[arg]) {
-                                dead_args |= (1 << i);
+                                arg_life |= DEAD_ARG << i;
                             }
                         }
                     }
@@ -1432,8 +1428,6 @@ static void tcg_liveness_analysis(TCGContext *s)
                         arg = args[i];
                         dead_temps[arg] = 0;
                     }
-                    s->op_dead_args[oi] = dead_args;
-                    s->op_sync_args[oi] = sync_args;
                 }
             }
             break;
@@ -1544,15 +1538,13 @@ static void tcg_liveness_analysis(TCGContext *s)
             } else {
             do_not_remove:
                 /* output args are dead */
-                dead_args = 0;
-                sync_args = 0;
                 for (i = 0; i < nb_oargs; i++) {
                     arg = args[i];
                     if (dead_temps[arg]) {
-                        dead_args |= (1 << i);
+                        arg_life |= DEAD_ARG << i;
                     }
                     if (mem_temps[arg]) {
-                        sync_args |= (1 << i);
+                        arg_life |= SYNC_ARG << i;
                     }
                     dead_temps[arg] = 1;
                     mem_temps[arg] = 0;
@@ -1570,7 +1562,7 @@ static void tcg_liveness_analysis(TCGContext *s)
                 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
                     arg = args[i];
                     if (dead_temps[arg]) {
-                        dead_args |= (1 << i);
+                        arg_life |= DEAD_ARG << i;
                     }
                 }
                 /* input arguments are live for preceding opcodes */
@@ -1578,11 +1570,10 @@ static void tcg_liveness_analysis(TCGContext *s)
                     arg = args[i];
                     dead_temps[arg] = 0;
                 }
-                s->op_dead_args[oi] = dead_args;
-                s->op_sync_args[oi] = sync_args;
             }
             break;
         }
+        s->op_arg_life[oi] = arg_life;
     }
 }
 #else
@@ -1921,11 +1912,11 @@ static void tcg_reg_alloc_bb_end(TCGContext *s, TCGRegSet allocated_regs)
     save_globals(s, allocated_regs);
 }
 
-#define IS_DEAD_ARG(n) ((dead_args >> (n)) & 1)
-#define NEED_SYNC_ARG(n) ((sync_args >> (n)) & 1)
+#define IS_DEAD_ARG(n)   (arg_life & (DEAD_ARG << (n)))
+#define NEED_SYNC_ARG(n) (arg_life & (SYNC_ARG << (n)))
 
 static void tcg_reg_alloc_movi(TCGContext *s, const TCGArg *args,
-                               uint16_t dead_args, uint8_t sync_args)
+                               TCGLifeData arg_life)
 {
     TCGTemp *ots;
     tcg_target_ulong val;
@@ -1954,8 +1945,7 @@ static void tcg_reg_alloc_movi(TCGContext *s, const TCGArg *args,
 }
 
 static void tcg_reg_alloc_mov(TCGContext *s, const TCGOpDef *def,
-                              const TCGArg *args, uint16_t dead_args,
-                              uint8_t sync_args)
+                              const TCGArg *args, TCGLifeData arg_life)
 {
     TCGRegSet allocated_regs;
     TCGTemp *ts, *ots;
@@ -2040,8 +2030,7 @@ static void tcg_reg_alloc_mov(TCGContext *s, const TCGOpDef *def,
 
 static void tcg_reg_alloc_op(TCGContext *s, 
                              const TCGOpDef *def, TCGOpcode opc,
-                             const TCGArg *args, uint16_t dead_args,
-                             uint8_t sync_args)
+                             const TCGArg *args, TCGLifeData arg_life)
 {
     TCGRegSet allocated_regs;
     int i, k, nb_iargs, nb_oargs;
@@ -2206,8 +2195,7 @@ static void tcg_reg_alloc_op(TCGContext *s,
 #endif
 
 static void tcg_reg_alloc_call(TCGContext *s, int nb_oargs, int nb_iargs,
-                               const TCGArg * const args, uint16_t dead_args,
-                               uint8_t sync_args)
+                               const TCGArg * const args, TCGLifeData arg_life)
 {
     int flags, nb_regs, i;
     TCGReg reg;
@@ -2427,8 +2415,7 @@ int tcg_gen_code(TCGContext *s, TranslationBlock *tb)
         TCGArg * const args = &s->gen_opparam_buf[op->args];
         TCGOpcode opc = op->opc;
         const TCGOpDef *def = &tcg_op_defs[opc];
-        uint16_t dead_args = s->op_dead_args[oi];
-        uint8_t sync_args = s->op_sync_args[oi];
+        TCGLifeData arg_life = s->op_arg_life[oi];
 
         oi_next = op->next;
 #ifdef CONFIG_PROFILER
@@ -2438,11 +2425,11 @@ int tcg_gen_code(TCGContext *s, TranslationBlock *tb)
         switch (opc) {
         case INDEX_op_mov_i32:
         case INDEX_op_mov_i64:
-            tcg_reg_alloc_mov(s, def, args, dead_args, sync_args);
+            tcg_reg_alloc_mov(s, def, args, arg_life);
             break;
         case INDEX_op_movi_i32:
         case INDEX_op_movi_i64:
-            tcg_reg_alloc_movi(s, args, dead_args, sync_args);
+            tcg_reg_alloc_movi(s, args, arg_life);
             break;
         case INDEX_op_insn_start:
             if (num_insns >= 0) {
@@ -2467,8 +2454,7 @@ int tcg_gen_code(TCGContext *s, TranslationBlock *tb)
             tcg_out_label(s, arg_label(args[0]), s->code_ptr);
             break;
         case INDEX_op_call:
-            tcg_reg_alloc_call(s, op->callo, op->calli, args,
-                               dead_args, sync_args);
+            tcg_reg_alloc_call(s, op->callo, op->calli, args, arg_life);
             break;
         default:
             /* Sanity check that we've not introduced any unhandled opcodes. */
@@ -2478,7 +2464,7 @@ int tcg_gen_code(TCGContext *s, TranslationBlock *tb)
             /* Note: in order to speed up the code, it would be much
                faster to have specialized register allocator functions for
                some common argument patterns */
-            tcg_reg_alloc_op(s, def, opc, args, dead_args, sync_args);
+            tcg_reg_alloc_op(s, def, opc, args, arg_life);
             break;
         }
 #ifdef CONFIG_DEBUG_TCG
diff --git a/tcg/tcg.h b/tcg/tcg.h
index 6046dcd..7c0a138 100644
--- a/tcg/tcg.h
+++ b/tcg/tcg.h
@@ -575,6 +575,14 @@ typedef struct TCGTempSet {
     unsigned long l[BITS_TO_LONGS(TCG_MAX_TEMPS)];
 } TCGTempSet;
 
+/* While we limit helpers to 6 arguments, for 32-bit hosts, with padding,
+   this imples a max of 6*2 (64-bit in) + 2 (64-bit out) = 14 operands.
+   There are never more than 2 outputs, which means that we can store all
+   dead + sync data within 16 bits.  */
+#define DEAD_ARG  4
+#define SYNC_ARG  1
+typedef uint16_t TCGLifeData;
+
 typedef struct TCGOp {
     TCGOpcode opc   : 8;
 
@@ -608,12 +616,8 @@ struct TCGContext {
     uintptr_t *tb_jmp_target_addr; /* tb->jmp_target_addr if !USE_DIRECT_JUMP */
 
     /* liveness analysis */
-    uint16_t *op_dead_args; /* for each operation, each bit tells if the
-                               corresponding argument is dead */
-    uint8_t *op_sync_args;  /* for each operation, each bit tells if the
-                               corresponding output argument needs to be
-                               sync to memory. */
-    
+    TCGLifeData *op_arg_life;
+
     TCGRegSet reserved_regs;
     intptr_t current_frame_offset;
     intptr_t frame_start;
-- 
2.7.4

  reply	other threads:[~2016-08-04 16:28 UTC|newest]

Thread overview: 11+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2016-08-04 16:26 [Qemu-devel] [PATCH v4 for-2.7 0/7] Fixing i686 host / sparc64 guest crash Richard Henderson
2016-08-04 16:26 ` Richard Henderson [this message]
2016-08-04 16:26 ` [Qemu-devel] [PATCH v4 for-2.7 2/7] tcg: Reorg TCGOp chaining Richard Henderson
2016-08-05 11:58   ` Aurelien Jarno
2016-08-04 16:26 ` [Qemu-devel] [PATCH v4 for-2.7 3/7] tcg: Fold life data into TCGOp Richard Henderson
2016-08-04 16:26 ` [Qemu-devel] [PATCH v4 for-2.7 4/7] tcg: Compress dead_temps and mem_temps into a single array Richard Henderson
2016-08-04 16:26 ` [Qemu-devel] [PATCH v4 for-2.7 5/7] tcg: Include liveness info in the dumps Richard Henderson
2016-08-04 16:26 ` [Qemu-devel] [PATCH v4 for-2.7 6/7] tcg: Require liveness analysis Richard Henderson
2016-08-04 16:26 ` [Qemu-devel] [PATCH v4 for-2.7 7/7] tcg: Lower indirect registers in a separate pass Richard Henderson
2016-08-04 16:37 ` [Qemu-devel] [PATCH v4 for-2.7 0/7] Fixing i686 host / sparc64 guest crash Peter Maydell
2016-08-04 16:55   ` Richard Henderson

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1470328007-7909-2-git-send-email-rth@twiddle.net \
    --to=rth@twiddle.net \
    --cc=aurelien@aurel32.net \
    --cc=peter.maydell@linaro.org \
    --cc=qemu-devel@nongnu.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.