qemu-devel.nongnu.org archive mirror
 help / color / mirror / Atom feed
From: Filip Navara <filip.navara@gmail.com>
To: qemu-devel@nongnu.org
Cc: Paul Brook <paul@codesourcery.com>
Subject: Re: [Qemu-devel] [PATCH] RFC: TCG constant propagation.
Date: Thu, 23 Jul 2009 11:08:47 +0200	[thread overview]
Message-ID: <5b31733c0907230208o7374ad68uf6e4438a5c5a41d5@mail.gmail.com> (raw)
In-Reply-To: <E1MTGSg-0007wd-QX@lists.gnu.org>

[-- Attachment #1: Type: text/plain, Size: 988 bytes --]

On Tue, Jul 21, 2009 at 4:38 PM, Filip Navara<filip.navara@gmail.com> wrote:
> Add support for constant propagation to TCG. This has to be paired with the liveness
> analysis to remove the dead code. Not all possible operations are covered, but the
> most common ones are. This improves the code generation for several ARM instructions,
> like MVN (immediate), and it may help other targets as well.
> ---
>  tcg/tcg.c |  164 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
>  1 files changed, 164 insertions(+), 0 deletions(-)

Attached is updated version of the patch which adds profiling code for
the TCG pass and hopefully fixes it for TCG 64-bit targets.

Also it looks that the actual code size is worse for TCG ARM target
with this optimization. To mitigate it I propose the other attached
patch which uses the barrel shifter for more optimal generation of the
"movi" operation. I can't test it, so any help is welcome.

Best regards,
Filip Navara

[-- Attachment #2: 0001-TCG-constant-propagation.patch --]
[-- Type: application/octet-stream, Size: 8340 bytes --]

From 403c7d19df34fb25a8b57d90000697a83488cd48 Mon Sep 17 00:00:00 2001
From: Filip Navara <filip.navara@gmail.com>
Date: Tue, 21 Jul 2009 16:31:40 +0200
Subject: [PATCH 1/2] TCG constant propagation.

Add support for constant propagation to TCG. This has to be paired with the liveness
analysis to remove the dead code. Not all possible operations are covered, but the
most common ones are. This improves the code generation for several ARM instructions,
like MVN (immediate), and it may help other targets as well.

v1 -> v2:
Added profiling code and hopefully fixed for 64-bit TCG targets.
---
 tcg/tcg.c |  202 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 tcg/tcg.h |    1 +
 2 files changed, 203 insertions(+), 0 deletions(-)

diff --git a/tcg/tcg.c b/tcg/tcg.c
index 4cb5934..e23e303 100644
--- a/tcg/tcg.c
+++ b/tcg/tcg.c
@@ -1025,7 +1025,199 @@ void tcg_add_target_add_op_defs(const TCGTargetOpDef *tdefs)
 #endif
         tdefs++;
     }
+}
+
+static void tcg_const_analysis(TCGContext *s)
+{
+    int nb_cargs, nb_iargs, nb_oargs, dest, src, src2, del_args, i;
+    TCGArg *args;
+    uint16_t op;
+    uint16_t *opc_ptr;
+    const TCGOpDef *def;
+    uint8_t *const_temps;
+    tcg_target_ulong *temp_values;
+    tcg_target_ulong val, mask;
+    tcg_target_ulong dest_val, src_val, src2_val;
+
+    const_temps = tcg_malloc(s->nb_temps);
+    memset(const_temps, 0, s->nb_temps);
+    temp_values = tcg_malloc(s->nb_temps * sizeof(uint32_t));
+
+    opc_ptr = gen_opc_buf;
+    args = gen_opparam_buf;
+    while (opc_ptr < gen_opc_ptr) {
+        op = *opc_ptr;
+        def = &tcg_op_defs[op];
+        nb_oargs = def->nb_oargs;
+        nb_iargs = def->nb_iargs;
+        nb_cargs = def->nb_cargs;
+        del_args = 0;
+        mask = ~((tcg_target_ulong)0);
+
+        switch(op) {
+        case INDEX_op_movi_i32:
+#if TCG_TARGET_REG_BITS == 64
+        case INDEX_op_movi_i64:
+#endif
+            dest = args[0];
+            val = args[1];
+            const_temps[dest] = 1;
+            temp_values[dest] = val;
+            break;
+        case INDEX_op_mov_i32:
+#if TCG_TARGET_REG_BITS == 64
+        case INDEX_op_mov_i64:
+#endif
+            dest = args[0];
+            src = args[1];
+            const_temps[dest] = const_temps[src];
+            temp_values[dest] = temp_values[src];
+            break;
+#if defined(TCG_TARGET_HAS_not_i32) || defined(TCG_TARGET_HAS_not_i64)
+#ifdef TCG_TARGET_HAS_not_i32
+        case INDEX_op_not_i32:
+#endif
+#if TCG_TARGET_REG_BITS == 64
+            mask = 0xffffffff;
+#ifdef TCG_TARGET_HAS_not_i64
+        case INDEX_op_not_i64:
+#endif
+#endif
+            dest = args[0];
+            src = args[1];
+            if (const_temps[src]) {
+                const_temps[dest] = 1;
+                dest_val = ~temp_values[src];
+                *opc_ptr = INDEX_op_movi_i32;
+                args[1] = temp_values[dest] = dest_val & mask;
+            } else {
+                const_temps[dest] = 0;
+            }
+            break;
+#endif
+        case INDEX_op_add_i32:
+        case INDEX_op_sub_i32:
+        case INDEX_op_mul_i32:
+        case INDEX_op_and_i32:
+        case INDEX_op_or_i32:
+        case INDEX_op_xor_i32:
+        case INDEX_op_shl_i32:
+        case INDEX_op_shr_i32:
+#if TCG_TARGET_REG_BITS == 64
+            mask = 0xffffffff;
+        case INDEX_op_add_i64:
+        case INDEX_op_sub_i64:
+        case INDEX_op_mul_i64:
+        case INDEX_op_and_i64:
+        case INDEX_op_or_i64:
+        case INDEX_op_xor_i64:
+        case INDEX_op_shl_i64:
+        case INDEX_op_shr_i64:
+#endif
+
+            dest = args[0];
+            src = args[1];
+            src2 = args[2];
+            if (const_temps[src] && const_temps[src2]) {
+                src_val = temp_values[src];
+                src2_val = temp_values[src2];
+                const_temps[dest] = 1;
+                switch (op) {
+                case INDEX_op_add_i32:
+#if TCG_TARGET_REG_BITS == 64
+                case INDEX_op_add_i64:
+#endif
+                    dest_val = src_val + src2_val;
+                    break;
+                case INDEX_op_sub_i32:
+#if TCG_TARGET_REG_BITS == 64
+                case INDEX_op_sub_i64:
+#endif
+                    dest_val = src_val - src2_val;
+                    break;
+                case INDEX_op_mul_i32:
+#if TCG_TARGET_REG_BITS == 64
+                case INDEX_op_mul_i64:
+#endif
+                    dest_val = src_val * src2_val;
+                    break;
+                case INDEX_op_and_i32:
+#if TCG_TARGET_REG_BITS == 64
+                case INDEX_op_and_i64:
+#endif
+                    dest_val = src_val & src2_val;
+                    break;
+                case INDEX_op_or_i32:
+#if TCG_TARGET_REG_BITS == 64
+                case INDEX_op_or_i64:
+#endif
+                    dest_val = src_val | src2_val;
+                    break;
+                case INDEX_op_xor_i32:
+#if TCG_TARGET_REG_BITS == 64
+                case INDEX_op_xor_i64:
+#endif
+                    dest_val = src_val ^ src2_val;
+                    break;
+                case INDEX_op_shl_i32:
+#if TCG_TARGET_REG_BITS == 64
+                case INDEX_op_shl_i64:
+#endif
+                    dest_val = src_val << src2_val;
+                    break;
+                case INDEX_op_shr_i32:
+#if TCG_TARGET_REG_BITS == 64
+                case INDEX_op_shr_i64:
+#endif
+                    dest_val = src_val >> src2_val;
+                    break;
+                default:
+                    tcg_abort();
+                    return;
+                }
+                *opc_ptr = INDEX_op_movi_i32;                
+                args[1] = temp_values[dest] = dest_val & mask;
+                del_args = 1;
+            } else {
+                const_temps[dest] = 0;
+            }
+            break;
+        case INDEX_op_call:
+            nb_oargs = args[0] >> 16;
+            nb_iargs = args[0] & 0xffff;
+            nb_cargs = def->nb_cargs;
+            args++;
+            for (i = 0; i < nb_oargs; i++) {
+                const_temps[args[i]] = 0;
+            }
+            break;
+        case INDEX_op_nopn:
+            /* variable number of arguments */
+            nb_cargs = args[0];
+            break;
+        case INDEX_op_set_label:
+            memset(const_temps, 0, s->nb_temps);
+            break;
+        default:
+            if (def->flags & TCG_OPF_BB_END) {
+                memset(const_temps, 0, s->nb_temps);
+            } else {
+                for (i = 0; i < nb_oargs; i++) {
+                    const_temps[args[i]] = 0;
+                }
+            }
+            break;
+        }
+        opc_ptr++;
+        args += nb_iargs + nb_oargs + nb_cargs - del_args;
+        if (del_args > 0) {
+            gen_opparam_ptr -= del_args;
+            memmove(args, args + del_args, (gen_opparam_ptr - args) * sizeof(*args));
+        }
+    }
 
+    if (args != gen_opparam_ptr)
+        tcg_abort();
 }
 
 #ifdef USE_LIVENESS_ANALYSIS
@@ -1896,6 +2088,14 @@ static inline int tcg_gen_code_common(TCGContext *s, uint8_t *gen_code_buf,
 #endif
 
 #ifdef CONFIG_PROFILER
+    s->const_time -= profile_getclock();
+#endif
+    tcg_const_analysis(s);
+#ifdef CONFIG_PROFILER
+    s->const_time += profile_getclock();
+#endif
+
+#ifdef CONFIG_PROFILER
     s->la_time -= profile_getclock();
 #endif
     tcg_liveness_analysis(s);
@@ -2068,6 +2268,8 @@ void tcg_dump_info(FILE *f,
                 (double)s->interm_time / tot * 100.0);
     cpu_fprintf(f, "  gen_code time     %0.1f%%\n", 
                 (double)s->code_time / tot * 100.0);
+    cpu_fprintf(f, "const/code time     %0.1f%%\n", 
+                (double)s->const_time / (s->code_time ? s->code_time : 1) * 100.0);
     cpu_fprintf(f, "liveness/code time  %0.1f%%\n", 
                 (double)s->la_time / (s->code_time ? s->code_time : 1) * 100.0);
     cpu_fprintf(f, "cpu_restore count   %" PRId64 "\n",
diff --git a/tcg/tcg.h b/tcg/tcg.h
index ad0bd14..f8322f2 100644
--- a/tcg/tcg.h
+++ b/tcg/tcg.h
@@ -288,6 +288,7 @@ struct TCGContext {
     int64_t code_out_len;
     int64_t interm_time;
     int64_t code_time;
+    int64_t const_time;
     int64_t la_time;
     int64_t restore_count;
     int64_t restore_time;
-- 
1.6.3.2.1299.gee46c


[-- Attachment #3: 0002-Use-ARM-barrel-shifter-for-more-optimal-generation-o.patch --]
[-- Type: application/octet-stream, Size: 1559 bytes --]

From 203b7284477109415989d44b71dbd149d05caadf Mon Sep 17 00:00:00 2001
From: Filip Navara <filip.navara@gmail.com>
Date: Thu, 23 Jul 2009 11:01:32 +0200
Subject: [PATCH 2/2] Use ARM barrel shifter for more optimal generation of MVN, MOV instructions for the MOVI TCG operation.

---
 tcg/arm/tcg-target.c |   12 ++++++++++++
 1 files changed, 12 insertions(+), 0 deletions(-)

diff --git a/tcg/arm/tcg-target.c b/tcg/arm/tcg-target.c
index 7ef2b89..0c59984 100644
--- a/tcg/arm/tcg-target.c
+++ b/tcg/arm/tcg-target.c
@@ -334,6 +334,7 @@ static inline void tcg_out_movi32(TCGContext *s,
                 int cond, int rd, int32_t arg)
 {
     int offset = (uint32_t) arg - ((uint32_t) s->code_ptr + 8);
+    int shift;
 
     /* TODO: This is very suboptimal, we can easily have a constant
      * pool somewhere after all the instructions.  */
@@ -346,6 +347,17 @@ static inline void tcg_out_movi32(TCGContext *s,
                 tcg_out_dat_imm(s, cond, ARITH_ADD, rd, 15, offset) :
                 tcg_out_dat_imm(s, cond, ARITH_SUB, rd, 15, -offset);
 
+    if (arg < 0x100)
+        tcg_out_dat_imm(s, cond, ARITH_MOV, rd, 0, arg & 0xff);
+
+    shift = ctz32(arg) & ~1;
+    if (!((arg >> shift) & ~0xff))
+        return tcg_out_dat_imm(s, cond, ARITH_MOV, rd, 0, (arg >> shift) + (shift << 7));
+
+    shift = ctz32(~arg) & ~1;
+    if (!((~arg >> shift) & ~0xff))
+        return tcg_out_dat_imm(s, cond, ARITH_MVN, rd, 0, (~arg >> shift) + (shift << 7));
+
 #ifdef __ARM_ARCH_7A__
     /* use movw/movt */
     /* movw */
-- 
1.6.3.2.1299.gee46c


  reply	other threads:[~2009-07-23  9:08 UTC|newest]

Thread overview: 10+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2009-07-21 14:37 [Qemu-devel] [PATCH] RFC: TCG constant propagation Filip Navara
2009-07-23  9:08 ` Filip Navara [this message]
2009-07-23  9:25   ` Laurent Desnogues
2009-07-23  9:30     ` Paul Brook
2009-07-23  9:49     ` Filip Navara
2009-07-23 20:10   ` Stuart Brady
2009-07-23 20:28     ` Filip Navara
2009-07-23 22:02   ` Daniel Jacobowitz
2009-08-05  8:13 ` Pablo Virolainen
2009-08-05  8:48   ` Filip Navara

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=5b31733c0907230208o7374ad68uf6e4438a5c5a41d5@mail.gmail.com \
    --to=filip.navara@gmail.com \
    --cc=paul@codesourcery.com \
    --cc=qemu-devel@nongnu.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).