[Qemu-devel] [PATCH v5 0/7] tcg: allocate TB structs preceding translate

qemu-devel.nongnu.org archive mirror
 help / color / mirror / Atom feed

* [Qemu-devel] [PATCH v5 0/7] tcg: allocate TB structs preceding translate
@ 2017-06-09  5:37 Richard Henderson
  2017-06-09  5:37 ` [Qemu-devel] [PATCH v5 1/7] util: add cacheinfo Richard Henderson
                   ` (8 more replies)
  0 siblings, 9 replies; 13+ messages in thread
From: Richard Henderson @ 2017-06-09  5:37 UTC (permalink / raw)
  To: qemu-devel; +Cc: cota

This is a follow-up to Emilio's patch set.

My primary changes to Emilio's patches are to the first patch, in
merging the existing implementations from tcg/ppc/tcg-target.inc.c
into util/cacheinfo.c.

Then I've a few follow-up patches to take advantage of the new TB
placement for arm platforms.  I've had a look at the asm output for
ppc64 and s390x, and don't see anything obvious that can be improved.

Changes since v4:
  * The first patch reorganized a bit for aarch64 and ppc64.
    Re-tested on win32, for which there was a Werror.
    Incorporated feedback from Emilio re MacOS.
  * Fixed the short description for the tcg/arm patches.


r~


Emilio G. Cota (2):
  util: add cacheinfo
  tcg: allocate TB structs before the corresponding translated code

Richard Henderson (5):
  tcg/aarch64: Use ADR in tcg_out_movi
  tcg/arm: Use indirect branch for goto_tb
  tcg/arm: Remove limit on code buffer size
  tcg/arm: Try pc-relative addresses for movi
  tcg/arm: Use ldr (literal) for goto_tb

 include/exec/exec-all.h      |   5 +-
 include/exec/tb-context.h    |   3 +-
 include/qemu/osdep.h         |   3 +
 tcg/aarch64/tcg-target.inc.c |   7 +-
 tcg/arm/tcg-target.inc.c     |  82 +++++++++++--------
 tcg/ppc/tcg-target.inc.c     |  71 +----------------
 tcg/tcg.c                    |  20 +++++
 tcg/tcg.h                    |   2 +-
 translate-all.c              |  41 ++++++----
 util/Makefile.objs           |   1 +
 util/cacheinfo.c             | 185 +++++++++++++++++++++++++++++++++++++++++++
 11 files changed, 293 insertions(+), 127 deletions(-)
 create mode 100644 util/cacheinfo.c

-- 
2.9.4

^ permalink raw reply	[flat|nested] 13+ messages in thread

* [Qemu-devel] [PATCH v5 1/7] util: add cacheinfo
  2017-06-09  5:37 [Qemu-devel] [PATCH v5 0/7] tcg: allocate TB structs preceding translate Richard Henderson
@ 2017-06-09  5:37 ` Richard Henderson
  2017-06-09  5:37 ` [Qemu-devel] [PATCH v5 2/7] tcg: allocate TB structs before the corresponding translated code Richard Henderson
                   ` (7 subsequent siblings)
  8 siblings, 0 replies; 13+ messages in thread
From: Richard Henderson @ 2017-06-09  5:37 UTC (permalink / raw)
  To: qemu-devel; +Cc: cota

From: "Emilio G. Cota" <cota@braap.org>

Add helpers to gather cache info from the host at init-time.

For now, only export the host's I/D cache line sizes, which we
will use to improve cache locality to avoid false sharing.

Suggested-by: Richard Henderson <rth@twiddle.net>
Suggested-by: Geert Martin Ijewski <gm.ijewski@web.de>
Tested-by:    Geert Martin Ijewski <gm.ijewski@web.de>
Signed-off-by: Emilio G. Cota <cota@braap.org>
Message-Id: <1496794624-4083-1-git-send-email-cota@braap.org>
[rth: Move all implementations from tcg/ppc/]
Signed-off-by: Richard Henderson <rth@twiddle.net>
---
 include/qemu/osdep.h     |   3 +
 tcg/ppc/tcg-target.inc.c |  71 +-----------------
 util/Makefile.objs       |   1 +
 util/cacheinfo.c         | 185 +++++++++++++++++++++++++++++++++++++++++++++++
 4 files changed, 191 insertions(+), 69 deletions(-)
 create mode 100644 util/cacheinfo.c

diff --git a/include/qemu/osdep.h b/include/qemu/osdep.h
index 1c9f5e2..ee43521 100644
--- a/include/qemu/osdep.h
+++ b/include/qemu/osdep.h
@@ -470,4 +470,7 @@ char *qemu_get_pid_name(pid_t pid);
  */
 pid_t qemu_fork(Error **errp);
 
+extern int qemu_icache_linesize;
+extern int qemu_dcache_linesize;
+
 #endif
diff --git a/tcg/ppc/tcg-target.inc.c b/tcg/ppc/tcg-target.inc.c
index 8d50f18..1f690df 100644
--- a/tcg/ppc/tcg-target.inc.c
+++ b/tcg/ppc/tcg-target.inc.c
@@ -2820,14 +2820,11 @@ void tcg_register_jit(void *buf, size_t buf_size)
 }
 #endif /* __ELF__ */
 
-static size_t dcache_bsize = 16;
-static size_t icache_bsize = 16;
-
 void flush_icache_range(uintptr_t start, uintptr_t stop)
 {
     uintptr_t p, start1, stop1;
-    size_t dsize = dcache_bsize;
-    size_t isize = icache_bsize;
+    size_t dsize = qemu_dcache_linesize;
+    size_t isize = qemu_icache_linesize;
 
     start1 = start & ~(dsize - 1);
     stop1 = (stop + dsize - 1) & ~(dsize - 1);
@@ -2844,67 +2841,3 @@ void flush_icache_range(uintptr_t start, uintptr_t stop)
     asm volatile ("sync" : : : "memory");
     asm volatile ("isync" : : : "memory");
 }
-
-#if defined _AIX
-#include <sys/systemcfg.h>
-
-static void __attribute__((constructor)) tcg_cache_init(void)
-{
-    icache_bsize = _system_configuration.icache_line;
-    dcache_bsize = _system_configuration.dcache_line;
-}
-
-#elif defined __linux__
-static void __attribute__((constructor)) tcg_cache_init(void)
-{
-    unsigned long dsize = qemu_getauxval(AT_DCACHEBSIZE);
-    unsigned long isize = qemu_getauxval(AT_ICACHEBSIZE);
-
-    if (dsize == 0 || isize == 0) {
-        if (dsize == 0) {
-            fprintf(stderr, "getauxval AT_DCACHEBSIZE failed\n");
-        }
-        if (isize == 0) {
-            fprintf(stderr, "getauxval AT_ICACHEBSIZE failed\n");
-        }
-        exit(1);
-    }
-    dcache_bsize = dsize;
-    icache_bsize = isize;
-}
-
-#elif defined __APPLE__
-#include <sys/sysctl.h>
-
-static void __attribute__((constructor)) tcg_cache_init(void)
-{
-    size_t len;
-    unsigned cacheline;
-    int name[2] = { CTL_HW, HW_CACHELINE };
-
-    len = sizeof(cacheline);
-    if (sysctl(name, 2, &cacheline, &len, NULL, 0)) {
-        perror("sysctl CTL_HW HW_CACHELINE failed");
-        exit(1);
-    }
-    dcache_bsize = cacheline;
-    icache_bsize = cacheline;
-}
-
-#elif defined(__FreeBSD__) || defined(__FreeBSD_kernel__)
-#include <sys/sysctl.h>
-
-static void __attribute__((constructor)) tcg_cache_init(void)
-{
-    size_t len = 4;
-    unsigned cacheline;
-
-    if (sysctlbyname ("machdep.cacheline_size", &cacheline, &len, NULL, 0)) {
-        fprintf(stderr, "sysctlbyname machdep.cacheline_size failed: %s\n",
-                strerror(errno));
-        exit(1);
-    }
-    dcache_bsize = cacheline;
-    icache_bsize = cacheline;
-}
-#endif
diff --git a/util/Makefile.objs b/util/Makefile.objs
index c6205eb..94d9477 100644
--- a/util/Makefile.objs
+++ b/util/Makefile.objs
@@ -20,6 +20,7 @@ util-obj-y += host-utils.o
 util-obj-y += bitmap.o bitops.o hbitmap.o
 util-obj-y += fifo8.o
 util-obj-y += acl.o
+util-obj-y += cacheinfo.o
 util-obj-y += error.o qemu-error.o
 util-obj-y += id.o
 util-obj-y += iov.o qemu-config.o qemu-sockets.o uri.o notify.o
diff --git a/util/cacheinfo.c b/util/cacheinfo.c
new file mode 100644
index 0000000..f987522
--- /dev/null
+++ b/util/cacheinfo.c
@@ -0,0 +1,185 @@
+/*
+ * cacheinfo.c - helpers to query the host about its caches
+ *
+ * Copyright (C) 2017, Emilio G. Cota <cota@braap.org>
+ * License: GNU GPL, version 2 or later.
+ *   See the COPYING file in the top-level directory.
+ */
+
+#include "qemu/osdep.h"
+
+int qemu_icache_linesize = 0;
+int qemu_dcache_linesize = 0;
+
+/*
+ * Operating system specific detection mechanisms.
+ */
+
+#if defined(_AIX)
+# include <sys/systemcfg.h>
+
+static void sys_cache_info(int *isize, int *dsize)
+{
+    *isize = _system_configuration.icache_line;
+    *dsize = _system_configuration.dcache_line;
+}
+
+#elif defined(_WIN32)
+
+static void sys_cache_info(int *isize, int *dsize)
+{
+    SYSTEM_LOGICAL_PROCESSOR_INFORMATION *buf;
+    DWORD size = 0;
+    BOOL success;
+    size_t i, n;
+
+    /* Check for the required buffer size first.  Note that if the zero
+       size we use for the probe results in success, then there is no
+       data available; fail in that case.  */
+    success = GetLogicalProcessorInformation(0, &size);
+    if (success || GetLastError() != ERROR_INSUFFICIENT_BUFFER) {
+        return;
+    }
+
+    n = size / sizeof(SYSTEM_LOGICAL_PROCESSOR_INFORMATION);
+    size = n * sizeof(SYSTEM_LOGICAL_PROCESSOR_INFORMATION);
+    buf = g_new0(SYSTEM_LOGICAL_PROCESSOR_INFORMATION, n);
+    if (!GetLogicalProcessorInformation(buf, &size)) {
+        goto fail;
+    }
+
+    for (i = 0; i < n; i++) {
+        if (buf[i].Relationship == RelationCache
+            && buf[i].Cache.Level == 1) {
+            switch (buf[i].Cache.Type) {
+            case CacheUnified:
+                *isize = *dsize = buf[i].Cache.LineSize;
+                break;
+            case CacheInstruction:
+                *isize = buf[i].Cache.LineSize;
+                break;
+            case CacheData:
+                *dsize = buf[i].Cache.LineSize;
+                break;
+            default:
+                break;
+            }
+        }
+    }
+ fail:
+    g_free(buf);
+}
+
+#elif defined(__APPLE__) \
+      || defined(__FreeBSD__) || defined(__FreeBSD_kernel__)
+# include <sys/sysctl.h>
+# if defined(__APPLE__)
+#  define SYSCTL_CACHELINE_NAME "hw.cachelinesize"
+# else
+#  define SYSCTL_CACHELINE_NAME "machdep.cacheline_size"
+# endif
+
+static void sys_cache_info(int *isize, int *dsize)
+{
+    /* There's only a single sysctl for both I/D cache line sizes.  */
+    long size;
+    size_t len = sizeof(size);
+    if (!sysctlbyname(SYSCTL_CACHELINE_NAME, &size, &len, NULL, 0)) {
+        *isize = *dsize = size;
+    }
+}
+
+#else
+/* POSIX */
+
+static void sys_cache_info(int *isize, int *dsize)
+{
+# ifdef _SC_LEVEL1_ICACHE_LINESIZE
+    *isize = sysconf(_SC_LEVEL1_ICACHE_LINESIZE);
+# endif
+# ifdef _SC_LEVEL1_DCACHE_LINESIZE
+    *dsize = sysconf(_SC_LEVEL1_DCACHE_LINESIZE);
+# endif
+}
+#endif /* sys_cache_info */
+
+/*
+ * Architecture (+ OS) specific detection mechanisms.
+ */
+
+#if defined(__aarch64__)
+
+static void arch_cache_info(int *isize, int *dsize)
+{
+    if (*isize == 0 || *dsize == 0) {
+        unsigned ctr;
+
+        /* The real cache geometry is in CCSIDR_EL1/CLIDR_EL1/CSSELR_EL1,
+           but (at least under Linux) these are marked protected by the
+           kernel.  However, CTR_EL0 contains the minimum linesize in the
+           entire hierarchy, and is used by userspace cache flushing.  */
+        asm volatile("mrs\t%0, ctr_el0" : "=r"(ctr));
+        if (*isize == 0) {
+            *isize = 4 << (ctr & 0xf);
+        }
+        if (*dsize == 0) {
+            *dsize = 4 << ((ctr >> 16) & 0xf);
+        }
+    }
+}
+
+#elif defined(_ARCH_PPC) && defined(__linux__)
+
+static void arch_cache_info(int *isize, int *dsize)
+{
+    if (*isize == 0) {
+        *isize = qemu_getauxval(AT_ICACHEBSIZE);
+    }
+    if (*dsize == 0) {
+        *dsize = qemu_getauxval(AT_DCACHEBSIZE);
+    }
+}
+
+#else
+static void arch_cache_info(int *isize, int *dsize) { }
+#endif /* arch_cache_info */
+
+/*
+ * ... and if all else fails ...
+ */
+
+static void fallback_cache_info(int *isize, int *dsize)
+{
+    /* If we can only find one of the two, assume they're the same.  */
+    if (*isize) {
+        if (*dsize) {
+            /* Success! */
+        } else {
+            *dsize = *isize;
+        }
+    } else if (*dsize) {
+        *isize = *dsize;
+    } else {
+#if defined(_ARCH_PPC)
+        /* For PPC, we're going to use the icache size computed for
+           flush_icache_range.  Which means that we must use the
+           architecture minimum.  */
+        *isize = *dsize = 16;
+#else
+        /* Otherwise, 64 bytes is not uncommon.  */
+        *isize = *dsize = 64;
+#endif
+    }
+}
+
+static void __attribute__((constructor)) init_cache_info(void)
+{
+    int isize = 0, dsize = 0;
+
+    sys_cache_info(&isize, &dsize);
+    arch_cache_info(&isize, &dsize);
+    fallback_cache_info(&isize, &dsize);
+
+    qemu_icache_linesize = isize;
+    qemu_dcache_linesize = dsize;
+}
-- 
2.9.4

^ permalink raw reply related	[flat|nested] 13+ messages in thread

* [Qemu-devel] [PATCH v5 2/7] tcg: allocate TB structs before the corresponding translated code
  2017-06-09  5:37 [Qemu-devel] [PATCH v5 0/7] tcg: allocate TB structs preceding translate Richard Henderson
  2017-06-09  5:37 ` [Qemu-devel] [PATCH v5 1/7] util: add cacheinfo Richard Henderson
@ 2017-06-09  5:37 ` Richard Henderson
  2017-06-09  5:37 ` [Qemu-devel] [PATCH v5 3/7] tcg/aarch64: Use ADR in tcg_out_movi Richard Henderson
                   ` (6 subsequent siblings)
  8 siblings, 0 replies; 13+ messages in thread
From: Richard Henderson @ 2017-06-09  5:37 UTC (permalink / raw)
  To: qemu-devel; +Cc: cota

From: "Emilio G. Cota" <cota@braap.org>

Allocating an arbitrarily-sized array of tbs results in either
(a) a lot of memory wasted or (b) unnecessary flushes of the code
cache when we run out of TB structs in the array.

An obvious solution would be to just malloc a TB struct when needed,
and keep the TB array as an array of pointers (recall that tb_find_pc()
needs the TB array to run in O(log n)).

Perhaps a better solution, which is implemented in this patch, is to
allocate TB's right before the translated code they describe. This
results in some memory waste due to padding to have code and TBs in
separate cache lines--for instance, I measured 4.7% of padding in the
used portion of code_gen_buffer when booting aarch64 Linux on a
host with 64-byte cache lines. However, it can allow for optimizations
in some host architectures, since TCG backends could safely assume that
the TB and the corresponding translated code are very close to each
other in memory. See this message by rth for a detailed explanation:

  https://lists.gnu.org/archive/html/qemu-devel/2017-03/msg05172.html
  Subject: Re: GSoC 2017 Proposal: TCG performance enhancements
  Message-ID: <1e67644b-4b30-887e-d329-1848e94c9484@twiddle.net>

Suggested-by: Richard Henderson <rth@twiddle.net>
Reviewed-by: Pranith Kumar <bobby.prani@gmail.com>
Signed-off-by: Emilio G. Cota <cota@braap.org>
Message-Id: <1496790745-314-3-git-send-email-cota@braap.org>
[rth: Simplify the arithmetic in tcg_tb_alloc]
Signed-off-by: Richard Henderson <rth@twiddle.net>
---
 include/exec/tb-context.h |  3 ++-
 tcg/tcg.c                 | 20 ++++++++++++++++++++
 tcg/tcg.h                 |  2 +-
 translate-all.c           | 39 ++++++++++++++++++++++++---------------
 4 files changed, 47 insertions(+), 17 deletions(-)

diff --git a/include/exec/tb-context.h b/include/exec/tb-context.h
index c7f17f2..25c2afe 100644
--- a/include/exec/tb-context.h
+++ b/include/exec/tb-context.h
@@ -31,8 +31,9 @@ typedef struct TBContext TBContext;
 
 struct TBContext {
 
-    TranslationBlock *tbs;
+    TranslationBlock **tbs;
     struct qht htable;
+    size_t tbs_size;
     int nb_tbs;
     /* any access to the tbs or the page table must use this lock */
     QemuMutex tb_lock;
diff --git a/tcg/tcg.c b/tcg/tcg.c
index 564292f..3559829 100644
--- a/tcg/tcg.c
+++ b/tcg/tcg.c
@@ -383,6 +383,26 @@ void tcg_context_init(TCGContext *s)
     }
 }
 
+/*
+ * Allocate TBs right before their corresponding translated code, making
+ * sure that TBs and code are on different cache lines.
+ */
+TranslationBlock *tcg_tb_alloc(TCGContext *s)
+{
+    uintptr_t align = qemu_icache_linesize;
+    TranslationBlock *tb;
+    void *next;
+
+    tb = (void *)ROUND_UP((uintptr_t)s->code_gen_ptr, align);
+    next = (void *)ROUND_UP((uintptr_t)(tb + 1), align);
+
+    if (unlikely(next > s->code_gen_highwater)) {
+        return NULL;
+    }
+    s->code_gen_ptr = next;
+    return tb;
+}
+
 void tcg_prologue_init(TCGContext *s)
 {
     size_t prologue_size, total_size;
diff --git a/tcg/tcg.h b/tcg/tcg.h
index 5ec48d1..9e37722 100644
--- a/tcg/tcg.h
+++ b/tcg/tcg.h
@@ -697,7 +697,6 @@ struct TCGContext {
        here, because there's too much arithmetic throughout that relies
        on addition and subtraction working on bytes.  Rely on the GCC
        extension that allows arithmetic on void*.  */
-    int code_gen_max_blocks;
     void *code_gen_prologue;
     void *code_gen_epilogue;
     void *code_gen_buffer;
@@ -756,6 +755,7 @@ static inline bool tcg_op_buf_full(void)
 /* tb_lock must be held for tcg_malloc_internal. */
 void *tcg_malloc_internal(TCGContext *s, int size);
 void tcg_pool_reset(TCGContext *s);
+TranslationBlock *tcg_tb_alloc(TCGContext *s);
 
 void tb_lock(void);
 void tb_unlock(void);
diff --git a/translate-all.c b/translate-all.c
index b3ee876..bb094ad 100644
--- a/translate-all.c
+++ b/translate-all.c
@@ -781,12 +781,13 @@ static inline void code_gen_alloc(size_t tb_size)
         exit(1);
     }
 
-    /* Estimate a good size for the number of TBs we can support.  We
-       still haven't deducted the prologue from the buffer size here,
-       but that's minimal and won't affect the estimate much.  */
-    tcg_ctx.code_gen_max_blocks
-        = tcg_ctx.code_gen_buffer_size / CODE_GEN_AVG_BLOCK_SIZE;
-    tcg_ctx.tb_ctx.tbs = g_new(TranslationBlock, tcg_ctx.code_gen_max_blocks);
+    /* size this conservatively -- realloc later if needed */
+    tcg_ctx.tb_ctx.tbs_size =
+        tcg_ctx.code_gen_buffer_size / CODE_GEN_AVG_BLOCK_SIZE / 8;
+    if (unlikely(!tcg_ctx.tb_ctx.tbs_size)) {
+        tcg_ctx.tb_ctx.tbs_size = 64 * 1024;
+    }
+    tcg_ctx.tb_ctx.tbs = g_new(TranslationBlock *, tcg_ctx.tb_ctx.tbs_size);
 
     qemu_mutex_init(&tcg_ctx.tb_ctx.tb_lock);
 }
@@ -828,13 +829,20 @@ bool tcg_enabled(void)
 static TranslationBlock *tb_alloc(target_ulong pc)
 {
     TranslationBlock *tb;
+    TBContext *ctx;
 
     assert_tb_locked();
 
-    if (tcg_ctx.tb_ctx.nb_tbs >= tcg_ctx.code_gen_max_blocks) {
+    tb = tcg_tb_alloc(&tcg_ctx);
+    if (unlikely(tb == NULL)) {
         return NULL;
     }
-    tb = &tcg_ctx.tb_ctx.tbs[tcg_ctx.tb_ctx.nb_tbs++];
+    ctx = &tcg_ctx.tb_ctx;
+    if (unlikely(ctx->nb_tbs == ctx->tbs_size)) {
+        ctx->tbs_size *= 2;
+        ctx->tbs = g_renew(TranslationBlock *, ctx->tbs, ctx->tbs_size);
+    }
+    ctx->tbs[ctx->nb_tbs++] = tb;
     tb->pc = pc;
     tb->cflags = 0;
     tb->invalid = false;
@@ -850,8 +858,10 @@ void tb_free(TranslationBlock *tb)
        Ignore the hard cases and just back up if this TB happens to
        be the last one generated.  */
     if (tcg_ctx.tb_ctx.nb_tbs > 0 &&
-            tb == &tcg_ctx.tb_ctx.tbs[tcg_ctx.tb_ctx.nb_tbs - 1]) {
-        tcg_ctx.code_gen_ptr = tb->tc_ptr;
+            tb == tcg_ctx.tb_ctx.tbs[tcg_ctx.tb_ctx.nb_tbs - 1]) {
+        size_t struct_size = ROUND_UP(sizeof(*tb), qemu_icache_linesize);
+
+        tcg_ctx.code_gen_ptr = tb->tc_ptr - struct_size;
         tcg_ctx.tb_ctx.nb_tbs--;
     }
 }
@@ -1666,7 +1676,7 @@ static TranslationBlock *tb_find_pc(uintptr_t tc_ptr)
     m_max = tcg_ctx.tb_ctx.nb_tbs - 1;
     while (m_min <= m_max) {
         m = (m_min + m_max) >> 1;
-        tb = &tcg_ctx.tb_ctx.tbs[m];
+        tb = tcg_ctx.tb_ctx.tbs[m];
         v = (uintptr_t)tb->tc_ptr;
         if (v == tc_ptr) {
             return tb;
@@ -1676,7 +1686,7 @@ static TranslationBlock *tb_find_pc(uintptr_t tc_ptr)
             m_min = m + 1;
         }
     }
-    return &tcg_ctx.tb_ctx.tbs[m_max];
+    return tcg_ctx.tb_ctx.tbs[m_max];
 }
 
 #if !defined(CONFIG_USER_ONLY)
@@ -1874,7 +1884,7 @@ void dump_exec_info(FILE *f, fprintf_function cpu_fprintf)
     direct_jmp_count = 0;
     direct_jmp2_count = 0;
     for (i = 0; i < tcg_ctx.tb_ctx.nb_tbs; i++) {
-        tb = &tcg_ctx.tb_ctx.tbs[i];
+        tb = tcg_ctx.tb_ctx.tbs[i];
         target_code_size += tb->size;
         if (tb->size > max_target_code_size) {
             max_target_code_size = tb->size;
@@ -1894,8 +1904,7 @@ void dump_exec_info(FILE *f, fprintf_function cpu_fprintf)
     cpu_fprintf(f, "gen code size       %td/%zd\n",
                 tcg_ctx.code_gen_ptr - tcg_ctx.code_gen_buffer,
                 tcg_ctx.code_gen_highwater - tcg_ctx.code_gen_buffer);
-    cpu_fprintf(f, "TB count            %d/%d\n",
-            tcg_ctx.tb_ctx.nb_tbs, tcg_ctx.code_gen_max_blocks);
+    cpu_fprintf(f, "TB count            %d\n", tcg_ctx.tb_ctx.nb_tbs);
     cpu_fprintf(f, "TB avg target size  %d max=%d bytes\n",
             tcg_ctx.tb_ctx.nb_tbs ? target_code_size /
                     tcg_ctx.tb_ctx.nb_tbs : 0,
-- 
2.9.4

^ permalink raw reply related	[flat|nested] 13+ messages in thread

* [Qemu-devel] [PATCH v5 3/7] tcg/aarch64: Use ADR in tcg_out_movi
  2017-06-09  5:37 [Qemu-devel] [PATCH v5 0/7] tcg: allocate TB structs preceding translate Richard Henderson
  2017-06-09  5:37 ` [Qemu-devel] [PATCH v5 1/7] util: add cacheinfo Richard Henderson
  2017-06-09  5:37 ` [Qemu-devel] [PATCH v5 2/7] tcg: allocate TB structs before the corresponding translated code Richard Henderson
@ 2017-06-09  5:37 ` Richard Henderson
  2017-06-09  5:37 ` [Qemu-devel] [PATCH v5 4/7] tcg/arm: Use indirect branch for goto_tb Richard Henderson
                   ` (5 subsequent siblings)
  8 siblings, 0 replies; 13+ messages in thread
From: Richard Henderson @ 2017-06-09  5:37 UTC (permalink / raw)
  To: qemu-devel; +Cc: cota

The new placement of the TB means that we can use one insn
to load the return value for exit_tb returning the TB pointer.

Tested-by: Emilio G. Cota <cota@braap.org>
Signed-off-by: Richard Henderson <rth@twiddle.net>
---
 tcg/aarch64/tcg-target.inc.c | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/tcg/aarch64/tcg-target.inc.c b/tcg/aarch64/tcg-target.inc.c
index 5f18545..1fa3bcc 100644
--- a/tcg/aarch64/tcg-target.inc.c
+++ b/tcg/aarch64/tcg-target.inc.c
@@ -616,7 +616,12 @@ static void tcg_out_movi(TCGContext *s, TCGType type, TCGReg rd,
     /* Look for host pointer values within 4G of the PC.  This happens
        often when loading pointers to QEMU's own data structures.  */
     if (type == TCG_TYPE_I64) {
-        tcg_target_long disp = (value >> 12) - ((intptr_t)s->code_ptr >> 12);
+        tcg_target_long disp = value - (intptr_t)s->code_ptr;
+        if (disp == sextract64(disp, 0, 21)) {
+            tcg_out_insn(s, 3406, ADR, rd, disp);
+            return;
+        }
+        disp = (value >> 12) - ((intptr_t)s->code_ptr >> 12);
         if (disp == sextract64(disp, 0, 21)) {
             tcg_out_insn(s, 3406, ADRP, rd, disp);
             if (value & 0xfff) {
-- 
2.9.4

^ permalink raw reply related	[flat|nested] 13+ messages in thread

* [Qemu-devel] [PATCH v5 4/7] tcg/arm: Use indirect branch for goto_tb
  2017-06-09  5:37 [Qemu-devel] [PATCH v5 0/7] tcg: allocate TB structs preceding translate Richard Henderson
                   ` (2 preceding siblings ...)
  2017-06-09  5:37 ` [Qemu-devel] [PATCH v5 3/7] tcg/aarch64: Use ADR in tcg_out_movi Richard Henderson
@ 2017-06-09  5:37 ` Richard Henderson
  2017-06-09  5:37 ` [Qemu-devel] [PATCH v5 5/7] tcg/arm: Remove limit on code buffer size Richard Henderson
                   ` (4 subsequent siblings)
  8 siblings, 0 replies; 13+ messages in thread
From: Richard Henderson @ 2017-06-09  5:37 UTC (permalink / raw)
  To: qemu-devel; +Cc: cota

Signed-off-by: Richard Henderson <rth@twiddle.net>
---
 include/exec/exec-all.h  |  5 +----
 tcg/arm/tcg-target.inc.c | 17 ++---------------
 2 files changed, 3 insertions(+), 19 deletions(-)

diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h
index 87ae10b..724ec73 100644
--- a/include/exec/exec-all.h
+++ b/include/exec/exec-all.h
@@ -301,7 +301,7 @@ static inline void tlb_flush_by_mmuidx_all_cpus_synced(CPUState *cpu,
 #define CODE_GEN_AVG_BLOCK_SIZE 150
 #endif
 
-#if defined(__arm__) || defined(_ARCH_PPC) \
+#if defined(_ARCH_PPC) \
     || defined(__x86_64__) || defined(__i386__) \
     || defined(__sparc__) || defined(__aarch64__) \
     || defined(__s390x__) || defined(__mips__) \
@@ -401,9 +401,6 @@ static inline void tb_set_jmp_target1(uintptr_t jmp_addr, uintptr_t addr)
 #elif defined(__aarch64__)
 void aarch64_tb_set_jmp_target(uintptr_t jmp_addr, uintptr_t addr);
 #define tb_set_jmp_target1 aarch64_tb_set_jmp_target
-#elif defined(__arm__)
-void arm_tb_set_jmp_target(uintptr_t jmp_addr, uintptr_t addr);
-#define tb_set_jmp_target1 arm_tb_set_jmp_target
 #elif defined(__sparc__) || defined(__mips__)
 void tb_set_jmp_target1(uintptr_t jmp_addr, uintptr_t addr);
 #else
diff --git a/tcg/arm/tcg-target.inc.c b/tcg/arm/tcg-target.inc.c
index 9f5cb66..fce382f 100644
--- a/tcg/arm/tcg-target.inc.c
+++ b/tcg/arm/tcg-target.inc.c
@@ -1026,16 +1026,6 @@ static void tcg_out_call(TCGContext *s, tcg_insn_unit *addr)
     }
 }
 
-void arm_tb_set_jmp_target(uintptr_t jmp_addr, uintptr_t addr)
-{
-    tcg_insn_unit *code_ptr = (tcg_insn_unit *)jmp_addr;
-    tcg_insn_unit *target = (tcg_insn_unit *)addr;
-
-    /* we could use a ldr pc, [pc, #-4] kind of branch and avoid the flush */
-    reloc_pc24_atomic(code_ptr, target);
-    flush_icache_range(jmp_addr, jmp_addr + 4);
-}
-
 static inline void tcg_out_goto_label(TCGContext *s, int cond, TCGLabel *l)
 {
     if (l->has_value) {
@@ -1665,11 +1655,8 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
         }
         break;
     case INDEX_op_goto_tb:
-        if (s->tb_jmp_insn_offset) {
-            /* Direct jump method */
-            s->tb_jmp_insn_offset[args[0]] = tcg_current_code_size(s);
-            tcg_out_b_noaddr(s, COND_AL);
-        } else {
+        tcg_debug_assert(s->tb_jmp_insn_offset == 0);
+        {
             /* Indirect jump method */
             intptr_t ptr = (intptr_t)(s->tb_jmp_target_addr + args[0]);
             tcg_out_movi32(s, COND_AL, TCG_REG_R0, ptr & ~0xfff);
-- 
2.9.4

^ permalink raw reply related	[flat|nested] 13+ messages in thread

* [Qemu-devel] [PATCH v5 5/7] tcg/arm: Remove limit on code buffer size
  2017-06-09  5:37 [Qemu-devel] [PATCH v5 0/7] tcg: allocate TB structs preceding translate Richard Henderson
                   ` (3 preceding siblings ...)
  2017-06-09  5:37 ` [Qemu-devel] [PATCH v5 4/7] tcg/arm: Use indirect branch for goto_tb Richard Henderson
@ 2017-06-09  5:37 ` Richard Henderson
  2017-06-09  5:37 ` [Qemu-devel] [PATCH v5 6/7] tcg/arm: Try pc-relative addresses for movi Richard Henderson
                   ` (3 subsequent siblings)
  8 siblings, 0 replies; 13+ messages in thread
From: Richard Henderson @ 2017-06-09  5:37 UTC (permalink / raw)
  To: qemu-devel; +Cc: cota

Since we're no longer using a direct branch, we have no
limit on the branch distance.

Signed-off-by: Richard Henderson <rth@twiddle.net>
---
 translate-all.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/translate-all.c b/translate-all.c
index bb094ad..966747a 100644
--- a/translate-all.c
+++ b/translate-all.c
@@ -523,8 +523,6 @@ static inline PageDesc *page_find(tb_page_addr_t index)
 # define MAX_CODE_GEN_BUFFER_SIZE  (32u * 1024 * 1024)
 #elif defined(__aarch64__)
 # define MAX_CODE_GEN_BUFFER_SIZE  (128ul * 1024 * 1024)
-#elif defined(__arm__)
-# define MAX_CODE_GEN_BUFFER_SIZE  (16u * 1024 * 1024)
 #elif defined(__s390x__)
   /* We have a +- 4GB range on the branches; leave some slop.  */
 # define MAX_CODE_GEN_BUFFER_SIZE  (3ul * 1024 * 1024 * 1024)
-- 
2.9.4

^ permalink raw reply related	[flat|nested] 13+ messages in thread

* [Qemu-devel] [PATCH v5 6/7] tcg/arm: Try pc-relative addresses for movi
  2017-06-09  5:37 [Qemu-devel] [PATCH v5 0/7] tcg: allocate TB structs preceding translate Richard Henderson
                   ` (4 preceding siblings ...)
  2017-06-09  5:37 ` [Qemu-devel] [PATCH v5 5/7] tcg/arm: Remove limit on code buffer size Richard Henderson
@ 2017-06-09  5:37 ` Richard Henderson
  2017-06-09  5:37 ` [Qemu-devel] [PATCH v5 7/7] tcg/arm: Use ldr (literal) for goto_tb Richard Henderson
                   ` (2 subsequent siblings)
  8 siblings, 0 replies; 13+ messages in thread
From: Richard Henderson @ 2017-06-09  5:37 UTC (permalink / raw)
  To: qemu-devel; +Cc: cota

Signed-off-by: Richard Henderson <rth@twiddle.net>
---
 tcg/arm/tcg-target.inc.c | 44 +++++++++++++++++++++++++++++---------------
 1 file changed, 29 insertions(+), 15 deletions(-)

diff --git a/tcg/arm/tcg-target.inc.c b/tcg/arm/tcg-target.inc.c
index fce382f..18708b1 100644
--- a/tcg/arm/tcg-target.inc.c
+++ b/tcg/arm/tcg-target.inc.c
@@ -418,25 +418,39 @@ static inline void tcg_out_dat_imm(TCGContext *s,
 
 static void tcg_out_movi32(TCGContext *s, int cond, int rd, uint32_t arg)
 {
-    int rot, opc, rn;
-
-    /* For armv7, make sure not to use movw+movt when mov/mvn would do.
-       Speed things up by only checking when movt would be required.
-       Prior to armv7, have one go at fully rotated immediates before
-       doing the decomposition thing below.  */
-    if (!use_armv7_instructions || (arg & 0xffff0000)) {
-        rot = encode_imm(arg);
+    int rot, opc, rn, diff;
+
+    /* Check a single MOV/MVN before anything else.  */
+    rot = encode_imm(arg);
+    if (rot >= 0) {
+        tcg_out_dat_imm(s, cond, ARITH_MOV, rd, 0,
+                        rotl(arg, rot) | (rot << 7));
+        return;
+    }
+    rot = encode_imm(~arg);
+    if (rot >= 0) {
+        tcg_out_dat_imm(s, cond, ARITH_MVN, rd, 0,
+                        rotl(~arg, rot) | (rot << 7));
+        return;
+    }
+
+    /* Check for a pc-relative address.  This will usually be the TB,
+       or within the TB, which is immediately before the code block.  */
+    diff = arg - ((intptr_t)s->code_ptr + 8);
+    if (diff >= 0) {
+        rot = encode_imm(diff);
         if (rot >= 0) {
-            tcg_out_dat_imm(s, cond, ARITH_MOV, rd, 0,
-                            rotl(arg, rot) | (rot << 7));
+            tcg_out_dat_imm(s, cond, ARITH_ADD, rd, TCG_REG_PC,
+                            rotl(diff, rot) | (rot << 7));
             return;
-        }
-        rot = encode_imm(~arg);
+	}
+    } else {
+        rot = encode_imm(-diff);
         if (rot >= 0) {
-            tcg_out_dat_imm(s, cond, ARITH_MVN, rd, 0,
-                            rotl(~arg, rot) | (rot << 7));
+            tcg_out_dat_imm(s, cond, ARITH_SUB, rd, TCG_REG_PC,
+                            rotl(-diff, rot) | (rot << 7));
             return;
-        }
+	}
     }
 
     /* Use movw + movt.  */
-- 
2.9.4

^ permalink raw reply related	[flat|nested] 13+ messages in thread

* [Qemu-devel] [PATCH v5 7/7] tcg/arm: Use ldr (literal) for goto_tb
  2017-06-09  5:37 [Qemu-devel] [PATCH v5 0/7] tcg: allocate TB structs preceding translate Richard Henderson
                   ` (5 preceding siblings ...)
  2017-06-09  5:37 ` [Qemu-devel] [PATCH v5 6/7] tcg/arm: Try pc-relative addresses for movi Richard Henderson
@ 2017-06-09  5:37 ` Richard Henderson
  2017-06-09  7:00 ` [Qemu-devel] [PATCH v5 0/7] tcg: allocate TB structs preceding translate no-reply
  2017-06-09 19:52 ` Emilio G. Cota
  8 siblings, 0 replies; 13+ messages in thread
From: Richard Henderson @ 2017-06-09  5:37 UTC (permalink / raw)
  To: qemu-devel; +Cc: cota

The new placement of the TB means that we can use one insn
to load the goto_tb destination directly from the TB.

Signed-off-by: Richard Henderson <rth@twiddle.net>
---
 tcg/arm/tcg-target.inc.c | 23 ++++++++++++++++++-----
 1 file changed, 18 insertions(+), 5 deletions(-)

diff --git a/tcg/arm/tcg-target.inc.c b/tcg/arm/tcg-target.inc.c
index 18708b1..b640fb9 100644
--- a/tcg/arm/tcg-target.inc.c
+++ b/tcg/arm/tcg-target.inc.c
@@ -1669,14 +1669,27 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
         }
         break;
     case INDEX_op_goto_tb:
-        tcg_debug_assert(s->tb_jmp_insn_offset == 0);
         {
             /* Indirect jump method */
-            intptr_t ptr = (intptr_t)(s->tb_jmp_target_addr + args[0]);
-            tcg_out_movi32(s, COND_AL, TCG_REG_R0, ptr & ~0xfff);
-            tcg_out_ld32_12(s, COND_AL, TCG_REG_PC, TCG_REG_R0, ptr & 0xfff);
+            intptr_t ptr, dif, dil;
+            TCGReg base = TCG_REG_PC;
+
+            tcg_debug_assert(s->tb_jmp_insn_offset == 0);
+            ptr = (intptr_t)(s->tb_jmp_target_addr + args[0]);
+            dif = ptr - ((intptr_t)s->code_ptr + 8);
+            dil = sextract32(dif, 0, 12);
+            if (dif != dil) {
+                /* The TB is close, but outside the 12 bits addressable by
+                   the load.  We can extend this to 20 bits with a sub of a
+                   shifted immediate from pc.  In the vastly unlikely event
+                   the code requires more than 1MB, we'll use 2 insns and
+                   be no worse off.  */
+                base = TCG_REG_R0;
+                tcg_out_movi32(s, COND_AL, base, ptr - dil);
+            }
+            tcg_out_ld32_12(s, COND_AL, TCG_REG_PC, base, dil);
+            s->tb_jmp_reset_offset[args[0]] = tcg_current_code_size(s);
         }
-        s->tb_jmp_reset_offset[args[0]] = tcg_current_code_size(s);
         break;
     case INDEX_op_goto_ptr:
         tcg_out_bx(s, COND_AL, args[0]);
-- 
2.9.4

^ permalink raw reply related	[flat|nested] 13+ messages in thread

* Re: [Qemu-devel] [PATCH v5 0/7] tcg: allocate TB structs preceding translate
  2017-06-09  5:37 [Qemu-devel] [PATCH v5 0/7] tcg: allocate TB structs preceding translate Richard Henderson
                   ` (6 preceding siblings ...)
  2017-06-09  5:37 ` [Qemu-devel] [PATCH v5 7/7] tcg/arm: Use ldr (literal) for goto_tb Richard Henderson
@ 2017-06-09  7:00 ` no-reply
  2017-06-09 19:52 ` Emilio G. Cota
  8 siblings, 0 replies; 13+ messages in thread
From: no-reply @ 2017-06-09  7:00 UTC (permalink / raw)
  To: rth; +Cc: famz, qemu-devel, cota

Hi,

This series seems to have some coding style problems. See output below for
more information:

Type: series
Subject: [Qemu-devel] [PATCH v5 0/7] tcg: allocate TB structs preceding translate
Message-id: 20170609053719.26251-1-rth@twiddle.net

=== TEST SCRIPT BEGIN ===
#!/bin/bash

BASE=base
n=1
total=$(git log --oneline $BASE.. | wc -l)
failed=0

git config --local diff.renamelimit 0
git config --local diff.renames True

commits="$(git log --format=%H --reverse $BASE..)"
for c in $commits; do
    echo "Checking PATCH $n/$total: $(git log -n 1 --format=%s $c)..."
    if ! git show $c --format=email | ./scripts/checkpatch.pl --mailback -; then
        failed=1
        echo
    fi
    n=$((n+1))
done

exit $failed
=== TEST SCRIPT END ===

Updating 3c8cf5a9c21ff8782164d1def7f44bd888713384
From https://github.com/patchew-project/qemu
 * [new tag]         patchew/1496991197-1815-1-git-send-email-thuth@redhat.com -> patchew/1496991197-1815-1-git-send-email-thuth@redhat.com
Switched to a new branch 'test'
0d1e2e8 tcg/arm: Use ldr (literal) for goto_tb
7887e52 tcg/arm: Try pc-relative addresses for movi
e014497 tcg/arm: Remove limit on code buffer size
1c0e563 tcg/arm: Use indirect branch for goto_tb
388468e tcg/aarch64: Use ADR in tcg_out_movi
cf1c30b tcg: allocate TB structs before the corresponding translated code
857d34e util: add cacheinfo

=== OUTPUT BEGIN ===
Checking PATCH 1/7: util: add cacheinfo...
ERROR: do not initialise globals to 0 or NULL
#149: FILE: util/cacheinfo.c:11:
+int qemu_icache_linesize = 0;

ERROR: do not initialise globals to 0 or NULL
#150: FILE: util/cacheinfo.c:12:
+int qemu_dcache_linesize = 0;

ERROR: space prohibited after that '&&' (ctx:ExW)
#191: FILE: util/cacheinfo.c:53:
+            && buf[i].Cache.Level == 1) {
             ^

WARNING: architecture specific defines should be avoided
#214: FILE: util/cacheinfo.c:76:
+# if defined(__APPLE__)

WARNING: architecture specific defines should be avoided
#248: FILE: util/cacheinfo.c:110:
+#if defined(__aarch64__)

total: 3 errors, 2 warnings, 218 lines checked

Your patch has style problems, please review.  If any of these errors
are false positives report them to the maintainer, see
CHECKPATCH in MAINTAINERS.

Checking PATCH 2/7: tcg: allocate TB structs before the corresponding translated code...
Checking PATCH 3/7: tcg/aarch64: Use ADR in tcg_out_movi...
Checking PATCH 4/7: tcg/arm: Use indirect branch for goto_tb...
Checking PATCH 5/7: tcg/arm: Remove limit on code buffer size...
Checking PATCH 6/7: tcg/arm: Try pc-relative addresses for movi...
ERROR: code indent should never use tabs
#54: FILE: tcg/arm/tcg-target.inc.c:446:
+^I}$

ERROR: code indent should never use tabs
#64: FILE: tcg/arm/tcg-target.inc.c:453:
+^I}$

total: 2 errors, 0 warnings, 54 lines checked

Your patch has style problems, please review.  If any of these errors
are false positives report them to the maintainer, see
CHECKPATCH in MAINTAINERS.

Checking PATCH 7/7: tcg/arm: Use ldr (literal) for goto_tb...
=== OUTPUT END ===

Test command exited with code: 1


---
Email generated automatically by Patchew [http://patchew.org/].
Please send your feedback to patchew-devel@freelists.org

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [Qemu-devel] [PATCH v5 0/7] tcg: allocate TB structs preceding translate
  2017-06-09  5:37 [Qemu-devel] [PATCH v5 0/7] tcg: allocate TB structs preceding translate Richard Henderson
                   ` (7 preceding siblings ...)
  2017-06-09  7:00 ` [Qemu-devel] [PATCH v5 0/7] tcg: allocate TB structs preceding translate no-reply
@ 2017-06-09 19:52 ` Emilio G. Cota
  2017-06-09 19:55   ` [Qemu-devel] [PATCH] translate-all: consolidate tb init in tb_gen_code Emilio G. Cota
  2017-06-09 19:58   ` [Qemu-devel] [PATCH v5 0/7] tcg: allocate TB structs preceding translate Richard Henderson
  8 siblings, 2 replies; 13+ messages in thread
From: Emilio G. Cota @ 2017-06-09 19:52 UTC (permalink / raw)
  To: Richard Henderson; +Cc: qemu-devel

On Thu, Jun 08, 2017 at 22:37:12 -0700, Richard Henderson wrote:
> This is a follow-up to Emilio's patch set.
> 
> My primary changes to Emilio's patches are to the first patch, in
> merging the existing implementations from tcg/ppc/tcg-target.inc.c
> into util/cacheinfo.c.
> 
> Then I've a few follow-up patches to take advantage of the new TB
> placement for arm platforms.  I've had a look at the asm output for
> ppc64 and s390x, and don't see anything obvious that can be improved.
> 
> Changes since v4:
>   * The first patch reorganized a bit for aarch64 and ppc64.
>     Re-tested on win32, for which there was a Werror.
>     Incorporated feedback from Emilio re MacOS.
>   * Fixed the short description for the tcg/arm patches.

This is shaping up quite nicely. Some minor suggestions:

Can we get these checkpatch warnings fixed ..

> === OUTPUT BEGIN ===
> Checking PATCH 1/7: util: add cacheinfo...
> ERROR: do not initialise globals to 0 or NULL
> #149: FILE: util/cacheinfo.c:11:
> +int qemu_icache_linesize = 0;
> 
> ERROR: do not initialise globals to 0 or NULL
> #150: FILE: util/cacheinfo.c:12:
> +int qemu_dcache_linesize = 0;
> 
> ERROR: space prohibited after that '&&' (ctx:ExW)
> #191: FILE: util/cacheinfo.c:53:
> +            && buf[i].Cache.Level == 1) {
>              ^

.. as well as these?

> Checking PATCH 6/7: tcg/arm: Try pc-relative addresses for movi...
> ERROR: code indent should never use tabs
> #54: FILE: tcg/arm/tcg-target.inc.c:446:
> +^I}$
> 
> ERROR: code indent should never use tabs
> #64: FILE: tcg/arm/tcg-target.inc.c:453:
> +^I}$

While at it, we might want to add the tiny patch I'll send as
a reply to this message.

Thanks,

		Emilio

^ permalink raw reply	[flat|nested] 13+ messages in thread

* [Qemu-devel] [PATCH] translate-all: consolidate tb init in tb_gen_code
  2017-06-09 19:52 ` Emilio G. Cota
@ 2017-06-09 19:55   ` Emilio G. Cota
  2017-06-09 19:58   ` [Qemu-devel] [PATCH v5 0/7] tcg: allocate TB structs preceding translate Richard Henderson
  1 sibling, 0 replies; 13+ messages in thread
From: Emilio G. Cota @ 2017-06-09 19:55 UTC (permalink / raw)
  To: qemu-devel; +Cc: Richard Henderson

We are partially initializing tb in tb_alloc. Instead, fully
initialize it in tb_gen_code, which is tb_alloc's only caller.

This saves an unnecessary write to tb->cflags.

Signed-off-by: Emilio G. Cota <cota@braap.org>
---
 translate-all.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/translate-all.c b/translate-all.c
index 966747a..d4f364d 100644
--- a/translate-all.c
+++ b/translate-all.c
@@ -841,9 +841,6 @@ static TranslationBlock *tb_alloc(target_ulong pc)
         ctx->tbs = g_renew(TranslationBlock *, ctx->tbs, ctx->tbs_size);
     }
     ctx->tbs[ctx->nb_tbs++] = tb;
-    tb->pc = pc;
-    tb->cflags = 0;
-    tb->invalid = false;
     return tb;
 }
 
@@ -1287,9 +1284,11 @@ TranslationBlock *tb_gen_code(CPUState *cpu,
 
     gen_code_buf = tcg_ctx.code_gen_ptr;
     tb->tc_ptr = gen_code_buf;
+    tb->pc = pc;
     tb->cs_base = cs_base;
     tb->flags = flags;
     tb->cflags = cflags;
+    tb->invalid = false;
 
 #ifdef CONFIG_PROFILER
     tcg_ctx.tb_count1++; /* includes aborted translations because of
-- 
2.7.4

^ permalink raw reply related	[flat|nested] 13+ messages in thread

* Re: [Qemu-devel] [PATCH v5 0/7] tcg: allocate TB structs preceding translate
  2017-06-09 19:52 ` Emilio G. Cota
  2017-06-09 19:55   ` [Qemu-devel] [PATCH] translate-all: consolidate tb init in tb_gen_code Emilio G. Cota
@ 2017-06-09 19:58   ` Richard Henderson
  2017-06-09 20:16     ` Emilio G. Cota
  1 sibling, 1 reply; 13+ messages in thread
From: Richard Henderson @ 2017-06-09 19:58 UTC (permalink / raw)
  To: Emilio G. Cota; +Cc: qemu-devel

On 06/09/2017 12:52 PM, Emilio G. Cota wrote:
> On Thu, Jun 08, 2017 at 22:37:12 -0700, Richard Henderson wrote:
>> This is a follow-up to Emilio's patch set.
>>
>> My primary changes to Emilio's patches are to the first patch, in
>> merging the existing implementations from tcg/ppc/tcg-target.inc.c
>> into util/cacheinfo.c.
>>
>> Then I've a few follow-up patches to take advantage of the new TB
>> placement for arm platforms.  I've had a look at the asm output for
>> ppc64 and s390x, and don't see anything obvious that can be improved.
>>
>> Changes since v4:
>>    * The first patch reorganized a bit for aarch64 and ppc64.
>>      Re-tested on win32, for which there was a Werror.
>>      Incorporated feedback from Emilio re MacOS.
>>    * Fixed the short description for the tcg/arm patches.
> 
> This is shaping up quite nicely. Some minor suggestions:
> 
> Can we get these checkpatch warnings fixed ..
> 
>> === OUTPUT BEGIN ===
>> Checking PATCH 1/7: util: add cacheinfo...
>> ERROR: do not initialise globals to 0 or NULL
>> #149: FILE: util/cacheinfo.c:11:
>> +int qemu_icache_linesize = 0;
>>
>> ERROR: do not initialise globals to 0 or NULL
>> #150: FILE: util/cacheinfo.c:12:
>> +int qemu_dcache_linesize = 0;

These are bogus checkpatch warnings.  If we really want this, we should also 
use -fno-common.  But without that, there is a real difference between 
initialized and non-initialized global variables.

>>
>> ERROR: space prohibited after that '&&' (ctx:ExW)
>> #191: FILE: util/cacheinfo.c:53:
>> +            && buf[i].Cache.Level == 1) {
>>               ^

This is also bogus.  I have no idea what it's attempting to detect.

> 
> .. as well as these?
> 
>> Checking PATCH 6/7: tcg/arm: Try pc-relative addresses for movi...
>> ERROR: code indent should never use tabs
>> #54: FILE: tcg/arm/tcg-target.inc.c:446:
>> +^I}$
>>
>> ERROR: code indent should never use tabs
>> #64: FILE: tcg/arm/tcg-target.inc.c:453:
>> +^I}$

Yes, I can fix these.


r~

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [Qemu-devel] [PATCH v5 0/7] tcg: allocate TB structs preceding translate
  2017-06-09 19:58   ` [Qemu-devel] [PATCH v5 0/7] tcg: allocate TB structs preceding translate Richard Henderson
@ 2017-06-09 20:16     ` Emilio G. Cota
  0 siblings, 0 replies; 13+ messages in thread
From: Emilio G. Cota @ 2017-06-09 20:16 UTC (permalink / raw)
  To: Richard Henderson; +Cc: qemu-devel

On Fri, Jun 09, 2017 at 12:58:17 -0700, Richard Henderson wrote:
> >>=== OUTPUT BEGIN ===
> >>Checking PATCH 1/7: util: add cacheinfo...
> >>ERROR: do not initialise globals to 0 or NULL
> >>#149: FILE: util/cacheinfo.c:11:
> >>+int qemu_icache_linesize = 0;
> >>
> >>ERROR: do not initialise globals to 0 or NULL
> >>#150: FILE: util/cacheinfo.c:12:
> >>+int qemu_dcache_linesize = 0;
> 
> These are bogus checkpatch warnings.  If we really want this, we should also
> use -fno-common.  But without that, there is a real difference between
> initialized and non-initialized global variables.
> 
> >>
> >>ERROR: space prohibited after that '&&' (ctx:ExW)
> >>#191: FILE: util/cacheinfo.c:53:
> >>+            && buf[i].Cache.Level == 1) {
> >>              ^
> 
> This is also bogus.  I have no idea what it's attempting to detect.

I think this one is just enforcing a certain coding style convention;
"A &&\nB" will pass but "A\n&&B" won't.

		E.

^ permalink raw reply	[flat|nested] 13+ messages in thread

end of thread, other threads:[~2017-06-09 20:16 UTC | newest]

Thread overview: 13+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2017-06-09  5:37 [Qemu-devel] [PATCH v5 0/7] tcg: allocate TB structs preceding translate Richard Henderson
2017-06-09  5:37 ` [Qemu-devel] [PATCH v5 1/7] util: add cacheinfo Richard Henderson
2017-06-09  5:37 ` [Qemu-devel] [PATCH v5 2/7] tcg: allocate TB structs before the corresponding translated code Richard Henderson
2017-06-09  5:37 ` [Qemu-devel] [PATCH v5 3/7] tcg/aarch64: Use ADR in tcg_out_movi Richard Henderson
2017-06-09  5:37 ` [Qemu-devel] [PATCH v5 4/7] tcg/arm: Use indirect branch for goto_tb Richard Henderson
2017-06-09  5:37 ` [Qemu-devel] [PATCH v5 5/7] tcg/arm: Remove limit on code buffer size Richard Henderson
2017-06-09  5:37 ` [Qemu-devel] [PATCH v5 6/7] tcg/arm: Try pc-relative addresses for movi Richard Henderson
2017-06-09  5:37 ` [Qemu-devel] [PATCH v5 7/7] tcg/arm: Use ldr (literal) for goto_tb Richard Henderson
2017-06-09  7:00 ` [Qemu-devel] [PATCH v5 0/7] tcg: allocate TB structs preceding translate no-reply
2017-06-09 19:52 ` Emilio G. Cota
2017-06-09 19:55   ` [Qemu-devel] [PATCH] translate-all: consolidate tb init in tb_gen_code Emilio G. Cota
2017-06-09 19:58   ` [Qemu-devel] [PATCH v5 0/7] tcg: allocate TB structs preceding translate Richard Henderson
2017-06-09 20:16     ` Emilio G. Cota

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).