All of lore.kernel.org
 help / color / mirror / Atom feed
* [Qemu-devel] [PATCH] exec: Don't request an address for code_gen_buffer if -fpie
@ 2012-10-04 21:31 Richard Henderson
  2012-10-07 16:34 ` Blue Swirl
  0 siblings, 1 reply; 12+ messages in thread
From: Richard Henderson @ 2012-10-04 21:31 UTC (permalink / raw)
  To: qemu-devel

The hard-coded addresses inside code_gen_alloc only make sense if
we're building an executable that will actually run at the address
we've put into the linker scripts.

When we're building with -fpie, the executable will run at some
random location chosen by the kernel.  We get better placement for
the code_gen_buffer if we allow the kernel to place the memory,
as it will tend to to place it near the executable, based on the
PROT_EXEC bit.

Since code_gen_prologue is always inside the executable, this effect
is easily seen at the end of most TB, with the exit_tb opcode:

Before:
0x40b82024:  mov    $0x7fa97bd5c296,%r10
0x40b8202e:  jmpq   *%r10

After:
0x7f1191ff1024:  jmpq   0x7f119edc0296

Signed-off-by: Richard Henderson <rth@twiddle.net>
---
 exec.c | 127 +++++++++++++++++++++++++++++++----------------------------------
 1 file changed, 60 insertions(+), 67 deletions(-)

diff --git a/exec.c b/exec.c
index bb6aa4a..0ddc07a 100644
--- a/exec.c
+++ b/exec.c
@@ -510,6 +510,14 @@ static uint8_t static_code_gen_buffer[DEFAULT_CODE_GEN_BUFFER_SIZE]
                __attribute__((aligned (CODE_GEN_ALIGN)));
 #endif
 
+/* ??? Should configure for this not list operating systems here.  */
+#if defined(__linux__) \
+    || defined(__FreeBSD__) || defined(__FreeBSD_kernel__) \
+    || defined(__DragonFly__) || defined(__OpenBSD__) \
+    || defined(__NetBSD__)
+# define USE_MMAP
+#endif
+
 static void code_gen_alloc(unsigned long tb_size)
 {
 #ifdef USE_STATIC_CODE_GEN_BUFFER
@@ -517,6 +525,45 @@ static void code_gen_alloc(unsigned long tb_size)
     code_gen_buffer_size = DEFAULT_CODE_GEN_BUFFER_SIZE;
     map_exec(code_gen_buffer, code_gen_buffer_size);
 #else
+#ifdef USE_MMAP
+    int flags = MAP_PRIVATE | MAP_ANONYMOUS;
+#endif
+    uintptr_t max_buf = -1, start = 0;
+
+    /* Constrain the size and position of the buffer based on the host cpu.  */
+#if defined(__x86_64__)
+# if !defined(__PIE__) && !defined(__PIC__) && defined(MAP_32BIT)
+    /* Force the memory down into low memory with the executable.
+       Leave the choice of exact location with the kernel.  */
+    flags |= MAP_32BIT;
+    /* Cannot expect to map more than 800MB in low memory.  */
+    max_buf = 800 * 1024 * 1024;
+# else
+    /* Maximum range of direct branches.  */
+    max_buf = 2ul * 1024 * 1024 * 1024;
+# endif
+#elif defined(__sparc__) && HOST_LONG_BITS == 64
+    /* Maximum range of direct branches between TB (via "call").  */
+    max_buf = 2ul * 1024 * 1024 * 1024;
+    start = 0x40000000ul;
+#elif defined(__arm__)
+    /* Keep the buffer no bigger than 16MB to branch between blocks */
+    max_buf = 16 * 1024 * 1024;
+#elif defined(__s390x__)
+    /* Map the buffer so that we can use direct calls and branches.  */
+    /* We have a +- 4GB range on the branches; leave some slop.  */
+    max_buf = 3ul * 1024 * 1024 * 1024;
+    start = 0x90000000ul;
+#endif
+#if defined(__PIE__) || defined(__PIC__)
+    /* Don't bother setting a preferred location if we're building
+       a position-independent executable.  We're more likely to get
+       an address near the main executable if we let the kernel
+       choose the address.  */
+    start = 0;
+#endif
+
+    /* Size the buffer.  */
     code_gen_buffer_size = tb_size;
     if (code_gen_buffer_size == 0) {
 #if defined(CONFIG_USER_ONLY)
@@ -526,81 +573,27 @@ static void code_gen_alloc(unsigned long tb_size)
         code_gen_buffer_size = (unsigned long)(ram_size / 4);
 #endif
     }
-    if (code_gen_buffer_size < MIN_CODE_GEN_BUFFER_SIZE)
+    if (code_gen_buffer_size < MIN_CODE_GEN_BUFFER_SIZE) {
         code_gen_buffer_size = MIN_CODE_GEN_BUFFER_SIZE;
-    /* The code gen buffer location may have constraints depending on
-       the host cpu and OS */
-#if defined(__linux__) 
-    {
-        int flags;
-        void *start = NULL;
-
-        flags = MAP_PRIVATE | MAP_ANONYMOUS;
-#if defined(__x86_64__)
-        flags |= MAP_32BIT;
-        /* Cannot map more than that */
-        if (code_gen_buffer_size > (800 * 1024 * 1024))
-            code_gen_buffer_size = (800 * 1024 * 1024);
-#elif defined(__sparc__) && HOST_LONG_BITS == 64
-        // Map the buffer below 2G, so we can use direct calls and branches
-        start = (void *) 0x40000000UL;
-        if (code_gen_buffer_size > (512 * 1024 * 1024))
-            code_gen_buffer_size = (512 * 1024 * 1024);
-#elif defined(__arm__)
-        /* Keep the buffer no bigger than 16MB to branch between blocks */
-        if (code_gen_buffer_size > 16 * 1024 * 1024)
-            code_gen_buffer_size = 16 * 1024 * 1024;
-#elif defined(__s390x__)
-        /* Map the buffer so that we can use direct calls and branches.  */
-        /* We have a +- 4GB range on the branches; leave some slop.  */
-        if (code_gen_buffer_size > (3ul * 1024 * 1024 * 1024)) {
-            code_gen_buffer_size = 3ul * 1024 * 1024 * 1024;
-        }
-        start = (void *)0x90000000UL;
-#endif
-        code_gen_buffer = mmap(start, code_gen_buffer_size,
-                               PROT_WRITE | PROT_READ | PROT_EXEC,
-                               flags, -1, 0);
-        if (code_gen_buffer == MAP_FAILED) {
-            fprintf(stderr, "Could not allocate dynamic translator buffer\n");
-            exit(1);
-        }
     }
-#elif defined(__FreeBSD__) || defined(__FreeBSD_kernel__) \
-    || defined(__DragonFly__) || defined(__OpenBSD__) \
-    || defined(__NetBSD__)
-    {
-        int flags;
-        void *addr = NULL;
-        flags = MAP_PRIVATE | MAP_ANONYMOUS;
-#if defined(__x86_64__)
-        /* FreeBSD doesn't have MAP_32BIT, use MAP_FIXED and assume
-         * 0x40000000 is free */
-        flags |= MAP_FIXED;
-        addr = (void *)0x40000000;
-        /* Cannot map more than that */
-        if (code_gen_buffer_size > (800 * 1024 * 1024))
-            code_gen_buffer_size = (800 * 1024 * 1024);
-#elif defined(__sparc__) && HOST_LONG_BITS == 64
-        // Map the buffer below 2G, so we can use direct calls and branches
-        addr = (void *) 0x40000000UL;
-        if (code_gen_buffer_size > (512 * 1024 * 1024)) {
-            code_gen_buffer_size = (512 * 1024 * 1024);
-        }
-#endif
-        code_gen_buffer = mmap(addr, code_gen_buffer_size,
-                               PROT_WRITE | PROT_READ | PROT_EXEC, 
-                               flags, -1, 0);
-        if (code_gen_buffer == MAP_FAILED) {
-            fprintf(stderr, "Could not allocate dynamic translator buffer\n");
-            exit(1);
-        }
+    if (code_gen_buffer_size > max_buf) {
+        code_gen_buffer_size = max_buf;
+    }
+
+#ifdef USE_MMAP
+    code_gen_buffer = mmap((void *)start, code_gen_buffer_size,
+                           PROT_WRITE | PROT_READ | PROT_EXEC,
+                           flags, -1, 0);
+    if (code_gen_buffer == MAP_FAILED) {
+        fprintf(stderr, "Could not allocate dynamic translator buffer\n");
+        exit(1);
     }
 #else
     code_gen_buffer = g_malloc(code_gen_buffer_size);
     map_exec(code_gen_buffer, code_gen_buffer_size);
 #endif
 #endif /* !USE_STATIC_CODE_GEN_BUFFER */
+
     map_exec(code_gen_prologue, sizeof(code_gen_prologue));
     code_gen_buffer_max_size = code_gen_buffer_size -
         (TCG_MAX_OP_SIZE * OPC_BUF_SIZE);
-- 
1.7.11.4

^ permalink raw reply related	[flat|nested] 12+ messages in thread

end of thread, other threads:[~2012-10-15 20:17 UTC | newest]

Thread overview: 12+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2012-10-04 21:31 [Qemu-devel] [PATCH] exec: Don't request an address for code_gen_buffer if -fpie Richard Henderson
2012-10-07 16:34 ` Blue Swirl
2012-10-07 19:20   ` Richard Henderson
2012-10-07 19:40     ` Blue Swirl
2012-10-12 21:20       ` [Qemu-devel] [PATCH v3 0/4] Better allocation of code_gen_buffer with -fpie Richard Henderson
2012-10-12 21:20         ` [Qemu-devel] [PATCH 1/4] exec: Split up and tidy code_gen_buffer Richard Henderson
2012-10-13 13:33           ` Blue Swirl
2012-10-15 20:16             ` Richard Henderson
2012-10-12 21:20         ` [Qemu-devel] [PATCH 2/4] exec: Don't make DEFAULT_CODE_GEN_BUFFER_SIZE too large Richard Henderson
2012-10-12 21:20         ` [Qemu-devel] [PATCH 3/4] exec: Do not use absolute address hints for code_gen_buffer with -fpie Richard Henderson
2012-10-12 21:20         ` [Qemu-devel] [PATCH 4/4] exec: Allocate code_gen_prologue from code_gen_buffer Richard Henderson
2012-10-12 21:41         ` [Qemu-devel] [PATCH 5/4] exec: Make MIN_CODE_GEN_BUFFER_SIZE private to exec.c Richard Henderson

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.