* [Qemu-devel] [PATCH 1/4] exec: Split up and tidy code_gen_buffer
2012-10-12 21:20 ` [Qemu-devel] [PATCH v3 0/4] Better allocation of code_gen_buffer with -fpie Richard Henderson
@ 2012-10-12 21:20 ` Richard Henderson
2012-10-13 13:33 ` Blue Swirl
2012-10-12 21:20 ` [Qemu-devel] [PATCH 2/4] exec: Don't make DEFAULT_CODE_GEN_BUFFER_SIZE too large Richard Henderson
` (3 subsequent siblings)
4 siblings, 1 reply; 12+ messages in thread
From: Richard Henderson @ 2012-10-12 21:20 UTC (permalink / raw)
To: qemu-devel; +Cc: Blue Swirl
It now consists of:
A macro definition of MAX_CODE_GEN_BUFFER_SIZE with host-specific values,
A function size_code_gen_buffer that applies most of the reasoning for
choosing a buffer size,
Three variations of a function alloc_code_gen_buffer that contain all
of the logic for allocating executable memory via a given allocation
mechanism.
Signed-off-by: Richard Henderson <rth@twiddle.net>
---
exec.c | 193 ++++++++++++++++++++++++++++++++++-------------------------------
1 file changed, 102 insertions(+), 91 deletions(-)
diff --git a/exec.c b/exec.c
index 7899042..db735dd 100644
--- a/exec.c
+++ b/exec.c
@@ -103,9 +103,9 @@ spinlock_t tb_lock = SPIN_LOCK_UNLOCKED;
uint8_t code_gen_prologue[1024] code_gen_section;
static uint8_t *code_gen_buffer;
-static unsigned long code_gen_buffer_size;
+static size_t code_gen_buffer_size;
/* threshold to flush the translated code buffer */
-static unsigned long code_gen_buffer_max_size;
+static size_t code_gen_buffer_max_size;
static uint8_t *code_gen_ptr;
#if !defined(CONFIG_USER_ONLY)
@@ -497,110 +497,121 @@ bool memory_region_is_unassigned(MemoryRegion *mr)
#define mmap_unlock() do { } while(0)
#endif
-#define DEFAULT_CODE_GEN_BUFFER_SIZE (32 * 1024 * 1024)
-
#if defined(CONFIG_USER_ONLY)
/* Currently it is not recommended to allocate big chunks of data in
- user mode. It will change when a dedicated libc will be used */
+ user mode. It will change when a dedicated libc will be used. */
+/* ??? 64-bit hosts ought to have no problem mmaping data outside the
+ region in which the guest needs to run. Revisit this. */
#define USE_STATIC_CODE_GEN_BUFFER
#endif
-#ifdef USE_STATIC_CODE_GEN_BUFFER
-static uint8_t static_code_gen_buffer[DEFAULT_CODE_GEN_BUFFER_SIZE]
- __attribute__((aligned (CODE_GEN_ALIGN)));
+/* ??? Should configure for this, not list operating systems here. */
+#if (defined(__linux__) \
+ || defined(__FreeBSD__) || defined(__FreeBSD_kernel__) \
+ || defined(__DragonFly__) || defined(__OpenBSD__) \
+ || defined(__NetBSD__))
+# define USE_MMAP
#endif
-static void code_gen_alloc(unsigned long tb_size)
+/* Maximum size of the code gen buffer we'd like to use. Unless otherwise
+ indicated, this is constrained by the range of direct branches on the
+ host cpu, as used by the TCG implementation of goto_tb. */
+#if defined(__x86_64__)
+# define MAX_CODE_GEN_BUFFER_SIZE (2ul * 1024 * 1024 * 1024)
+#elif defined(__sparc__)
+# define MAX_CODE_GEN_BUFFER_SIZE (2ul * 1024 * 1024 * 1024)
+#elif defined(__arm__)
+# define MAX_CODE_GEN_BUFFER_SIZE (16u * 1024 * 1024)
+#elif defined(__s390x__)
+ /* We have a +- 4GB range on the branches; leave some slop. */
+# define MAX_CODE_GEN_BUFFER_SIZE (3ul * 1024 * 1024 * 1024)
+#else
+# define MAX_CODE_GEN_BUFFER_SIZE ((size_t)-1)
+#endif
+
+#define DEFAULT_CODE_GEN_BUFFER_SIZE (32u * 1024 * 1024)
+
+static inline size_t size_code_gen_buffer(size_t tb_size)
{
+ /* Size the buffer. */
+ if (tb_size == 0) {
#ifdef USE_STATIC_CODE_GEN_BUFFER
- code_gen_buffer = static_code_gen_buffer;
- code_gen_buffer_size = DEFAULT_CODE_GEN_BUFFER_SIZE;
- map_exec(code_gen_buffer, code_gen_buffer_size);
-#else
- code_gen_buffer_size = tb_size;
- if (code_gen_buffer_size == 0) {
-#if defined(CONFIG_USER_ONLY)
- code_gen_buffer_size = DEFAULT_CODE_GEN_BUFFER_SIZE;
+ tb_size = DEFAULT_CODE_GEN_BUFFER_SIZE;
#else
- /* XXX: needs adjustments */
- code_gen_buffer_size = (unsigned long)(ram_size / 4);
+ /* ??? Needs adjustments. */
+ /* ??? If we relax the requirement that CONFIG_USER_ONLY use the
+ static buffer, we could size this on RESERVED_VA, on the text
+ segment size of the executable, or continue to use the default. */
+ tb_size = (unsigned long)(ram_size / 4);
#endif
}
- if (code_gen_buffer_size < MIN_CODE_GEN_BUFFER_SIZE)
- code_gen_buffer_size = MIN_CODE_GEN_BUFFER_SIZE;
- /* The code gen buffer location may have constraints depending on
- the host cpu and OS */
-#if defined(__linux__)
- {
- int flags;
- void *start = NULL;
-
- flags = MAP_PRIVATE | MAP_ANONYMOUS;
-#if defined(__x86_64__)
- flags |= MAP_32BIT;
- /* Cannot map more than that */
- if (code_gen_buffer_size > (800 * 1024 * 1024))
- code_gen_buffer_size = (800 * 1024 * 1024);
-#elif defined(__sparc__) && HOST_LONG_BITS == 64
- // Map the buffer below 2G, so we can use direct calls and branches
- start = (void *) 0x40000000UL;
- if (code_gen_buffer_size > (512 * 1024 * 1024))
- code_gen_buffer_size = (512 * 1024 * 1024);
-#elif defined(__arm__)
- /* Keep the buffer no bigger than 16MB to branch between blocks */
- if (code_gen_buffer_size > 16 * 1024 * 1024)
- code_gen_buffer_size = 16 * 1024 * 1024;
-#elif defined(__s390x__)
- /* Map the buffer so that we can use direct calls and branches. */
- /* We have a +- 4GB range on the branches; leave some slop. */
- if (code_gen_buffer_size > (3ul * 1024 * 1024 * 1024)) {
- code_gen_buffer_size = 3ul * 1024 * 1024 * 1024;
- }
- start = (void *)0x90000000UL;
-#endif
- code_gen_buffer = mmap(start, code_gen_buffer_size,
- PROT_WRITE | PROT_READ | PROT_EXEC,
- flags, -1, 0);
- if (code_gen_buffer == MAP_FAILED) {
- fprintf(stderr, "Could not allocate dynamic translator buffer\n");
- exit(1);
- }
+ if (tb_size < MIN_CODE_GEN_BUFFER_SIZE) {
+ tb_size = MIN_CODE_GEN_BUFFER_SIZE;
}
-#elif defined(__FreeBSD__) || defined(__FreeBSD_kernel__) \
- || defined(__DragonFly__) || defined(__OpenBSD__) \
- || defined(__NetBSD__)
- {
- int flags;
- void *addr = NULL;
- flags = MAP_PRIVATE | MAP_ANONYMOUS;
-#if defined(__x86_64__)
- /* FreeBSD doesn't have MAP_32BIT, use MAP_FIXED and assume
- * 0x40000000 is free */
- flags |= MAP_FIXED;
- addr = (void *)0x40000000;
- /* Cannot map more than that */
- if (code_gen_buffer_size > (800 * 1024 * 1024))
- code_gen_buffer_size = (800 * 1024 * 1024);
-#elif defined(__sparc__) && HOST_LONG_BITS == 64
- // Map the buffer below 2G, so we can use direct calls and branches
- addr = (void *) 0x40000000UL;
- if (code_gen_buffer_size > (512 * 1024 * 1024)) {
- code_gen_buffer_size = (512 * 1024 * 1024);
- }
-#endif
- code_gen_buffer = mmap(addr, code_gen_buffer_size,
- PROT_WRITE | PROT_READ | PROT_EXEC,
- flags, -1, 0);
- if (code_gen_buffer == MAP_FAILED) {
- fprintf(stderr, "Could not allocate dynamic translator buffer\n");
- exit(1);
- }
+ if (tb_size > MAX_CODE_GEN_BUFFER_SIZE) {
+ tb_size = MAX_CODE_GEN_BUFFER_SIZE;
}
+ code_gen_buffer_size = tb_size;
+ return tb_size;
+}
+
+#ifdef USE_STATIC_CODE_GEN_BUFFER
+static uint8_t static_code_gen_buffer[DEFAULT_CODE_GEN_BUFFER_SIZE]
+ __attribute__((aligned(CODE_GEN_ALIGN)));
+
+static inline void *alloc_code_gen_buffer(void)
+{
+ map_exec(static_code_gen_buffer, code_gen_buffer_size);
+ return static_code_gen_buffer;
+}
+#elif defined(USE_MMAP)
+static inline void *alloc_code_gen_buffer(void)
+{
+ int flags = MAP_PRIVATE | MAP_ANONYMOUS;
+ uintptr_t start = 0;
+ void *buf;
+
+ /* Constrain the position of the buffer based on the host cpu.
+ Note that these addresses are chosen in concert with the
+ addresses assigned in the relevant linker script file. */
+# if defined(__x86_64__) && defined(MAP_32BIT)
+ /* Force the memory down into low memory with the executable.
+ Leave the choice of exact location with the kernel. */
+ flags |= MAP_32BIT;
+ /* Cannot expect to map more than 800MB in low memory. */
+ if (code_gen_buffer_size > 800u * 1024 * 1024) {
+ code_gen_buffer_size = 800u * 1024 * 1024;
+ }
+# elif defined(__sparc__)
+ start = 0x40000000ul;
+# elif defined(__s390x__)
+ start = 0x90000000ul;
+# endif
+
+ buf = mmap((void *)start, code_gen_buffer_size,
+ PROT_WRITE | PROT_READ | PROT_EXEC, flags, -1, 0);
+ return buf == MAP_FAILED ? NULL : buf;
+}
#else
- code_gen_buffer = g_malloc(code_gen_buffer_size);
- map_exec(code_gen_buffer, code_gen_buffer_size);
-#endif
-#endif /* !USE_STATIC_CODE_GEN_BUFFER */
+static inline void *alloc_code_gen_buffer(void)
+{
+ void *buf = g_malloc(code_gen_buffer_size);
+ if (buf) {
+ map_exec(buf, code_gen_buffer_size);
+ }
+ return buf;
+}
+#endif /* USE_STATIC_CODE_GEN_BUFFER, USE_MMAP */
+
+static inline void code_gen_alloc(size_t tb_size)
+{
+ code_gen_buffer_size = size_code_gen_buffer(tb_size);
+ code_gen_buffer = alloc_code_gen_buffer();
+ if (code_gen_buffer == NULL) {
+ fprintf(stderr, "Could not allocate dynamic translator buffer\n");
+ exit(1);
+ }
+
map_exec(code_gen_prologue, sizeof(code_gen_prologue));
code_gen_buffer_max_size = code_gen_buffer_size -
(TCG_MAX_OP_SIZE * OPC_BUF_SIZE);
--
1.7.11.7
^ permalink raw reply related [flat|nested] 12+ messages in thread
* Re: [Qemu-devel] [PATCH 1/4] exec: Split up and tidy code_gen_buffer
2012-10-12 21:20 ` [Qemu-devel] [PATCH 1/4] exec: Split up and tidy code_gen_buffer Richard Henderson
@ 2012-10-13 13:33 ` Blue Swirl
2012-10-15 20:16 ` Richard Henderson
0 siblings, 1 reply; 12+ messages in thread
From: Blue Swirl @ 2012-10-13 13:33 UTC (permalink / raw)
To: Richard Henderson; +Cc: qemu-devel
On Fri, Oct 12, 2012 at 9:20 PM, Richard Henderson <rth@twiddle.net> wrote:
> It now consists of:
>
> A macro definition of MAX_CODE_GEN_BUFFER_SIZE with host-specific values,
>
> A function size_code_gen_buffer that applies most of the reasoning for
> choosing a buffer size,
>
> Three variations of a function alloc_code_gen_buffer that contain all
> of the logic for allocating executable memory via a given allocation
> mechanism.
>
> Signed-off-by: Richard Henderson <rth@twiddle.net>
> ---
> exec.c | 193 ++++++++++++++++++++++++++++++++++-------------------------------
> 1 file changed, 102 insertions(+), 91 deletions(-)
>
> diff --git a/exec.c b/exec.c
> index 7899042..db735dd 100644
> --- a/exec.c
> +++ b/exec.c
> @@ -103,9 +103,9 @@ spinlock_t tb_lock = SPIN_LOCK_UNLOCKED;
>
> uint8_t code_gen_prologue[1024] code_gen_section;
> static uint8_t *code_gen_buffer;
> -static unsigned long code_gen_buffer_size;
> +static size_t code_gen_buffer_size;
> /* threshold to flush the translated code buffer */
> -static unsigned long code_gen_buffer_max_size;
> +static size_t code_gen_buffer_max_size;
This breaks build on i386:
/src/qemu/exec.c: In function 'dump_exec_info':
/src/qemu/exec.c:4208: error: format '%ld' expects type 'long int',
but argument 4 has type 'size_t'
> static uint8_t *code_gen_ptr;
>
> #if !defined(CONFIG_USER_ONLY)
> @@ -497,110 +497,121 @@ bool memory_region_is_unassigned(MemoryRegion *mr)
> #define mmap_unlock() do { } while(0)
> #endif
>
> -#define DEFAULT_CODE_GEN_BUFFER_SIZE (32 * 1024 * 1024)
> -
> #if defined(CONFIG_USER_ONLY)
> /* Currently it is not recommended to allocate big chunks of data in
> - user mode. It will change when a dedicated libc will be used */
> + user mode. It will change when a dedicated libc will be used. */
> +/* ??? 64-bit hosts ought to have no problem mmaping data outside the
> + region in which the guest needs to run. Revisit this. */
> #define USE_STATIC_CODE_GEN_BUFFER
> #endif
>
> -#ifdef USE_STATIC_CODE_GEN_BUFFER
> -static uint8_t static_code_gen_buffer[DEFAULT_CODE_GEN_BUFFER_SIZE]
> - __attribute__((aligned (CODE_GEN_ALIGN)));
> +/* ??? Should configure for this, not list operating systems here. */
> +#if (defined(__linux__) \
> + || defined(__FreeBSD__) || defined(__FreeBSD_kernel__) \
> + || defined(__DragonFly__) || defined(__OpenBSD__) \
> + || defined(__NetBSD__))
> +# define USE_MMAP
> #endif
>
> -static void code_gen_alloc(unsigned long tb_size)
> +/* Maximum size of the code gen buffer we'd like to use. Unless otherwise
> + indicated, this is constrained by the range of direct branches on the
> + host cpu, as used by the TCG implementation of goto_tb. */
> +#if defined(__x86_64__)
> +# define MAX_CODE_GEN_BUFFER_SIZE (2ul * 1024 * 1024 * 1024)
> +#elif defined(__sparc__)
> +# define MAX_CODE_GEN_BUFFER_SIZE (2ul * 1024 * 1024 * 1024)
> +#elif defined(__arm__)
> +# define MAX_CODE_GEN_BUFFER_SIZE (16u * 1024 * 1024)
> +#elif defined(__s390x__)
> + /* We have a +- 4GB range on the branches; leave some slop. */
> +# define MAX_CODE_GEN_BUFFER_SIZE (3ul * 1024 * 1024 * 1024)
> +#else
> +# define MAX_CODE_GEN_BUFFER_SIZE ((size_t)-1)
> +#endif
> +
> +#define DEFAULT_CODE_GEN_BUFFER_SIZE (32u * 1024 * 1024)
> +
> +static inline size_t size_code_gen_buffer(size_t tb_size)
> {
> + /* Size the buffer. */
> + if (tb_size == 0) {
> #ifdef USE_STATIC_CODE_GEN_BUFFER
> - code_gen_buffer = static_code_gen_buffer;
> - code_gen_buffer_size = DEFAULT_CODE_GEN_BUFFER_SIZE;
> - map_exec(code_gen_buffer, code_gen_buffer_size);
> -#else
> - code_gen_buffer_size = tb_size;
> - if (code_gen_buffer_size == 0) {
> -#if defined(CONFIG_USER_ONLY)
> - code_gen_buffer_size = DEFAULT_CODE_GEN_BUFFER_SIZE;
> + tb_size = DEFAULT_CODE_GEN_BUFFER_SIZE;
> #else
> - /* XXX: needs adjustments */
> - code_gen_buffer_size = (unsigned long)(ram_size / 4);
> + /* ??? Needs adjustments. */
> + /* ??? If we relax the requirement that CONFIG_USER_ONLY use the
> + static buffer, we could size this on RESERVED_VA, on the text
> + segment size of the executable, or continue to use the default. */
> + tb_size = (unsigned long)(ram_size / 4);
> #endif
> }
> - if (code_gen_buffer_size < MIN_CODE_GEN_BUFFER_SIZE)
> - code_gen_buffer_size = MIN_CODE_GEN_BUFFER_SIZE;
> - /* The code gen buffer location may have constraints depending on
> - the host cpu and OS */
> -#if defined(__linux__)
> - {
> - int flags;
> - void *start = NULL;
> -
> - flags = MAP_PRIVATE | MAP_ANONYMOUS;
> -#if defined(__x86_64__)
> - flags |= MAP_32BIT;
> - /* Cannot map more than that */
> - if (code_gen_buffer_size > (800 * 1024 * 1024))
> - code_gen_buffer_size = (800 * 1024 * 1024);
> -#elif defined(__sparc__) && HOST_LONG_BITS == 64
> - // Map the buffer below 2G, so we can use direct calls and branches
> - start = (void *) 0x40000000UL;
> - if (code_gen_buffer_size > (512 * 1024 * 1024))
> - code_gen_buffer_size = (512 * 1024 * 1024);
> -#elif defined(__arm__)
> - /* Keep the buffer no bigger than 16MB to branch between blocks */
> - if (code_gen_buffer_size > 16 * 1024 * 1024)
> - code_gen_buffer_size = 16 * 1024 * 1024;
> -#elif defined(__s390x__)
> - /* Map the buffer so that we can use direct calls and branches. */
> - /* We have a +- 4GB range on the branches; leave some slop. */
> - if (code_gen_buffer_size > (3ul * 1024 * 1024 * 1024)) {
> - code_gen_buffer_size = 3ul * 1024 * 1024 * 1024;
> - }
> - start = (void *)0x90000000UL;
> -#endif
> - code_gen_buffer = mmap(start, code_gen_buffer_size,
> - PROT_WRITE | PROT_READ | PROT_EXEC,
> - flags, -1, 0);
> - if (code_gen_buffer == MAP_FAILED) {
> - fprintf(stderr, "Could not allocate dynamic translator buffer\n");
> - exit(1);
> - }
> + if (tb_size < MIN_CODE_GEN_BUFFER_SIZE) {
> + tb_size = MIN_CODE_GEN_BUFFER_SIZE;
> }
> -#elif defined(__FreeBSD__) || defined(__FreeBSD_kernel__) \
> - || defined(__DragonFly__) || defined(__OpenBSD__) \
> - || defined(__NetBSD__)
> - {
> - int flags;
> - void *addr = NULL;
> - flags = MAP_PRIVATE | MAP_ANONYMOUS;
> -#if defined(__x86_64__)
> - /* FreeBSD doesn't have MAP_32BIT, use MAP_FIXED and assume
> - * 0x40000000 is free */
> - flags |= MAP_FIXED;
> - addr = (void *)0x40000000;
> - /* Cannot map more than that */
> - if (code_gen_buffer_size > (800 * 1024 * 1024))
> - code_gen_buffer_size = (800 * 1024 * 1024);
> -#elif defined(__sparc__) && HOST_LONG_BITS == 64
> - // Map the buffer below 2G, so we can use direct calls and branches
> - addr = (void *) 0x40000000UL;
> - if (code_gen_buffer_size > (512 * 1024 * 1024)) {
> - code_gen_buffer_size = (512 * 1024 * 1024);
> - }
> -#endif
> - code_gen_buffer = mmap(addr, code_gen_buffer_size,
> - PROT_WRITE | PROT_READ | PROT_EXEC,
> - flags, -1, 0);
> - if (code_gen_buffer == MAP_FAILED) {
> - fprintf(stderr, "Could not allocate dynamic translator buffer\n");
> - exit(1);
> - }
> + if (tb_size > MAX_CODE_GEN_BUFFER_SIZE) {
> + tb_size = MAX_CODE_GEN_BUFFER_SIZE;
> }
> + code_gen_buffer_size = tb_size;
> + return tb_size;
> +}
> +
> +#ifdef USE_STATIC_CODE_GEN_BUFFER
> +static uint8_t static_code_gen_buffer[DEFAULT_CODE_GEN_BUFFER_SIZE]
> + __attribute__((aligned(CODE_GEN_ALIGN)));
> +
> +static inline void *alloc_code_gen_buffer(void)
> +{
> + map_exec(static_code_gen_buffer, code_gen_buffer_size);
> + return static_code_gen_buffer;
> +}
> +#elif defined(USE_MMAP)
> +static inline void *alloc_code_gen_buffer(void)
> +{
> + int flags = MAP_PRIVATE | MAP_ANONYMOUS;
> + uintptr_t start = 0;
> + void *buf;
> +
> + /* Constrain the position of the buffer based on the host cpu.
> + Note that these addresses are chosen in concert with the
> + addresses assigned in the relevant linker script file. */
> +# if defined(__x86_64__) && defined(MAP_32BIT)
> + /* Force the memory down into low memory with the executable.
> + Leave the choice of exact location with the kernel. */
> + flags |= MAP_32BIT;
> + /* Cannot expect to map more than 800MB in low memory. */
> + if (code_gen_buffer_size > 800u * 1024 * 1024) {
> + code_gen_buffer_size = 800u * 1024 * 1024;
> + }
> +# elif defined(__sparc__)
> + start = 0x40000000ul;
> +# elif defined(__s390x__)
> + start = 0x90000000ul;
> +# endif
> +
> + buf = mmap((void *)start, code_gen_buffer_size,
> + PROT_WRITE | PROT_READ | PROT_EXEC, flags, -1, 0);
> + return buf == MAP_FAILED ? NULL : buf;
> +}
> #else
> - code_gen_buffer = g_malloc(code_gen_buffer_size);
> - map_exec(code_gen_buffer, code_gen_buffer_size);
> -#endif
> -#endif /* !USE_STATIC_CODE_GEN_BUFFER */
> +static inline void *alloc_code_gen_buffer(void)
> +{
> + void *buf = g_malloc(code_gen_buffer_size);
> + if (buf) {
> + map_exec(buf, code_gen_buffer_size);
> + }
> + return buf;
> +}
> +#endif /* USE_STATIC_CODE_GEN_BUFFER, USE_MMAP */
> +
> +static inline void code_gen_alloc(size_t tb_size)
> +{
> + code_gen_buffer_size = size_code_gen_buffer(tb_size);
> + code_gen_buffer = alloc_code_gen_buffer();
> + if (code_gen_buffer == NULL) {
> + fprintf(stderr, "Could not allocate dynamic translator buffer\n");
> + exit(1);
> + }
> +
> map_exec(code_gen_prologue, sizeof(code_gen_prologue));
> code_gen_buffer_max_size = code_gen_buffer_size -
> (TCG_MAX_OP_SIZE * OPC_BUF_SIZE);
> --
> 1.7.11.7
>
^ permalink raw reply [flat|nested] 12+ messages in thread
* [Qemu-devel] [PATCH 2/4] exec: Don't make DEFAULT_CODE_GEN_BUFFER_SIZE too large
2012-10-12 21:20 ` [Qemu-devel] [PATCH v3 0/4] Better allocation of code_gen_buffer with -fpie Richard Henderson
2012-10-12 21:20 ` [Qemu-devel] [PATCH 1/4] exec: Split up and tidy code_gen_buffer Richard Henderson
@ 2012-10-12 21:20 ` Richard Henderson
2012-10-12 21:20 ` [Qemu-devel] [PATCH 3/4] exec: Do not use absolute address hints for code_gen_buffer with -fpie Richard Henderson
` (2 subsequent siblings)
4 siblings, 0 replies; 12+ messages in thread
From: Richard Henderson @ 2012-10-12 21:20 UTC (permalink / raw)
To: qemu-devel; +Cc: Blue Swirl
For ARM we cap the buffer size to 16MB. Do not allocate 32MB in that case.
Signed-off-by: Richard Henderson <rth@twiddle.net>
---
exec.c | 6 +++++-
1 file changed, 5 insertions(+), 1 deletion(-)
diff --git a/exec.c b/exec.c
index db735dd..386cc08 100644
--- a/exec.c
+++ b/exec.c
@@ -529,7 +529,11 @@ bool memory_region_is_unassigned(MemoryRegion *mr)
# define MAX_CODE_GEN_BUFFER_SIZE ((size_t)-1)
#endif
-#define DEFAULT_CODE_GEN_BUFFER_SIZE (32u * 1024 * 1024)
+#define DEFAULT_CODE_GEN_BUFFER_SIZE_1 (32u * 1024 * 1024)
+
+#define DEFAULT_CODE_GEN_BUFFER_SIZE \
+ (DEFAULT_CODE_GEN_BUFFER_SIZE_1 < MAX_CODE_GEN_BUFFER_SIZE \
+ ? DEFAULT_CODE_GEN_BUFFER_SIZE_1 : MAX_CODE_GEN_BUFFER_SIZE)
static inline size_t size_code_gen_buffer(size_t tb_size)
{
--
1.7.11.7
^ permalink raw reply related [flat|nested] 12+ messages in thread
* [Qemu-devel] [PATCH 3/4] exec: Do not use absolute address hints for code_gen_buffer with -fpie
2012-10-12 21:20 ` [Qemu-devel] [PATCH v3 0/4] Better allocation of code_gen_buffer with -fpie Richard Henderson
2012-10-12 21:20 ` [Qemu-devel] [PATCH 1/4] exec: Split up and tidy code_gen_buffer Richard Henderson
2012-10-12 21:20 ` [Qemu-devel] [PATCH 2/4] exec: Don't make DEFAULT_CODE_GEN_BUFFER_SIZE too large Richard Henderson
@ 2012-10-12 21:20 ` Richard Henderson
2012-10-12 21:20 ` [Qemu-devel] [PATCH 4/4] exec: Allocate code_gen_prologue from code_gen_buffer Richard Henderson
2012-10-12 21:41 ` [Qemu-devel] [PATCH 5/4] exec: Make MIN_CODE_GEN_BUFFER_SIZE private to exec.c Richard Henderson
4 siblings, 0 replies; 12+ messages in thread
From: Richard Henderson @ 2012-10-12 21:20 UTC (permalink / raw)
To: qemu-devel; +Cc: Blue Swirl
The hard-coded addresses inside alloc_code_gen_buffer only make sense
if we're building an executable that will actually run at the address
we've put into the linker scripts.
When we're building with -fpie, the executable will run at some
random location chosen by the kernel. We get better placement for
the code_gen_buffer if we allow the kernel to place the memory,
as it will tend to to place it near the executable, based on the
PROT_EXEC bit.
Since code_gen_prologue is always inside the executable, this effect
is easily seen at the end of most TB, with the exit_tb opcode, and
with any calls to helper functions.
Signed-off-by: Richard Henderson <rth@twiddle.net>
---
exec.c | 7 ++++++-
1 file changed, 6 insertions(+), 1 deletion(-)
diff --git a/exec.c b/exec.c
index 386cc08..e5f8c30 100644
--- a/exec.c
+++ b/exec.c
@@ -578,7 +578,12 @@ static inline void *alloc_code_gen_buffer(void)
/* Constrain the position of the buffer based on the host cpu.
Note that these addresses are chosen in concert with the
addresses assigned in the relevant linker script file. */
-# if defined(__x86_64__) && defined(MAP_32BIT)
+# if defined(__PIE__) || defined(__PIC__)
+ /* Don't bother setting a preferred location if we're building
+ a position-independent executable. We're more likely to get
+ an address near the main executable if we let the kernel
+ choose the address. */
+# elif defined(__x86_64__) && defined(MAP_32BIT)
/* Force the memory down into low memory with the executable.
Leave the choice of exact location with the kernel. */
flags |= MAP_32BIT;
--
1.7.11.7
^ permalink raw reply related [flat|nested] 12+ messages in thread
* [Qemu-devel] [PATCH 4/4] exec: Allocate code_gen_prologue from code_gen_buffer
2012-10-12 21:20 ` [Qemu-devel] [PATCH v3 0/4] Better allocation of code_gen_buffer with -fpie Richard Henderson
` (2 preceding siblings ...)
2012-10-12 21:20 ` [Qemu-devel] [PATCH 3/4] exec: Do not use absolute address hints for code_gen_buffer with -fpie Richard Henderson
@ 2012-10-12 21:20 ` Richard Henderson
2012-10-12 21:41 ` [Qemu-devel] [PATCH 5/4] exec: Make MIN_CODE_GEN_BUFFER_SIZE private to exec.c Richard Henderson
4 siblings, 0 replies; 12+ messages in thread
From: Richard Henderson @ 2012-10-12 21:20 UTC (permalink / raw)
To: qemu-devel; +Cc: Blue Swirl
We had a hack for arm and sparc, allocating code_gen_prologue to a
special section. Which, honestly does no good under certain cases.
We've already got limits on code_gen_buffer_size to ensure that all
TBs can use direct branches between themselves; reuse this limit to
ensure the prologue is also reachable.
As a bonus, we get to avoid marking a page of the main executable's
data segment as executable.
Signed-off-by: Richard Henderson <rth@twiddle.net>
---
exec.c | 30 +++++++++++-------------------
tcg/tcg.h | 2 +-
2 files changed, 12 insertions(+), 20 deletions(-)
diff --git a/exec.c b/exec.c
index e5f8c30..958aebe 100644
--- a/exec.c
+++ b/exec.c
@@ -86,22 +86,7 @@ static int nb_tbs;
/* any access to the tbs or the page table must use this lock */
spinlock_t tb_lock = SPIN_LOCK_UNLOCKED;
-#if defined(__arm__) || defined(__sparc__)
-/* The prologue must be reachable with a direct jump. ARM and Sparc64
- have limited branch ranges (possibly also PPC) so place it in a
- section close to code segment. */
-#define code_gen_section \
- __attribute__((__section__(".gen_code"))) \
- __attribute__((aligned (32)))
-#elif defined(_WIN32) && !defined(_WIN64)
-#define code_gen_section \
- __attribute__((aligned (16)))
-#else
-#define code_gen_section \
- __attribute__((aligned (32)))
-#endif
-
-uint8_t code_gen_prologue[1024] code_gen_section;
+uint8_t *code_gen_prologue;
static uint8_t *code_gen_buffer;
static size_t code_gen_buffer_size;
/* threshold to flush the translated code buffer */
@@ -221,7 +206,7 @@ static int tb_flush_count;
static int tb_phys_invalidate_count;
#ifdef _WIN32
-static void map_exec(void *addr, long size)
+static inline void map_exec(void *addr, long size)
{
DWORD old_protect;
VirtualProtect(addr, size,
@@ -229,7 +214,7 @@ static void map_exec(void *addr, long size)
}
#else
-static void map_exec(void *addr, long size)
+static inline void map_exec(void *addr, long size)
{
unsigned long start, end, page_size;
@@ -621,7 +606,14 @@ static inline void code_gen_alloc(size_t tb_size)
exit(1);
}
- map_exec(code_gen_prologue, sizeof(code_gen_prologue));
+ /* Steal room for the prologue at the end of the buffer. This ensures
+ (via the MAX_CODE_GEN_BUFFER_SIZE limits above) that direct branches
+ from TB's to the prologue are going to be in range. It also means
+ that we don't need to mark (additional) portions of the data segment
+ as executable. */
+ code_gen_prologue = code_gen_buffer + code_gen_buffer_size - 1024;
+ code_gen_buffer_size -= 1024;
+
code_gen_buffer_max_size = code_gen_buffer_size -
(TCG_MAX_OP_SIZE * OPC_BUF_SIZE);
code_gen_max_blocks = code_gen_buffer_size / CODE_GEN_AVG_BLOCK_SIZE;
diff --git a/tcg/tcg.h b/tcg/tcg.h
index 7bafe0e..45e94f5 100644
--- a/tcg/tcg.h
+++ b/tcg/tcg.h
@@ -616,7 +616,7 @@ TCGv_i64 tcg_const_i64(int64_t val);
TCGv_i32 tcg_const_local_i32(int32_t val);
TCGv_i64 tcg_const_local_i64(int64_t val);
-extern uint8_t code_gen_prologue[];
+extern uint8_t *code_gen_prologue;
/* TCG targets may use a different definition of tcg_qemu_tb_exec. */
#if !defined(tcg_qemu_tb_exec)
--
1.7.11.7
^ permalink raw reply related [flat|nested] 12+ messages in thread
* [Qemu-devel] [PATCH 5/4] exec: Make MIN_CODE_GEN_BUFFER_SIZE private to exec.c
2012-10-12 21:20 ` [Qemu-devel] [PATCH v3 0/4] Better allocation of code_gen_buffer with -fpie Richard Henderson
` (3 preceding siblings ...)
2012-10-12 21:20 ` [Qemu-devel] [PATCH 4/4] exec: Allocate code_gen_prologue from code_gen_buffer Richard Henderson
@ 2012-10-12 21:41 ` Richard Henderson
4 siblings, 0 replies; 12+ messages in thread
From: Richard Henderson @ 2012-10-12 21:41 UTC (permalink / raw)
To: qemu-devel; +Cc: Blue Swirl
It is used nowhere else, and the corresponding MAX_CODE_GEN_BUFFER_SIZE
also lives there.
Signed-off-by: Richard Henderson <rth@twiddle.net>
---
exec-all.h | 2 --
exec.c | 4 ++++
2 files changed, 4 insertions(+), 2 deletions(-)
Dang it, I knew there was something else I was intending to include
in the patch set that I sent a moment ago...
r~
diff --git a/exec-all.h b/exec-all.h
index 6516da0..7f29820 100644
--- a/exec-all.h
+++ b/exec-all.h
@@ -121,8 +121,6 @@ static inline void tlb_flush(CPUArchState *env, int flush_global)
#define CODE_GEN_PHYS_HASH_BITS 15
#define CODE_GEN_PHYS_HASH_SIZE (1 << CODE_GEN_PHYS_HASH_BITS)
-#define MIN_CODE_GEN_BUFFER_SIZE (1024 * 1024)
-
/* estimated block size for TB allocation */
/* XXX: use a per code average code fragment size and modulate it
according to the host CPU */
diff --git a/exec.c b/exec.c
index 958aebe..096cc56 100644
--- a/exec.c
+++ b/exec.c
@@ -498,6 +498,10 @@ bool memory_region_is_unassigned(MemoryRegion *mr)
# define USE_MMAP
#endif
+/* Minimum size of the code gen buffer. This number is randomly chosen,
+ but not so small that we can't have a fair number of TB's live. */
+#define MIN_CODE_GEN_BUFFER_SIZE (1024u * 1024)
+
/* Maximum size of the code gen buffer we'd like to use. Unless otherwise
indicated, this is constrained by the range of direct branches on the
host cpu, as used by the TCG implementation of goto_tb. */
--
1.7.11.7
^ permalink raw reply related [flat|nested] 12+ messages in thread