* [Qemu-devel] [PATCH] tcg: add ability to dump /tmp/perf-<pid>.map files
@ 2014-07-11 16:43 Alex Bennée
2014-07-11 16:52 ` Richard Henderson
0 siblings, 1 reply; 4+ messages in thread
From: Alex Bennée @ 2014-07-11 16:43 UTC (permalink / raw)
To: qemu-devel; +Cc: batuzovk, Alex Bennée, Anthony Liguori, rth
This allows the perf tool to map samples to each individual translation
block. This could be expanded for user space but currently it gives
enough information to find any hotblocks by other means.
Signed-off-by: Alex Bennée <alex.bennee@linaro.org>
---
v2:
- hoist up into translate-all.c
- don't use pointless glib wrappers
- use proper format types for portability
- mark prologue/epilog area
- rebase
---
qemu-options.hx | 10 ++++++++++
translate-all.c | 27 +++++++++++++++++++++++++++
vl.c | 6 ++++++
3 files changed, 43 insertions(+)
diff --git a/qemu-options.hx b/qemu-options.hx
index c2c0823..b8c81e3 100644
--- a/qemu-options.hx
+++ b/qemu-options.hx
@@ -3220,6 +3220,16 @@ STEXI
prepend a timestamp to each log message.(default:on)
ETEXI
+DEF("perfmap", 0, QEMU_OPTION_PERFMAP, \
+ "-perfmap generate a /tmp/perf-${pid}.map file for perf\n",
+ QEMU_ARCH_ALL)
+STEXI
+@item -perfmap
+@findex -perfmap
+This will cause QEMU to generate a map file for Linux perf tools that will allow
+basic profiling information to be broken down into basic blocks.
+ETEXI
+
HXCOMM This is the last statement. Insert new options before this line!
STEXI
@end table
diff --git a/translate-all.c b/translate-all.c
index 5549a85..0c7cbbe 100644
--- a/translate-all.c
+++ b/translate-all.c
@@ -27,6 +27,7 @@
#include <stdio.h>
#include <string.h>
#include <inttypes.h>
+#include <glib.h>
#include "config.h"
@@ -129,6 +130,25 @@ static void tb_link_page(TranslationBlock *tb, tb_page_addr_t phys_pc,
tb_page_addr_t phys_page2);
static TranslationBlock *tb_find_pc(uintptr_t tc_ptr);
+static FILE *tb_perfmap = NULL;
+/* if I could put this in a header easily.... */
+void tb_enable_perfmap(void);
+
+void tb_enable_perfmap(void) {
+ gchar * map_file = g_strdup_printf("/tmp/perf-%d.map", getpid());
+ tb_perfmap = fopen(map_file, "w");
+ g_free(map_file);
+}
+
+static void tb_write_perfmap(tcg_insn_unit *start, int size, target_ulong pc)
+{
+ if (tb_perfmap) {
+ fprintf(tb_perfmap,
+ "%"PRIxPTR" %x subject-"TARGET_FMT_lx"\n",
+ (uintptr_t) start, size, pc);
+ }
+}
+
void cpu_gen_init(void)
{
tcg_context_init(&tcg_ctx);
@@ -184,6 +204,7 @@ int cpu_gen_code(CPUArchState *env, TranslationBlock *tb, int *gen_code_size_ptr
s->code_out_len += gen_code_size;
#endif
+ tb_write_perfmap(gen_code_buf, gen_code_size, tb->pc);
#ifdef DEBUG_DISAS
if (qemu_loglevel_mask(CPU_LOG_TB_OUT_ASM)) {
qemu_log("OUT: [size=%d]\n", gen_code_size);
@@ -584,6 +605,12 @@ static inline void code_gen_alloc(size_t tb_size)
tcg_ctx.code_gen_buffer_size - 1024;
tcg_ctx.code_gen_buffer_size -= 1024;
+ if (tb_perfmap) {
+ fprintf(tb_perfmap,
+ "%"PRIxPTR" %x tcg-prologue-buffer\n",
+ (uintptr_t) tcg_ctx.code_gen_prologue, 1024);
+ }
+
tcg_ctx.code_gen_buffer_max_size = tcg_ctx.code_gen_buffer_size -
(TCG_MAX_OP_SIZE * OPC_BUF_SIZE);
tcg_ctx.code_gen_max_blocks = tcg_ctx.code_gen_buffer_size /
diff --git a/vl.c b/vl.c
index d8c1572..7650ec8 100644
--- a/vl.c
+++ b/vl.c
@@ -118,6 +118,9 @@ int main(int argc, char **argv)
#include "qapi/string-input-visitor.h"
#include "qom/object_interfaces.h"
+/* Not sure where the best place for this is? */
+extern void tb_enable_perfmap(void);
+
#define DEFAULT_RAM_SIZE 128
#define MAX_VIRTIO_CONSOLES 1
@@ -3387,6 +3390,9 @@ int main(int argc, char **argv, char **envp)
case QEMU_OPTION_D:
log_file = optarg;
break;
+ case QEMU_OPTION_PERFMAP:
+ tb_enable_perfmap();
+ break;
case QEMU_OPTION_s:
add_device_config(DEV_GDB, "tcp::" DEFAULT_GDBSTUB_PORT);
break;
--
2.0.1
^ permalink raw reply related [flat|nested] 4+ messages in thread
* Re: [Qemu-devel] [PATCH] tcg: add ability to dump /tmp/perf-<pid>.map files
2014-07-11 16:43 Alex Bennée
@ 2014-07-11 16:52 ` Richard Henderson
2014-07-14 13:53 ` Alex Bennée
0 siblings, 1 reply; 4+ messages in thread
From: Richard Henderson @ 2014-07-11 16:52 UTC (permalink / raw)
To: Alex Bennée, qemu-devel; +Cc: Anthony Liguori, batuzovk
On 07/11/2014 09:43 AM, Alex Bennée wrote:
> +/* if I could put this in a header easily.... */
> +void tb_enable_perfmap(void);
How about next to tb_flush in exec/exec-all.h?
> +
> +void tb_enable_perfmap(void) {
Watch the { placement.
> + gchar * map_file = g_strdup_printf("/tmp/perf-%d.map", getpid());
> + tb_perfmap = fopen(map_file, "w");
> + g_free(map_file);
> +}
And speaking of tb_flush, ought there be some marker in the perf file to reset
things? Otherwise you're just going to wind up with garbage if you let the
guest run long enough.
Of course, I have no idea how to link the flush with a timestamp that allows
one to selectively choose which section of the perf file one uses...
r~
^ permalink raw reply [flat|nested] 4+ messages in thread
* Re: [Qemu-devel] [PATCH] tcg: add ability to dump /tmp/perf-<pid>.map files
2014-07-11 16:52 ` Richard Henderson
@ 2014-07-14 13:53 ` Alex Bennée
0 siblings, 0 replies; 4+ messages in thread
From: Alex Bennée @ 2014-07-14 13:53 UTC (permalink / raw)
To: Richard Henderson; +Cc: qemu-devel, Anthony Liguori, batuzovk
Richard Henderson writes:
> On 07/11/2014 09:43 AM, Alex Bennée wrote:
>> +/* if I could put this in a header easily.... */
>> +void tb_enable_perfmap(void);
>
> How about next to tb_flush in exec/exec-all.h?
Including exec/exec-all.h trips up some sort of include prevention
magic:
CC vl.o
In file included from vl.c:82:0:
/home/alex/lsrc/qemu/qemu.git/include/exec/exec-all.h:75:28: error: attempt to use poisoned "CPUArchState"
void gen_intermediate_code(CPUArchState *env, struct TranslationBlock *tb);
^
>> +
>> +void tb_enable_perfmap(void) {
>
> Watch the { placement.
>
>> + gchar * map_file = g_strdup_printf("/tmp/perf-%d.map", getpid());
>> + tb_perfmap = fopen(map_file, "w");
>> + g_free(map_file);
>> +}
>
> And speaking of tb_flush, ought there be some marker in the perf file to reset
> things? Otherwise you're just going to wind up with garbage if you let the
> guest run long enough.
>
> Of course, I have no idea how to link the flush with a timestamp that allows
> one to selectively choose which section of the perf file one uses...
The perf JIT format isn't really that smart so currently I'm not sure
how it would deal with it. Currently I suspect the best solution is
"don't do that then" :-/
>
>
> r~
--
Alex Bennée
^ permalink raw reply [flat|nested] 4+ messages in thread
* [Qemu-devel] [PATCH] tcg: add ability to dump /tmp/perf-<pid>.map files
@ 2014-07-15 11:34 Alex Bennée
0 siblings, 0 replies; 4+ messages in thread
From: Alex Bennée @ 2014-07-15 11:34 UTC (permalink / raw)
To: qemu-devel; +Cc: batuzovk, Alex Bennée, viro, Anthony Liguori, rth
This allows the perf tool to map samples to each individual translation
block. This could be expanded for user space but currently it gives
enough information to find any hotblocks by other means.
Signed-off-by: Alex Bennée <alex.bennee@linaro.org>
---
v2:
- hoist up into translate-all.c
- don't use pointless glib wrappers
- use proper format types for portability
- mark prologue/epilog area
- rebase
v3:
- fix bracket for perf-map
- find an include for the tb_enable_perfmap() declaration
---
include/qemu-common.h | 2 ++
qemu-options.hx | 10 ++++++++++
translate-all.c | 26 ++++++++++++++++++++++++++
vl.c | 4 ++++
4 files changed, 42 insertions(+)
diff --git a/include/qemu-common.h b/include/qemu-common.h
index 66ceceb..ffacd5a 100644
--- a/include/qemu-common.h
+++ b/include/qemu-common.h
@@ -259,6 +259,8 @@ typedef struct PCIHostDeviceAddress {
void tcg_exec_init(unsigned long tb_size);
bool tcg_enabled(void);
+void tb_enable_perfmap(void);
+
void cpu_exec_init_all(void);
/* CPU save/load. */
diff --git a/qemu-options.hx b/qemu-options.hx
index c2c0823..b8c81e3 100644
--- a/qemu-options.hx
+++ b/qemu-options.hx
@@ -3220,6 +3220,16 @@ STEXI
prepend a timestamp to each log message.(default:on)
ETEXI
+DEF("perfmap", 0, QEMU_OPTION_PERFMAP, \
+ "-perfmap generate a /tmp/perf-${pid}.map file for perf\n",
+ QEMU_ARCH_ALL)
+STEXI
+@item -perfmap
+@findex -perfmap
+This will cause QEMU to generate a map file for Linux perf tools that will allow
+basic profiling information to be broken down into basic blocks.
+ETEXI
+
HXCOMM This is the last statement. Insert new options before this line!
STEXI
@end table
diff --git a/translate-all.c b/translate-all.c
index 5549a85..11d3f28 100644
--- a/translate-all.c
+++ b/translate-all.c
@@ -27,6 +27,7 @@
#include <stdio.h>
#include <string.h>
#include <inttypes.h>
+#include <glib.h>
#include "config.h"
@@ -129,6 +130,24 @@ static void tb_link_page(TranslationBlock *tb, tb_page_addr_t phys_pc,
tb_page_addr_t phys_page2);
static TranslationBlock *tb_find_pc(uintptr_t tc_ptr);
+static FILE *tb_perfmap = NULL;
+
+void tb_enable_perfmap(void)
+{
+ gchar * map_file = g_strdup_printf("/tmp/perf-%d.map", getpid());
+ tb_perfmap = fopen(map_file, "w");
+ g_free(map_file);
+}
+
+static void tb_write_perfmap(tcg_insn_unit *start, int size, target_ulong pc)
+{
+ if (tb_perfmap) {
+ fprintf(tb_perfmap,
+ "%"PRIxPTR" %x subject-"TARGET_FMT_lx"\n",
+ (uintptr_t) start, size, pc);
+ }
+}
+
void cpu_gen_init(void)
{
tcg_context_init(&tcg_ctx);
@@ -184,6 +203,7 @@ int cpu_gen_code(CPUArchState *env, TranslationBlock *tb, int *gen_code_size_ptr
s->code_out_len += gen_code_size;
#endif
+ tb_write_perfmap(gen_code_buf, gen_code_size, tb->pc);
#ifdef DEBUG_DISAS
if (qemu_loglevel_mask(CPU_LOG_TB_OUT_ASM)) {
qemu_log("OUT: [size=%d]\n", gen_code_size);
@@ -584,6 +604,12 @@ static inline void code_gen_alloc(size_t tb_size)
tcg_ctx.code_gen_buffer_size - 1024;
tcg_ctx.code_gen_buffer_size -= 1024;
+ if (tb_perfmap) {
+ fprintf(tb_perfmap,
+ "%"PRIxPTR" %x tcg-prologue-buffer\n",
+ (uintptr_t) tcg_ctx.code_gen_prologue, 1024);
+ }
+
tcg_ctx.code_gen_buffer_max_size = tcg_ctx.code_gen_buffer_size -
(TCG_MAX_OP_SIZE * OPC_BUF_SIZE);
tcg_ctx.code_gen_max_blocks = tcg_ctx.code_gen_buffer_size /
diff --git a/vl.c b/vl.c
index d8c1572..bc840d1 100644
--- a/vl.c
+++ b/vl.c
@@ -117,6 +117,7 @@ int main(int argc, char **argv)
#include "ui/qemu-spice.h"
#include "qapi/string-input-visitor.h"
#include "qom/object_interfaces.h"
+#include "qemu-common.h"
#define DEFAULT_RAM_SIZE 128
@@ -3387,6 +3388,9 @@ int main(int argc, char **argv, char **envp)
case QEMU_OPTION_D:
log_file = optarg;
break;
+ case QEMU_OPTION_PERFMAP:
+ tb_enable_perfmap();
+ break;
case QEMU_OPTION_s:
add_device_config(DEV_GDB, "tcp::" DEFAULT_GDBSTUB_PORT);
break;
--
2.0.1
^ permalink raw reply related [flat|nested] 4+ messages in thread
end of thread, other threads:[~2014-07-15 11:34 UTC | newest]
Thread overview: 4+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2014-07-15 11:34 [Qemu-devel] [PATCH] tcg: add ability to dump /tmp/perf-<pid>.map files Alex Bennée
-- strict thread matches above, loose matches on Subject: below --
2014-07-11 16:43 Alex Bennée
2014-07-11 16:52 ` Richard Henderson
2014-07-14 13:53 ` Alex Bennée
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).