From: David Ahern <dsahern@gmail.com>
To: acme@ghostprotocols.net, linux-kernel@vger.kernel.org
Cc: David Ahern <dsahern@gmail.com>, Ingo Molnar <mingo@kernel.org>,
Frederic Weisbecker <fweisbec@gmail.com>,
Peter Zijlstra <peterz@infradead.org>,
Jiri Olsa <jolsa@redhat.com>, Namhyung Kim <namhyung@kernel.org>,
Mike Galbraith <efault@gmx.de>,
Stephane Eranian <eranian@google.com>
Subject: [PATCH 3/3] perf record: mmap output file
Date: Tue, 8 Oct 2013 21:26:54 -0600 [thread overview]
Message-ID: <1381289214-24885-4-git-send-email-dsahern@gmail.com> (raw)
In-Reply-To: <1381289214-24885-1-git-send-email-dsahern@gmail.com>
When recording raw_syscalls for the entire system, e.g.,
perf record -e raw_syscalls:*,sched:sched_switch -a -- sleep 1
you end up with a negative feedback loop as perf itself calls
write() fairly often. This patch handles the problem by mmap'ing the
file in chunks of 64M at a time and copies events from the event buffers
to the file avoiding write system calls.
Before (with write syscall):
perf record -o /tmp/perf.data -e raw_syscalls:*,sched:sched_switch -a -- sleep 1
[ perf record: Woken up 0 times to write data ]
[ perf record: Captured and wrote 81.843 MB /tmp/perf.data (~3575786 samples) ]
After (using mmap):
perf record -o /tmp/perf.data -e raw_syscalls:*,sched:sched_switch -a -- sleep 1
[ perf record: Woken up 31 times to write data ]
[ perf record: Captured and wrote 8.203 MB /tmp/perf.data (~358388 samples) ]
In addition to perf-trace benefits using mmap lowers the overhead of
perf-record. For example,
perf stat -i -- perf record -g -o /tmp/perf.data openssl speed aes
showsi a drop in time, CPU cycles, and instructions all drop by more than a
factor of 3. Jiri also ran a test that showed a big improvement.
Signed-off-by: David Ahern <dsahern@gmail.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Stephane Eranian <eranian@google.com>
---
tools/perf/builtin-record.c | 87 +++++++++++++++++++++++++++++++++++++++++++
1 file changed, 87 insertions(+)
diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index da1384012505..45bb565e0bb1 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -29,6 +29,9 @@
#include <sched.h>
#include <sys/mman.h>
+/* mmap file big chunks at a time */
+#define MMAP_OUTPUT_SIZE (64*1024*1024)
+
#ifndef HAVE_ON_EXIT
#ifndef ATEXIT_MAX
#define ATEXIT_MAX 32
@@ -64,6 +67,14 @@ static void __handle_on_exit_funcs(void)
struct perf_record {
struct perf_tool tool;
struct perf_record_opts opts;
+
+ /* for MMAP based file writes */
+ void *mmap_addr;
+ u64 bytes_at_mmap_start; /* bytes in file when mmap use starts */
+ u64 mmap_offset; /* current location within mmap */
+ size_t mmap_size; /* size of mmap segments */
+ bool use_mmap;
+
u64 bytes_written;
const char *output_name;
struct perf_evlist *evlist;
@@ -82,8 +93,66 @@ static void advance_output(struct perf_record *rec, size_t size)
rec->bytes_written += size;
}
+static int do_mmap_output(struct perf_record *rec, void *buf, size_t size)
+{
+ u64 remaining;
+ off_t offset;
+
+ if (rec->mmap_addr == NULL) {
+do_mmap:
+ offset = rec->bytes_at_mmap_start + rec->bytes_written;
+ if (offset < (ssize_t) rec->mmap_size) {
+ rec->mmap_offset = offset;
+ offset = 0;
+ } else
+ rec->mmap_offset = 0;
+
+ rec->mmap_addr = mmap(NULL, rec->mmap_size,
+ PROT_WRITE | PROT_READ,
+ MAP_SHARED,
+ rec->output,
+ offset);
+
+ if (rec->mmap_addr == MAP_FAILED) {
+ pr_err("mmap failed: %d: %s\n", errno, strerror(errno));
+ return -1;
+ }
+
+ /* expand file to include this mmap segment */
+ if (ftruncate(rec->output, offset + rec->mmap_size) != 0) {
+ pr_err("ftruncate failed\n");
+ return -1;
+ }
+ }
+
+ remaining = rec->mmap_size - rec->mmap_offset;
+
+ if (size > remaining) {
+ memcpy(rec->mmap_addr + rec->mmap_offset, buf, remaining);
+ rec->bytes_written += remaining;
+
+ size -= remaining;
+ buf += remaining;
+
+ msync(rec->mmap_addr, rec->mmap_size, MS_ASYNC);
+ munmap(rec->mmap_addr, rec->mmap_size);
+ goto do_mmap;
+ }
+
+ if (size) {
+ memcpy(rec->mmap_addr + rec->mmap_offset, buf, size);
+ rec->bytes_written += size;
+ rec->mmap_offset += size;
+ }
+
+ return 0;
+}
+
static int write_output(struct perf_record *rec, void *buf, size_t size)
{
+ if (rec->use_mmap)
+ return do_mmap_output(rec, buf, size);
+
while (size) {
int ret = write(rec->output, buf, size);
@@ -546,6 +615,11 @@ static int __cmd_record(struct perf_record *rec, int argc, const char **argv)
if (forks)
perf_evlist__start_workload(evsel_list);
+ if (!rec->opts.pipe_output && stat(output_name, &st) == 0) {
+ rec->use_mmap = true;
+ rec->bytes_at_mmap_start = st.st_size - rec->bytes_written;
+ }
+
for (;;) {
int hits = rec->samples;
@@ -572,6 +646,18 @@ static int __cmd_record(struct perf_record *rec, int argc, const char **argv)
}
}
+ if (rec->use_mmap) {
+ off_t len = rec->bytes_at_mmap_start + rec->bytes_written;
+
+ rec->use_mmap = false;
+ msync(rec->mmap_addr, rec->mmap_size, MS_ASYNC);
+ munmap(rec->mmap_addr, rec->mmap_size);
+ rec->mmap_addr = NULL;
+
+ if (ftruncate(rec->output, len) != 0)
+ pr_err("ftruncate failed\n");
+ }
+
if (quiet || signr == SIGUSR1)
return 0;
@@ -804,6 +890,7 @@ static struct perf_record record = {
.uses_mmap = true,
},
},
+ .mmap_size = MMAP_OUTPUT_SIZE,
};
#define CALLCHAIN_HELP "do call-graph (stack chain/backtrace) recording: "
--
1.7.10.1
next prev parent reply other threads:[~2013-10-09 3:27 UTC|newest]
Thread overview: 19+ messages / expand[flat|nested] mbox.gz Atom feed top
2013-10-09 3:26 [PATCH 0/3] perf trace enhancements David Ahern
2013-10-09 3:26 ` [PATCH 1/3] perf util: Add findnew method to intlist - v2 David Ahern
2013-10-15 5:32 ` [tip:perf/core] perf util: Add findnew method to intlist tip-bot for David Ahern
2013-10-09 3:26 ` [PATCH 2/3] perf trace: Add summary option to dump syscall statistics David Ahern
2013-10-09 13:16 ` Jiri Olsa
2013-10-15 5:32 ` [tip:perf/core] " tip-bot for David Ahern
2013-10-09 3:26 ` David Ahern [this message]
2013-10-09 5:59 ` [PATCH 3/3] perf record: mmap output file Ingo Molnar
2013-10-15 14:04 ` David Ahern
2013-10-15 14:32 ` Arnaldo Carvalho de Melo
2013-10-15 14:38 ` Peter Zijlstra
2013-10-15 15:27 ` Ingo Molnar
2013-10-15 15:29 ` Peter Zijlstra
2013-10-15 15:30 ` David Ahern
2013-10-15 16:06 ` Ingo Molnar
2013-10-15 16:16 ` David Ahern
2013-10-16 7:11 ` Ingo Molnar
2013-10-15 16:37 ` Peter Zijlstra
2013-10-09 7:14 ` Mike Galbraith
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1381289214-24885-4-git-send-email-dsahern@gmail.com \
--to=dsahern@gmail.com \
--cc=acme@ghostprotocols.net \
--cc=efault@gmx.de \
--cc=eranian@google.com \
--cc=fweisbec@gmail.com \
--cc=jolsa@redhat.com \
--cc=linux-kernel@vger.kernel.org \
--cc=mingo@kernel.org \
--cc=namhyung@kernel.org \
--cc=peterz@infradead.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox