linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: David Ahern <dsahern@gmail.com>
To: acme@ghostprotocols.net, linux-kernel@vger.kernel.org
Cc: David Ahern <dsahern@gmail.com>, Ingo Molnar <mingo@kernel.org>,
	Frederic Weisbecker <fweisbec@gmail.com>,
	Peter Zijlstra <peterz@infradead.org>,
	Jiri Olsa <jolsa@redhat.com>, Namhyung Kim <namhyung@kernel.org>,
	Mike Galbraith <efault@gmx.de>,
	Stephane Eranian <eranian@google.com>
Subject: [PATCH] perf record: mmap output file - RFC
Date: Sun, 22 Sep 2013 20:05:59 -0600	[thread overview]
Message-ID: <1379901959-5285-1-git-send-email-dsahern@gmail.com> (raw)

When recording raw_syscalls for the entire system, e.g.,
    perf record -e raw_syscalls:*,sched:sched_switch -a -- sleep 1

you end up with a negative feedback loop as perf itself calls
write() fairly often. This patch mmap's the file in chunks of 64M
at a time and copies events from the event buffers to the file
avoiding write system calls.

Before (with write syscall):

perf record -o /tmp/perf.data -e raw_syscalls:*,sched:sched_switch -a -- sleep 1
[ perf record: Woken up 0 times to write data ]
[ perf record: Captured and wrote 81.843 MB /tmp/perf.data (~3575786 samples) ]

After (using mmap):

perf record -o /tmp/perf.data -e raw_syscalls:*,sched:sched_switch -a -- sleep 1
[ perf record: Woken up 31 times to write data ]
[ perf record: Captured and wrote 8.203 MB /tmp/perf.data (~358388 samples) ]

Before I get too far down this path I wanted to get comments on the approach.

Signed-off-by: David Ahern <dsahern@gmail.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Stephane Eranian <eranian@google.com>
---
 tools/perf/builtin-record.c |   87 +++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 87 insertions(+)

diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index da13840..45bb565 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -29,6 +29,9 @@
 #include <sched.h>
 #include <sys/mman.h>
 
+/* mmap file big chunks at a time */
+#define MMAP_OUTPUT_SIZE   (64*1024*1024)
+
 #ifndef HAVE_ON_EXIT
 #ifndef ATEXIT_MAX
 #define ATEXIT_MAX 32
@@ -64,6 +67,14 @@ static void __handle_on_exit_funcs(void)
 struct perf_record {
 	struct perf_tool	tool;
 	struct perf_record_opts	opts;
+
+	/* for MMAP based file writes */
+	void			*mmap_addr;
+	u64			bytes_at_mmap_start; /* bytes in file when mmap use starts */
+	u64			mmap_offset;    /* current location within mmap */
+	size_t			mmap_size;      /* size of mmap segments */
+	bool			use_mmap;
+
 	u64			bytes_written;
 	const char		*output_name;
 	struct perf_evlist	*evlist;
@@ -82,8 +93,66 @@ static void advance_output(struct perf_record *rec, size_t size)
 	rec->bytes_written += size;
 }
 
+static int do_mmap_output(struct perf_record *rec, void *buf, size_t size)
+{
+	u64 remaining;
+	off_t offset;
+
+	if (rec->mmap_addr == NULL) {
+do_mmap:
+		offset = rec->bytes_at_mmap_start + rec->bytes_written;
+		if (offset < (ssize_t) rec->mmap_size) {
+			rec->mmap_offset = offset;
+			offset = 0;
+		} else
+			rec->mmap_offset = 0;
+
+		rec->mmap_addr = mmap(NULL, rec->mmap_size,
+				     PROT_WRITE | PROT_READ,
+				     MAP_SHARED,
+				     rec->output,
+				     offset);
+
+		if (rec->mmap_addr == MAP_FAILED) {
+			pr_err("mmap failed: %d: %s\n", errno, strerror(errno));
+			return -1;
+		}
+
+		/* expand file to include this mmap segment */
+		if (ftruncate(rec->output, offset + rec->mmap_size) != 0) {
+			pr_err("ftruncate failed\n");
+			return -1;
+		}
+	}
+
+	remaining = rec->mmap_size - rec->mmap_offset;
+
+	if (size > remaining) {
+		memcpy(rec->mmap_addr + rec->mmap_offset, buf, remaining);
+		rec->bytes_written += remaining;
+
+		size -= remaining;
+		buf  += remaining;
+
+		msync(rec->mmap_addr, rec->mmap_size, MS_ASYNC);
+		munmap(rec->mmap_addr, rec->mmap_size);
+		goto do_mmap;
+	}
+
+	if (size) {
+		memcpy(rec->mmap_addr + rec->mmap_offset, buf, size);
+		rec->bytes_written += size;
+		rec->mmap_offset += size;
+	}
+
+	return 0;
+}
+
 static int write_output(struct perf_record *rec, void *buf, size_t size)
 {
+	if (rec->use_mmap)
+		return do_mmap_output(rec, buf, size);
+
 	while (size) {
 		int ret = write(rec->output, buf, size);
 
@@ -546,6 +615,11 @@ static int __cmd_record(struct perf_record *rec, int argc, const char **argv)
 	if (forks)
 		perf_evlist__start_workload(evsel_list);
 
+	if (!rec->opts.pipe_output && stat(output_name, &st) == 0) {
+		rec->use_mmap = true;
+		rec->bytes_at_mmap_start = st.st_size - rec->bytes_written;
+	}
+
 	for (;;) {
 		int hits = rec->samples;
 
@@ -572,6 +646,18 @@ static int __cmd_record(struct perf_record *rec, int argc, const char **argv)
 		}
 	}
 
+	if (rec->use_mmap) {
+		off_t len = rec->bytes_at_mmap_start + rec->bytes_written;
+
+		rec->use_mmap = false;
+		msync(rec->mmap_addr, rec->mmap_size, MS_ASYNC);
+		munmap(rec->mmap_addr, rec->mmap_size);
+		rec->mmap_addr = NULL;
+
+		if (ftruncate(rec->output, len) != 0)
+			pr_err("ftruncate failed\n");
+	}
+
 	if (quiet || signr == SIGUSR1)
 		return 0;
 
@@ -804,6 +890,7 @@ static struct perf_record record = {
 			.uses_mmap   = true,
 		},
 	},
+	.mmap_size = MMAP_OUTPUT_SIZE,
 };
 
 #define CALLCHAIN_HELP "do call-graph (stack chain/backtrace) recording: "
-- 
1.7.10.1


             reply	other threads:[~2013-09-23  2:06 UTC|newest]

Thread overview: 9+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2013-09-23  2:05 David Ahern [this message]
2013-09-26 17:51 ` [PATCH] perf record: mmap output file - RFC Jiri Olsa
2013-09-26 18:12   ` David Ahern
2013-09-26 18:23     ` Jiri Olsa
2013-09-26 23:17   ` David Ahern
2013-09-27  6:29     ` Ingo Molnar
2013-09-27  2:24   ` Namhyung Kim
2013-10-09 13:33 ` Jiri Olsa
2013-10-09 13:46   ` David Ahern

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1379901959-5285-1-git-send-email-dsahern@gmail.com \
    --to=dsahern@gmail.com \
    --cc=acme@ghostprotocols.net \
    --cc=efault@gmx.de \
    --cc=eranian@google.com \
    --cc=fweisbec@gmail.com \
    --cc=jolsa@redhat.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mingo@kernel.org \
    --cc=namhyung@kernel.org \
    --cc=peterz@infradead.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).