* [PATCH 1/2] perf tools: Add perf_data__write implementation into perf_data_file object
2013-11-04 18:31 [PATCH 0/2] perf record: mmap output file - v3 Jiri Olsa
@ 2013-11-04 18:31 ` Jiri Olsa
2013-11-04 18:31 ` [PATCH 2/2] perf tools: Add perf_data_file__write mmap support Jiri Olsa
2013-11-06 17:51 ` [PATCH 0/2] perf record: mmap output file - v3 David Ahern
2 siblings, 0 replies; 6+ messages in thread
From: Jiri Olsa @ 2013-11-04 18:31 UTC (permalink / raw)
To: linux-kernel
Cc: Jiri Olsa, Ingo Molnar, Frederic Weisbecker, Peter Zijlstra,
Namhyung Kim, Mike Galbraith, Stephane Eranian, David Ahern,
Adrian Hunter
Adding perf_data__write implementation into perf_data_file
object. This interface is now used within record command
to store data.
Signed-off-by: Jiri Olsa <jolsa@redhat.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Stephane Eranian <eranian@google.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Adrian Hunter <adrian.hunter@intel.com>
---
tools/perf/builtin-record.c | 38 ++++++++++++++------------------------
tools/perf/util/data.c | 20 ++++++++++++++++++++
tools/perf/util/data.h | 15 ++++++++-------
3 files changed, 42 insertions(+), 31 deletions(-)
diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index ab8d15e..5201677 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -82,24 +82,17 @@ static void advance_output(struct perf_record *rec, size_t size)
rec->bytes_written += size;
}
-static int write_output(struct perf_record *rec, void *buf, size_t size)
+static ssize_t perf_record__write(struct perf_record *rec,
+ void *buf, size_t size)
{
- struct perf_data_file *file = &rec->file;
-
- while (size) {
- int ret = write(file->fd, buf, size);
-
- if (ret < 0) {
- pr_err("failed to write perf data, error: %m\n");
- return -1;
- }
-
- size -= ret;
- buf += ret;
+ struct perf_session *session = rec->session;
+ ssize_t ret;
- rec->bytes_written += ret;
- }
+ ret = perf_data_file__write(session->file, buf, size);
+ if (ret < 0)
+ return -1;
+ rec->bytes_written += ret;
return 0;
}
@@ -109,10 +102,7 @@ static int process_synthesized_event(struct perf_tool *tool,
struct machine *machine __maybe_unused)
{
struct perf_record *rec = container_of(tool, struct perf_record, tool);
- if (write_output(rec, event, event->header.size) < 0)
- return -1;
-
- return 0;
+ return perf_record__write(rec, event, event->header.size);
}
static int perf_record__mmap_read(struct perf_record *rec,
@@ -137,7 +127,7 @@ static int perf_record__mmap_read(struct perf_record *rec,
size = md->mask + 1 - (old & md->mask);
old += size;
- if (write_output(rec, buf, size) < 0) {
+ if (perf_record__write(rec, buf, size) < 0) {
rc = -1;
goto out;
}
@@ -147,7 +137,7 @@ static int perf_record__mmap_read(struct perf_record *rec,
size = head - old;
old += size;
- if (write_output(rec, buf, size) < 0) {
+ if (perf_record__write(rec, buf, size) < 0) {
rc = -1;
goto out;
}
@@ -322,8 +312,8 @@ static struct perf_event_header finished_round_event = {
static int perf_record__mmap_read_all(struct perf_record *rec)
{
- int i;
- int rc = 0;
+ struct perf_session *session = rec->session;
+ int i, rc = 0;
for (i = 0; i < rec->evlist->nr_mmaps; i++) {
if (rec->evlist->mmap[i].base) {
@@ -335,7 +325,7 @@ static int perf_record__mmap_read_all(struct perf_record *rec)
}
if (perf_header__has_feat(&rec->session->header, HEADER_TRACING_DATA))
- rc = write_output(rec, &finished_round_event,
+ rc = perf_data_file__write(session->file, &finished_round_event,
sizeof(finished_round_event));
out:
diff --git a/tools/perf/util/data.c b/tools/perf/util/data.c
index 7d09faf..cce1256 100644
--- a/tools/perf/util/data.c
+++ b/tools/perf/util/data.c
@@ -118,3 +118,23 @@ void perf_data_file__close(struct perf_data_file *file)
{
close(file->fd);
}
+
+ssize_t perf_data_file__write(struct perf_data_file *file,
+ void *buf, size_t size)
+{
+ ssize_t total = size;
+
+ while (size) {
+ ssize_t ret = write(file->fd, buf, size);
+
+ if (ret < 0) {
+ pr_err("failed to write perf data, error: %m\n");
+ return -1;
+ }
+
+ size -= ret;
+ buf += ret;
+ }
+
+ return total;
+}
diff --git a/tools/perf/util/data.h b/tools/perf/util/data.h
index 8c2df80..02c53dc 100644
--- a/tools/perf/util/data.h
+++ b/tools/perf/util/data.h
@@ -9,12 +9,12 @@ enum perf_data_mode {
};
struct perf_data_file {
- const char *path;
- int fd;
- bool is_pipe;
- bool force;
- unsigned long size;
- enum perf_data_mode mode;
+ const char *path;
+ int fd;
+ bool is_pipe;
+ bool force;
+ unsigned long size;
+ enum perf_data_mode mode;
};
static inline bool perf_data_file__is_read(struct perf_data_file *file)
@@ -44,5 +44,6 @@ static inline unsigned long perf_data_file__size(struct perf_data_file *file)
int perf_data_file__open(struct perf_data_file *file);
void perf_data_file__close(struct perf_data_file *file);
-
+ssize_t perf_data_file__write(struct perf_data_file *file,
+ void *buf, size_t size);
#endif /* __PERF_DATA_H */
--
1.7.11.7
^ permalink raw reply related [flat|nested] 6+ messages in thread* [PATCH 2/2] perf tools: Add perf_data_file__write mmap support
2013-11-04 18:31 [PATCH 0/2] perf record: mmap output file - v3 Jiri Olsa
2013-11-04 18:31 ` [PATCH 1/2] perf tools: Add perf_data__write implementation into perf_data_file object Jiri Olsa
@ 2013-11-04 18:31 ` Jiri Olsa
2013-11-06 17:51 ` [PATCH 0/2] perf record: mmap output file - v3 David Ahern
2 siblings, 0 replies; 6+ messages in thread
From: Jiri Olsa @ 2013-11-04 18:31 UTC (permalink / raw)
To: linux-kernel
Cc: Jiri Olsa, Ingo Molnar, Frederic Weisbecker, Peter Zijlstra,
Namhyung Kim, Mike Galbraith, Stephane Eranian, David Ahern,
Adrian Hunter
When recording raw_syscalls for the entire system, e.g.,
perf record -e raw_syscalls:*,sched:sched_switch -a -- sleep 10
you end up with a negative feedback loop as perf itself calls
write() fairly often. This patch handles the problem by mmap'ing the
file in chunks of 64M at a time and copies events from the event buffers
to the file avoiding write system calls.
Before (with write syscall):
# time ./perf.old record -e raw_syscalls:*,sched:sched_switch -a -- sleep 10
[ perf record: Woken up 0 times to write data ]
[ perf record: Captured and wrote 914.717 MB perf.data (~39964591 samples) ]
real 0m11.390s
user 0m2.029s
sys 0m9.311s
After (using mmap):
# time ./perf record -e raw_syscalls:*,sched:sched_switch -a -- sleep 10
[ perf record: Woken up 74 times to write data ]
[ perf record: Captured and wrote 19.231 MB perf.data (~840219 samples) ]
real 0m10.182s
user 0m0.067s
sys 0m0.121s
In addition to perf-trace benefits using mmap lowers the overhead of
perf-record.
v3: moved David's code into perf_data_file object, also used
most of his changelog
Original-patch-by: David Ahern <dsahern@gmail.com>
Signed-off-by: Jiri Olsa <jolsa@redhat.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Stephane Eranian <eranian@google.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Adrian Hunter <adrian.hunter@intel.com>
---
tools/perf/builtin-record.c | 11 ++---
tools/perf/util/data.c | 100 +++++++++++++++++++++++++++++++++++++++++++-
tools/perf/util/data.h | 8 ++++
3 files changed, 112 insertions(+), 7 deletions(-)
diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index 5201677..45722fc 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -240,12 +240,8 @@ out:
static int process_buildids(struct perf_record *rec)
{
- struct perf_data_file *file = &rec->file;
struct perf_session *session = rec->session;
-
- u64 size = lseek(file->fd, 0, SEEK_CUR);
- if (size == 0)
- return 0;
+ u64 size = perf_data_file__size(&rec->file);
return __perf_session__process_events(session, rec->post_processing_offset,
size - rec->post_processing_offset,
@@ -535,6 +531,11 @@ static int __cmd_record(struct perf_record *rec, int argc, const char **argv)
if (quiet || signr == SIGUSR1)
return 0;
+ if (perf_data_file__munmap(file)) {
+ pr_err("data file unmap failed\n");
+ goto out_delete_session;
+ }
+
fprintf(stderr, "[ perf record: Woken up %ld times to write data ]\n", waking);
/*
diff --git a/tools/perf/util/data.c b/tools/perf/util/data.c
index cce1256..af5d644 100644
--- a/tools/perf/util/data.c
+++ b/tools/perf/util/data.c
@@ -4,10 +4,13 @@
#include <sys/stat.h>
#include <unistd.h>
#include <string.h>
+#include <sys/mman.h>
#include "data.h"
#include "util.h"
+#define MMAP_WRITE_SIZE (64*1024*1024)
+
static bool check_pipe(struct perf_data_file *file)
{
struct stat st;
@@ -111,6 +114,9 @@ int perf_data_file__open(struct perf_data_file *file)
if (!file->path)
file->path = "perf.data";
+ if (!file->mmap_size)
+ file->mmap_size = MMAP_WRITE_SIZE;
+
return open_file(file);
}
@@ -119,8 +125,70 @@ void perf_data_file__close(struct perf_data_file *file)
close(file->fd);
}
-ssize_t perf_data_file__write(struct perf_data_file *file,
- void *buf, size_t size)
+static int do_mmap(struct perf_data_file *file, u64 offset)
+{
+ u64 mmap_size = file->mmap_size;
+
+ file->mmap_off = offset % mmap_size;
+ file->mmap_foff = (offset / mmap_size) * mmap_size;
+
+ file->mmap_addr = mmap(NULL, mmap_size,
+ PROT_WRITE | PROT_READ,
+ MAP_SHARED,
+ file->fd,
+ file->mmap_foff);
+
+ if (file->mmap_addr == MAP_FAILED) {
+ pr_err("mmap failed: %d: %s\n", errno, strerror(errno));
+ return -1;
+ }
+
+ /* Expand file to include this mmap segment. */
+ if (ftruncate(file->fd, file->mmap_foff + file->mmap_size) != 0) {
+ pr_err("ftruncate failed: %d: %s\n", errno, strerror(errno));
+ return -1;
+ }
+
+ return 0;
+}
+
+static ssize_t write_mmap(struct perf_data_file *file,
+ void *buf, size_t size)
+{
+ ssize_t total = size;
+
+ if (!file->mmap_addr) {
+ off_t offset = lseek(file->fd, 0, SEEK_CUR);
+ if (offset < 0)
+ return -1;
+
+ if (do_mmap(file, offset))
+ return -1;
+ }
+
+ while (size) {
+ u64 remain = file->mmap_size - file->mmap_off;
+
+ if (size > remain) {
+ memcpy(file->mmap_addr + file->mmap_off, buf, remain);
+ size -= remain;
+ buf += remain;
+
+ munmap(file->mmap_addr, file->mmap_size);
+ if (do_mmap(file, file->mmap_foff + file->mmap_size))
+ return -1;
+ } else {
+ memcpy(file->mmap_addr + file->mmap_off, buf, size);
+ file->mmap_off += size;
+ size = 0;
+ }
+ }
+
+ return total;
+}
+
+static ssize_t write_raw(struct perf_data_file *file,
+ void *buf, size_t size)
{
ssize_t total = size;
@@ -138,3 +206,31 @@ ssize_t perf_data_file__write(struct perf_data_file *file,
return total;
}
+
+ssize_t perf_data_file__write(struct perf_data_file *file,
+ void *buf, size_t size)
+{
+ return file->is_pipe ? write_raw(file, buf, size) :
+ write_mmap(file, buf, size);
+}
+
+int perf_data_file__munmap(struct perf_data_file *file)
+{
+ if (file->mmap_addr) {
+ int ret;
+
+ munmap(file->mmap_addr, file->mmap_size);
+
+ file->mmap_addr = NULL;
+ file->size = file->mmap_foff + file->mmap_off;
+
+ ret = ftruncate(file->fd, file->size);
+ if (ret)
+ pr_err("ftruncate failed: %d: %s\n", errno,
+ strerror(errno));
+
+ return ret;
+ }
+
+ return 0;
+}
diff --git a/tools/perf/util/data.h b/tools/perf/util/data.h
index 02c53dc..de59ee0 100644
--- a/tools/perf/util/data.h
+++ b/tools/perf/util/data.h
@@ -2,6 +2,7 @@
#define __PERF_DATA_H
#include <stdbool.h>
+#include "types.h"
enum perf_data_mode {
PERF_DATA_MODE_WRITE,
@@ -15,6 +16,12 @@ struct perf_data_file {
bool force;
unsigned long size;
enum perf_data_mode mode;
+
+ /* for MMAP based file writes */
+ void *mmap_addr;
+ u64 mmap_off;
+ u64 mmap_foff;
+ u64 mmap_size;
};
static inline bool perf_data_file__is_read(struct perf_data_file *file)
@@ -46,4 +53,5 @@ int perf_data_file__open(struct perf_data_file *file);
void perf_data_file__close(struct perf_data_file *file);
ssize_t perf_data_file__write(struct perf_data_file *file,
void *buf, size_t size);
+int perf_data_file__munmap(struct perf_data_file *file);
#endif /* __PERF_DATA_H */
--
1.7.11.7
^ permalink raw reply related [flat|nested] 6+ messages in thread