From: Jiri Olsa <jolsa@kernel.org>
To: Arnaldo Carvalho de Melo <acme@kernel.org>
Cc: lkml <linux-kernel@vger.kernel.org>,
Ingo Molnar <mingo@kernel.org>,
Namhyung Kim <namhyung@kernel.org>,
David Ahern <dsahern@gmail.com>,
Peter Zijlstra <a.p.zijlstra@chello.nl>
Subject: [PATCH 5/7] perf tools: Optimize sample parsing for ordered events
Date: Tue, 31 Oct 2017 10:29:45 +0100 [thread overview]
Message-ID: <20171031092947.19410-6-jolsa@kernel.org> (raw)
In-Reply-To: <20171031092947.19410-1-jolsa@kernel.org>
Currently when using ordered events we parse the sample
twice (the perf_evlist__parse_sample function). Once
before we queue the sample for sorting:
perf_session__process_event
perf_evlist__parse_sample(sample)
perf_session__queue_event(sample.time)
And then when we deliver the sorted sample:
ordered_events__deliver_event
perf_evlist__parse_sample
perf_session__deliver_event
We can skip the initial full sample parsing by using
perf_evlist__parse_sample_timestamp function, which
got introduced earlier. The new path looks like:
perf_session__process_event
perf_evlist__parse_sample_timestamp
perf_session__queue_event
ordered_events__deliver_event
perf_session__deliver_event
perf_evlist__parse_sample
It saves some instructions and is slightly faster:
Before:
Performance counter stats for './perf.old report --stdio' (5 runs):
64,396,007,225 cycles:u ( +- 0.97% )
105,882,112,735 instructions:u # 1.64 insn per cycle ( +- 0.00% )
21.618103465 seconds time elapsed ( +- 1.12% )
After:
Performance counter stats for './perf report --stdio' (5 runs):
60,567,807,182 cycles:u ( +- 0.40% )
104,853,333,514 instructions:u # 1.73 insn per cycle ( +- 0.00% )
20.168895243 seconds time elapsed ( +- 0.32% )
Link: http://lkml.kernel.org/n/tip-cjp2tuk0qkjs9dxzlpmm34ua@git.kernel.org
Signed-off-by: Jiri Olsa <jolsa@kernel.org>
---
tools/perf/builtin-kvm.c | 8 ++++----
tools/perf/util/session.c | 41 ++++++++++++++++++-----------------------
2 files changed, 22 insertions(+), 27 deletions(-)
diff --git a/tools/perf/builtin-kvm.c b/tools/perf/builtin-kvm.c
index 293589a9adab..24733aea25cb 100644
--- a/tools/perf/builtin-kvm.c
+++ b/tools/perf/builtin-kvm.c
@@ -740,20 +740,20 @@ static s64 perf_kvm__mmap_read_idx(struct perf_kvm_stat *kvm, int idx,
u64 *mmap_time)
{
union perf_event *event;
- struct perf_sample sample;
+ u64 timestamp;
s64 n = 0;
int err;
*mmap_time = ULLONG_MAX;
while ((event = perf_evlist__mmap_read(kvm->evlist, idx)) != NULL) {
- err = perf_evlist__parse_sample(kvm->evlist, event, &sample);
+ err = perf_evlist__parse_sample_timestamp(kvm->evlist, event, ×tamp);
if (err) {
perf_evlist__mmap_consume(kvm->evlist, idx);
pr_err("Failed to parse sample\n");
return -1;
}
- err = perf_session__queue_event(kvm->session, event, sample.time, 0);
+ err = perf_session__queue_event(kvm->session, event, timestamp, 0);
/*
* FIXME: Here we can't consume the event, as perf_session__queue_event will
* point to it, and it'll get possibly overwritten by the kernel.
@@ -767,7 +767,7 @@ static s64 perf_kvm__mmap_read_idx(struct perf_kvm_stat *kvm, int idx,
/* save time stamp of our first sample for this mmap */
if (n == 0)
- *mmap_time = sample.time;
+ *mmap_time = timestamp;
/* limit events per mmap handled all at once */
n++;
diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c
index e591006c0d56..91e787a4406d 100644
--- a/tools/perf/util/session.c
+++ b/tools/perf/util/session.c
@@ -26,7 +26,6 @@
static int perf_session__deliver_event(struct perf_session *session,
union perf_event *event,
- struct perf_sample *sample,
struct perf_tool *tool,
u64 file_offset);
@@ -106,17 +105,10 @@ static void perf_session__set_comm_exec(struct perf_session *session)
static int ordered_events__deliver_event(struct ordered_events *oe,
struct ordered_event *event)
{
- struct perf_sample sample;
struct perf_session *session = container_of(oe, struct perf_session,
ordered_events);
- int ret = perf_evlist__parse_sample(session->evlist, event->event, &sample);
-
- if (ret) {
- pr_err("Can't parse sample, err = %d\n", ret);
- return ret;
- }
- return perf_session__deliver_event(session, event->event, &sample,
+ return perf_session__deliver_event(session, event->event,
session->tool, event->file_offset);
}
@@ -1327,20 +1319,26 @@ static int machines__deliver_event(struct machines *machines,
static int perf_session__deliver_event(struct perf_session *session,
union perf_event *event,
- struct perf_sample *sample,
struct perf_tool *tool,
u64 file_offset)
{
+ struct perf_sample sample;
int ret;
- ret = auxtrace__process_event(session, event, sample, tool);
+ ret = perf_evlist__parse_sample(session->evlist, event, &sample);
+ if (ret) {
+ pr_err("Can't parse sample, err = %d\n", ret);
+ return ret;
+ }
+
+ ret = auxtrace__process_event(session, event, &sample, tool);
if (ret < 0)
return ret;
if (ret > 0)
return 0;
return machines__deliver_event(&session->machines, session->evlist,
- event, sample, tool, file_offset);
+ event, &sample, tool, file_offset);
}
static s64 perf_session__process_user_event(struct perf_session *session,
@@ -1494,7 +1492,6 @@ static s64 perf_session__process_event(struct perf_session *session,
{
struct perf_evlist *evlist = session->evlist;
struct perf_tool *tool = session->tool;
- struct perf_sample sample;
int ret;
if (session->header.needs_swap)
@@ -1508,21 +1505,19 @@ static s64 perf_session__process_event(struct perf_session *session,
if (event->header.type >= PERF_RECORD_USER_TYPE_START)
return perf_session__process_user_event(session, event, file_offset);
- /*
- * For all kernel events we get the sample data
- */
- ret = perf_evlist__parse_sample(evlist, event, &sample);
- if (ret)
- return ret;
-
if (tool->ordered_events) {
- ret = perf_session__queue_event(session, event, sample.time, file_offset);
+ u64 timestamp;
+
+ ret = perf_evlist__parse_sample_timestamp(evlist, event, ×tamp);
+ if (ret)
+ return ret;
+
+ ret = perf_session__queue_event(session, event, timestamp, file_offset);
if (ret != -ETIME)
return ret;
}
- return perf_session__deliver_event(session, event, &sample, tool,
- file_offset);
+ return perf_session__deliver_event(session, event, tool, file_offset);
}
void perf_event_header__bswap(struct perf_event_header *hdr)
--
2.13.6
next prev parent reply other threads:[~2017-10-31 9:31 UTC|newest]
Thread overview: 11+ messages / expand[flat|nested] mbox.gz Atom feed top
2017-10-31 9:29 [PATCH 0/7] perf tools: Small sample parsing speedup Jiri Olsa
2017-10-31 9:29 ` [PATCH 1/7] perf tools: Reset cursor arg instead of callchain_cursor Jiri Olsa
2017-10-31 9:29 ` [PATCH 2/7] perf tools: Centralize perf_sample initialization Jiri Olsa
2017-10-31 9:29 ` [PATCH 3/7] perf tools: Add perf_evlist__parse_sample_timestamp function Jiri Olsa
2017-10-31 9:29 ` [PATCH 4/7] perf tools: Pass timestamp arg in perf_session__queue_event Jiri Olsa
2017-10-31 9:29 ` Jiri Olsa [this message]
2017-10-31 9:40 ` [PATCH 5/7] perf tools: Optimize sample parsing for ordered events Ingo Molnar
2017-10-31 9:29 ` [PATCH 6/7] perf tools: Remove perf_tool from event_op2 Jiri Olsa
2017-10-31 9:29 ` [PATCH 7/7] perf tools: Remove perf_tool from event_op3 Jiri Olsa
2017-11-01 8:39 ` [PATCH 0/7] perf tools: Small sample parsing speedup Namhyung Kim
2017-11-01 11:50 ` Jiri Olsa
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20171031092947.19410-6-jolsa@kernel.org \
--to=jolsa@kernel.org \
--cc=a.p.zijlstra@chello.nl \
--cc=acme@kernel.org \
--cc=dsahern@gmail.com \
--cc=linux-kernel@vger.kernel.org \
--cc=mingo@kernel.org \
--cc=namhyung@kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.