From: Arnaldo Carvalho de Melo <acme@kernel.org>
To: Ingo Molnar <mingo@kernel.org>
Cc: linux-kernel@vger.kernel.org, linux-perf-users@vger.kernel.org,
Jiri Olsa <jolsa@kernel.org>,
Adrian Hunter <adrian.hunter@intel.com>,
David Ahern <dsahern@gmail.com>,
Namhyung Kim <namhyung@kernel.org>,
Wang Nan <wangnan0@huawei.com>,
Arnaldo Carvalho de Melo <acme@redhat.com>
Subject: [PATCH 78/83] perf tools: Optimize sample parsing for ordered events
Date: Fri, 17 Nov 2017 17:15:56 -0300 [thread overview]
Message-ID: <20171117201601.24110-79-acme@kernel.org> (raw)
In-Reply-To: <20171117201601.24110-1-acme@kernel.org>
From: Jiri Olsa <jolsa@kernel.org>
Currently when using ordered events we parse the sample twice (the
perf_evlist__parse_sample function). Once before we queue the sample for
sorting:
perf_session__process_event
perf_evlist__parse_sample(sample)
perf_session__queue_event(sample.time)
And then when we deliver the sorted sample:
ordered_events__deliver_event
perf_evlist__parse_sample
perf_session__deliver_event
We can skip the initial full sample parsing by using
perf_evlist__parse_sample_timestamp function, which got introduced
earlier. The new path looks like:
perf_session__process_event
perf_evlist__parse_sample_timestamp
perf_session__queue_event
ordered_events__deliver_event
perf_session__deliver_event
perf_evlist__parse_sample
It saves some instructions and is slightly faster:
Before:
Performance counter stats for './perf.old report --stdio' (5 runs):
64,396,007,225 cycles:u ( +- 0.97% )
105,882,112,735 instructions:u # 1.64 insn per cycle ( +- 0.00% )
21.618103465 seconds time elapsed ( +- 1.12% )
After:
Performance counter stats for './perf report --stdio' (5 runs):
60,567,807,182 cycles:u ( +- 0.40% )
104,853,333,514 instructions:u # 1.73 insn per cycle ( +- 0.00% )
20.168895243 seconds time elapsed ( +- 0.32% )
Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Wang Nan <wangnan0@huawei.com>
Link: http://lkml.kernel.org/n/tip-cjp2tuk0qkjs9dxzlpmm34ua@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
tools/perf/builtin-kvm.c | 8 ++++----
tools/perf/util/session.c | 41 ++++++++++++++++++-----------------------
2 files changed, 22 insertions(+), 27 deletions(-)
diff --git a/tools/perf/builtin-kvm.c b/tools/perf/builtin-kvm.c
index cd253db6917f..597c7de9bec9 100644
--- a/tools/perf/builtin-kvm.c
+++ b/tools/perf/builtin-kvm.c
@@ -741,20 +741,20 @@ static s64 perf_kvm__mmap_read_idx(struct perf_kvm_stat *kvm, int idx,
u64 *mmap_time)
{
union perf_event *event;
- struct perf_sample sample;
+ u64 timestamp;
s64 n = 0;
int err;
*mmap_time = ULLONG_MAX;
while ((event = perf_evlist__mmap_read(kvm->evlist, idx)) != NULL) {
- err = perf_evlist__parse_sample(kvm->evlist, event, &sample);
+ err = perf_evlist__parse_sample_timestamp(kvm->evlist, event, ×tamp);
if (err) {
perf_evlist__mmap_consume(kvm->evlist, idx);
pr_err("Failed to parse sample\n");
return -1;
}
- err = perf_session__queue_event(kvm->session, event, sample.time, 0);
+ err = perf_session__queue_event(kvm->session, event, timestamp, 0);
/*
* FIXME: Here we can't consume the event, as perf_session__queue_event will
* point to it, and it'll get possibly overwritten by the kernel.
@@ -768,7 +768,7 @@ static s64 perf_kvm__mmap_read_idx(struct perf_kvm_stat *kvm, int idx,
/* save time stamp of our first sample for this mmap */
if (n == 0)
- *mmap_time = sample.time;
+ *mmap_time = timestamp;
/* limit events per mmap handled all at once */
n++;
diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c
index 8976e417eab2..df2857137908 100644
--- a/tools/perf/util/session.c
+++ b/tools/perf/util/session.c
@@ -27,7 +27,6 @@
static int perf_session__deliver_event(struct perf_session *session,
union perf_event *event,
- struct perf_sample *sample,
struct perf_tool *tool,
u64 file_offset);
@@ -107,17 +106,10 @@ static void perf_session__set_comm_exec(struct perf_session *session)
static int ordered_events__deliver_event(struct ordered_events *oe,
struct ordered_event *event)
{
- struct perf_sample sample;
struct perf_session *session = container_of(oe, struct perf_session,
ordered_events);
- int ret = perf_evlist__parse_sample(session->evlist, event->event, &sample);
-
- if (ret) {
- pr_err("Can't parse sample, err = %d\n", ret);
- return ret;
- }
- return perf_session__deliver_event(session, event->event, &sample,
+ return perf_session__deliver_event(session, event->event,
session->tool, event->file_offset);
}
@@ -1328,20 +1320,26 @@ static int machines__deliver_event(struct machines *machines,
static int perf_session__deliver_event(struct perf_session *session,
union perf_event *event,
- struct perf_sample *sample,
struct perf_tool *tool,
u64 file_offset)
{
+ struct perf_sample sample;
int ret;
- ret = auxtrace__process_event(session, event, sample, tool);
+ ret = perf_evlist__parse_sample(session->evlist, event, &sample);
+ if (ret) {
+ pr_err("Can't parse sample, err = %d\n", ret);
+ return ret;
+ }
+
+ ret = auxtrace__process_event(session, event, &sample, tool);
if (ret < 0)
return ret;
if (ret > 0)
return 0;
return machines__deliver_event(&session->machines, session->evlist,
- event, sample, tool, file_offset);
+ event, &sample, tool, file_offset);
}
static s64 perf_session__process_user_event(struct perf_session *session,
@@ -1495,7 +1493,6 @@ static s64 perf_session__process_event(struct perf_session *session,
{
struct perf_evlist *evlist = session->evlist;
struct perf_tool *tool = session->tool;
- struct perf_sample sample;
int ret;
if (session->header.needs_swap)
@@ -1509,21 +1506,19 @@ static s64 perf_session__process_event(struct perf_session *session,
if (event->header.type >= PERF_RECORD_USER_TYPE_START)
return perf_session__process_user_event(session, event, file_offset);
- /*
- * For all kernel events we get the sample data
- */
- ret = perf_evlist__parse_sample(evlist, event, &sample);
- if (ret)
- return ret;
-
if (tool->ordered_events) {
- ret = perf_session__queue_event(session, event, sample.time, file_offset);
+ u64 timestamp;
+
+ ret = perf_evlist__parse_sample_timestamp(evlist, event, ×tamp);
+ if (ret)
+ return ret;
+
+ ret = perf_session__queue_event(session, event, timestamp, file_offset);
if (ret != -ETIME)
return ret;
}
- return perf_session__deliver_event(session, event, &sample, tool,
- file_offset);
+ return perf_session__deliver_event(session, event, tool, file_offset);
}
void perf_event_header__bswap(struct perf_event_header *hdr)
--
2.13.6
next prev parent reply other threads:[~2017-11-17 20:20 UTC|newest]
Thread overview: 85+ messages / expand[flat|nested] mbox.gz Atom feed top
2017-11-17 20:14 [GIT PULL 00/83] perf/core improvements and fixes Arnaldo Carvalho de Melo
2017-11-17 20:14 ` [PATCH 01/83] perf evlist: Set the correct idx when adding dummy events Arnaldo Carvalho de Melo
2017-11-17 20:14 ` [PATCH 02/83] perf record: Generate PERF_RECORD_{MMAP,COMM,EXEC} with --delay Arnaldo Carvalho de Melo
2017-11-17 20:14 ` [PATCH 03/83] perf annotate: Add annotation_line struct Arnaldo Carvalho de Melo
2017-11-17 20:14 ` [PATCH 04/83] perf annotate: Move line/offset into " Arnaldo Carvalho de Melo
2017-11-17 20:14 ` [PATCH 05/83] perf annotate: Move ipc/cycles " Arnaldo Carvalho de Melo
2017-11-17 20:14 ` [PATCH 06/83] perf annotate: Add symbol__annotate function Arnaldo Carvalho de Melo
2017-11-17 20:14 ` [PATCH 07/83] perf annotate: Add struct annotate_args Arnaldo Carvalho de Melo
2017-11-17 20:14 ` [PATCH 08/83] perf annotate: Add arch into " Arnaldo Carvalho de Melo
2017-11-17 20:14 ` [PATCH 09/83] perf annotate: Add map " Arnaldo Carvalho de Melo
2017-11-17 20:14 ` [PATCH 10/83] perf annotate: Add offset/line/line_nr " Arnaldo Carvalho de Melo
2017-11-17 20:14 ` [PATCH 11/83] perf annotate: Add evsel into struct annotation_line_args Arnaldo Carvalho de Melo
2017-11-17 20:14 ` [PATCH 12/83] perf annotate: Add annotation_line__next function Arnaldo Carvalho de Melo
2017-11-17 20:14 ` [PATCH 13/83] perf annotate: Add annotation_line__add function Arnaldo Carvalho de Melo
2017-11-17 20:14 ` [PATCH 14/83] perf annotate: Move rb_node to struct annotation_line Arnaldo Carvalho de Melo
2017-11-17 20:14 ` [PATCH 15/83] perf annotate: Add annotation_line__(new|delete) functions Arnaldo Carvalho de Melo
2017-11-17 20:14 ` [PATCH 16/83] perf annotate: Add annotated_source__purge function Arnaldo Carvalho de Melo
2017-11-17 20:14 ` [PATCH 17/83] perf annotate: Add samples into struct annotation_line Arnaldo Carvalho de Melo
2017-11-17 20:14 ` [PATCH 18/83] perf annotate: Add symbol__calc_percent function Arnaldo Carvalho de Melo
2017-11-17 20:14 ` [PATCH 19/83] perf annotate: Add symbol__calc_lines function Arnaldo Carvalho de Melo
2017-11-17 20:14 ` [PATCH 20/83] perf annotate: Remove disasm__calc_percent() from disasm_line__print() Arnaldo Carvalho de Melo
2017-11-17 20:14 ` [PATCH 21/83] perf annotate: Remove disasm__calc_percent() from annotate_browser__calc_percent() Arnaldo Carvalho de Melo
2017-11-17 20:15 ` [PATCH 22/83] perf annotate: Remove disasm__calc_percent function Arnaldo Carvalho de Melo
2017-11-17 20:15 ` [PATCH 23/83] perf annotate: Remove struct source_line Arnaldo Carvalho de Melo
2017-11-17 20:15 ` [PATCH 24/83] perf annotate: Add annotation_line__print function Arnaldo Carvalho de Melo
2017-11-17 20:15 ` [PATCH 25/83] perf annotate: Factor annotation_line__print from disasm_line__print Arnaldo Carvalho de Melo
2017-11-17 20:15 ` [PATCH 26/83] perf annotate browser: Use samples data from struct annotation_line Arnaldo Carvalho de Melo
2017-11-17 20:15 ` [PATCH 27/83] perf annotate browser: Do not pass nr_events in disasm_rb_tree__insert Arnaldo Carvalho de Melo
2017-11-17 20:15 ` [PATCH 28/83] perf annotate browser: Rename struct browser_disasm_line to browser_line Arnaldo Carvalho de Melo
2017-11-17 20:15 ` [PATCH 29/83] perf annotate browser: Rename disasm_line__browser " Arnaldo Carvalho de Melo
2017-11-17 20:15 ` [PATCH 30/83] perf annotate browser: Change selection to struct annotation_line Arnaldo Carvalho de Melo
2017-11-17 20:15 ` [PATCH 31/83] perf annotate browser: Change offsets " Arnaldo Carvalho de Melo
2017-11-17 20:15 ` [PATCH 32/83] perf annotate browser: Use struct annotation_line in browser_line Arnaldo Carvalho de Melo
2017-11-17 20:15 ` [PATCH 33/83] perf annotate browser: Use struct annotation_line in find functions Arnaldo Carvalho de Melo
2017-11-17 20:15 ` [PATCH 34/83] perf annotate browser: Use struct annotation_line in browser top Arnaldo Carvalho de Melo
2017-11-17 20:15 ` [PATCH 35/83] perf annotate browser: Add disasm_line__write function Arnaldo Carvalho de Melo
2017-11-17 20:15 ` [PATCH 36/83] perf annotate: Align source and offset lines Arnaldo Carvalho de Melo
2017-11-17 20:15 ` [PATCH 37/83] tools headers: Synchronize kernel ABI headers wrt SPDX tags Arnaldo Carvalho de Melo
2017-11-17 20:15 ` [PATCH 38/83] perf record: Fix -c/-F options for cpu event aliases Arnaldo Carvalho de Melo
2017-11-17 20:15 ` [PATCH 39/83] perf evsel: Enable type checking for perf_evsel_config_term types Arnaldo Carvalho de Melo
2017-11-17 20:15 ` [PATCH 40/83] perf trace: Fix an exit code of trace__symbols_init Arnaldo Carvalho de Melo
2017-11-17 20:15 ` [PATCH 41/83] perf tools: Use shell function for perl cflags retrieval Arnaldo Carvalho de Melo
2017-11-17 20:15 ` [PATCH 42/83] perf evsel: Fix up leftover perf_evsel_stat usage via evsel->priv Arnaldo Carvalho de Melo
2017-11-17 20:15 ` [PATCH 43/83] perf tests: Add missing WRITE_ASS for new fields of perf_event_attr Arnaldo Carvalho de Melo
2017-11-17 20:15 ` [PATCH 44/83] perf script: Fix --per-event-dump for auxtrace synth evsels Arnaldo Carvalho de Melo
2017-11-17 20:15 ` [PATCH 45/83] perf vendor events powerpc: Update POWER9 events Arnaldo Carvalho de Melo
2017-11-17 20:15 ` [PATCH 46/83] perf buildid-cache: Update help text for purge command Arnaldo Carvalho de Melo
2017-11-17 20:15 ` [PATCH 47/83] perf tools: Document some missing perf.data headers Arnaldo Carvalho de Melo
2017-11-17 20:15 ` [PATCH 48/83] perf script: Allow printing period for non freq mode groups Arnaldo Carvalho de Melo
2017-11-17 20:15 ` [PATCH 49/83] perf top: Document missing options Arnaldo Carvalho de Melo
2017-11-17 20:15 ` [PATCH 50/83] perf top: Remove a duplicate word Arnaldo Carvalho de Melo
2017-11-17 20:15 ` [PATCH 51/83] perf tests: Set evlist of test__backward_ring_buffer() to !overwrite Arnaldo Carvalho de Melo
2017-11-17 20:15 ` [PATCH 52/83] perf tests: Set evlist of test__sw_clock_freq() " Arnaldo Carvalho de Melo
2017-11-17 20:15 ` [PATCH 53/83] perf tests: Set evlist of test__basic_mmap() " Arnaldo Carvalho de Melo
2017-11-17 20:15 ` [PATCH 54/83] perf tests: Set evlist of test__task_exit() " Arnaldo Carvalho de Melo
2017-11-17 20:15 ` [PATCH 55/83] perf machine: Guard against NULL in machine__exit() Arnaldo Carvalho de Melo
2017-11-17 20:15 ` [PATCH 56/83] perf help: Fix a bug during strstart() conversion Arnaldo Carvalho de Melo
2017-11-17 20:15 ` [PATCH 57/83] perf annotate: Do not truncate instruction names at 6 chars Arnaldo Carvalho de Melo
2017-11-17 20:15 ` [PATCH 58/83] perf test shell: Fix check open filename arg using 'perf trace' on s390x Arnaldo Carvalho de Melo
2017-11-17 20:15 ` [PATCH 59/83] perf test shell: Fix test case probe libc's inet_pton " Arnaldo Carvalho de Melo
2017-11-17 20:15 ` [PATCH 60/83] perf evlist: Add helper to check if attr.exclude_kernel is set in all evsels Arnaldo Carvalho de Melo
2017-11-17 20:15 ` [PATCH 61/83] perf report: Ignore kptr_restrict when not sampling the kernel Arnaldo Carvalho de Melo
2017-11-17 20:15 ` [PATCH 62/83] perf record: " Arnaldo Carvalho de Melo
2017-11-17 20:15 ` [PATCH 63/83] perf top: " Arnaldo Carvalho de Melo
2017-11-17 20:15 ` [PATCH 64/83] perf c2c: Fix spelling mistakes in browser help text Arnaldo Carvalho de Melo
2017-11-17 20:15 ` [PATCH 65/83] perf evsel: Say which PMU Hardware event doesn't support sampling/overflow-interrupts Arnaldo Carvalho de Melo
2017-11-17 20:15 ` [PATCH 66/83] perf lock: Document missing options Arnaldo Carvalho de Melo
2017-11-17 20:15 ` [PATCH 67/83] perf: Fix header.size for namespace events Arnaldo Carvalho de Melo
2017-11-17 20:15 ` [PATCH 68/83] perf inject: Document missing options Arnaldo Carvalho de Melo
2017-11-17 20:15 ` [PATCH 69/83] perf trace: Document missing option, colons Arnaldo Carvalho de Melo
2017-11-17 20:15 ` [PATCH 70/83] perf timechart: Document missing --force option Arnaldo Carvalho de Melo
2017-11-17 20:15 ` [PATCH 71/83] perf sched: " Arnaldo Carvalho de Melo
2017-11-17 20:15 ` [PATCH 72/83] perf evlist: " Arnaldo Carvalho de Melo
2017-11-17 20:15 ` [PATCH 73/83] perf buildid-cache: " Arnaldo Carvalho de Melo
2017-11-17 20:15 ` [PATCH 74/83] perf callchain: Reset cursor arg instead of callchain_cursor Arnaldo Carvalho de Melo
2017-11-17 20:15 ` [PATCH 75/83] perf evsel: Centralize perf_sample initialization Arnaldo Carvalho de Melo
2017-11-17 20:15 ` [PATCH 76/83] perf evlist: Add perf_evlist__parse_sample_timestamp function Arnaldo Carvalho de Melo
2017-11-17 20:15 ` [PATCH 77/83] perf ordered_events: Pass timestamp arg in perf_session__queue_event Arnaldo Carvalho de Melo
2017-11-17 20:15 ` Arnaldo Carvalho de Melo [this message]
2017-11-17 20:15 ` [PATCH 79/83] perf top: Fix window dimensions change handling Arnaldo Carvalho de Melo
2017-11-17 20:15 ` [PATCH 80/83] perf top: Use signal interface for SIGWINCH handler Arnaldo Carvalho de Melo
2017-11-17 20:15 ` [PATCH 81/83] perf top: Fix crash when annotating symbol Arnaldo Carvalho de Melo
2017-11-17 20:16 ` [PATCH 82/83] perf tools: Change (symbol|annotation)__calc_percent return type to void Arnaldo Carvalho de Melo
2017-11-17 20:16 ` [PATCH 83/83] perf tools: Move symbol__calc_percent() call to outside symbol__disassemble() Arnaldo Carvalho de Melo
2017-11-18 8:06 ` [GIT PULL 00/83] perf/core improvements and fixes Ingo Molnar
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20171117201601.24110-79-acme@kernel.org \
--to=acme@kernel.org \
--cc=acme@redhat.com \
--cc=adrian.hunter@intel.com \
--cc=dsahern@gmail.com \
--cc=jolsa@kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-perf-users@vger.kernel.org \
--cc=mingo@kernel.org \
--cc=namhyung@kernel.org \
--cc=wangnan0@huawei.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).