From: Arnaldo Carvalho de Melo <acme@kernel.org>
To: Ingo Molnar <mingo@kernel.org>
Cc: linux-kernel@vger.kernel.org, linux-perf-users@vger.kernel.org,
Jiri Olsa <jolsa@kernel.org>,
Adrian Hunter <adrian.hunter@intel.com>,
David Ahern <dsahern@gmail.com>,
Namhyung Kim <namhyung@kernel.org>,
Wang Nan <wangnan0@huawei.com>,
Arnaldo Carvalho de Melo <acme@redhat.com>
Subject: [PATCH 78/83] perf tools: Optimize sample parsing for ordered events
Date: Fri, 17 Nov 2017 17:15:56 -0300 [thread overview]
Message-ID: <20171117201601.24110-79-acme@kernel.org> (raw)
In-Reply-To: <20171117201601.24110-1-acme@kernel.org>
From: Jiri Olsa <jolsa@kernel.org>
Currently when using ordered events we parse the sample twice (the
perf_evlist__parse_sample function). Once before we queue the sample for
sorting:
perf_session__process_event
perf_evlist__parse_sample(sample)
perf_session__queue_event(sample.time)
And then when we deliver the sorted sample:
ordered_events__deliver_event
perf_evlist__parse_sample
perf_session__deliver_event
We can skip the initial full sample parsing by using
perf_evlist__parse_sample_timestamp function, which got introduced
earlier. The new path looks like:
perf_session__process_event
perf_evlist__parse_sample_timestamp
perf_session__queue_event
ordered_events__deliver_event
perf_session__deliver_event
perf_evlist__parse_sample
It saves some instructions and is slightly faster:
Before:
Performance counter stats for './perf.old report --stdio' (5 runs):
64,396,007,225 cycles:u ( +- 0.97% )
105,882,112,735 instructions:u # 1.64 insn per cycle ( +- 0.00% )
21.618103465 seconds time elapsed ( +- 1.12% )
After:
Performance counter stats for './perf report --stdio' (5 runs):
60,567,807,182 cycles:u ( +- 0.40% )
104,853,333,514 instructions:u # 1.73 insn per cycle ( +- 0.00% )
20.168895243 seconds time elapsed ( +- 0.32% )
Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Wang Nan <wangnan0@huawei.com>
Link: http://lkml.kernel.org/n/tip-cjp2tuk0qkjs9dxzlpmm34ua@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
tools/perf/builtin-kvm.c | 8 ++++----
tools/perf/util/session.c | 41 ++++++++++++++++++-----------------------
2 files changed, 22 insertions(+), 27 deletions(-)
diff --git a/tools/perf/builtin-kvm.c b/tools/perf/builtin-kvm.c
index cd253db6917f..597c7de9bec9 100644
--- a/tools/perf/builtin-kvm.c
+++ b/tools/perf/builtin-kvm.c
@@ -741,20 +741,20 @@ static s64 perf_kvm__mmap_read_idx(struct perf_kvm_stat *kvm, int idx,
u64 *mmap_time)
{
union perf_event *event;
- struct perf_sample sample;
+ u64 timestamp;
s64 n = 0;
int err;
*mmap_time = ULLONG_MAX;
while ((event = perf_evlist__mmap_read(kvm->evlist, idx)) != NULL) {
- err = perf_evlist__parse_sample(kvm->evlist, event, &sample);
+ err = perf_evlist__parse_sample_timestamp(kvm->evlist, event, ×tamp);
if (err) {
perf_evlist__mmap_consume(kvm->evlist, idx);
pr_err("Failed to parse sample\n");
return -1;
}
- err = perf_session__queue_event(kvm->session, event, sample.time, 0);
+ err = perf_session__queue_event(kvm->session, event, timestamp, 0);
/*
* FIXME: Here we can't consume the event, as perf_session__queue_event will
* point to it, and it'll get possibly overwritten by the kernel.
@@ -768,7 +768,7 @@ static s64 perf_kvm__mmap_read_idx(struct perf_kvm_stat *kvm, int idx,
/* save time stamp of our first sample for this mmap */
if (n == 0)
- *mmap_time = sample.time;
+ *mmap_time = timestamp;
/* limit events per mmap handled all at once */
n++;
diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c
index 8976e417eab2..df2857137908 100644
--- a/tools/perf/util/session.c
+++ b/tools/perf/util/session.c
@@ -27,7 +27,6 @@
static int perf_session__deliver_event(struct perf_session *session,
union perf_event *event,
- struct perf_sample *sample,
struct perf_tool *tool,
u64 file_offset);
@@ -107,17 +106,10 @@ static void perf_session__set_comm_exec(struct perf_session *session)
static int ordered_events__deliver_event(struct ordered_events *oe,
struct ordered_event *event)
{
- struct perf_sample sample;
struct perf_session *session = container_of(oe, struct perf_session,
ordered_events);
- int ret = perf_evlist__parse_sample(session->evlist, event->event, &sample);
-
- if (ret) {
- pr_err("Can't parse sample, err = %d\n", ret);
- return ret;
- }
- return perf_session__deliver_event(session, event->event, &sample,
+ return perf_session__deliver_event(session, event->event,
session->tool, event->file_offset);
}
@@ -1328,20 +1320,26 @@ static int machines__deliver_event(struct machines *machines,
static int perf_session__deliver_event(struct perf_session *session,
union perf_event *event,
- struct perf_sample *sample,
struct perf_tool *tool,
u64 file_offset)
{
+ struct perf_sample sample;
int ret;
- ret = auxtrace__process_event(session, event, sample, tool);
+ ret = perf_evlist__parse_sample(session->evlist, event, &sample);
+ if (ret) {
+ pr_err("Can't parse sample, err = %d\n", ret);
+ return ret;
+ }
+
+ ret = auxtrace__process_event(session, event, &sample, tool);
if (ret < 0)
return ret;
if (ret > 0)
return 0;
return machines__deliver_event(&session->machines, session->evlist,
- event, sample, tool, file_offset);
+ event, &sample, tool, file_offset);
}
static s64 perf_session__process_user_event(struct perf_session *session,
@@ -1495,7 +1493,6 @@ static s64 perf_session__process_event(struct perf_session *session,
{
struct perf_evlist *evlist = session->evlist;
struct perf_tool *tool = session->tool;
- struct perf_sample sample;
int ret;
if (session->header.needs_swap)
@@ -1509,21 +1506,19 @@ static s64 perf_session__process_event(struct perf_session *session,
if (event->header.type >= PERF_RECORD_USER_TYPE_START)
return perf_session__process_user_event(session, event, file_offset);
- /*
- * For all kernel events we get the sample data
- */
- ret = perf_evlist__parse_sample(evlist, event, &sample);
- if (ret)
- return ret;
-
if (tool->ordered_events) {
- ret = perf_session__queue_event(session, event, sample.time, file_offset);
+ u64 timestamp;
+
+ ret = perf_evlist__parse_sample_timestamp(evlist, event, ×tamp);
+ if (ret)
+ return ret;
+
+ ret = perf_session__queue_event(session, event, timestamp, file_offset);
if (ret != -ETIME)
return ret;
}
- return perf_session__deliver_event(session, event, &sample, tool,
- file_offset);
+ return perf_session__deliver_event(session, event, tool, file_offset);
}
void perf_event_header__bswap(struct perf_event_header *hdr)
--
2.13.6
next prev parent reply other threads:[~2017-11-17 20:20 UTC|newest]
Thread overview: 87+ messages / expand[flat|nested] mbox.gz Atom feed top
2017-11-17 20:14 [GIT PULL 00/83] perf/core improvements and fixes Arnaldo Carvalho de Melo
2017-11-17 20:14 ` Arnaldo Carvalho de Melo
2017-11-17 20:14 ` [PATCH 01/83] perf evlist: Set the correct idx when adding dummy events Arnaldo Carvalho de Melo
2017-11-17 20:14 ` [PATCH 02/83] perf record: Generate PERF_RECORD_{MMAP,COMM,EXEC} with --delay Arnaldo Carvalho de Melo
2017-11-17 20:14 ` [PATCH 03/83] perf annotate: Add annotation_line struct Arnaldo Carvalho de Melo
2017-11-17 20:14 ` [PATCH 04/83] perf annotate: Move line/offset into " Arnaldo Carvalho de Melo
2017-11-17 20:14 ` [PATCH 05/83] perf annotate: Move ipc/cycles " Arnaldo Carvalho de Melo
2017-11-17 20:14 ` [PATCH 06/83] perf annotate: Add symbol__annotate function Arnaldo Carvalho de Melo
2017-11-17 20:14 ` [PATCH 07/83] perf annotate: Add struct annotate_args Arnaldo Carvalho de Melo
2017-11-17 20:14 ` [PATCH 08/83] perf annotate: Add arch into " Arnaldo Carvalho de Melo
2017-11-17 20:14 ` [PATCH 09/83] perf annotate: Add map " Arnaldo Carvalho de Melo
2017-11-17 20:14 ` [PATCH 10/83] perf annotate: Add offset/line/line_nr " Arnaldo Carvalho de Melo
2017-11-17 20:14 ` [PATCH 11/83] perf annotate: Add evsel into struct annotation_line_args Arnaldo Carvalho de Melo
2017-11-17 20:14 ` [PATCH 12/83] perf annotate: Add annotation_line__next function Arnaldo Carvalho de Melo
2017-11-17 20:14 ` [PATCH 13/83] perf annotate: Add annotation_line__add function Arnaldo Carvalho de Melo
2017-11-17 20:14 ` [PATCH 14/83] perf annotate: Move rb_node to struct annotation_line Arnaldo Carvalho de Melo
2017-11-17 20:14 ` [PATCH 15/83] perf annotate: Add annotation_line__(new|delete) functions Arnaldo Carvalho de Melo
2017-11-17 20:14 ` [PATCH 16/83] perf annotate: Add annotated_source__purge function Arnaldo Carvalho de Melo
2017-11-17 20:14 ` [PATCH 17/83] perf annotate: Add samples into struct annotation_line Arnaldo Carvalho de Melo
2017-11-17 20:14 ` [PATCH 18/83] perf annotate: Add symbol__calc_percent function Arnaldo Carvalho de Melo
2017-11-17 20:14 ` [PATCH 19/83] perf annotate: Add symbol__calc_lines function Arnaldo Carvalho de Melo
2017-11-17 20:14 ` [PATCH 20/83] perf annotate: Remove disasm__calc_percent() from disasm_line__print() Arnaldo Carvalho de Melo
2017-11-17 20:14 ` [PATCH 21/83] perf annotate: Remove disasm__calc_percent() from annotate_browser__calc_percent() Arnaldo Carvalho de Melo
2017-11-17 20:15 ` [PATCH 22/83] perf annotate: Remove disasm__calc_percent function Arnaldo Carvalho de Melo
2017-11-17 20:15 ` [PATCH 23/83] perf annotate: Remove struct source_line Arnaldo Carvalho de Melo
2017-11-17 20:15 ` [PATCH 24/83] perf annotate: Add annotation_line__print function Arnaldo Carvalho de Melo
2017-11-17 20:15 ` [PATCH 25/83] perf annotate: Factor annotation_line__print from disasm_line__print Arnaldo Carvalho de Melo
2017-11-17 20:15 ` [PATCH 26/83] perf annotate browser: Use samples data from struct annotation_line Arnaldo Carvalho de Melo
2017-11-17 20:15 ` [PATCH 27/83] perf annotate browser: Do not pass nr_events in disasm_rb_tree__insert Arnaldo Carvalho de Melo
2017-11-17 20:15 ` [PATCH 28/83] perf annotate browser: Rename struct browser_disasm_line to browser_line Arnaldo Carvalho de Melo
2017-11-17 20:15 ` [PATCH 29/83] perf annotate browser: Rename disasm_line__browser " Arnaldo Carvalho de Melo
2017-11-17 20:15 ` [PATCH 30/83] perf annotate browser: Change selection to struct annotation_line Arnaldo Carvalho de Melo
2017-11-17 20:15 ` [PATCH 31/83] perf annotate browser: Change offsets " Arnaldo Carvalho de Melo
2017-11-17 20:15 ` [PATCH 32/83] perf annotate browser: Use struct annotation_line in browser_line Arnaldo Carvalho de Melo
2017-11-17 20:15 ` [PATCH 33/83] perf annotate browser: Use struct annotation_line in find functions Arnaldo Carvalho de Melo
2017-11-17 20:15 ` [PATCH 34/83] perf annotate browser: Use struct annotation_line in browser top Arnaldo Carvalho de Melo
2017-11-17 20:15 ` [PATCH 35/83] perf annotate browser: Add disasm_line__write function Arnaldo Carvalho de Melo
2017-11-17 20:15 ` [PATCH 36/83] perf annotate: Align source and offset lines Arnaldo Carvalho de Melo
2017-11-17 20:15 ` [PATCH 37/83] tools headers: Synchronize kernel ABI headers wrt SPDX tags Arnaldo Carvalho de Melo
2017-11-17 20:15 ` [PATCH 38/83] perf record: Fix -c/-F options for cpu event aliases Arnaldo Carvalho de Melo
2017-11-17 20:15 ` [PATCH 39/83] perf evsel: Enable type checking for perf_evsel_config_term types Arnaldo Carvalho de Melo
2017-11-17 20:15 ` [PATCH 40/83] perf trace: Fix an exit code of trace__symbols_init Arnaldo Carvalho de Melo
2017-11-17 20:15 ` [PATCH 41/83] perf tools: Use shell function for perl cflags retrieval Arnaldo Carvalho de Melo
2017-11-17 20:15 ` [PATCH 42/83] perf evsel: Fix up leftover perf_evsel_stat usage via evsel->priv Arnaldo Carvalho de Melo
2017-11-17 20:15 ` [PATCH 43/83] perf tests: Add missing WRITE_ASS for new fields of perf_event_attr Arnaldo Carvalho de Melo
2017-11-17 20:15 ` [PATCH 44/83] perf script: Fix --per-event-dump for auxtrace synth evsels Arnaldo Carvalho de Melo
2017-11-17 20:15 ` [PATCH 45/83] perf vendor events powerpc: Update POWER9 events Arnaldo Carvalho de Melo
2017-11-17 20:15 ` [PATCH 46/83] perf buildid-cache: Update help text for purge command Arnaldo Carvalho de Melo
2017-11-17 20:15 ` [PATCH 47/83] perf tools: Document some missing perf.data headers Arnaldo Carvalho de Melo
2017-11-17 20:15 ` [PATCH 48/83] perf script: Allow printing period for non freq mode groups Arnaldo Carvalho de Melo
2017-11-17 20:15 ` [PATCH 49/83] perf top: Document missing options Arnaldo Carvalho de Melo
2017-11-17 20:15 ` [PATCH 50/83] perf top: Remove a duplicate word Arnaldo Carvalho de Melo
2017-11-17 20:15 ` [PATCH 51/83] perf tests: Set evlist of test__backward_ring_buffer() to !overwrite Arnaldo Carvalho de Melo
2017-11-17 20:15 ` [PATCH 52/83] perf tests: Set evlist of test__sw_clock_freq() " Arnaldo Carvalho de Melo
2017-11-17 20:15 ` [PATCH 53/83] perf tests: Set evlist of test__basic_mmap() " Arnaldo Carvalho de Melo
2017-11-17 20:15 ` [PATCH 54/83] perf tests: Set evlist of test__task_exit() " Arnaldo Carvalho de Melo
2017-11-17 20:15 ` [PATCH 55/83] perf machine: Guard against NULL in machine__exit() Arnaldo Carvalho de Melo
2017-11-17 20:15 ` [PATCH 56/83] perf help: Fix a bug during strstart() conversion Arnaldo Carvalho de Melo
2017-11-17 20:15 ` [PATCH 57/83] perf annotate: Do not truncate instruction names at 6 chars Arnaldo Carvalho de Melo
2017-11-17 20:15 ` [PATCH 58/83] perf test shell: Fix check open filename arg using 'perf trace' on s390x Arnaldo Carvalho de Melo
2017-11-17 20:15 ` [PATCH 59/83] perf test shell: Fix test case probe libc's inet_pton " Arnaldo Carvalho de Melo
2017-11-17 20:15 ` [PATCH 60/83] perf evlist: Add helper to check if attr.exclude_kernel is set in all evsels Arnaldo Carvalho de Melo
2017-11-17 20:15 ` [PATCH 61/83] perf report: Ignore kptr_restrict when not sampling the kernel Arnaldo Carvalho de Melo
2017-11-17 20:15 ` [PATCH 62/83] perf record: " Arnaldo Carvalho de Melo
2017-11-17 20:15 ` [PATCH 63/83] perf top: " Arnaldo Carvalho de Melo
2017-11-17 20:15 ` [PATCH 64/83] perf c2c: Fix spelling mistakes in browser help text Arnaldo Carvalho de Melo
2017-11-17 20:15 ` [PATCH 65/83] perf evsel: Say which PMU Hardware event doesn't support sampling/overflow-interrupts Arnaldo Carvalho de Melo
2017-11-17 20:15 ` [PATCH 66/83] perf lock: Document missing options Arnaldo Carvalho de Melo
2017-11-17 20:15 ` [PATCH 67/83] perf: Fix header.size for namespace events Arnaldo Carvalho de Melo
2017-11-17 20:15 ` [PATCH 68/83] perf inject: Document missing options Arnaldo Carvalho de Melo
2017-11-17 20:15 ` [PATCH 69/83] perf trace: Document missing option, colons Arnaldo Carvalho de Melo
2017-11-17 20:15 ` [PATCH 70/83] perf timechart: Document missing --force option Arnaldo Carvalho de Melo
2017-11-17 20:15 ` [PATCH 71/83] perf sched: " Arnaldo Carvalho de Melo
2017-11-17 20:15 ` [PATCH 72/83] perf evlist: " Arnaldo Carvalho de Melo
2017-11-17 20:15 ` [PATCH 73/83] perf buildid-cache: " Arnaldo Carvalho de Melo
2017-11-17 20:15 ` [PATCH 74/83] perf callchain: Reset cursor arg instead of callchain_cursor Arnaldo Carvalho de Melo
2017-11-17 20:15 ` [PATCH 75/83] perf evsel: Centralize perf_sample initialization Arnaldo Carvalho de Melo
2017-11-17 20:15 ` [PATCH 76/83] perf evlist: Add perf_evlist__parse_sample_timestamp function Arnaldo Carvalho de Melo
2017-11-17 20:15 ` [PATCH 77/83] perf ordered_events: Pass timestamp arg in perf_session__queue_event Arnaldo Carvalho de Melo
2017-11-17 20:15 ` Arnaldo Carvalho de Melo [this message]
2017-11-17 20:15 ` [PATCH 79/83] perf top: Fix window dimensions change handling Arnaldo Carvalho de Melo
2017-11-17 20:15 ` [PATCH 80/83] perf top: Use signal interface for SIGWINCH handler Arnaldo Carvalho de Melo
2017-11-17 20:15 ` [PATCH 81/83] perf top: Fix crash when annotating symbol Arnaldo Carvalho de Melo
2017-11-17 20:16 ` [PATCH 82/83] perf tools: Change (symbol|annotation)__calc_percent return type to void Arnaldo Carvalho de Melo
2017-11-17 20:16 ` [PATCH 83/83] perf tools: Move symbol__calc_percent() call to outside symbol__disassemble() Arnaldo Carvalho de Melo
2017-11-18 8:06 ` [GIT PULL 00/83] perf/core improvements and fixes Ingo Molnar
2017-11-18 8:06 ` Ingo Molnar
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20171117201601.24110-79-acme@kernel.org \
--to=acme@kernel.org \
--cc=acme@redhat.com \
--cc=adrian.hunter@intel.com \
--cc=dsahern@gmail.com \
--cc=jolsa@kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-perf-users@vger.kernel.org \
--cc=mingo@kernel.org \
--cc=namhyung@kernel.org \
--cc=wangnan0@huawei.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.