public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed
From: Jiri Olsa <jolsa@kernel.org>
To: Arnaldo Carvalho de Melo <acme@kernel.org>
Cc: lkml <linux-kernel@vger.kernel.org>,
	Ingo Molnar <mingo@kernel.org>,
	Namhyung Kim <namhyung@kernel.org>,
	David Ahern <dsahern@gmail.com>,
	Peter Zijlstra <a.p.zijlstra@chello.nl>
Subject: [PATCH 5/7] perf tools: Optimize sample parsing for ordered events
Date: Tue, 31 Oct 2017 10:29:45 +0100	[thread overview]
Message-ID: <20171031092947.19410-6-jolsa@kernel.org> (raw)
In-Reply-To: <20171031092947.19410-1-jolsa@kernel.org>

Currently when using ordered events we parse the sample
twice (the perf_evlist__parse_sample function). Once
before we queue the sample for sorting:

  perf_session__process_event
    perf_evlist__parse_sample(sample)
    perf_session__queue_event(sample.time)

And then when we deliver the sorted sample:

  ordered_events__deliver_event
    perf_evlist__parse_sample
    perf_session__deliver_event

We can skip the initial full sample parsing by using
perf_evlist__parse_sample_timestamp function, which
got introduced earlier. The new path looks like:

  perf_session__process_event
    perf_evlist__parse_sample_timestamp
    perf_session__queue_event

  ordered_events__deliver_event
    perf_session__deliver_event
      perf_evlist__parse_sample

It saves some instructions and is slightly faster:

Before:
 Performance counter stats for './perf.old report --stdio' (5 runs):

    64,396,007,225      cycles:u                                                      ( +-  0.97% )
   105,882,112,735      instructions:u            #    1.64  insn per cycle           ( +-  0.00% )

      21.618103465 seconds time elapsed                                          ( +-  1.12% )

After:
 Performance counter stats for './perf report --stdio' (5 runs):

    60,567,807,182      cycles:u                                                      ( +-  0.40% )
   104,853,333,514      instructions:u            #    1.73  insn per cycle           ( +-  0.00% )

      20.168895243 seconds time elapsed                                          ( +-  0.32% )

Link: http://lkml.kernel.org/n/tip-cjp2tuk0qkjs9dxzlpmm34ua@git.kernel.org
Signed-off-by: Jiri Olsa <jolsa@kernel.org>
---
 tools/perf/builtin-kvm.c  |  8 ++++----
 tools/perf/util/session.c | 41 ++++++++++++++++++-----------------------
 2 files changed, 22 insertions(+), 27 deletions(-)

diff --git a/tools/perf/builtin-kvm.c b/tools/perf/builtin-kvm.c
index 293589a9adab..24733aea25cb 100644
--- a/tools/perf/builtin-kvm.c
+++ b/tools/perf/builtin-kvm.c
@@ -740,20 +740,20 @@ static s64 perf_kvm__mmap_read_idx(struct perf_kvm_stat *kvm, int idx,
 				   u64 *mmap_time)
 {
 	union perf_event *event;
-	struct perf_sample sample;
+	u64 timestamp;
 	s64 n = 0;
 	int err;
 
 	*mmap_time = ULLONG_MAX;
 	while ((event = perf_evlist__mmap_read(kvm->evlist, idx)) != NULL) {
-		err = perf_evlist__parse_sample(kvm->evlist, event, &sample);
+		err = perf_evlist__parse_sample_timestamp(kvm->evlist, event, &timestamp);
 		if (err) {
 			perf_evlist__mmap_consume(kvm->evlist, idx);
 			pr_err("Failed to parse sample\n");
 			return -1;
 		}
 
-		err = perf_session__queue_event(kvm->session, event, sample.time, 0);
+		err = perf_session__queue_event(kvm->session, event, timestamp, 0);
 		/*
 		 * FIXME: Here we can't consume the event, as perf_session__queue_event will
 		 *        point to it, and it'll get possibly overwritten by the kernel.
@@ -767,7 +767,7 @@ static s64 perf_kvm__mmap_read_idx(struct perf_kvm_stat *kvm, int idx,
 
 		/* save time stamp of our first sample for this mmap */
 		if (n == 0)
-			*mmap_time = sample.time;
+			*mmap_time = timestamp;
 
 		/* limit events per mmap handled all at once */
 		n++;
diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c
index e591006c0d56..91e787a4406d 100644
--- a/tools/perf/util/session.c
+++ b/tools/perf/util/session.c
@@ -26,7 +26,6 @@
 
 static int perf_session__deliver_event(struct perf_session *session,
 				       union perf_event *event,
-				       struct perf_sample *sample,
 				       struct perf_tool *tool,
 				       u64 file_offset);
 
@@ -106,17 +105,10 @@ static void perf_session__set_comm_exec(struct perf_session *session)
 static int ordered_events__deliver_event(struct ordered_events *oe,
 					 struct ordered_event *event)
 {
-	struct perf_sample sample;
 	struct perf_session *session = container_of(oe, struct perf_session,
 						    ordered_events);
-	int ret = perf_evlist__parse_sample(session->evlist, event->event, &sample);
-
-	if (ret) {
-		pr_err("Can't parse sample, err = %d\n", ret);
-		return ret;
-	}
 
-	return perf_session__deliver_event(session, event->event, &sample,
+	return perf_session__deliver_event(session, event->event,
 					   session->tool, event->file_offset);
 }
 
@@ -1327,20 +1319,26 @@ static int machines__deliver_event(struct machines *machines,
 
 static int perf_session__deliver_event(struct perf_session *session,
 				       union perf_event *event,
-				       struct perf_sample *sample,
 				       struct perf_tool *tool,
 				       u64 file_offset)
 {
+	struct perf_sample sample;
 	int ret;
 
-	ret = auxtrace__process_event(session, event, sample, tool);
+	ret = perf_evlist__parse_sample(session->evlist, event, &sample);
+	if (ret) {
+		pr_err("Can't parse sample, err = %d\n", ret);
+		return ret;
+	}
+
+	ret = auxtrace__process_event(session, event, &sample, tool);
 	if (ret < 0)
 		return ret;
 	if (ret > 0)
 		return 0;
 
 	return machines__deliver_event(&session->machines, session->evlist,
-				       event, sample, tool, file_offset);
+				       event, &sample, tool, file_offset);
 }
 
 static s64 perf_session__process_user_event(struct perf_session *session,
@@ -1494,7 +1492,6 @@ static s64 perf_session__process_event(struct perf_session *session,
 {
 	struct perf_evlist *evlist = session->evlist;
 	struct perf_tool *tool = session->tool;
-	struct perf_sample sample;
 	int ret;
 
 	if (session->header.needs_swap)
@@ -1508,21 +1505,19 @@ static s64 perf_session__process_event(struct perf_session *session,
 	if (event->header.type >= PERF_RECORD_USER_TYPE_START)
 		return perf_session__process_user_event(session, event, file_offset);
 
-	/*
-	 * For all kernel events we get the sample data
-	 */
-	ret = perf_evlist__parse_sample(evlist, event, &sample);
-	if (ret)
-		return ret;
-
 	if (tool->ordered_events) {
-		ret = perf_session__queue_event(session, event, sample.time, file_offset);
+		u64 timestamp;
+
+		ret = perf_evlist__parse_sample_timestamp(evlist, event, &timestamp);
+		if (ret)
+			return ret;
+
+		ret = perf_session__queue_event(session, event, timestamp, file_offset);
 		if (ret != -ETIME)
 			return ret;
 	}
 
-	return perf_session__deliver_event(session, event, &sample, tool,
-					   file_offset);
+	return perf_session__deliver_event(session, event, tool, file_offset);
 }
 
 void perf_event_header__bswap(struct perf_event_header *hdr)
-- 
2.13.6

  parent reply	other threads:[~2017-10-31  9:31 UTC|newest]

Thread overview: 11+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2017-10-31  9:29 [PATCH 0/7] perf tools: Small sample parsing speedup Jiri Olsa
2017-10-31  9:29 ` [PATCH 1/7] perf tools: Reset cursor arg instead of callchain_cursor Jiri Olsa
2017-10-31  9:29 ` [PATCH 2/7] perf tools: Centralize perf_sample initialization Jiri Olsa
2017-10-31  9:29 ` [PATCH 3/7] perf tools: Add perf_evlist__parse_sample_timestamp function Jiri Olsa
2017-10-31  9:29 ` [PATCH 4/7] perf tools: Pass timestamp arg in perf_session__queue_event Jiri Olsa
2017-10-31  9:29 ` Jiri Olsa [this message]
2017-10-31  9:40   ` [PATCH 5/7] perf tools: Optimize sample parsing for ordered events Ingo Molnar
2017-10-31  9:29 ` [PATCH 6/7] perf tools: Remove perf_tool from event_op2 Jiri Olsa
2017-10-31  9:29 ` [PATCH 7/7] perf tools: Remove perf_tool from event_op3 Jiri Olsa
2017-11-01  8:39 ` [PATCH 0/7] perf tools: Small sample parsing speedup Namhyung Kim
2017-11-01 11:50   ` Jiri Olsa

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20171031092947.19410-6-jolsa@kernel.org \
    --to=jolsa@kernel.org \
    --cc=a.p.zijlstra@chello.nl \
    --cc=acme@kernel.org \
    --cc=dsahern@gmail.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mingo@kernel.org \
    --cc=namhyung@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox