[PATCH 05/25] perf stat: Add support to measure SMI cost

All of lore.kernel.org
 help / color / mirror / Atom feed

From: Arnaldo Carvalho de Melo <acme@kernel.org>
To: Ingo Molnar <mingo@kernel.org>
Cc: linux-kernel@vger.kernel.org, Kan Liang <Kan.liang@intel.com>,
	Andi Kleen <ak@linux.intel.com>, Kan Liang <kan.liang@intel.com>,
	Peter Zijlstra <peterz@infradead.org>,
	Robert Elliott <elliott@hpe.com>,
	Stephane Eranian <eranian@google.com>,
	Thomas Gleixner <tglx@linutronix.de>,
	Arnaldo Carvalho de Melo <acme@redhat.com>
Subject: [PATCH 05/25] perf stat: Add support to measure SMI cost
Date: Wed, 21 Jun 2017 15:02:25 -0300	[thread overview]
Message-ID: <20170621180245.23134-6-acme@kernel.org> (raw)
In-Reply-To: <20170621180245.23134-1-acme@kernel.org>

From: Kan Liang <Kan.liang@intel.com>

Implementing a new --smi-cost mode in perf stat to measure SMI cost.

During the measurement, the /sys/device/cpu/freeze_on_smi will be set.

The measurement can be done with one counter (unhalted core cycles), and
two free running MSR counters (IA32_APERF and SMI_COUNT).

In practice, the percentages of SMI core cycles should be more useful
than absolute value. So the output will be the percentage of SMI core
cycles and SMI#. metric_only will be set by default.

SMI cycles% = (aperf - unhalted core cycles) / aperf

Here is an example output.

 Performance counter stats for 'sudo echo ':

SMI cycles%          SMI#
    0.1%              1

       0.010858678 seconds time elapsed

Users who wants to get the actual value can apply additional
--no-metric-only.

Signed-off-by: Kan Liang <Kan.liang@intel.com>
Acked-by: Jiri Olsa <jolsa@kernel.org>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Kan Liang <kan.liang@intel.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Robert Elliott <elliott@hpe.com>
Cc: Stephane Eranian <eranian@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/1495825538-5230-3-git-send-email-kan.liang@intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/Documentation/perf-stat.txt | 14 ++++++++++
 tools/perf/builtin-stat.c              | 49 ++++++++++++++++++++++++++++++++++
 tools/perf/util/stat-shadow.c          | 33 +++++++++++++++++++++++
 tools/perf/util/stat.c                 |  2 ++
 tools/perf/util/stat.h                 |  2 ++
 5 files changed, 100 insertions(+)

diff --git a/tools/perf/Documentation/perf-stat.txt b/tools/perf/Documentation/perf-stat.txt
index bd0e4417f2be..698076313606 100644
--- a/tools/perf/Documentation/perf-stat.txt
+++ b/tools/perf/Documentation/perf-stat.txt
@@ -239,6 +239,20 @@ taskset.
 --no-merge::
 Do not merge results from same PMUs.
 
+--smi-cost::
+Measure SMI cost if msr/aperf/ and msr/smi/ events are supported.
+
+During the measurement, the /sys/device/cpu/freeze_on_smi will be set to
+freeze core counters on SMI.
+The aperf counter will not be effected by the setting.
+The cost of SMI can be measured by (aperf - unhalted core cycles).
+
+In practice, the percentages of SMI cycles is very useful for performance
+oriented analysis. --metric_only will be applied by default.
+The output is SMI cycles%, equals to (aperf - unhalted core cycles) / aperf
+
+Users who wants to get the actual value can apply --no-metric-only.
+
 EXAMPLES
 --------
 
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index ad9324d1daf9..324363054c3f 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -86,6 +86,7 @@
 #define DEFAULT_SEPARATOR	" "
 #define CNTR_NOT_SUPPORTED	"<not supported>"
 #define CNTR_NOT_COUNTED	"<not counted>"
+#define FREEZE_ON_SMI_PATH	"devices/cpu/freeze_on_smi"
 
 static void print_counters(struct timespec *ts, int argc, const char **argv);
 
@@ -122,6 +123,14 @@ static const char * topdown_attrs[] = {
 	NULL,
 };
 
+static const char *smi_cost_attrs = {
+	"{"
+	"msr/aperf/,"
+	"msr/smi/,"
+	"cycles"
+	"}"
+};
+
 static struct perf_evlist	*evsel_list;
 
 static struct target target = {
@@ -137,6 +146,8 @@ static bool			null_run			=  false;
 static int			detailed_run			=  0;
 static bool			transaction_run;
 static bool			topdown_run			= false;
+static bool			smi_cost			= false;
+static bool			smi_reset			= false;
 static bool			big_num				=  true;
 static int			big_num_opt			=  -1;
 static const char		*csv_sep			= NULL;
@@ -1782,6 +1793,8 @@ static const struct option stat_options[] = {
 			"Only print computed metrics. No raw values", enable_metric_only),
 	OPT_BOOLEAN(0, "topdown", &topdown_run,
 			"measure topdown level 1 statistics"),
+	OPT_BOOLEAN(0, "smi-cost", &smi_cost,
+			"measure SMI cost"),
 	OPT_END()
 };
 
@@ -2160,6 +2173,39 @@ static int add_default_attributes(void)
 		return 0;
 	}
 
+	if (smi_cost) {
+		int smi;
+
+		if (sysfs__read_int(FREEZE_ON_SMI_PATH, &smi) < 0) {
+			fprintf(stderr, "freeze_on_smi is not supported.\n");
+			return -1;
+		}
+
+		if (!smi) {
+			if (sysfs__write_int(FREEZE_ON_SMI_PATH, 1) < 0) {
+				fprintf(stderr, "Failed to set freeze_on_smi.\n");
+				return -1;
+			}
+			smi_reset = true;
+		}
+
+		if (pmu_have_event("msr", "aperf") &&
+		    pmu_have_event("msr", "smi")) {
+			if (!force_metric_only)
+				metric_only = true;
+			err = parse_events(evsel_list, smi_cost_attrs, NULL);
+		} else {
+			fprintf(stderr, "To measure SMI cost, it needs "
+				"msr/aperf/, msr/smi/ and cpu/cycles/ support\n");
+			return -1;
+		}
+		if (err) {
+			fprintf(stderr, "Cannot set up SMI cost events\n");
+			return -1;
+		}
+		return 0;
+	}
+
 	if (topdown_run) {
 		char *str = NULL;
 		bool warn = false;
@@ -2742,6 +2788,9 @@ int cmd_stat(int argc, const char **argv)
 	perf_stat__exit_aggr_mode();
 	perf_evlist__free_stats(evsel_list);
 out:
+	if (smi_cost && smi_reset)
+		sysfs__write_int(FREEZE_ON_SMI_PATH, 0);
+
 	perf_evlist__delete(evsel_list);
 	return status;
 }
diff --git a/tools/perf/util/stat-shadow.c b/tools/perf/util/stat-shadow.c
index ac10cc675d39..719d6cb86952 100644
--- a/tools/perf/util/stat-shadow.c
+++ b/tools/perf/util/stat-shadow.c
@@ -44,6 +44,8 @@ static struct stats runtime_topdown_slots_issued[NUM_CTX][MAX_NR_CPUS];
 static struct stats runtime_topdown_slots_retired[NUM_CTX][MAX_NR_CPUS];
 static struct stats runtime_topdown_fetch_bubbles[NUM_CTX][MAX_NR_CPUS];
 static struct stats runtime_topdown_recovery_bubbles[NUM_CTX][MAX_NR_CPUS];
+static struct stats runtime_smi_num_stats[NUM_CTX][MAX_NR_CPUS];
+static struct stats runtime_aperf_stats[NUM_CTX][MAX_NR_CPUS];
 static struct rblist runtime_saved_values;
 static bool have_frontend_stalled;
 
@@ -157,6 +159,8 @@ void perf_stat__reset_shadow_stats(void)
 	memset(runtime_topdown_slots_issued, 0, sizeof(runtime_topdown_slots_issued));
 	memset(runtime_topdown_fetch_bubbles, 0, sizeof(runtime_topdown_fetch_bubbles));
 	memset(runtime_topdown_recovery_bubbles, 0, sizeof(runtime_topdown_recovery_bubbles));
+	memset(runtime_smi_num_stats, 0, sizeof(runtime_smi_num_stats));
+	memset(runtime_aperf_stats, 0, sizeof(runtime_aperf_stats));
 
 	next = rb_first(&runtime_saved_values.entries);
 	while (next) {
@@ -217,6 +221,10 @@ void perf_stat__update_shadow_stats(struct perf_evsel *counter, u64 *count,
 		update_stats(&runtime_dtlb_cache_stats[ctx][cpu], count[0]);
 	else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_ITLB))
 		update_stats(&runtime_itlb_cache_stats[ctx][cpu], count[0]);
+	else if (perf_stat_evsel__is(counter, SMI_NUM))
+		update_stats(&runtime_smi_num_stats[ctx][cpu], count[0]);
+	else if (perf_stat_evsel__is(counter, APERF))
+		update_stats(&runtime_aperf_stats[ctx][cpu], count[0]);
 
 	if (counter->collect_stat) {
 		struct saved_value *v = saved_value_lookup(counter, cpu, ctx,
@@ -592,6 +600,29 @@ static double td_be_bound(int ctx, int cpu)
 	return sanitize_val(1.0 - sum);
 }
 
+static void print_smi_cost(int cpu, struct perf_evsel *evsel,
+			   struct perf_stat_output_ctx *out)
+{
+	double smi_num, aperf, cycles, cost = 0.0;
+	int ctx = evsel_context(evsel);
+	const char *color = NULL;
+
+	smi_num = avg_stats(&runtime_smi_num_stats[ctx][cpu]);
+	aperf = avg_stats(&runtime_aperf_stats[ctx][cpu]);
+	cycles = avg_stats(&runtime_cycles_stats[ctx][cpu]);
+
+	if ((cycles == 0) || (aperf == 0))
+		return;
+
+	if (smi_num)
+		cost = (aperf - cycles) / aperf * 100.00;
+
+	if (cost > 10)
+		color = PERF_COLOR_RED;
+	out->print_metric(out->ctx, color, "%8.1f%%", "SMI cycles%", cost);
+	out->print_metric(out->ctx, NULL, "%4.0f", "SMI#", smi_num);
+}
+
 void perf_stat__print_shadow_stats(struct perf_evsel *evsel,
 				   double avg, int cpu,
 				   struct perf_stat_output_ctx *out)
@@ -825,6 +856,8 @@ void perf_stat__print_shadow_stats(struct perf_evsel *evsel,
 		}
 		snprintf(unit_buf, sizeof(unit_buf), "%c/sec", unit);
 		print_metric(ctxp, NULL, "%8.3f", unit_buf, ratio);
+	} else if (perf_stat_evsel__is(evsel, SMI_NUM)) {
+		print_smi_cost(cpu, evsel, out);
 	} else {
 		print_metric(ctxp, NULL, NULL, NULL, 0);
 	}
diff --git a/tools/perf/util/stat.c b/tools/perf/util/stat.c
index c58174443dc1..53b9a994a3dc 100644
--- a/tools/perf/util/stat.c
+++ b/tools/perf/util/stat.c
@@ -86,6 +86,8 @@ static const char *id_str[PERF_STAT_EVSEL_ID__MAX] = {
 	ID(TOPDOWN_SLOTS_RETIRED, topdown-slots-retired),
 	ID(TOPDOWN_FETCH_BUBBLES, topdown-fetch-bubbles),
 	ID(TOPDOWN_RECOVERY_BUBBLES, topdown-recovery-bubbles),
+	ID(SMI_NUM, msr/smi/),
+	ID(APERF, msr/aperf/),
 };
 #undef ID
 
diff --git a/tools/perf/util/stat.h b/tools/perf/util/stat.h
index 0a65ae23f495..7522bf10b03e 100644
--- a/tools/perf/util/stat.h
+++ b/tools/perf/util/stat.h
@@ -22,6 +22,8 @@ enum perf_stat_evsel_id {
 	PERF_STAT_EVSEL_ID__TOPDOWN_SLOTS_RETIRED,
 	PERF_STAT_EVSEL_ID__TOPDOWN_FETCH_BUBBLES,
 	PERF_STAT_EVSEL_ID__TOPDOWN_RECOVERY_BUBBLES,
+	PERF_STAT_EVSEL_ID__SMI_NUM,
+	PERF_STAT_EVSEL_ID__APERF,
 	PERF_STAT_EVSEL_ID__MAX,
 };
 
-- 
2.9.4

next prev parent reply	other threads:[~2017-06-21 18:03 UTC|newest]

Thread overview: 27+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2017-06-21 18:02 [GIT PULL 00/25] perf/core improvements and fixes Arnaldo Carvalho de Melo
2017-06-21 18:02 ` [PATCH 01/25] perf evsel: Adopt find_process() Arnaldo Carvalho de Melo
2017-06-21 18:02 ` [PATCH 02/25] perf tools: Do parameter validation earlier on fetch_kernel_version() Arnaldo Carvalho de Melo
2017-06-21 18:02 ` [PATCH 03/25] perf tools: Remove unused _ALL_SOURCE define Arnaldo Carvalho de Melo
2017-06-21 18:02 ` [PATCH 04/25] tools lib api fs: Add sysfs__write_int function Arnaldo Carvalho de Melo
2017-06-21 18:02 ` Arnaldo Carvalho de Melo [this message]
2017-06-21 18:02 ` [PATCH 06/25] perf unwind: Support for powerpc Arnaldo Carvalho de Melo
2017-06-21 18:02 ` [PATCH 07/25] perf intel-pt: Move decoder error setting into one condition Arnaldo Carvalho de Melo
2017-06-21 18:02 ` [PATCH 08/25] perf intel-pt: Improve sample timestamp Arnaldo Carvalho de Melo
2017-06-21 18:02 ` [PATCH 09/25] perf intel-pt: Fix missing stack clear Arnaldo Carvalho de Melo
2017-06-21 18:02 ` [PATCH 10/25] perf intel-pt: Ensure IP is zero when state is INTEL_PT_STATE_NO_IP Arnaldo Carvalho de Melo
2017-06-21 18:02 ` [PATCH 11/25] perf intel-pt: Fix last_ip usage Arnaldo Carvalho de Melo
2017-06-21 18:02 ` [PATCH 12/25] perf intel-pt: Ensure never to set 'last_ip' when packet 'count' is zero Arnaldo Carvalho de Melo
2017-06-21 18:02 ` [PATCH 13/25] perf intel-pt: Use FUP always when scanning for an IP Arnaldo Carvalho de Melo
2017-06-21 18:02 ` [PATCH 14/25] perf intel-pt: Clear FUP flag on error Arnaldo Carvalho de Melo
2017-06-21 18:02 ` [PATCH 15/25] perf intel-pt: Add missing __fallthrough Arnaldo Carvalho de Melo
2017-06-21 18:02 ` [PATCH 16/25] perf intel-pt: Allow decoding with branch tracing disabled Arnaldo Carvalho de Melo
2017-06-21 18:02 ` [PATCH 17/25] perf intel-pt: Add default config for pass-through branch enable Arnaldo Carvalho de Melo
2017-06-21 18:02 ` [PATCH 18/25] perf intel-pt: Add documentation for new config terms Arnaldo Carvalho de Melo
2017-06-21 18:02 ` [PATCH 19/25] perf intel-pt: Add decoder support for ptwrite and power event packets Arnaldo Carvalho de Melo
2017-06-21 18:02 ` [PATCH 20/25] perf intel-pt: Add reserved byte to CBR packet payload Arnaldo Carvalho de Melo
2017-06-21 18:02 ` [PATCH 21/25] perf intel-pt: Add decoder support for CBR events Arnaldo Carvalho de Melo
2017-06-21 18:02 ` [PATCH 22/25] perf intel-pt: Remove redundant initial_skip checks Arnaldo Carvalho de Melo
2017-06-21 18:02 ` [PATCH 23/25] perf intel-pt: Fix transactions_sample_type Arnaldo Carvalho de Melo
2017-06-21 18:02 ` [PATCH 24/25] perf tools: Fix message because cpu list option is -C not -c Arnaldo Carvalho de Melo
2017-06-21 18:02 ` [PATCH 25/25] perf script: Fix message because field list option is -F not -f Arnaldo Carvalho de Melo
2017-06-21 18:13 ` [GIT PULL 00/25] perf/core improvements and fixes Ingo Molnar

find likely ancestor, descendant, or conflicting patches for this message:
( dfblob:bd0e4417f2b dfblob:69807631360 dfblob:ad9324d1daf
dfblob:324363054c3 dfblob:ac10cc675d3 dfblob:719d6cb8695
dfblob:c58174443dc dfblob:53b9a994a3d dfblob:0a65ae23f49
dfblob:7522bf10b03 )
 OR (
bs:"[PATCH 05/25] perf stat: Add support to measure SMI cost" )
	(help)

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20170621180245.23134-6-acme@kernel.org \
    --to=acme@kernel.org \
    --cc=Kan.liang@intel.com \
    --cc=acme@redhat.com \
    --cc=ak@linux.intel.com \
    --cc=elliott@hpe.com \
    --cc=eranian@google.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mingo@kernel.org \
    --cc=peterz@infradead.org \
    --cc=tglx@linutronix.de \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.