All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v2] perf metric: Reduce multiplexing with duration_time
@ 2021-11-24  1:52 Ian Rogers
  2021-11-28 16:23 ` Jiri Olsa
  0 siblings, 1 reply; 5+ messages in thread
From: Ian Rogers @ 2021-11-24  1:52 UTC (permalink / raw)
  To: Andi Kleen, Jiri Olsa, Namhyung Kim, John Garry, Kajol Jain,
	Paul A . Clarke, Arnaldo Carvalho de Melo, Kan Liang,
	Peter Zijlstra, Ingo Molnar, Mark Rutland, Alexander Shishkin,
	linux-perf-users, linux-kernel
  Cc: eranian, Ian Rogers

It is common to use the same counters with and without
duration_time. The ID sharing code treats duration_time as if it
were a hardware event placed in the same group. This causes
unnecessary multiplexing such as in the following example where
l3_cache_access isn't shared:

$ perf stat -M l3 -a sleep 1

 Performance counter stats for 'system wide':

         3,117,007      l3_cache_miss             #    199.5 MB/s  l3_rd_bw
                                                  #     43.6 %  l3_hits
                                                  #     56.4 %  l3_miss               (50.00%)
         5,526,447      l3_cache_access                                               (50.00%)
         5,392,435      l3_cache_access           # 5389191.2 access/s  l3_access_rate  (50.00%)
     1,000,601,901 ns   duration_time

       1.000601901 seconds time elapsed

Fix this by placing duration_time in all groups unless metric
sharing has been disabled on the command line:

$ perf stat -M l3 -a sleep 1

 Performance counter stats for 'system wide':

         3,597,972      l3_cache_miss             #    230.3 MB/s  l3_rd_bw
                                                  #     48.0 %  l3_hits
                                                  #     52.0 %  l3_miss
         6,914,459      l3_cache_access           # 6909935.9 access/s  l3_access_rate
     1,000,654,579 ns   duration_time

       1.000654579 seconds time elapsed

$ perf stat --metric-no-merge -M l3 -a sleep 1

 Performance counter stats for 'system wide':

         3,501,834      l3_cache_miss             #     53.5 %  l3_miss               (24.99%)
         6,548,173      l3_cache_access                                               (24.99%)
         3,417,622      l3_cache_miss             #     45.7 %  l3_hits               (25.04%)
         6,294,062      l3_cache_access                                               (25.04%)
         5,923,238      l3_cache_access           # 5919688.1 access/s  l3_access_rate  (24.99%)
     1,000,599,683 ns   duration_time
         3,607,486      l3_cache_miss             #    230.9 MB/s  l3_rd_bw           (49.97%)

       1.000599683 seconds time elapsed

v2. Doesn't count duration_time in the metric_list_cmp function that
    sorts larger metrics first. Without this a metric with duration_time
    and an event is sorted the same as a metric with two events,
    possibly not allowing the first metric to share with the second.

Signed-off-by: Ian Rogers <irogers@google.com>
---
 tools/perf/util/metricgroup.c | 42 +++++++++++++++++++++++++++--------
 1 file changed, 33 insertions(+), 9 deletions(-)

diff --git a/tools/perf/util/metricgroup.c b/tools/perf/util/metricgroup.c
index fffe02aae3ed..51c99cb08abf 100644
--- a/tools/perf/util/metricgroup.c
+++ b/tools/perf/util/metricgroup.c
@@ -1115,13 +1115,27 @@ static int metricgroup__add_metric_sys_event_iter(const struct pmu_event *pe,
 	return ret;
 }
 
+/**
+ * metric_list_cmp - list_sort comparator that sorts metrics with more events to
+ *                   the front. duration_time is excluded from the count.
+ */
 static int metric_list_cmp(void *priv __maybe_unused, const struct list_head *l,
 			   const struct list_head *r)
 {
 	const struct metric *left = container_of(l, struct metric, nd);
 	const struct metric *right = container_of(r, struct metric, nd);
+	struct expr_id_data *data;
+	int left_count, right_count;
+
+	left_count = hashmap__size(left->pctx->ids);
+	if (!expr__get_id(left->pctx, "duration_time", &data))
+		left_count--;
+
+	right_count = hashmap__size(right->pctx->ids);
+	if (!expr__get_id(right->pctx, "duration_time", &data))
+		right_count--;
 
-	return hashmap__size(right->pctx->ids) - hashmap__size(left->pctx->ids);
+	return right_count - left_count;
 }
 
 /**
@@ -1299,14 +1313,16 @@ static int build_combined_expr_ctx(const struct list_head *metric_list,
 /**
  * parse_ids - Build the event string for the ids and parse them creating an
  *             evlist. The encoded metric_ids are decoded.
+ * @metric_no_merge: is metric sharing explicitly disabled.
  * @fake_pmu: used when testing metrics not supported by the current CPU.
  * @ids: the event identifiers parsed from a metric.
  * @modifier: any modifiers added to the events.
  * @has_constraint: false if events should be placed in a weak group.
  * @out_evlist: the created list of events.
  */
-static int parse_ids(struct perf_pmu *fake_pmu, struct expr_parse_ctx *ids,
-		     const char *modifier, bool has_constraint, struct evlist **out_evlist)
+static int parse_ids(bool metric_no_merge, struct perf_pmu *fake_pmu,
+		     struct expr_parse_ctx *ids, const char *modifier,
+		     bool has_constraint, struct evlist **out_evlist)
 {
 	struct parse_events_error parse_error;
 	struct evlist *parsed_evlist;
@@ -1314,12 +1330,19 @@ static int parse_ids(struct perf_pmu *fake_pmu, struct expr_parse_ctx *ids,
 	int ret;
 
 	*out_evlist = NULL;
-	if (hashmap__size(ids->ids) == 0) {
+	if (!metric_no_merge || hashmap__size(ids->ids) == 0) {
 		char *tmp;
 		/*
-		 * No ids/events in the expression parsing context. Events may
-		 * have been removed because of constant evaluation, e.g.:
-		 *  event1 if #smt_on else 0
+		 * We may fail to share events between metrics because
+		 * duration_time isn't present in one metric. For example, a
+		 * ratio of cache misses doesn't need duration_time but the same
+		 * events may be used for a misses per second. Events without
+		 * sharing implies multiplexing, that is best avoided, so place
+		 * duration_time in every group.
+		 *
+		 * Also, there may be no ids/events in the expression parsing
+		 * context because of constant evaluation, e.g.:
+		 *    event1 if #smt_on else 0
 		 * Add a duration_time event to avoid a parse error on an empty
 		 * string.
 		 */
@@ -1387,7 +1410,8 @@ static int parse_groups(struct evlist *perf_evlist, const char *str,
 		ret = build_combined_expr_ctx(&metric_list, &combined);
 
 		if (!ret && combined && hashmap__size(combined->ids)) {
-			ret = parse_ids(fake_pmu, combined, /*modifier=*/NULL,
+			ret = parse_ids(metric_no_merge, fake_pmu, combined,
+					/*modifier=*/NULL,
 					/*has_constraint=*/true,
 					&combined_evlist);
 		}
@@ -1435,7 +1459,7 @@ static int parse_groups(struct evlist *perf_evlist, const char *str,
 			}
 		}
 		if (!metric_evlist) {
-			ret = parse_ids(fake_pmu, m->pctx, m->modifier,
+			ret = parse_ids(metric_no_merge, fake_pmu, m->pctx, m->modifier,
 					m->has_constraint, &m->evlist);
 			if (ret)
 				goto out;
-- 
2.34.0.rc2.393.gf8c9666880-goog


^ permalink raw reply related	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2021-11-30 19:24 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2021-11-24  1:52 [PATCH v2] perf metric: Reduce multiplexing with duration_time Ian Rogers
2021-11-28 16:23 ` Jiri Olsa
2021-11-29 17:46   ` Ian Rogers
2021-11-30 18:11     ` Jiri Olsa
2021-11-30 19:24       ` Arnaldo Carvalho de Melo

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.