From: kan.liang@linux.intel.com
To: peterz@infradead.org, acme@redhat.com, mingo@kernel.org,
linux-kernel@vger.kernel.org
Cc: tglx@linutronix.de, jolsa@kernel.org, eranian@google.com,
alexander.shishkin@linux.intel.com, ak@linux.intel.com,
Kan Liang <kan.liang@linux.intel.com>
Subject: [PATCH V5 RESEND 07/14] perf/x86/intel: Support per thread RDPMC TopDown metrics
Date: Mon, 6 Jan 2020 12:29:12 -0800 [thread overview]
Message-ID: <20200106202919.2943-8-kan.liang@linux.intel.com> (raw)
In-Reply-To: <20200106202919.2943-1-kan.liang@linux.intel.com>
From: Kan Liang <kan.liang@linux.intel.com>
With Ice Lake CPUs, the TopDown metrics are directly available as fixed
counters and do not require generic counters, which make it possible to
measure TopDown per thread/process instead of only per core.
The metrics and slots values have to be saved/restored during context
switching.
The saved values are also used as previous values to calculate the
delta.
The PERF_METRICS MSR value will be returned if RDPMC metrics events.
Re-use last_period and period_left, which are unused sampling fields,
for saved_metric and saved_slots.
Signed-off-by: Kan Liang <kan.liang@linux.intel.com>
---
Changes since V4:
- Re-use last_period and period_left for saved_metric and saved_slots.
arch/x86/events/core.c | 5 +-
arch/x86/events/intel/core.c | 103 +++++++++++++++++++++++++++++------
include/linux/perf_event.h | 29 ++++++----
3 files changed, 108 insertions(+), 29 deletions(-)
diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c
index bfa5e8286eed..333541c05815 100644
--- a/arch/x86/events/core.c
+++ b/arch/x86/events/core.c
@@ -2204,7 +2204,10 @@ static int x86_pmu_event_idx(struct perf_event *event)
if (!(event->hw.flags & PERF_X86_EVENT_RDPMC_ALLOWED))
return 0;
- if (x86_pmu.num_counters_fixed && idx >= INTEL_PMC_IDX_FIXED) {
+ /* Return PERF_METRICS MSR value for metrics event */
+ if (is_metric_idx(idx))
+ idx = 1 << 29;
+ else if (x86_pmu.num_counters_fixed && idx >= INTEL_PMC_IDX_FIXED) {
idx -= INTEL_PMC_IDX_FIXED;
idx |= 1 << 30;
}
diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
index d7aecfe03372..0d1a327c18fc 100644
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -2244,7 +2244,13 @@ static int icl_set_topdown_event_period(struct perf_event *event)
if (left == x86_pmu.max_period) {
wrmsrl(MSR_CORE_PERF_FIXED_CTR3, 0);
wrmsrl(MSR_PERF_METRICS, 0);
- local64_set(&hwc->period_left, 0);
+ hwc->saved_slots = 0;
+ hwc->saved_metric = 0;
+ }
+
+ if ((hwc->saved_slots) && is_slots_event(event)) {
+ wrmsrl(MSR_CORE_PERF_FIXED_CTR3, hwc->saved_slots);
+ wrmsrl(MSR_PERF_METRICS, hwc->saved_metric);
}
perf_event_update_userpage(event);
@@ -2265,7 +2271,7 @@ static u64 icl_get_metrics_event_value(u64 metric, u64 slots, int idx)
return mul_u64_u32_div(slots, val, 0xff);
}
-static void __icl_update_topdown_event(struct perf_event *event,
+static u64 icl_get_topdown_value(struct perf_event *event,
u64 slots, u64 metrics)
{
int idx = event->hw.idx;
@@ -2276,7 +2282,50 @@ static void __icl_update_topdown_event(struct perf_event *event,
else
delta = slots;
- local64_add(delta, &event->count);
+ return delta;
+}
+
+static void __icl_update_topdown_event(struct perf_event *event,
+ u64 slots, u64 metrics,
+ u64 last_slots, u64 last_metrics)
+{
+ u64 delta, last = 0;
+
+ delta = icl_get_topdown_value(event, slots, metrics);
+ if (last_slots)
+ last = icl_get_topdown_value(event, last_slots, last_metrics);
+
+ /*
+ * The 8bit integer fraction of metric may be not accurate,
+ * especially when the changes is very small.
+ * For example, if only a few bad_spec happens, the fraction
+ * may be reduced from 1 to 0. If so, the bad_spec event value
+ * will be 0 which is definitely less than the last value.
+ * Avoid update event->count for this case.
+ */
+ if (delta > last) {
+ delta -= last;
+ local64_add(delta, &event->count);
+ }
+}
+
+static void update_saved_topdown_regs(struct perf_event *event,
+ u64 slots, u64 metrics)
+{
+ struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+ struct perf_event *other;
+ int idx;
+
+ event->hw.saved_slots = slots;
+ event->hw.saved_metric = metrics;
+
+ for_each_set_bit(idx, cpuc->active_mask, INTEL_PMC_IDX_TD_BE_BOUND + 1) {
+ if (!is_topdown_idx(idx))
+ continue;
+ other = cpuc->events[idx];
+ other->hw.saved_slots = slots;
+ other->hw.saved_metric = metrics;
+ }
}
/*
@@ -2290,6 +2339,7 @@ static u64 icl_update_topdown_event(struct perf_event *event)
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
struct perf_event *other;
u64 slots, metrics;
+ bool reset = true;
int idx;
/* read Fixed counter 3 */
@@ -2304,25 +2354,45 @@ static u64 icl_update_topdown_event(struct perf_event *event)
if (!is_topdown_idx(idx))
continue;
other = cpuc->events[idx];
- __icl_update_topdown_event(other, slots, metrics);
+ __icl_update_topdown_event(other, slots, metrics,
+ event ? event->hw.saved_slots : 0,
+ event ? event->hw.saved_metric : 0);
}
/*
* Check and update this event, which may have been cleared
* in active_mask e.g. x86_pmu_stop()
*/
- if (event && !test_bit(event->hw.idx, cpuc->active_mask))
- __icl_update_topdown_event(event, slots, metrics);
+ if (event && !test_bit(event->hw.idx, cpuc->active_mask)) {
+ __icl_update_topdown_event(event, slots, metrics,
+ event->hw.saved_slots,
+ event->hw.saved_metric);
- /*
- * Software is recommended to periodically clear both registers
- * in order to maintain accurate measurements, which is required for
- * certain scenarios that involve sampling metrics at high rates.
- * Software should always write fixed counter 3 before write to
- * PERF_METRICS.
- */
- wrmsrl(MSR_CORE_PERF_FIXED_CTR3, 0);
- wrmsrl(MSR_PERF_METRICS, 0);
+ /*
+ * In x86_pmu_stop(), the event is cleared in active_mask first,
+ * then drain the delta, which indicates context switch for
+ * counting.
+ * Save metric and slots for context switch.
+ * Don't need to reset the PERF_METRICS and Fixed counter 3.
+ * Because the values will be restored in next schedule in.
+ */
+ update_saved_topdown_regs(event, slots, metrics);
+ reset = false;
+ }
+
+ if (reset) {
+ /*
+ * Software is recommended to periodically clear both registers
+ * in order to maintain accurate measurements, which is required
+ * for certain scenarios that involve sampling metrics at high
+ * rates. Software should always write fixed counter 3 before
+ * write to PERF_METRICS.
+ */
+ wrmsrl(MSR_CORE_PERF_FIXED_CTR3, 0);
+ wrmsrl(MSR_PERF_METRICS, 0);
+ if (event)
+ update_saved_topdown_regs(event, 0, 0);
+ }
return slots;
}
@@ -3515,9 +3585,6 @@ static int intel_pmu_hw_config(struct perf_event *event)
event->attr.config1 = event->hw.config &
X86_ALL_EVENT_FLAGS;
event->hw.flags |= PERF_X86_EVENT_TOPDOWN;
-
- if (is_metric_event(event))
- event->hw.flags &= ~PERF_X86_EVENT_RDPMC_ALLOWED;
}
}
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 011dcbdbccc2..3f58414e4a91 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -200,17 +200,26 @@ struct hw_perf_event {
*/
u64 sample_period;
- /*
- * The period we started this sample with.
- */
- u64 last_period;
+ union {
+ struct { /* Sampling */
+ /*
+ * The period we started this sample with.
+ */
+ u64 last_period;
- /*
- * However much is left of the current period; note that this is
- * a full 64bit value and allows for generation of periods longer
- * than hardware might allow.
- */
- local64_t period_left;
+ /*
+ * However much is left of the current period;
+ * note that this is a full 64bit value and
+ * allows for generation of periods longer
+ * than hardware might allow.
+ */
+ local64_t period_left;
+ };
+ struct { /* Topdown events counting for context switch */
+ u64 saved_metric;
+ u64 saved_slots;
+ };
+ };
/*
* State for throttling the event, see __perf_event_overflow() and
--
2.17.1
next prev parent reply other threads:[~2020-01-06 20:30 UTC|newest]
Thread overview: 19+ messages / expand[flat|nested] mbox.gz Atom feed top
2020-01-06 20:29 [PATCH V5 RESEND 00/14] TopDown metrics support for Icelake kan.liang
2020-01-06 20:29 ` [PATCH V5 RESEND 01/14] perf/x86/intel: Introduce the fourth fixed counter kan.liang
2020-01-06 20:29 ` [PATCH V5 RESEND 02/14] perf/x86/intel: Set correct mask for TOPDOWN.SLOTS kan.liang
2020-01-06 20:29 ` [PATCH V5 RESEND 03/14] perf/x86/intel: Move BTS index to 47 kan.liang
2020-01-06 20:29 ` [PATCH V5 RESEND 04/14] perf/x86/intel: Basic support for metrics counters kan.liang
2020-01-06 20:29 ` [PATCH V5 RESEND 05/14] perf/x86/intel: Fix the name of perf capabilities for perf METRICS kan.liang
2020-01-06 20:29 ` [PATCH V5 RESEND 06/14] perf/x86/intel: Support hardware TopDown metrics kan.liang
2020-01-06 20:29 ` kan.liang [this message]
2020-01-06 20:29 ` [PATCH V5 RESEND 08/14] perf/x86/intel: Export TopDown events for Icelake kan.liang
2020-01-06 20:29 ` [PATCH V5 RESEND 09/14] perf/x86/intel: Disable sampling read slots and topdown kan.liang
2020-01-06 20:29 ` [PATCH V5 RESEND 10/14] perf/x86/intel: Name global status bit in NMI handler kan.liang
2020-01-06 20:29 ` [PATCH V5 RESEND 11/14] perf/x86: Use event_base_rdpmc for RDPMC userspace support kan.liang
2020-01-06 20:29 ` [PATCH V5 RESEND 12/14] perf, tools, stat: Support new per thread TopDown metrics kan.liang
2020-01-06 20:29 ` [PATCH V5 RESEND 13/14] perf, tools, stat: Check Topdown Metric group kan.liang
2020-01-06 20:29 ` [PATCH V5 RESEND 14/14] perf, tools: Add documentation for topdown metrics kan.liang
2020-01-10 13:17 ` [PATCH V5 RESEND 00/14] TopDown metrics support for Icelake Peter Zijlstra
2020-04-20 16:00 ` Stephane Eranian
2020-04-20 17:02 ` Peter Zijlstra
-- strict thread matches above, loose matches on Subject: below --
2019-12-03 14:11 kan.liang
2019-12-03 14:12 ` [PATCH V5 RESEND 07/14] perf/x86/intel: Support per thread RDPMC TopDown metrics kan.liang
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20200106202919.2943-8-kan.liang@linux.intel.com \
--to=kan.liang@linux.intel.com \
--cc=acme@redhat.com \
--cc=ak@linux.intel.com \
--cc=alexander.shishkin@linux.intel.com \
--cc=eranian@google.com \
--cc=jolsa@kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=mingo@kernel.org \
--cc=peterz@infradead.org \
--cc=tglx@linutronix.de \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.