From: kan.liang@linux.intel.com
To: peterz@infradead.org, mingo@redhat.com, acme@kernel.org,
tglx@linutronix.de, bp@alien8.de, x86@kernel.org,
linux-kernel@vger.kernel.org
Cc: mark.rutland@arm.com, alexander.shishkin@linux.intel.com,
jolsa@redhat.com, namhyung@kernel.org, dave.hansen@intel.com,
yu-cheng.yu@intel.com, bigeasy@linutronix.de, gorcunov@gmail.com,
hpa@zytor.com, alexey.budankov@linux.intel.com,
eranian@google.com, ak@linux.intel.com, like.xu@linux.intel.com,
yao.jin@linux.intel.com, wei.w.wang@intel.com,
Kan Liang <kan.liang@linux.intel.com>
Subject: [PATCH V3 13/23] perf/x86/intel/lbr: Factor out intel_pmu_store_lbr
Date: Fri, 3 Jul 2020 05:49:19 -0700 [thread overview]
Message-ID: <1593780569-62993-14-git-send-email-kan.liang@linux.intel.com> (raw)
In-Reply-To: <1593780569-62993-1-git-send-email-kan.liang@linux.intel.com>
From: Kan Liang <kan.liang@linux.intel.com>
The way to store the LBR information from a PEBS LBR record can be
reused in Architecture LBR, because
- The LBR information is stored like a stack. Entry 0 is always the
youngest branch.
- The layout of the LBR INFO MSR is similar.
The LBR information may be retrieved from either the LBR registers
(non-PEBS event) or a buffer (PEBS event). Extend rdlbr_*() to support
both methods.
Explicitly check the invalid entry (0s), which can avoid unnecessary MSR
access if using a non-PEBS event. For a PEBS event, the check should
slightly improve the performance as well. The invalid entries are cut.
The intel_pmu_lbr_filter() doesn't need to check and filter them out.
Cannot share the function with current model-specific LBR read, because
the direction of the LBR growth is opposite.
Signed-off-by: Kan Liang <kan.liang@linux.intel.com>
---
arch/x86/events/intel/lbr.c | 81 +++++++++++++++++++++++++++++++--------------
1 file changed, 56 insertions(+), 25 deletions(-)
diff --git a/arch/x86/events/intel/lbr.c b/arch/x86/events/intel/lbr.c
index f47f41e..7186751 100644
--- a/arch/x86/events/intel/lbr.c
+++ b/arch/x86/events/intel/lbr.c
@@ -348,28 +348,37 @@ static __always_inline void wrlbr_info(unsigned int idx, u64 val)
wrmsrl(x86_pmu.lbr_info + idx, val);
}
-static __always_inline u64 rdlbr_from(unsigned int idx)
+static __always_inline u64 rdlbr_from(unsigned int idx, struct lbr_entry *lbr)
{
u64 val;
+ if (lbr)
+ return lbr->from;
+
rdmsrl(x86_pmu.lbr_from + idx, val);
return lbr_from_signext_quirk_rd(val);
}
-static __always_inline u64 rdlbr_to(unsigned int idx)
+static __always_inline u64 rdlbr_to(unsigned int idx, struct lbr_entry *lbr)
{
u64 val;
+ if (lbr)
+ return lbr->to;
+
rdmsrl(x86_pmu.lbr_to + idx, val);
return val;
}
-static __always_inline u64 rdlbr_info(unsigned int idx)
+static __always_inline u64 rdlbr_info(unsigned int idx, struct lbr_entry *lbr)
{
u64 val;
+ if (lbr)
+ return lbr->info;
+
rdmsrl(x86_pmu.lbr_info + idx, val);
return val;
@@ -387,16 +396,16 @@ wrlbr_all(struct lbr_entry *lbr, unsigned int idx, bool need_info)
static __always_inline bool
rdlbr_all(struct lbr_entry *lbr, unsigned int idx, bool need_info)
{
- u64 from = rdlbr_from(idx);
+ u64 from = rdlbr_from(idx, NULL);
/* Don't read invalid entry */
if (!from)
return false;
lbr->from = from;
- lbr->to = rdlbr_to(idx);
+ lbr->to = rdlbr_to(idx, NULL);
if (need_info)
- lbr->info = rdlbr_info(idx);
+ lbr->info = rdlbr_info(idx, NULL);
return true;
}
@@ -432,7 +441,7 @@ void intel_pmu_lbr_restore(void *ctx)
static __always_inline bool lbr_is_reset_in_cstate(void *ctx)
{
- return !rdlbr_from(((struct x86_perf_task_context *)ctx)->tos);
+ return !rdlbr_from(((struct x86_perf_task_context *)ctx)->tos, NULL);
}
static void __intel_pmu_lbr_restore(void *ctx)
@@ -709,8 +718,8 @@ void intel_pmu_lbr_read_64(struct cpu_hw_events *cpuc)
u16 cycles = 0;
int lbr_flags = lbr_desc[lbr_format];
- from = rdlbr_from(lbr_idx);
- to = rdlbr_to(lbr_idx);
+ from = rdlbr_from(lbr_idx, NULL);
+ to = rdlbr_to(lbr_idx, NULL);
/*
* Read LBR call stack entries
@@ -722,7 +731,7 @@ void intel_pmu_lbr_read_64(struct cpu_hw_events *cpuc)
if (lbr_format == LBR_FORMAT_INFO && need_info) {
u64 info;
- info = rdlbr_info(lbr_idx);
+ info = rdlbr_info(lbr_idx, NULL);
mis = !!(info & LBR_INFO_MISPRED);
pred = !mis;
in_tx = !!(info & LBR_INFO_IN_TX);
@@ -777,6 +786,42 @@ void intel_pmu_lbr_read_64(struct cpu_hw_events *cpuc)
cpuc->lbr_stack.hw_idx = tos;
}
+static void intel_pmu_store_lbr(struct cpu_hw_events *cpuc,
+ struct lbr_entry *entries)
+{
+ struct perf_branch_entry *e;
+ struct lbr_entry *lbr;
+ u64 from, to, info;
+ int i;
+
+ for (i = 0; i < x86_pmu.lbr_nr; i++) {
+ lbr = entries ? &entries[i] : NULL;
+ e = &cpuc->lbr_entries[i];
+
+ from = rdlbr_from(i, lbr);
+ /*
+ * Read LBR entries until invalid entry (0s) is detected.
+ */
+ if (!from)
+ break;
+
+ to = rdlbr_to(i, lbr);
+ info = rdlbr_info(i, lbr);
+
+ e->from = from;
+ e->to = to;
+ e->mispred = !!(info & LBR_INFO_MISPRED);
+ e->predicted = !(info & LBR_INFO_MISPRED);
+ e->in_tx = !!(info & LBR_INFO_IN_TX);
+ e->abort = !!(info & LBR_INFO_ABORT);
+ e->cycles = info & LBR_INFO_CYCLES;
+ e->type = 0;
+ e->reserved = 0;
+ }
+
+ cpuc->lbr_stack.nr = i;
+}
+
void intel_pmu_lbr_read(void)
{
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
@@ -1215,9 +1260,6 @@ intel_pmu_lbr_filter(struct cpu_hw_events *cpuc)
void intel_pmu_store_pebs_lbrs(struct pebs_lbr *lbr)
{
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
- int i;
-
- cpuc->lbr_stack.nr = x86_pmu.lbr_nr;
/* Cannot get TOS for large PEBS */
if (cpuc->n_pebs == cpuc->n_large_pebs)
@@ -1225,19 +1267,8 @@ void intel_pmu_store_pebs_lbrs(struct pebs_lbr *lbr)
else
cpuc->lbr_stack.hw_idx = intel_pmu_lbr_tos();
- for (i = 0; i < x86_pmu.lbr_nr; i++) {
- u64 info = lbr->lbr[i].info;
- struct perf_branch_entry *e = &cpuc->lbr_entries[i];
+ intel_pmu_store_lbr(cpuc, lbr->lbr);
- e->from = lbr->lbr[i].from;
- e->to = lbr->lbr[i].to;
- e->mispred = !!(info & LBR_INFO_MISPRED);
- e->predicted = !(info & LBR_INFO_MISPRED);
- e->in_tx = !!(info & LBR_INFO_IN_TX);
- e->abort = !!(info & LBR_INFO_ABORT);
- e->cycles = info & LBR_INFO_CYCLES;
- e->reserved = 0;
- }
intel_pmu_lbr_filter(cpuc);
}
--
2.7.4
next prev parent reply other threads:[~2020-07-03 12:53 UTC|newest]
Thread overview: 59+ messages / expand[flat|nested] mbox.gz Atom feed top
2020-07-03 12:49 [PATCH V3 00/23] Support Architectural LBR kan.liang
2020-07-03 12:49 ` [PATCH V3 01/23] x86/cpufeatures: Add Architectural LBRs feature bit kan.liang
2020-07-08 9:51 ` [tip: perf/core] " tip-bot2 for Kan Liang
2020-07-09 23:00 ` Dave Hansen
2020-07-10 9:51 ` Peter Zijlstra
2020-07-10 14:09 ` Liang, Kan
2020-07-03 12:49 ` [PATCH V3 02/23] perf/x86/intel/lbr: Add a function pointer for LBR reset kan.liang
2020-07-08 9:51 ` [tip: perf/core] " tip-bot2 for Kan Liang
2020-07-03 12:49 ` [PATCH V3 03/23] perf/x86/intel/lbr: Add a function pointer for LBR read kan.liang
2020-07-08 9:51 ` [tip: perf/core] " tip-bot2 for Kan Liang
2020-07-03 12:49 ` [PATCH V3 04/23] perf/x86/intel/lbr: Add the function pointers for LBR save and restore kan.liang
2020-07-08 9:51 ` [tip: perf/core] " tip-bot2 for Kan Liang
2020-07-03 12:49 ` [PATCH V3 05/23] perf/x86/intel/lbr: Factor out a new struct for generic optimization kan.liang
2020-07-08 9:51 ` [tip: perf/core] " tip-bot2 for Kan Liang
2020-07-03 12:49 ` [PATCH V3 06/23] perf/x86/intel/lbr: Use dynamic data structure for task_ctx kan.liang
2020-07-08 9:51 ` [tip: perf/core] " tip-bot2 for Kan Liang
2020-07-03 12:49 ` [PATCH V3 07/23] x86/msr-index: Add bunch of MSRs for Arch LBR kan.liang
2020-07-08 9:51 ` [tip: perf/core] " tip-bot2 for Kan Liang
2020-07-03 12:49 ` [PATCH V3 08/23] perf/x86: Expose CPUID enumeration bits for arch LBR kan.liang
2020-07-08 9:51 ` [tip: perf/core] " tip-bot2 for Kan Liang
2020-07-03 12:49 ` [PATCH V3 09/23] perf/x86/intel/lbr: Support LBR_CTL kan.liang
2020-07-08 9:51 ` [tip: perf/core] " tip-bot2 for Kan Liang
2020-07-03 12:49 ` [PATCH V3 10/23] perf/x86/intel/lbr: Unify the stored format of LBR information kan.liang
2020-07-08 9:51 ` [tip: perf/core] " tip-bot2 for Kan Liang
2020-07-03 12:49 ` [PATCH V3 11/23] perf/x86/intel/lbr: Mark the {rd,wr}lbr_{to,from} wrappers __always_inline kan.liang
2020-07-08 9:51 ` [tip: perf/core] " tip-bot2 for Kan Liang
2020-07-03 12:49 ` [PATCH V3 12/23] perf/x86/intel/lbr: Factor out rdlbr_all() and wrlbr_all() kan.liang
2020-07-08 9:51 ` [tip: perf/core] " tip-bot2 for Kan Liang
2020-07-03 12:49 ` kan.liang [this message]
2020-07-03 19:50 ` [PATCH V3 13/23] perf/x86/intel/lbr: Factor out intel_pmu_store_lbr Peter Zijlstra
2020-07-03 20:59 ` Liang, Kan
2020-07-06 10:25 ` Peter Zijlstra
2020-07-06 13:32 ` Liang, Kan
2020-07-06 14:25 ` Peter Zijlstra
2020-07-06 22:29 ` Liang, Kan
2020-07-07 7:40 ` Peter Zijlstra
2020-07-08 9:51 ` [tip: perf/core] " tip-bot2 for Kan Liang
2020-07-03 12:49 ` [PATCH V3 14/23] perf/x86/intel/lbr: Support Architectural LBR kan.liang
2020-07-08 9:51 ` [tip: perf/core] " tip-bot2 for Kan Liang
2020-07-03 12:49 ` [PATCH V3 15/23] perf/core: Factor out functions to allocate/free the task_ctx_data kan.liang
2020-07-08 9:51 ` [tip: perf/core] " tip-bot2 for Kan Liang
2020-07-03 12:49 ` [PATCH V3 16/23] perf/core: Use kmem_cache to allocate the PMU specific data kan.liang
2020-07-08 9:51 ` [tip: perf/core] " tip-bot2 for Kan Liang
2020-07-03 12:49 ` [PATCH V3 17/23] perf/x86/intel/lbr: Create kmem_cache for the LBR context data kan.liang
2020-07-08 9:51 ` [tip: perf/core] " tip-bot2 for Kan Liang
2020-07-03 12:49 ` [PATCH V3 18/23] perf/x86: Remove task_ctx_size kan.liang
2020-07-08 9:51 ` [tip: perf/core] " tip-bot2 for Kan Liang
2020-07-03 12:49 ` [PATCH V3 19/23] x86/fpu: Use proper mask to replace full instruction mask kan.liang
2020-07-08 9:51 ` [tip: perf/core] " tip-bot2 for Kan Liang
2020-07-03 12:49 ` [PATCH V3 20/23] x86/fpu/xstate: Support dynamic supervisor feature for LBR kan.liang
2020-07-08 9:51 ` [tip: perf/core] " tip-bot2 for Kan Liang
2021-05-27 22:15 ` Thomas Gleixner
2020-07-03 12:49 ` [PATCH V3 21/23] x86/fpu/xstate: Add helpers for LBR dynamic supervisor feature kan.liang
2020-07-08 9:51 ` [tip: perf/core] " tip-bot2 for Kan Liang
2020-07-03 12:49 ` [PATCH V3 22/23] perf/x86/intel/lbr: Support XSAVES/XRSTORS for LBR context switch kan.liang
2020-07-08 9:51 ` [tip: perf/core] " tip-bot2 for Kan Liang
2020-07-03 12:49 ` [PATCH V3 23/23] perf/x86/intel/lbr: Support XSAVES for arch LBR read kan.liang
2020-07-08 9:51 ` [tip: perf/core] " tip-bot2 for Kan Liang
2020-07-03 19:34 ` [PATCH V3 00/23] Support Architectural LBR Peter Zijlstra
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1593780569-62993-14-git-send-email-kan.liang@linux.intel.com \
--to=kan.liang@linux.intel.com \
--cc=acme@kernel.org \
--cc=ak@linux.intel.com \
--cc=alexander.shishkin@linux.intel.com \
--cc=alexey.budankov@linux.intel.com \
--cc=bigeasy@linutronix.de \
--cc=bp@alien8.de \
--cc=dave.hansen@intel.com \
--cc=eranian@google.com \
--cc=gorcunov@gmail.com \
--cc=hpa@zytor.com \
--cc=jolsa@redhat.com \
--cc=like.xu@linux.intel.com \
--cc=linux-kernel@vger.kernel.org \
--cc=mark.rutland@arm.com \
--cc=mingo@redhat.com \
--cc=namhyung@kernel.org \
--cc=peterz@infradead.org \
--cc=tglx@linutronix.de \
--cc=wei.w.wang@intel.com \
--cc=x86@kernel.org \
--cc=yao.jin@linux.intel.com \
--cc=yu-cheng.yu@intel.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox