From: Marc Zyngier <maz@kernel.org>
To: James Clark <james.clark@linaro.org>
Cc: kvmarm@lists.linux.dev, oliver.upton@linux.dev,
suzuki.poulose@arm.com, coresight@lists.linaro.org,
Joey Gouly <joey.gouly@arm.com>,
Zenghui Yu <yuzenghui@huawei.com>,
Catalin Marinas <catalin.marinas@arm.com>,
Will Deacon <will@kernel.org>, Mike Leach <mike.leach@linaro.org>,
Alexander Shishkin <alexander.shishkin@linux.intel.com>,
Mark Brown <broonie@kernel.org>,
Anshuman Khandual <anshuman.khandual@arm.com>,
"Rob Herring (Arm)" <robh@kernel.org>,
Shiqi Liu <shiqiliu@hust.edu.cn>, Fuad Tabba <tabba@google.com>,
James Morse <james.morse@arm.com>,
Raghavendra Rao Ananta <rananta@google.com>,
linux-arm-kernel@lists.infradead.org,
linux-kernel@vger.kernel.org
Subject: Re: [PATCH v8 6/8] KVM: arm64: coresight: Give TRBE enabled state to KVM
Date: Sat, 21 Dec 2024 11:54:31 +0000 [thread overview]
Message-ID: <86seqhp7yg.wl-maz@kernel.org> (raw)
In-Reply-To: <e6b46080-b9ae-4aad-b06d-81407c195e28@linaro.org>
On Fri, 20 Dec 2024 17:32:17 +0000,
James Clark <james.clark@linaro.org> wrote:
>
>
>
> On 20/12/2024 5:05 pm, Marc Zyngier wrote:
> > On Wed, 27 Nov 2024 10:01:23 +0000,
> > James Clark <james.clark@linaro.org> wrote:
> >>
> >> Currently in nVHE, KVM has to check if TRBE is enabled on every guest
> >> switch even if it was never used. Because it's a debug feature and is
> >> more likely to not be used than used, give KVM the TRBE buffer status to
> >> allow a much simpler and faster do-nothing path in the hyp.
> >>
> >> This is always called with preemption disabled except for probe/hotplug
> >> which gets wrapped with preempt_disable().
> >>
> >> Protected mode disables trace regardless of TRBE (because
> >> guest_trfcr_el1 is always 0), which was not previously done. HAS_TRBE
> >> becomes redundant, but HAS_TRF is now required for this.
> >>
> >> Signed-off-by: James Clark <james.clark@linaro.org>
> >> ---
> >> arch/arm64/include/asm/kvm_host.h | 10 +++-
> >> arch/arm64/kvm/debug.c | 25 ++++++++--
> >> arch/arm64/kvm/hyp/nvhe/debug-sr.c | 51 +++++++++++---------
> >> drivers/hwtracing/coresight/coresight-trbe.c | 5 ++
> >> 4 files changed, 65 insertions(+), 26 deletions(-)
> >>
> >> diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
> >> index 7e3478386351..ba251caa593b 100644
> >> --- a/arch/arm64/include/asm/kvm_host.h
> >> +++ b/arch/arm64/include/asm/kvm_host.h
> >> @@ -611,7 +611,8 @@ struct cpu_sve_state {
> >> */
> >> struct kvm_host_data {
> >> #define KVM_HOST_DATA_FLAG_HAS_SPE 0
> >> -#define KVM_HOST_DATA_FLAG_HAS_TRBE 1
> >> +#define KVM_HOST_DATA_FLAG_HAS_TRF 1
> >> +#define KVM_HOST_DATA_FLAG_TRBE_ENABLED 2
> >> unsigned long flags;
> >> struct kvm_cpu_context host_ctxt;
> >> @@ -657,6 +658,9 @@ struct kvm_host_data {
> >> u64 mdcr_el2;
> >> } host_debug_state;
> >> + /* Guest trace filter value */
> >> + u64 guest_trfcr_el1;
> >
> > Guest value? Or host state while running the guest? If the former,
> > then this has nothing to do here. If the latter, this should be
> > spelled out (trfcr_in_guest?), and the comment amended.
> >
> >> +
> >> /* Number of programmable event counters (PMCR_EL0.N) for this CPU */
> >> unsigned int nr_event_counters;
> >> };
> >> @@ -1381,6 +1385,8 @@ static inline bool kvm_pmu_counter_deferred(struct perf_event_attr *attr)
> >> void kvm_set_pmu_events(u64 set, struct perf_event_attr *attr);
> >> void kvm_clr_pmu_events(u64 clr);
> >> bool kvm_set_pmuserenr(u64 val);
> >> +void kvm_enable_trbe(void);
> >> +void kvm_disable_trbe(void);
> >> #else
> >> static inline void kvm_set_pmu_events(u64 set, struct perf_event_attr *attr) {}
> >> static inline void kvm_clr_pmu_events(u64 clr) {}
> >> @@ -1388,6 +1394,8 @@ static inline bool kvm_set_pmuserenr(u64 val)
> >> {
> >> return false;
> >> }
> >> +static inline void kvm_enable_trbe(void) {}
> >> +static inline void kvm_disable_trbe(void) {}
> >> #endif
> >> void kvm_vcpu_load_vhe(struct kvm_vcpu *vcpu);
> >> diff --git a/arch/arm64/kvm/debug.c b/arch/arm64/kvm/debug.c
> >> index dd9e139dfd13..0c340ae7b5d1 100644
> >> --- a/arch/arm64/kvm/debug.c
> >> +++ b/arch/arm64/kvm/debug.c
> >> @@ -314,7 +314,26 @@ void kvm_init_host_debug_data(void)
> >> !(read_sysreg_s(SYS_PMBIDR_EL1) & PMBIDR_EL1_P))
> >> host_data_set_flag(HAS_SPE);
> >> - if (cpuid_feature_extract_unsigned_field(dfr0,
> >> ID_AA64DFR0_EL1_TraceBuffer_SHIFT) &&
> >> - !(read_sysreg_s(SYS_TRBIDR_EL1) & TRBIDR_EL1_P))
> >> - host_data_set_flag(HAS_TRBE);
> >> + if (cpuid_feature_extract_unsigned_field(dfr0, ID_AA64DFR0_EL1_TraceFilt_SHIFT))
> >> + host_data_set_flag(HAS_TRF);
> >> }
> >> +
> >> +void kvm_enable_trbe(void)
> >> +{
> >> + if (has_vhe() || is_protected_kvm_enabled() ||
> >> + WARN_ON_ONCE(preemptible()))
> >> + return;
> >> +
> >> + host_data_set_flag(TRBE_ENABLED);
> >> +}
> >> +EXPORT_SYMBOL_GPL(kvm_enable_trbe);
> >> +
> >> +void kvm_disable_trbe(void)
> >> +{
> >> + if (has_vhe() || is_protected_kvm_enabled() ||
> >> + WARN_ON_ONCE(preemptible()))
> >> + return;
> >> +
> >> + host_data_clear_flag(TRBE_ENABLED);
> >> +}
> >> +EXPORT_SYMBOL_GPL(kvm_disable_trbe);
> >> diff --git a/arch/arm64/kvm/hyp/nvhe/debug-sr.c b/arch/arm64/kvm/hyp/nvhe/debug-sr.c
> >> index 858bb38e273f..9479bee41801 100644
> >> --- a/arch/arm64/kvm/hyp/nvhe/debug-sr.c
> >> +++ b/arch/arm64/kvm/hyp/nvhe/debug-sr.c
> >> @@ -51,32 +51,39 @@ static void __debug_restore_spe(u64 pmscr_el1)
> >> write_sysreg_el1(pmscr_el1, SYS_PMSCR);
> >> }
> >> -static void __debug_save_trace(u64 *trfcr_el1)
> >> +static void __trace_do_switch(u64 *saved_trfcr, u64 new_trfcr)
> >> {
> >> - *trfcr_el1 = 0;
> >> + *saved_trfcr = read_sysreg_el1(SYS_TRFCR);
> >> + write_sysreg_el1(new_trfcr, SYS_TRFCR);
> >> - /* Check if the TRBE is enabled */
> >> - if (!(read_sysreg_s(SYS_TRBLIMITR_EL1) & TRBLIMITR_EL1_E))
> >> + /* No need to drain if going to an enabled state or from disabled state */
> >> + if (new_trfcr || !*saved_trfcr)
> >
> > What if TRFCR_EL1.TS is set to something non-zero? I'd rather you
> > check for the E*TRE bits instead of assuming things.
> >
>
> Yeah it's probably better that way. TS is actually always set when any
> tracing session starts and then never cleared, so doing it the simpler
> way made it always flush even after tracing finished, which probably
> wasn't great.
Quite. Can you please *test* these things?
[...]
> >> @@ -253,8 +256,10 @@ static void trbe_drain_and_disable_local(struct trbe_cpudata *cpudata)
> >> static void trbe_reset_local(struct trbe_cpudata *cpudata)
> >> {
> >> + preempt_disable();
> >> trbe_drain_and_disable_local(cpudata);
> >> write_sysreg_s(0, SYS_TRBLIMITR_EL1);
> >> + preempt_enable();
> >
> > This looks terribly wrong. If you need to disable preemption here, why
> > doesn't the critical section cover all register accesses? Surely you
> > don't want to nuke another CPU's context?
> >
> > But looking at the calling sites, this makes even less sense. The two
> > callers of this thing mess with *per-CPU* interrupts. Dealing with
> > per-CPU interrupts in preemptible context is a big no-no (hint: they
> > start with a call to smp_processor_id()).
> >
> > So what is this supposed to ensure?
> >
> > M.
> >
>
> These ones are only intended to silence the
> WARN_ON_ONCE(preemptible()) in kvm_enable_trbe() when this is called
> from boot/hotplug (arm_trbe_enable_cpu()). Preemption isn't disabled,
> but a guest can't run at that point either.
>
> The "real" calls to kvm_enable_trbe() _are_ called from an atomic
> context. I think there was a previous review comment about when it was
> safe to call the KVM parts of this change, which is why I added the
> warning making sure it was always called with preemption disabled. But
> actually I could remove the warning and these preempt_disables() and
> replace them with a comment.
You should keep the WARN_ON(), and either *never* end-up calling this
stuff during a CPUHP event, or handle the fact that preemption isn't
initialised yet. For example by checking whether the current CPU is
online.
But this sort of random spreading of preemption disabling is not an
acceptable outcome.
M.
--
Without deviation from the norm, progress is not possible.
next prev parent reply other threads:[~2024-12-21 11:54 UTC|newest]
Thread overview: 16+ messages / expand[flat|nested] mbox.gz Atom feed top
2024-11-27 10:01 [PATCH v8 0/8] kvm/coresight: Support exclude guest and exclude host James Clark
2024-11-27 10:01 ` [PATCH v8 1/8] KVM: arm64: Get rid of __kvm_get_mdcr_el2() and related warts James Clark
2024-11-27 10:01 ` [PATCH v8 2/8] KVM: arm64: Track presence of SPE/TRBE in kvm_host_data instead of vCPU James Clark
2024-11-27 10:01 ` [PATCH v8 3/8] arm64/sysreg: Add a comment that the sysreg file should be sorted James Clark
2024-11-27 10:01 ` [PATCH v8 4/8] tools: arm64: Update sysreg.h header files James Clark
2024-11-27 10:01 ` [PATCH v8 5/8] arm64/sysreg/tools: Move TRFCR definitions to sysreg James Clark
2024-11-27 10:01 ` [PATCH v8 6/8] KVM: arm64: coresight: Give TRBE enabled state to KVM James Clark
2024-12-20 17:05 ` Marc Zyngier
2024-12-20 17:32 ` James Clark
2024-12-21 11:54 ` Marc Zyngier [this message]
2024-12-23 11:36 ` James Clark
2024-11-27 10:01 ` [PATCH v8 7/8] KVM: arm64: Support trace filtering for guests James Clark
2024-12-21 12:34 ` Marc Zyngier
2024-12-23 11:28 ` James Clark
2024-12-26 14:22 ` Marc Zyngier
2024-11-27 10:01 ` [PATCH v8 8/8] coresight: Pass guest TRFCR value to KVM James Clark
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=86seqhp7yg.wl-maz@kernel.org \
--to=maz@kernel.org \
--cc=alexander.shishkin@linux.intel.com \
--cc=anshuman.khandual@arm.com \
--cc=broonie@kernel.org \
--cc=catalin.marinas@arm.com \
--cc=coresight@lists.linaro.org \
--cc=james.clark@linaro.org \
--cc=james.morse@arm.com \
--cc=joey.gouly@arm.com \
--cc=kvmarm@lists.linux.dev \
--cc=linux-arm-kernel@lists.infradead.org \
--cc=linux-kernel@vger.kernel.org \
--cc=mike.leach@linaro.org \
--cc=oliver.upton@linux.dev \
--cc=rananta@google.com \
--cc=robh@kernel.org \
--cc=shiqiliu@hust.edu.cn \
--cc=suzuki.poulose@arm.com \
--cc=tabba@google.com \
--cc=will@kernel.org \
--cc=yuzenghui@huawei.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.