From: cdall@linaro.org (Christoffer Dall)
To: linux-arm-kernel@lists.infradead.org
Subject: [PATCH v3 03/20] arm64: Use the physical counter when available for read_cycles
Date: Wed, 18 Oct 2017 13:34:05 +0200 [thread overview]
Message-ID: <20171018113405.GA8900@cbox> (raw)
In-Reply-To: <9b06425f-7c2c-d44a-cd6c-aeaa4b76849c@arm.com>
On Mon, Oct 09, 2017 at 05:21:24PM +0100, Marc Zyngier wrote:
> On 23/09/17 01:41, Christoffer Dall wrote:
> > Currently get_cycles() is hardwired to arch_counter_get_cntvct() on
> > arm64, but as we move to using the physical timer for the in-kernel
> > time-keeping, we need to make that more flexible.
> >
> > First, we need to make sure the physical counter can be read on equal
> > terms to the virtual counter, which includes adding physical counter
> > read functions for timers that require errata.
> >
> > Second, we need to make a choice between reading the physical vs virtual
> > counter, depending on which timer is used for time keeping in the kernel
> > otherwise. We can do this using a static key to avoid a performance
> > penalty during runtime when reading the counter.
> >
> > Cc: Catalin Marinas <catalin.marinas@arm.com>
> > Cc: Will Deacon <will.deacon@arm.com>
> > Cc: Mark Rutland <mark.rutland@arm.com>
> > Cc: Marc Zyngier <marc.zyngier@arm.com>
> > Signed-off-by: Christoffer Dall <cdall@linaro.org>
>
> Right. I should have read patch #3. I'm an idiot.
>
> > ---
> > arch/arm64/include/asm/arch_timer.h | 15 ++++++++++++---
> > arch/arm64/include/asm/timex.h | 2 +-
> > drivers/clocksource/arm_arch_timer.c | 32 ++++++++++++++++++++++++++++++--
> > 3 files changed, 43 insertions(+), 6 deletions(-)
> >
> > diff --git a/arch/arm64/include/asm/arch_timer.h b/arch/arm64/include/asm/arch_timer.h
> > index 1859a1c..c56d8cd 100644
> > --- a/arch/arm64/include/asm/arch_timer.h
> > +++ b/arch/arm64/include/asm/arch_timer.h
> > @@ -30,6 +30,8 @@
> >
> > #include <clocksource/arm_arch_timer.h>
> >
> > +extern struct static_key_false arch_timer_phys_counter_available;
> > +
> > #if IS_ENABLED(CONFIG_ARM_ARCH_TIMER_OOL_WORKAROUND)
> > extern struct static_key_false arch_timer_read_ool_enabled;
> > #define needs_unstable_timer_counter_workaround() \
> > @@ -52,6 +54,7 @@ struct arch_timer_erratum_workaround {
> > const char *desc;
> > u32 (*read_cntp_tval_el0)(void);
> > u32 (*read_cntv_tval_el0)(void);
> > + u64 (*read_cntpct_el0)(void);
> > u64 (*read_cntvct_el0)(void);
> > int (*set_next_event_phys)(unsigned long, struct clock_event_device *);
> > int (*set_next_event_virt)(unsigned long, struct clock_event_device *);
> > @@ -148,10 +151,8 @@ static inline void arch_timer_set_cntkctl(u32 cntkctl)
> >
> > static inline u64 arch_counter_get_cntpct(void)
> > {
> > - u64 cval;
> > isb();
> > - asm volatile("mrs %0, cntpct_el0" : "=r" (cval));
> > - return cval;
> > + return arch_timer_reg_read_stable(cntpct_el0);
> > }
> >
> > static inline u64 arch_counter_get_cntvct(void)
> > @@ -160,6 +161,14 @@ static inline u64 arch_counter_get_cntvct(void)
> > return arch_timer_reg_read_stable(cntvct_el0);
> > }
> >
> > +static inline u64 arch_counter_get_cycles(void)
> > +{
> > + if (static_branch_unlikely(&arch_timer_phys_counter_available))
> > + return arch_counter_get_cntpct();
> > + else
> > + return arch_counter_get_cntvct();
> > +}
> > +
> > static inline int arch_timer_arch_init(void)
> > {
> > return 0;
> > diff --git a/arch/arm64/include/asm/timex.h b/arch/arm64/include/asm/timex.h
> > index 81a076e..c0d214c 100644
> > --- a/arch/arm64/include/asm/timex.h
> > +++ b/arch/arm64/include/asm/timex.h
> > @@ -22,7 +22,7 @@
> > * Use the current timer as a cycle counter since this is what we use for
> > * the delay loop.
> > */
> > -#define get_cycles() arch_counter_get_cntvct()
> > +#define get_cycles() arch_counter_get_cycles()
>
> Why can't this be arch_timer_read_counter() instead? Is there any
> measurable advantage in using a static key compared to a memory
> indirection?
>
No reason. I think I thought there was an include dependency issue that
led me to do it the other way, but I must have confused myself, because
using arch_timer_read_counter seems to work perfectly well.
> >
> > #include <asm-generic/timex.h>
> >
> > diff --git a/drivers/clocksource/arm_arch_timer.c b/drivers/clocksource/arm_arch_timer.c
> > index 9b3322a..f35da20 100644
> > --- a/drivers/clocksource/arm_arch_timer.c
> > +++ b/drivers/clocksource/arm_arch_timer.c
> > @@ -77,6 +77,9 @@ static bool arch_timer_mem_use_virtual;
> > static bool arch_counter_suspend_stop;
> > static bool vdso_default = true;
> >
> > +DEFINE_STATIC_KEY_FALSE(arch_timer_phys_counter_available);
> > +EXPORT_SYMBOL_GPL(arch_timer_phys_counter_available);
> > +
> > static bool evtstrm_enable = IS_ENABLED(CONFIG_ARM_ARCH_TIMER_EVTSTREAM);
> >
> > static int __init early_evtstrm_cfg(char *buf)
> > @@ -217,6 +220,11 @@ static u32 notrace fsl_a008585_read_cntv_tval_el0(void)
> > return __fsl_a008585_read_reg(cntv_tval_el0);
> > }
> >
> > +static u64 notrace fsl_a008585_read_cntpct_el0(void)
> > +{
> > + return __fsl_a008585_read_reg(cntpct_el0);
> > +}
> > +
> > static u64 notrace fsl_a008585_read_cntvct_el0(void)
> > {
> > return __fsl_a008585_read_reg(cntvct_el0);
> > @@ -258,6 +266,11 @@ static u32 notrace hisi_161010101_read_cntv_tval_el0(void)
> > return __hisi_161010101_read_reg(cntv_tval_el0);
> > }
> >
> > +static u64 notrace hisi_161010101_read_cntpct_el0(void)
> > +{
> > + return __hisi_161010101_read_reg(cntpct_el0);
> > +}
> > +
> > static u64 notrace hisi_161010101_read_cntvct_el0(void)
> > {
> > return __hisi_161010101_read_reg(cntvct_el0);
> > @@ -288,6 +301,15 @@ static struct ate_acpi_oem_info hisi_161010101_oem_info[] = {
> > #endif
> >
> > #ifdef CONFIG_ARM64_ERRATUM_858921
> > +static u64 notrace arm64_858921_read_cntpct_el0(void)
> > +{
> > + u64 old, new;
> > +
> > + old = read_sysreg(cntpct_el0);
> > + new = read_sysreg(cntpct_el0);
> > + return (((old ^ new) >> 32) & 1) ? old : new;
> > +}
> > +
> > static u64 notrace arm64_858921_read_cntvct_el0(void)
> > {
> > u64 old, new;
> > @@ -346,6 +368,7 @@ static const struct arch_timer_erratum_workaround ool_workarounds[] = {
> > .desc = "Freescale erratum a005858",
> > .read_cntp_tval_el0 = fsl_a008585_read_cntp_tval_el0,
> > .read_cntv_tval_el0 = fsl_a008585_read_cntv_tval_el0,
> > + .read_cntpct_el0 = fsl_a008585_read_cntpct_el0,
> > .read_cntvct_el0 = fsl_a008585_read_cntvct_el0,
> > .set_next_event_phys = erratum_set_next_event_tval_phys,
> > .set_next_event_virt = erratum_set_next_event_tval_virt,
> > @@ -358,6 +381,7 @@ static const struct arch_timer_erratum_workaround ool_workarounds[] = {
> > .desc = "HiSilicon erratum 161010101",
> > .read_cntp_tval_el0 = hisi_161010101_read_cntp_tval_el0,
> > .read_cntv_tval_el0 = hisi_161010101_read_cntv_tval_el0,
> > + .read_cntpct_el0 = hisi_161010101_read_cntpct_el0,
> > .read_cntvct_el0 = hisi_161010101_read_cntvct_el0,
> > .set_next_event_phys = erratum_set_next_event_tval_phys,
> > .set_next_event_virt = erratum_set_next_event_tval_virt,
> > @@ -368,6 +392,7 @@ static const struct arch_timer_erratum_workaround ool_workarounds[] = {
> > .desc = "HiSilicon erratum 161010101",
> > .read_cntp_tval_el0 = hisi_161010101_read_cntp_tval_el0,
> > .read_cntv_tval_el0 = hisi_161010101_read_cntv_tval_el0,
> > + .read_cntpct_el0 = hisi_161010101_read_cntpct_el0,
> > .read_cntvct_el0 = hisi_161010101_read_cntvct_el0,
> > .set_next_event_phys = erratum_set_next_event_tval_phys,
> > .set_next_event_virt = erratum_set_next_event_tval_virt,
> > @@ -378,6 +403,7 @@ static const struct arch_timer_erratum_workaround ool_workarounds[] = {
> > .match_type = ate_match_local_cap_id,
> > .id = (void *)ARM64_WORKAROUND_858921,
> > .desc = "ARM erratum 858921",
> > + .read_cntpct_el0 = arm64_858921_read_cntpct_el0,
> > .read_cntvct_el0 = arm64_858921_read_cntvct_el0,
> > },
> > #endif
> > @@ -890,10 +916,12 @@ static void __init arch_counter_register(unsigned type)
> >
> > /* Register the CP15 based counter if we have one */
> > if (type & ARCH_TIMER_TYPE_CP15) {
> > - if (arch_timer_uses_ppi == ARCH_TIMER_VIRT_PPI)
> > + if (arch_timer_uses_ppi == ARCH_TIMER_VIRT_PPI) {
> > arch_timer_read_counter = arch_counter_get_cntvct;
> > - else
> > + } else {
> > arch_timer_read_counter = arch_counter_get_cntpct;
> > + static_branch_enable(&arch_timer_phys_counter_available);
> > + }
> >
> > clocksource_counter.archdata.vdso_direct = vdso_default;
> > } else {
> >
>
> In my reply to patch #2, I had the following hunk:
>
> @@ -310,7 +329,7 @@ static void erratum_set_next_event_tval_generic(const int access, unsigned long
> struct clock_event_device *clk)
> {
> unsigned long ctrl;
> - u64 cval = evt + arch_counter_get_cntvct();
> + u64 cval = evt + arch_timer_read_counter();
>
> ctrl = arch_timer_reg_read(access, ARCH_TIMER_REG_CTRL, clk);
> ctrl |= ARCH_TIMER_CTRL_ENABLE;
>
> Once we start using a different timer, this could well have an effect...
>
Right, but wouldn't the following be a more correct way to go about it then:
diff --git a/drivers/clocksource/arm_arch_timer.c b/drivers/clocksource/arm_arch_timer.c
index 9a7b359..07f19db 100644
--- a/drivers/clocksource/arm_arch_timer.c
+++ b/drivers/clocksource/arm_arch_timer.c
@@ -329,16 +329,19 @@ static void erratum_set_next_event_tval_generic(const int access, unsigned long
struct clock_event_device *clk)
{
unsigned long ctrl;
- u64 cval = evt + arch_timer_read_counter();
+ u64 cval;
ctrl = arch_timer_reg_read(access, ARCH_TIMER_REG_CTRL, clk);
ctrl |= ARCH_TIMER_CTRL_ENABLE;
ctrl &= ~ARCH_TIMER_CTRL_IT_MASK;
- if (access == ARCH_TIMER_PHYS_ACCESS)
+ if (access == ARCH_TIMER_PHYS_ACCESS) {
+ cval = evt + arch_counter_get_cntpct();
write_sysreg(cval, cntp_cval_el0);
- else
+ } else {
+ cval = evt + arch_counter_get_cntvct();
write_sysreg(cval, cntv_cval_el0);
+ }
arch_timer_reg_write(access, ARCH_TIMER_REG_CTRL, ctrl, clk);
}
Thanks,
-Christoffer
next prev parent reply other threads:[~2017-10-18 11:34 UTC|newest]
Thread overview: 55+ messages / expand[flat|nested] mbox.gz Atom feed top
2017-09-23 0:41 [PATCH v3 00/20] KVM: arm/arm64: Optimize arch timer register handling Christoffer Dall
2017-09-23 0:41 ` [PATCH v3 01/20] irqchip/gic: Deal with broken firmware exposing only 4kB of GICv2 CPU interface Christoffer Dall
2017-09-23 0:41 ` [PATCH v3 02/20] arm64: Use physical counter for in-kernel reads Christoffer Dall
2017-10-09 16:10 ` Marc Zyngier
2017-10-17 15:33 ` Will Deacon
2017-10-18 10:00 ` Christoffer Dall
2017-09-23 0:41 ` [PATCH v3 03/20] arm64: Use the physical counter when available for read_cycles Christoffer Dall
2017-10-09 16:21 ` Marc Zyngier
2017-10-18 11:34 ` Christoffer Dall [this message]
2017-10-18 15:52 ` Marc Zyngier
2017-09-23 0:41 ` [PATCH v3 04/20] KVM: arm/arm64: Guard kvm_vgic_map_is_active against !vgic_initialized Christoffer Dall
2017-10-09 16:22 ` Marc Zyngier
2017-09-23 0:41 ` [PATCH v3 05/20] KVM: arm/arm64: Support calling vgic_update_irq_pending from irq context Christoffer Dall
2017-10-09 16:37 ` Marc Zyngier
2017-10-18 11:54 ` Christoffer Dall
2017-09-23 0:41 ` [PATCH v3 06/20] KVM: arm/arm64: Check that system supports split eoi/deactivate Christoffer Dall
2017-10-09 16:47 ` Marc Zyngier
2017-10-18 13:41 ` Christoffer Dall
2017-10-18 16:03 ` Marc Zyngier
2017-10-18 19:16 ` Christoffer Dall
2017-09-23 0:41 ` [PATCH v3 07/20] KVM: arm/arm64: Make timer_arm and timer_disarm helpers more generic Christoffer Dall
2017-10-09 17:05 ` Marc Zyngier
2017-10-18 16:47 ` Christoffer Dall
2017-10-18 16:53 ` Marc Zyngier
2017-09-23 0:41 ` [PATCH v3 08/20] KVM: arm/arm64: Rename soft timer to bg_timer Christoffer Dall
2017-10-09 17:06 ` Marc Zyngier
2017-09-23 0:41 ` [PATCH v3 09/20] KVM: arm/arm64: Use separate timer for phys timer emulation Christoffer Dall
2017-10-09 17:23 ` Marc Zyngier
2017-10-19 7:38 ` Christoffer Dall
2017-09-23 0:41 ` [PATCH v3 10/20] KVM: arm/arm64: Move timer/vgic flush/sync under disabled irq Christoffer Dall
2017-10-09 17:34 ` Marc Zyngier
2017-09-23 0:41 ` [PATCH v3 11/20] KVM: arm/arm64: Move timer save/restore out of the hyp code Christoffer Dall
2017-10-09 17:47 ` Marc Zyngier
2017-10-19 7:46 ` Christoffer Dall
2017-09-23 0:41 ` [PATCH v3 12/20] genirq: Document vcpu_info usage for percpu_devid interrupts Christoffer Dall
2017-10-09 17:48 ` Marc Zyngier
2017-09-23 0:42 ` [PATCH v3 13/20] KVM: arm/arm64: Set VCPU affinity for virt timer irq Christoffer Dall
2017-10-09 17:52 ` Marc Zyngier
2017-09-23 0:42 ` [PATCH v3 14/20] KVM: arm/arm64: Avoid timer save/restore in vcpu entry/exit Christoffer Dall
2017-10-10 8:47 ` Marc Zyngier
2017-10-19 8:15 ` Christoffer Dall
2017-09-23 0:42 ` [PATCH v3 15/20] KVM: arm/arm64: Support EL1 phys timer register access in set/get reg Christoffer Dall
2017-10-10 9:10 ` Marc Zyngier
2017-10-19 8:32 ` Christoffer Dall
2017-09-23 0:42 ` [PATCH v3 16/20] KVM: arm/arm64: Use kvm_arm_timer_set/get_reg for guest register traps Christoffer Dall
2017-10-10 9:12 ` Marc Zyngier
2017-09-23 0:42 ` [PATCH v3 17/20] KVM: arm/arm64: Move phys_timer_emulate function Christoffer Dall
2017-10-10 9:21 ` Marc Zyngier
2017-09-23 0:42 ` [PATCH v3 18/20] KVM: arm/arm64: Avoid phys timer emulation in vcpu entry/exit Christoffer Dall
2017-10-10 9:45 ` Marc Zyngier
2017-10-19 8:44 ` Christoffer Dall
2017-09-23 0:42 ` [PATCH v3 19/20] KVM: arm/arm64: Get rid of kvm_timer_flush_hwstate Christoffer Dall
2017-10-10 9:46 ` Marc Zyngier
2017-09-23 0:42 ` [PATCH v3 20/20] KVM: arm/arm64: Rework kvm_timer_should_fire Christoffer Dall
2017-10-10 9:59 ` Marc Zyngier
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20171018113405.GA8900@cbox \
--to=cdall@linaro.org \
--cc=linux-arm-kernel@lists.infradead.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).