* [PATCH v4 0/2] WFE for long delays
@ 2017-10-13 13:32 Julien Thierry
2017-10-13 13:32 ` [PATCH v4 1/2] arm_arch_timer: Expose event stream status Julien Thierry
2017-10-13 13:32 ` [PATCH v4 2/2] arm64: use WFE for long delays Julien Thierry
0 siblings, 2 replies; 4+ messages in thread
From: Julien Thierry @ 2017-10-13 13:32 UTC (permalink / raw)
To: linux-arm-kernel
It was pointed out that there are some windows where the event stream might not
be configured, meaning we cannot safely use WFE (e.g. when using PSCI
CPU_SUSPEND for idle, PSCI might reset CPU state).
The first patch provide a function to check whether the task is in state where
it can expect events from the event stream.
Second patch actually makes use of WFE in delays.
Changes since v1:
* Keep track of event stream state
* Do not use WFE when event stream might be misconfigured
Changes since v2:
* Use cpumask to track event stream state instead of percpu variable
* Tidy a bit code in arm64/lib/delay
* Do not factor asm-generic/delay code
Changes since v3:
* Use static initialiser for event stream cpumask
* Remove double negation when checking steam availability
* Use raw_smp_processor_id instead of smp_processor_id
* Fix typo
* Change macro argument to lower case
Cheers,
Julien Thierry (2):
arm_arch_timer: Expose event stream status
arm64: use WFE for long delays
arch/arm/include/asm/arch_timer.h | 1 +
arch/arm64/include/asm/arch_timer.h | 1 +
arch/arm64/lib/delay.c | 23 +++++++++++++++++++----
drivers/clocksource/arm_arch_timer.c | 25 ++++++++++++++++++++++---
include/clocksource/arm_arch_timer.h | 10 +++++++++-
5 files changed, 52 insertions(+), 8 deletions(-)
--
1.9.1
^ permalink raw reply [flat|nested] 4+ messages in thread
* [PATCH v4 1/2] arm_arch_timer: Expose event stream status
2017-10-13 13:32 [PATCH v4 0/2] WFE for long delays Julien Thierry
@ 2017-10-13 13:32 ` Julien Thierry
2017-10-13 17:51 ` Mark Rutland
2017-10-13 13:32 ` [PATCH v4 2/2] arm64: use WFE for long delays Julien Thierry
1 sibling, 1 reply; 4+ messages in thread
From: Julien Thierry @ 2017-10-13 13:32 UTC (permalink / raw)
To: linux-arm-kernel
The arch timer configuration for a CPU might get reset after suspending
said CPU.
In order to reliably use the event stream in the kernel (e.g. for delays),
we keep track of the state where we can safely consider the event stream as
properly configured.
Signed-off-by: Julien Thierry <julien.thierry@arm.com>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Marc Zyngier <marc.zyngier@arm.com>
Cc: Russell King <linux@armlinux.org.uk>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Will Deacon <will.deacon@arm.com>
---
arch/arm/include/asm/arch_timer.h | 1 +
arch/arm64/include/asm/arch_timer.h | 1 +
drivers/clocksource/arm_arch_timer.c | 25 ++++++++++++++++++++++---
include/clocksource/arm_arch_timer.h | 6 ++++++
4 files changed, 30 insertions(+), 3 deletions(-)
diff --git a/arch/arm/include/asm/arch_timer.h b/arch/arm/include/asm/arch_timer.h
index d4ebf56..0b6e104 100644
--- a/arch/arm/include/asm/arch_timer.h
+++ b/arch/arm/include/asm/arch_timer.h
@@ -106,6 +106,7 @@ static inline u32 arch_timer_get_cntkctl(void)
static inline void arch_timer_set_cntkctl(u32 cntkctl)
{
asm volatile("mcr p15, 0, %0, c14, c1, 0" : : "r" (cntkctl));
+ isb();
}
#endif
diff --git a/arch/arm64/include/asm/arch_timer.h b/arch/arm64/include/asm/arch_timer.h
index a652ce0..bdedd8f 100644
--- a/arch/arm64/include/asm/arch_timer.h
+++ b/arch/arm64/include/asm/arch_timer.h
@@ -144,6 +144,7 @@ static inline u32 arch_timer_get_cntkctl(void)
static inline void arch_timer_set_cntkctl(u32 cntkctl)
{
write_sysreg(cntkctl, cntkctl_el1);
+ isb();
}
static inline u64 arch_counter_get_cntpct(void)
diff --git a/drivers/clocksource/arm_arch_timer.c b/drivers/clocksource/arm_arch_timer.c
index fd4b7f6..13e6baa 100644
--- a/drivers/clocksource/arm_arch_timer.c
+++ b/drivers/clocksource/arm_arch_timer.c
@@ -77,6 +77,7 @@ struct arch_timer {
static bool arch_counter_suspend_stop;
static bool vdso_default = true;
+static cpumask_t evtstrm_available = CPU_MASK_NONE;
static bool evtstrm_enable = IS_ENABLED(CONFIG_ARM_ARCH_TIMER_EVTSTREAM);
static int __init early_evtstrm_cfg(char *buf)
@@ -740,6 +741,7 @@ static void arch_timer_evtstrm_enable(int divider)
#ifdef CONFIG_COMPAT
compat_elf_hwcap |= COMPAT_HWCAP_EVTSTRM;
#endif
+ cpumask_set_cpu(smp_processor_id(), &evtstrm_available);
}
static void arch_timer_configure_evtstream(void)
@@ -864,6 +866,16 @@ u32 arch_timer_get_rate(void)
return arch_timer_rate;
}
+bool arch_timer_evtstrm_available(void)
+{
+ /*
+ * We might get called from a preemptible context. This is fine
+ * because availability of the event stream should be always the same
+ * for a preemptible context and context where we might resume a task.
+ */
+ return cpumask_test_cpu(raw_smp_processor_id(), &evtstrm_available);
+}
+
static u64 arch_counter_get_cntvct_mem(void)
{
u32 vct_lo, vct_hi, tmp_hi;
@@ -929,6 +941,8 @@ static int arch_timer_dying_cpu(unsigned int cpu)
{
struct clock_event_device *clk = this_cpu_ptr(arch_timer_evt);
+ cpumask_clear_cpu(smp_processor_id(), &evtstrm_available);
+
arch_timer_stop(clk);
return 0;
}
@@ -938,10 +952,16 @@ static int arch_timer_dying_cpu(unsigned int cpu)
static int arch_timer_cpu_pm_notify(struct notifier_block *self,
unsigned long action, void *hcpu)
{
- if (action == CPU_PM_ENTER)
+ if (action == CPU_PM_ENTER) {
__this_cpu_write(saved_cntkctl, arch_timer_get_cntkctl());
- else if (action == CPU_PM_ENTER_FAILED || action == CPU_PM_EXIT)
+
+ cpumask_clear_cpu(smp_processor_id(), &evtstrm_available);
+ } else if (action == CPU_PM_ENTER_FAILED || action == CPU_PM_EXIT) {
arch_timer_set_cntkctl(__this_cpu_read(saved_cntkctl));
+
+ if (elf_hwcap & HWCAP_EVTSTRM)
+ cpumask_set_cpu(smp_processor_id(), &evtstrm_available);
+ }
return NOTIFY_OK;
}
@@ -1017,7 +1037,6 @@ static int __init arch_timer_register(void)
if (err)
goto out_unreg_notify;
-
/* Register and immediately configure the timer on the boot CPU */
err = cpuhp_setup_state(CPUHP_AP_ARM_ARCH_TIMER_STARTING,
"clockevents/arm/arch_timer:starting",
diff --git a/include/clocksource/arm_arch_timer.h b/include/clocksource/arm_arch_timer.h
index cc805b7..4e28283 100644
--- a/include/clocksource/arm_arch_timer.h
+++ b/include/clocksource/arm_arch_timer.h
@@ -93,6 +93,7 @@ struct arch_timer_mem {
extern u32 arch_timer_get_rate(void);
extern u64 (*arch_timer_read_counter)(void);
extern struct arch_timer_kvm_info *arch_timer_get_kvm_info(void);
+extern bool arch_timer_evtstrm_available(void);
#else
@@ -106,6 +107,11 @@ static inline u64 arch_timer_read_counter(void)
return 0;
}
+static inline bool arch_timer_evtstrm_available(void)
+{
+ return false;
+}
+
#endif
#endif
--
1.9.1
^ permalink raw reply related [flat|nested] 4+ messages in thread
* [PATCH v4 2/2] arm64: use WFE for long delays
2017-10-13 13:32 [PATCH v4 0/2] WFE for long delays Julien Thierry
2017-10-13 13:32 ` [PATCH v4 1/2] arm_arch_timer: Expose event stream status Julien Thierry
@ 2017-10-13 13:32 ` Julien Thierry
1 sibling, 0 replies; 4+ messages in thread
From: Julien Thierry @ 2017-10-13 13:32 UTC (permalink / raw)
To: linux-arm-kernel
The current delay implementation uses the yield instruction, which is a
hint that it is beneficial to schedule another thread. As this is a hint,
it may be implemented as a NOP, causing all delays to be busy loops. This
is the case for many existing CPUs.
Taking advantage of the generic timer sending periodic events to all
cores, we can use WFE during delays to reduce power consumption. This is
beneficial only for delays longer than the period of the timer event
stream.
If timer event stream is not enabled, delays will behave as yield/busy
loops.
Signed-off-by: Julien Thierry <julien.thierry@arm.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Will Deacon <will.deacon@arm.com>
Cc: Mark Rutland <mark.rutland@arm.com>
---
arch/arm64/lib/delay.c | 23 +++++++++++++++++++----
include/clocksource/arm_arch_timer.h | 4 +++-
2 files changed, 22 insertions(+), 5 deletions(-)
diff --git a/arch/arm64/lib/delay.c b/arch/arm64/lib/delay.c
index dad4ec9..e48ac40 100644
--- a/arch/arm64/lib/delay.c
+++ b/arch/arm64/lib/delay.c
@@ -24,10 +24,28 @@
#include <linux/module.h>
#include <linux/timex.h>
+#include <clocksource/arm_arch_timer.h>
+
+#define USECS_TO_CYCLES(time_usecs) \
+ xloops_to_cycles((time_usecs) * 0x10C7UL)
+
+static inline unsigned long xloops_to_cycles(unsigned long xloops)
+{
+ return (xloops * loops_per_jiffy * HZ) >> 32;
+}
+
void __delay(unsigned long cycles)
{
cycles_t start = get_cycles();
+ if (arch_timer_evtstrm_available()) {
+ const cycles_t timer_evt_period =
+ USECS_TO_CYCLES(ARCH_TIMER_EVT_STREAM_PERIOD_US);
+
+ while ((get_cycles() - start + timer_evt_period) < cycles)
+ wfe();
+ }
+
while ((get_cycles() - start) < cycles)
cpu_relax();
}
@@ -35,10 +53,7 @@ void __delay(unsigned long cycles)
inline void __const_udelay(unsigned long xloops)
{
- unsigned long loops;
-
- loops = xloops * loops_per_jiffy * HZ;
- __delay(loops >> 32);
+ __delay(xloops_to_cycles(xloops));
}
EXPORT_SYMBOL(__const_udelay);
diff --git a/include/clocksource/arm_arch_timer.h b/include/clocksource/arm_arch_timer.h
index 4e28283..349e595 100644
--- a/include/clocksource/arm_arch_timer.h
+++ b/include/clocksource/arm_arch_timer.h
@@ -67,7 +67,9 @@ enum arch_timer_spi_nr {
#define ARCH_TIMER_USR_VT_ACCESS_EN (1 << 8) /* virtual timer registers */
#define ARCH_TIMER_USR_PT_ACCESS_EN (1 << 9) /* physical timer registers */
-#define ARCH_TIMER_EVT_STREAM_FREQ 10000 /* 100us */
+#define ARCH_TIMER_EVT_STREAM_PERIOD_US 100
+#define ARCH_TIMER_EVT_STREAM_FREQ \
+ (USEC_PER_SEC / ARCH_TIMER_EVT_STREAM_PERIOD_US)
struct arch_timer_kvm_info {
struct timecounter timecounter;
--
1.9.1
^ permalink raw reply related [flat|nested] 4+ messages in thread
* [PATCH v4 1/2] arm_arch_timer: Expose event stream status
2017-10-13 13:32 ` [PATCH v4 1/2] arm_arch_timer: Expose event stream status Julien Thierry
@ 2017-10-13 17:51 ` Mark Rutland
0 siblings, 0 replies; 4+ messages in thread
From: Mark Rutland @ 2017-10-13 17:51 UTC (permalink / raw)
To: linux-arm-kernel
On Fri, Oct 13, 2017 at 02:32:55PM +0100, Julien Thierry wrote:
> The arch timer configuration for a CPU might get reset after suspending
> said CPU.
>
> In order to reliably use the event stream in the kernel (e.g. for delays),
> we keep track of the state where we can safely consider the event stream as
> properly configured.
>
> Signed-off-by: Julien Thierry <julien.thierry@arm.com>
> Cc: Mark Rutland <mark.rutland@arm.com>
> Cc: Marc Zyngier <marc.zyngier@arm.com>
> Cc: Russell King <linux@armlinux.org.uk>
> Cc: Catalin Marinas <catalin.marinas@arm.com>
> Cc: Will Deacon <will.deacon@arm.com>
It would be nice to call out that the ISBs are necessary to ensure the
that event stream is active before we make use of it in the kernel.
Previously that only mattered for userspace, and the exception return
was sufficient.
I think that can be fixed up when applying this, so:
Acked-by: Mark Rutland <mark.rutland@arm.com>
Thanks,
Mark.
> ---
> arch/arm/include/asm/arch_timer.h | 1 +
> arch/arm64/include/asm/arch_timer.h | 1 +
> drivers/clocksource/arm_arch_timer.c | 25 ++++++++++++++++++++++---
> include/clocksource/arm_arch_timer.h | 6 ++++++
> 4 files changed, 30 insertions(+), 3 deletions(-)
>
> diff --git a/arch/arm/include/asm/arch_timer.h b/arch/arm/include/asm/arch_timer.h
> index d4ebf56..0b6e104 100644
> --- a/arch/arm/include/asm/arch_timer.h
> +++ b/arch/arm/include/asm/arch_timer.h
> @@ -106,6 +106,7 @@ static inline u32 arch_timer_get_cntkctl(void)
> static inline void arch_timer_set_cntkctl(u32 cntkctl)
> {
> asm volatile("mcr p15, 0, %0, c14, c1, 0" : : "r" (cntkctl));
> + isb();
> }
>
> #endif
> diff --git a/arch/arm64/include/asm/arch_timer.h b/arch/arm64/include/asm/arch_timer.h
> index a652ce0..bdedd8f 100644
> --- a/arch/arm64/include/asm/arch_timer.h
> +++ b/arch/arm64/include/asm/arch_timer.h
> @@ -144,6 +144,7 @@ static inline u32 arch_timer_get_cntkctl(void)
> static inline void arch_timer_set_cntkctl(u32 cntkctl)
> {
> write_sysreg(cntkctl, cntkctl_el1);
> + isb();
> }
>
> static inline u64 arch_counter_get_cntpct(void)
> diff --git a/drivers/clocksource/arm_arch_timer.c b/drivers/clocksource/arm_arch_timer.c
> index fd4b7f6..13e6baa 100644
> --- a/drivers/clocksource/arm_arch_timer.c
> +++ b/drivers/clocksource/arm_arch_timer.c
> @@ -77,6 +77,7 @@ struct arch_timer {
> static bool arch_counter_suspend_stop;
> static bool vdso_default = true;
>
> +static cpumask_t evtstrm_available = CPU_MASK_NONE;
> static bool evtstrm_enable = IS_ENABLED(CONFIG_ARM_ARCH_TIMER_EVTSTREAM);
>
> static int __init early_evtstrm_cfg(char *buf)
> @@ -740,6 +741,7 @@ static void arch_timer_evtstrm_enable(int divider)
> #ifdef CONFIG_COMPAT
> compat_elf_hwcap |= COMPAT_HWCAP_EVTSTRM;
> #endif
> + cpumask_set_cpu(smp_processor_id(), &evtstrm_available);
> }
>
> static void arch_timer_configure_evtstream(void)
> @@ -864,6 +866,16 @@ u32 arch_timer_get_rate(void)
> return arch_timer_rate;
> }
>
> +bool arch_timer_evtstrm_available(void)
> +{
> + /*
> + * We might get called from a preemptible context. This is fine
> + * because availability of the event stream should be always the same
> + * for a preemptible context and context where we might resume a task.
> + */
> + return cpumask_test_cpu(raw_smp_processor_id(), &evtstrm_available);
> +}
> +
> static u64 arch_counter_get_cntvct_mem(void)
> {
> u32 vct_lo, vct_hi, tmp_hi;
> @@ -929,6 +941,8 @@ static int arch_timer_dying_cpu(unsigned int cpu)
> {
> struct clock_event_device *clk = this_cpu_ptr(arch_timer_evt);
>
> + cpumask_clear_cpu(smp_processor_id(), &evtstrm_available);
> +
> arch_timer_stop(clk);
> return 0;
> }
> @@ -938,10 +952,16 @@ static int arch_timer_dying_cpu(unsigned int cpu)
> static int arch_timer_cpu_pm_notify(struct notifier_block *self,
> unsigned long action, void *hcpu)
> {
> - if (action == CPU_PM_ENTER)
> + if (action == CPU_PM_ENTER) {
> __this_cpu_write(saved_cntkctl, arch_timer_get_cntkctl());
> - else if (action == CPU_PM_ENTER_FAILED || action == CPU_PM_EXIT)
> +
> + cpumask_clear_cpu(smp_processor_id(), &evtstrm_available);
> + } else if (action == CPU_PM_ENTER_FAILED || action == CPU_PM_EXIT) {
> arch_timer_set_cntkctl(__this_cpu_read(saved_cntkctl));
> +
> + if (elf_hwcap & HWCAP_EVTSTRM)
> + cpumask_set_cpu(smp_processor_id(), &evtstrm_available);
> + }
> return NOTIFY_OK;
> }
>
> @@ -1017,7 +1037,6 @@ static int __init arch_timer_register(void)
> if (err)
> goto out_unreg_notify;
>
> -
> /* Register and immediately configure the timer on the boot CPU */
> err = cpuhp_setup_state(CPUHP_AP_ARM_ARCH_TIMER_STARTING,
> "clockevents/arm/arch_timer:starting",
> diff --git a/include/clocksource/arm_arch_timer.h b/include/clocksource/arm_arch_timer.h
> index cc805b7..4e28283 100644
> --- a/include/clocksource/arm_arch_timer.h
> +++ b/include/clocksource/arm_arch_timer.h
> @@ -93,6 +93,7 @@ struct arch_timer_mem {
> extern u32 arch_timer_get_rate(void);
> extern u64 (*arch_timer_read_counter)(void);
> extern struct arch_timer_kvm_info *arch_timer_get_kvm_info(void);
> +extern bool arch_timer_evtstrm_available(void);
>
> #else
>
> @@ -106,6 +107,11 @@ static inline u64 arch_timer_read_counter(void)
> return 0;
> }
>
> +static inline bool arch_timer_evtstrm_available(void)
> +{
> + return false;
> +}
> +
> #endif
>
> #endif
> --
> 1.9.1
^ permalink raw reply [flat|nested] 4+ messages in thread
end of thread, other threads:[~2017-10-13 17:51 UTC | newest]
Thread overview: 4+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2017-10-13 13:32 [PATCH v4 0/2] WFE for long delays Julien Thierry
2017-10-13 13:32 ` [PATCH v4 1/2] arm_arch_timer: Expose event stream status Julien Thierry
2017-10-13 17:51 ` Mark Rutland
2017-10-13 13:32 ` [PATCH v4 2/2] arm64: use WFE for long delays Julien Thierry
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox