* [PATCH v6 04/10] cpuidle-haltpoll: define arch_haltpoll_want()
2024-07-26 20:21 ` [PATCH v6 03/10] Kconfig: move ARCH_HAS_OPTIMIZED_POLL to arch/Kconfig Ankur Arora
@ 2024-07-26 20:21 ` Ankur Arora
2024-07-26 20:21 ` [PATCH v6 05/10] governors/haltpoll: drop kvm_para_available() check Ankur Arora
` (5 subsequent siblings)
6 siblings, 0 replies; 27+ messages in thread
From: Ankur Arora @ 2024-07-26 20:21 UTC (permalink / raw)
To: linux-pm, kvm, linux-arm-kernel, linux-kernel
Cc: catalin.marinas, will, tglx, mingo, bp, dave.hansen, x86, hpa,
pbonzini, wanpengli, vkuznets, rafael, daniel.lezcano, peterz,
arnd, lenb, mark.rutland, harisokn, mtosatti, sudeep.holla, cl,
misono.tomohiro, joao.m.martins, boris.ostrovsky, konrad.wilk,
ankur.a.arora
From: Joao Martins <joao.m.martins@oracle.com>
kvm_para_has_hint(KVM_HINTS_REALTIME) is defined only on x86. In
pursuit of making cpuidle-haltpoll architecture independent, define
arch_haltpoll_want() which handles the architectural checks for
enabling haltpoll.
Signed-off-by: Joao Martins <joao.m.martins@oracle.com>
Signed-off-by: Mihai Carabas <mihai.carabas@oracle.com>
Signed-off-by: Ankur Arora <ankur.a.arora@oracle.com>
---
arch/x86/include/asm/cpuidle_haltpoll.h | 1 +
arch/x86/kernel/kvm.c | 13 +++++++++++++
drivers/cpuidle/cpuidle-haltpoll.c | 12 +-----------
include/linux/cpuidle_haltpoll.h | 5 +++++
4 files changed, 20 insertions(+), 11 deletions(-)
diff --git a/arch/x86/include/asm/cpuidle_haltpoll.h b/arch/x86/include/asm/cpuidle_haltpoll.h
index c8b39c6716ff..8a0a12769c2e 100644
--- a/arch/x86/include/asm/cpuidle_haltpoll.h
+++ b/arch/x86/include/asm/cpuidle_haltpoll.h
@@ -4,5 +4,6 @@
void arch_haltpoll_enable(unsigned int cpu);
void arch_haltpoll_disable(unsigned int cpu);
+bool arch_haltpoll_want(bool force);
#endif
diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
index 263f8aed4e2c..63710cb1aa63 100644
--- a/arch/x86/kernel/kvm.c
+++ b/arch/x86/kernel/kvm.c
@@ -1151,4 +1151,17 @@ void arch_haltpoll_disable(unsigned int cpu)
smp_call_function_single(cpu, kvm_enable_host_haltpoll, NULL, 1);
}
EXPORT_SYMBOL_GPL(arch_haltpoll_disable);
+
+bool arch_haltpoll_want(bool force)
+{
+ /* Do not load haltpoll if idle= is passed */
+ if (boot_option_idle_override != IDLE_NO_OVERRIDE)
+ return false;
+
+ if (!kvm_para_available())
+ return false;
+
+ return kvm_para_has_hint(KVM_HINTS_REALTIME) || force;
+}
+EXPORT_SYMBOL_GPL(arch_haltpoll_want);
#endif
diff --git a/drivers/cpuidle/cpuidle-haltpoll.c b/drivers/cpuidle/cpuidle-haltpoll.c
index d8515d5c0853..d47906632ce3 100644
--- a/drivers/cpuidle/cpuidle-haltpoll.c
+++ b/drivers/cpuidle/cpuidle-haltpoll.c
@@ -15,7 +15,6 @@
#include <linux/cpuidle.h>
#include <linux/module.h>
#include <linux/sched/idle.h>
-#include <linux/kvm_para.h>
#include <linux/cpuidle_haltpoll.h>
static bool force __read_mostly;
@@ -93,21 +92,12 @@ static void haltpoll_uninit(void)
haltpoll_cpuidle_devices = NULL;
}
-static bool haltpoll_want(void)
-{
- return kvm_para_has_hint(KVM_HINTS_REALTIME) || force;
-}
-
static int __init haltpoll_init(void)
{
int ret;
struct cpuidle_driver *drv = &haltpoll_driver;
- /* Do not load haltpoll if idle= is passed */
- if (boot_option_idle_override != IDLE_NO_OVERRIDE)
- return -ENODEV;
-
- if (!kvm_para_available() || !haltpoll_want())
+ if (!arch_haltpoll_want(force))
return -ENODEV;
cpuidle_poll_state_init(drv);
diff --git a/include/linux/cpuidle_haltpoll.h b/include/linux/cpuidle_haltpoll.h
index d50c1e0411a2..68eb7a757120 100644
--- a/include/linux/cpuidle_haltpoll.h
+++ b/include/linux/cpuidle_haltpoll.h
@@ -12,5 +12,10 @@ static inline void arch_haltpoll_enable(unsigned int cpu)
static inline void arch_haltpoll_disable(unsigned int cpu)
{
}
+
+static inline bool arch_haltpoll_want(bool force)
+{
+ return false;
+}
#endif
#endif
--
2.43.5
^ permalink raw reply related [flat|nested] 27+ messages in thread* [PATCH v6 05/10] governors/haltpoll: drop kvm_para_available() check
2024-07-26 20:21 ` [PATCH v6 03/10] Kconfig: move ARCH_HAS_OPTIMIZED_POLL to arch/Kconfig Ankur Arora
2024-07-26 20:21 ` [PATCH v6 04/10] cpuidle-haltpoll: define arch_haltpoll_want() Ankur Arora
@ 2024-07-26 20:21 ` Ankur Arora
2024-07-26 20:21 ` [PATCH v6 06/10] cpuidle-haltpoll: condition on ARCH_CPUIDLE_HALTPOLL Ankur Arora
` (4 subsequent siblings)
6 siblings, 0 replies; 27+ messages in thread
From: Ankur Arora @ 2024-07-26 20:21 UTC (permalink / raw)
To: linux-pm, kvm, linux-arm-kernel, linux-kernel
Cc: catalin.marinas, will, tglx, mingo, bp, dave.hansen, x86, hpa,
pbonzini, wanpengli, vkuznets, rafael, daniel.lezcano, peterz,
arnd, lenb, mark.rutland, harisokn, mtosatti, sudeep.holla, cl,
misono.tomohiro, joao.m.martins, boris.ostrovsky, konrad.wilk,
ankur.a.arora
From: Joao Martins <joao.m.martins@oracle.com>
The haltpoll governor is selected either by the cpuidle-haltpoll
driver, or explicitly by the user.
In particular, it is never selected by default since it has the lowest
rating of all governors (menu=20, teo=19, ladder=10/25, haltpoll=9).
So, we can safely forgo the kvm_para_available() check. This also
allows cpuidle-haltpoll to be tested on baremetal.
Signed-off-by: Joao Martins <joao.m.martins@oracle.com>
Signed-off-by: Mihai Carabas <mihai.carabas@oracle.com>
Acked-by: "Rafael J. Wysocki" <rafael@kernel.org>
Signed-off-by: Ankur Arora <ankur.a.arora@oracle.com>
---
drivers/cpuidle/governors/haltpoll.c | 6 +-----
1 file changed, 1 insertion(+), 5 deletions(-)
diff --git a/drivers/cpuidle/governors/haltpoll.c b/drivers/cpuidle/governors/haltpoll.c
index 663b7f164d20..c8752f793e61 100644
--- a/drivers/cpuidle/governors/haltpoll.c
+++ b/drivers/cpuidle/governors/haltpoll.c
@@ -18,7 +18,6 @@
#include <linux/tick.h>
#include <linux/sched.h>
#include <linux/module.h>
-#include <linux/kvm_para.h>
#include <trace/events/power.h>
static unsigned int guest_halt_poll_ns __read_mostly = 200000;
@@ -148,10 +147,7 @@ static struct cpuidle_governor haltpoll_governor = {
static int __init init_haltpoll(void)
{
- if (kvm_para_available())
- return cpuidle_register_governor(&haltpoll_governor);
-
- return 0;
+ return cpuidle_register_governor(&haltpoll_governor);
}
postcore_initcall(init_haltpoll);
--
2.43.5
^ permalink raw reply related [flat|nested] 27+ messages in thread* [PATCH v6 06/10] cpuidle-haltpoll: condition on ARCH_CPUIDLE_HALTPOLL
2024-07-26 20:21 ` [PATCH v6 03/10] Kconfig: move ARCH_HAS_OPTIMIZED_POLL to arch/Kconfig Ankur Arora
2024-07-26 20:21 ` [PATCH v6 04/10] cpuidle-haltpoll: define arch_haltpoll_want() Ankur Arora
2024-07-26 20:21 ` [PATCH v6 05/10] governors/haltpoll: drop kvm_para_available() check Ankur Arora
@ 2024-07-26 20:21 ` Ankur Arora
2024-07-26 20:21 ` [PATCH v6 07/10] arm64: define TIF_POLLING_NRFLAG Ankur Arora
` (3 subsequent siblings)
6 siblings, 0 replies; 27+ messages in thread
From: Ankur Arora @ 2024-07-26 20:21 UTC (permalink / raw)
To: linux-pm, kvm, linux-arm-kernel, linux-kernel
Cc: catalin.marinas, will, tglx, mingo, bp, dave.hansen, x86, hpa,
pbonzini, wanpengli, vkuznets, rafael, daniel.lezcano, peterz,
arnd, lenb, mark.rutland, harisokn, mtosatti, sudeep.holla, cl,
misono.tomohiro, joao.m.martins, boris.ostrovsky, konrad.wilk,
ankur.a.arora
The cpuidle-haltpoll driver and its namesake governor are selected
under KVM_GUEST on X86. KVM_GUEST in-turn selects ARCH_CPUIDLE_HALTPOLL
and defines the requisite arch_haltpoll_{enable,disable}() functions.
So remove the explicit dependence of HALTPOLL_CPUIDLE on KVM_GUEST,
and instead use ARCH_CPUIDLE_HALTPOLL as proxy for architectural
support for haltpoll.
Also change "halt poll" to "haltpoll" in one of the summary clauses,
since the second form is used everywhere else.
Signed-off-by: Ankur Arora <ankur.a.arora@oracle.com>
---
arch/x86/Kconfig | 1 +
drivers/cpuidle/Kconfig | 5 ++---
2 files changed, 3 insertions(+), 3 deletions(-)
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index efe59741dc47..153535e6f55d 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -838,6 +838,7 @@ config KVM_GUEST
config ARCH_CPUIDLE_HALTPOLL
def_bool n
+ depends on KVM_GUEST
prompt "Disable host haltpoll when loading haltpoll driver"
help
If virtualized under KVM, disable host haltpoll.
diff --git a/drivers/cpuidle/Kconfig b/drivers/cpuidle/Kconfig
index 75f6e176bbc8..c1bebadf22bc 100644
--- a/drivers/cpuidle/Kconfig
+++ b/drivers/cpuidle/Kconfig
@@ -35,7 +35,6 @@ config CPU_IDLE_GOV_TEO
config CPU_IDLE_GOV_HALTPOLL
bool "Haltpoll governor (for virtualized systems)"
- depends on KVM_GUEST
help
This governor implements haltpoll idle state selection, to be
used in conjunction with the haltpoll cpuidle driver, allowing
@@ -72,8 +71,8 @@ source "drivers/cpuidle/Kconfig.riscv"
endmenu
config HALTPOLL_CPUIDLE
- tristate "Halt poll cpuidle driver"
- depends on X86 && KVM_GUEST && ARCH_HAS_OPTIMIZED_POLL
+ tristate "Haltpoll cpuidle driver"
+ depends on ARCH_CPUIDLE_HALTPOLL && ARCH_HAS_OPTIMIZED_POLL
select CPU_IDLE_GOV_HALTPOLL
default y
help
--
2.43.5
^ permalink raw reply related [flat|nested] 27+ messages in thread* [PATCH v6 07/10] arm64: define TIF_POLLING_NRFLAG
2024-07-26 20:21 ` [PATCH v6 03/10] Kconfig: move ARCH_HAS_OPTIMIZED_POLL to arch/Kconfig Ankur Arora
` (2 preceding siblings ...)
2024-07-26 20:21 ` [PATCH v6 06/10] cpuidle-haltpoll: condition on ARCH_CPUIDLE_HALTPOLL Ankur Arora
@ 2024-07-26 20:21 ` Ankur Arora
2024-07-26 20:21 ` [PATCH v6 08/10] arm64: idle: export arch_cpu_idle Ankur Arora
` (2 subsequent siblings)
6 siblings, 0 replies; 27+ messages in thread
From: Ankur Arora @ 2024-07-26 20:21 UTC (permalink / raw)
To: linux-pm, kvm, linux-arm-kernel, linux-kernel
Cc: catalin.marinas, will, tglx, mingo, bp, dave.hansen, x86, hpa,
pbonzini, wanpengli, vkuznets, rafael, daniel.lezcano, peterz,
arnd, lenb, mark.rutland, harisokn, mtosatti, sudeep.holla, cl,
misono.tomohiro, joao.m.martins, boris.ostrovsky, konrad.wilk,
ankur.a.arora
From: Joao Martins <joao.m.martins@oracle.com>
Commit 842514849a61 ("arm64: Remove TIF_POLLING_NRFLAG") had removed
TIF_POLLING_NRFLAG because arm64 only supported non-polled idling via
cpu_do_idle().
To add support for polling via cpuidle-haltpoll, we want to use the
standard poll_idle() interface, which sets TIF_POLLING_NRFLAG while
polling.
Reuse the same bit to define TIF_POLLING_NRFLAG.
Signed-off-by: Joao Martins <joao.m.martins@oracle.com>
Signed-off-by: Mihai Carabas <mihai.carabas@oracle.com>
Signed-off-by: Ankur Arora <ankur.a.arora@oracle.com>
---
arch/arm64/include/asm/thread_info.h | 2 ++
1 file changed, 2 insertions(+)
diff --git a/arch/arm64/include/asm/thread_info.h b/arch/arm64/include/asm/thread_info.h
index e72a3bf9e563..23ff72168e48 100644
--- a/arch/arm64/include/asm/thread_info.h
+++ b/arch/arm64/include/asm/thread_info.h
@@ -69,6 +69,7 @@ void arch_setup_new_exec(void);
#define TIF_SYSCALL_TRACEPOINT 10 /* syscall tracepoint for ftrace */
#define TIF_SECCOMP 11 /* syscall secure computing */
#define TIF_SYSCALL_EMU 12 /* syscall emulation active */
+#define TIF_POLLING_NRFLAG 16 /* set while polling in poll_idle() */
#define TIF_MEMDIE 18 /* is terminating due to OOM killer */
#define TIF_FREEZE 19
#define TIF_RESTORE_SIGMASK 20
@@ -91,6 +92,7 @@ void arch_setup_new_exec(void);
#define _TIF_SYSCALL_TRACEPOINT (1 << TIF_SYSCALL_TRACEPOINT)
#define _TIF_SECCOMP (1 << TIF_SECCOMP)
#define _TIF_SYSCALL_EMU (1 << TIF_SYSCALL_EMU)
+#define _TIF_POLLING_NRFLAG (1 << TIF_POLLING_NRFLAG)
#define _TIF_UPROBE (1 << TIF_UPROBE)
#define _TIF_SINGLESTEP (1 << TIF_SINGLESTEP)
#define _TIF_32BIT (1 << TIF_32BIT)
--
2.43.5
^ permalink raw reply related [flat|nested] 27+ messages in thread* [PATCH v6 08/10] arm64: idle: export arch_cpu_idle
2024-07-26 20:21 ` [PATCH v6 03/10] Kconfig: move ARCH_HAS_OPTIMIZED_POLL to arch/Kconfig Ankur Arora
` (3 preceding siblings ...)
2024-07-26 20:21 ` [PATCH v6 07/10] arm64: define TIF_POLLING_NRFLAG Ankur Arora
@ 2024-07-26 20:21 ` Ankur Arora
2024-07-26 20:21 ` [PATCH v6 09/10] arm64: support cpuidle-haltpoll Ankur Arora
2024-07-26 20:21 ` [PATCH v6 10/10] cpuidle/poll_state: limit POLL_IDLE_RELAX_COUNT on arm64 Ankur Arora
6 siblings, 0 replies; 27+ messages in thread
From: Ankur Arora @ 2024-07-26 20:21 UTC (permalink / raw)
To: linux-pm, kvm, linux-arm-kernel, linux-kernel
Cc: catalin.marinas, will, tglx, mingo, bp, dave.hansen, x86, hpa,
pbonzini, wanpengli, vkuznets, rafael, daniel.lezcano, peterz,
arnd, lenb, mark.rutland, harisokn, mtosatti, sudeep.holla, cl,
misono.tomohiro, joao.m.martins, boris.ostrovsky, konrad.wilk,
ankur.a.arora
Needed for cpuidle-haltpoll.
Signed-off-by: Ankur Arora <ankur.a.arora@oracle.com>
---
arch/arm64/kernel/idle.c | 1 +
1 file changed, 1 insertion(+)
diff --git a/arch/arm64/kernel/idle.c b/arch/arm64/kernel/idle.c
index 05cfb347ec26..b85ba0df9b02 100644
--- a/arch/arm64/kernel/idle.c
+++ b/arch/arm64/kernel/idle.c
@@ -43,3 +43,4 @@ void __cpuidle arch_cpu_idle(void)
*/
cpu_do_idle();
}
+EXPORT_SYMBOL_GPL(arch_cpu_idle);
--
2.43.5
^ permalink raw reply related [flat|nested] 27+ messages in thread* [PATCH v6 09/10] arm64: support cpuidle-haltpoll
2024-07-26 20:21 ` [PATCH v6 03/10] Kconfig: move ARCH_HAS_OPTIMIZED_POLL to arch/Kconfig Ankur Arora
` (4 preceding siblings ...)
2024-07-26 20:21 ` [PATCH v6 08/10] arm64: idle: export arch_cpu_idle Ankur Arora
@ 2024-07-26 20:21 ` Ankur Arora
2024-07-29 17:20 ` Okanovic, Haris
` (2 more replies)
2024-07-26 20:21 ` [PATCH v6 10/10] cpuidle/poll_state: limit POLL_IDLE_RELAX_COUNT on arm64 Ankur Arora
6 siblings, 3 replies; 27+ messages in thread
From: Ankur Arora @ 2024-07-26 20:21 UTC (permalink / raw)
To: linux-pm, kvm, linux-arm-kernel, linux-kernel
Cc: catalin.marinas, will, tglx, mingo, bp, dave.hansen, x86, hpa,
pbonzini, wanpengli, vkuznets, rafael, daniel.lezcano, peterz,
arnd, lenb, mark.rutland, harisokn, mtosatti, sudeep.holla, cl,
misono.tomohiro, joao.m.martins, boris.ostrovsky, konrad.wilk,
ankur.a.arora
Add architectural support for cpuidle-haltpoll driver by defining
arch_haltpoll_*().
Also define ARCH_CPUIDLE_HALTPOLL to allow cpuidle-haltpoll to be
selected, and given that we have an optimized polling mechanism
in smp_cond_load*(), select ARCH_HAS_OPTIMIZED_POLL.
smp_cond_load*() are implemented via LDXR, WFE, with LDXR loading
a memory region in exclusive state and the WFE waiting for any
stores to it.
In the edge case -- no CPU stores to the waited region and there's no
interrupt -- the event-stream will provide the terminating condition
ensuring we don't wait forever, but because the event-stream runs at
a fixed frequency (configured at 10kHz) we might spend more time in
the polling stage than specified by cpuidle_poll_time().
This would only happen in the last iteration, since overshooting the
poll_limit means the governor moves out of the polling stage.
Signed-off-by: Ankur Arora <ankur.a.arora@oracle.com>
---
arch/arm64/Kconfig | 10 ++++++++++
arch/arm64/include/asm/cpuidle_haltpoll.h | 9 +++++++++
arch/arm64/kernel/cpuidle.c | 23 +++++++++++++++++++++++
3 files changed, 42 insertions(+)
create mode 100644 arch/arm64/include/asm/cpuidle_haltpoll.h
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 5d91259ee7b5..cf1c6681eb0a 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -35,6 +35,7 @@ config ARM64
select ARCH_HAS_MEMBARRIER_SYNC_CORE
select ARCH_HAS_NMI_SAFE_THIS_CPU_OPS
select ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE
+ select ARCH_HAS_OPTIMIZED_POLL
select ARCH_HAS_PTE_DEVMAP
select ARCH_HAS_PTE_SPECIAL
select ARCH_HAS_HW_PTE_YOUNG
@@ -2376,6 +2377,15 @@ config ARCH_HIBERNATION_HEADER
config ARCH_SUSPEND_POSSIBLE
def_bool y
+config ARCH_CPUIDLE_HALTPOLL
+ bool "Enable selection of the cpuidle-haltpoll driver"
+ default n
+ help
+ cpuidle-haltpoll allows for adaptive polling based on
+ current load before entering the idle state.
+
+ Some virtualized workloads benefit from using it.
+
endmenu # "Power management options"
menu "CPU Power Management"
diff --git a/arch/arm64/include/asm/cpuidle_haltpoll.h b/arch/arm64/include/asm/cpuidle_haltpoll.h
new file mode 100644
index 000000000000..65f289407a6c
--- /dev/null
+++ b/arch/arm64/include/asm/cpuidle_haltpoll.h
@@ -0,0 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ARCH_HALTPOLL_H
+#define _ARCH_HALTPOLL_H
+
+static inline void arch_haltpoll_enable(unsigned int cpu) { }
+static inline void arch_haltpoll_disable(unsigned int cpu) { }
+
+bool arch_haltpoll_want(bool force);
+#endif
diff --git a/arch/arm64/kernel/cpuidle.c b/arch/arm64/kernel/cpuidle.c
index f372295207fb..334df82a0eac 100644
--- a/arch/arm64/kernel/cpuidle.c
+++ b/arch/arm64/kernel/cpuidle.c
@@ -72,3 +72,26 @@ __cpuidle int acpi_processor_ffh_lpi_enter(struct acpi_lpi_state *lpi)
lpi->index, state);
}
#endif
+
+#if IS_ENABLED(CONFIG_HALTPOLL_CPUIDLE)
+
+#include <asm/cpuidle_haltpoll.h>
+
+bool arch_haltpoll_want(bool force)
+{
+ /*
+ * Enabling haltpoll requires two things:
+ *
+ * - Event stream support to provide a terminating condition to the
+ * WFE in the poll loop.
+ *
+ * - KVM support for arch_haltpoll_enable(), arch_haltpoll_enable().
+ *
+ * Given that the second is missing, allow haltpoll to only be force
+ * loaded.
+ */
+ return (arch_timer_evtstrm_available() && false) || force;
+}
+
+EXPORT_SYMBOL_GPL(arch_haltpoll_want);
+#endif
--
2.43.5
^ permalink raw reply related [flat|nested] 27+ messages in thread* Re: [PATCH v6 09/10] arm64: support cpuidle-haltpoll
2024-07-26 20:21 ` [PATCH v6 09/10] arm64: support cpuidle-haltpoll Ankur Arora
@ 2024-07-29 17:20 ` Okanovic, Haris
2024-07-29 18:02 ` Ankur Arora
2024-08-06 1:37 ` maobibo
2024-08-09 6:08 ` Tomohiro Misono (Fujitsu)
2 siblings, 1 reply; 27+ messages in thread
From: Okanovic, Haris @ 2024-07-29 17:20 UTC (permalink / raw)
To: linux-arm-kernel@lists.infradead.org, kvm@vger.kernel.org,
linux-pm@vger.kernel.org, linux-kernel@vger.kernel.org,
ankur.a.arora@oracle.com
Cc: joao.m.martins@oracle.com, boris.ostrovsky@oracle.com,
dave.hansen@linux.intel.com, konrad.wilk@oracle.com,
wanpengli@tencent.com, cl@gentwo.org, mingo@redhat.com,
catalin.marinas@arm.com, pbonzini@redhat.com, tglx@linutronix.de,
misono.tomohiro@fujitsu.com, daniel.lezcano@linaro.org,
arnd@arndb.de, lenb@kernel.org, will@kernel.org, hpa@zytor.com,
peterz@infradead.org, vkuznets@redhat.com, bp@alien8.de,
Okanovic, Haris, rafael@kernel.org, sudeep.holla@arm.com,
mtosatti@redhat.com, x86@kernel.org, mark.rutland@arm.com
On Fri, 2024-07-26 at 13:21 -0700, Ankur Arora wrote:
> CAUTION: This email originated from outside of the organization. Do not click links or open attachments unless you can confirm the sender and know the content is safe.
>
>
>
> Add architectural support for cpuidle-haltpoll driver by defining
> arch_haltpoll_*().
>
> Also define ARCH_CPUIDLE_HALTPOLL to allow cpuidle-haltpoll to be
> selected, and given that we have an optimized polling mechanism
> in smp_cond_load*(), select ARCH_HAS_OPTIMIZED_POLL.
>
> smp_cond_load*() are implemented via LDXR, WFE, with LDXR loading
> a memory region in exclusive state and the WFE waiting for any
> stores to it.
>
> In the edge case -- no CPU stores to the waited region and there's no
> interrupt -- the event-stream will provide the terminating condition
> ensuring we don't wait forever, but because the event-stream runs at
> a fixed frequency (configured at 10kHz) we might spend more time in
> the polling stage than specified by cpuidle_poll_time().
>
> This would only happen in the last iteration, since overshooting the
> poll_limit means the governor moves out of the polling stage.
>
> Signed-off-by: Ankur Arora <ankur.a.arora@oracle.com>
> ---
> arch/arm64/Kconfig | 10 ++++++++++
> arch/arm64/include/asm/cpuidle_haltpoll.h | 9 +++++++++
> arch/arm64/kernel/cpuidle.c | 23 +++++++++++++++++++++++
> 3 files changed, 42 insertions(+)
> create mode 100644 arch/arm64/include/asm/cpuidle_haltpoll.h
>
> diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
> index 5d91259ee7b5..cf1c6681eb0a 100644
> --- a/arch/arm64/Kconfig
> +++ b/arch/arm64/Kconfig
> @@ -35,6 +35,7 @@ config ARM64
> select ARCH_HAS_MEMBARRIER_SYNC_CORE
> select ARCH_HAS_NMI_SAFE_THIS_CPU_OPS
> select ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE
> + select ARCH_HAS_OPTIMIZED_POLL
> select ARCH_HAS_PTE_DEVMAP
> select ARCH_HAS_PTE_SPECIAL
> select ARCH_HAS_HW_PTE_YOUNG
> @@ -2376,6 +2377,15 @@ config ARCH_HIBERNATION_HEADER
> config ARCH_SUSPEND_POSSIBLE
> def_bool y
>
> +config ARCH_CPUIDLE_HALTPOLL
> + bool "Enable selection of the cpuidle-haltpoll driver"
> + default n
> + help
> + cpuidle-haltpoll allows for adaptive polling based on
> + current load before entering the idle state.
> +
> + Some virtualized workloads benefit from using it.
> +
> endmenu # "Power management options"
>
> menu "CPU Power Management"
> diff --git a/arch/arm64/include/asm/cpuidle_haltpoll.h b/arch/arm64/include/asm/cpuidle_haltpoll.h
> new file mode 100644
> index 000000000000..65f289407a6c
> --- /dev/null
> +++ b/arch/arm64/include/asm/cpuidle_haltpoll.h
> @@ -0,0 +1,9 @@
> +/* SPDX-License-Identifier: GPL-2.0 */
> +#ifndef _ARCH_HALTPOLL_H
> +#define _ARCH_HALTPOLL_H
> +
> +static inline void arch_haltpoll_enable(unsigned int cpu) { }
> +static inline void arch_haltpoll_disable(unsigned int cpu) { }
> +
> +bool arch_haltpoll_want(bool force);
> +#endif
> diff --git a/arch/arm64/kernel/cpuidle.c b/arch/arm64/kernel/cpuidle.c
> index f372295207fb..334df82a0eac 100644
> --- a/arch/arm64/kernel/cpuidle.c
> +++ b/arch/arm64/kernel/cpuidle.c
> @@ -72,3 +72,26 @@ __cpuidle int acpi_processor_ffh_lpi_enter(struct acpi_lpi_state *lpi)
> lpi->index, state);
> }
> #endif
> +
> +#if IS_ENABLED(CONFIG_HALTPOLL_CPUIDLE)
> +
> +#include <asm/cpuidle_haltpoll.h>
> +
> +bool arch_haltpoll_want(bool force)
> +{
> + /*
> + * Enabling haltpoll requires two things:
> + *
> + * - Event stream support to provide a terminating condition to the
> + * WFE in the poll loop.
> + *
> + * - KVM support for arch_haltpoll_enable(), arch_haltpoll_enable().
typo: "arch_haltpoll_enable" and "arch_haltpoll_enable"
> + *
> + * Given that the second is missing, allow haltpoll to only be force
> + * loaded.
> + */
> + return (arch_timer_evtstrm_available() && false) || force;
This should always evaluate false without force. Perhaps you meant
something like this?
```
- return (arch_timer_evtstrm_available() && false) || force;
+ return arch_timer_evtstrm_available() || force;
```
Regards,
Haris Okanovic
> +}
> +
> +EXPORT_SYMBOL_GPL(arch_haltpoll_want);
> +#endif
> --
> 2.43.5
>
^ permalink raw reply [flat|nested] 27+ messages in thread* Re: [PATCH v6 09/10] arm64: support cpuidle-haltpoll
2024-07-29 17:20 ` Okanovic, Haris
@ 2024-07-29 18:02 ` Ankur Arora
2024-08-13 15:26 ` Okanovic, Haris
0 siblings, 1 reply; 27+ messages in thread
From: Ankur Arora @ 2024-07-29 18:02 UTC (permalink / raw)
To: Okanovic, Haris
Cc: linux-arm-kernel@lists.infradead.org, kvm@vger.kernel.org,
linux-pm@vger.kernel.org, linux-kernel@vger.kernel.org,
ankur.a.arora@oracle.com, joao.m.martins@oracle.com,
boris.ostrovsky@oracle.com, dave.hansen@linux.intel.com,
konrad.wilk@oracle.com, wanpengli@tencent.com, cl@gentwo.org,
mingo@redhat.com, catalin.marinas@arm.com, pbonzini@redhat.com,
tglx@linutronix.de, misono.tomohiro@fujitsu.com,
daniel.lezcano@linaro.org, arnd@arndb.de, lenb@kernel.org,
will@kernel.org, hpa@zytor.com, peterz@infradead.org,
vkuznets@redhat.com, bp@alien8.de, rafael@kernel.org,
sudeep.holla@arm.com, mtosatti@redhat.com, x86@kernel.org,
mark.rutland@arm.com
Okanovic, Haris <harisokn@amazon.com> writes:
> On Fri, 2024-07-26 at 13:21 -0700, Ankur Arora wrote:
>> CAUTION: This email originated from outside of the organization. Do not click links or open attachments unless you can confirm the sender and know the content is safe.
>>
>>
>>
>> Add architectural support for cpuidle-haltpoll driver by defining
>> arch_haltpoll_*().
>>
>> Also define ARCH_CPUIDLE_HALTPOLL to allow cpuidle-haltpoll to be
>> selected, and given that we have an optimized polling mechanism
>> in smp_cond_load*(), select ARCH_HAS_OPTIMIZED_POLL.
>>
>> smp_cond_load*() are implemented via LDXR, WFE, with LDXR loading
>> a memory region in exclusive state and the WFE waiting for any
>> stores to it.
>>
>> In the edge case -- no CPU stores to the waited region and there's no
>> interrupt -- the event-stream will provide the terminating condition
>> ensuring we don't wait forever, but because the event-stream runs at
>> a fixed frequency (configured at 10kHz) we might spend more time in
>> the polling stage than specified by cpuidle_poll_time().
>>
>> This would only happen in the last iteration, since overshooting the
>> poll_limit means the governor moves out of the polling stage.
>>
>> Signed-off-by: Ankur Arora <ankur.a.arora@oracle.com>
>> ---
>> arch/arm64/Kconfig | 10 ++++++++++
>> arch/arm64/include/asm/cpuidle_haltpoll.h | 9 +++++++++
>> arch/arm64/kernel/cpuidle.c | 23 +++++++++++++++++++++++
>> 3 files changed, 42 insertions(+)
>> create mode 100644 arch/arm64/include/asm/cpuidle_haltpoll.h
>>
>> diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
>> index 5d91259ee7b5..cf1c6681eb0a 100644
>> --- a/arch/arm64/Kconfig
>> +++ b/arch/arm64/Kconfig
>> @@ -35,6 +35,7 @@ config ARM64
>> select ARCH_HAS_MEMBARRIER_SYNC_CORE
>> select ARCH_HAS_NMI_SAFE_THIS_CPU_OPS
>> select ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE
>> + select ARCH_HAS_OPTIMIZED_POLL
>> select ARCH_HAS_PTE_DEVMAP
>> select ARCH_HAS_PTE_SPECIAL
>> select ARCH_HAS_HW_PTE_YOUNG
>> @@ -2376,6 +2377,15 @@ config ARCH_HIBERNATION_HEADER
>> config ARCH_SUSPEND_POSSIBLE
>> def_bool y
>>
>> +config ARCH_CPUIDLE_HALTPOLL
>> + bool "Enable selection of the cpuidle-haltpoll driver"
>> + default n
>> + help
>> + cpuidle-haltpoll allows for adaptive polling based on
>> + current load before entering the idle state.
>> +
>> + Some virtualized workloads benefit from using it.
>> +
>> endmenu # "Power management options"
>>
>> menu "CPU Power Management"
>> diff --git a/arch/arm64/include/asm/cpuidle_haltpoll.h b/arch/arm64/include/asm/cpuidle_haltpoll.h
>> new file mode 100644
>> index 000000000000..65f289407a6c
>> --- /dev/null
>> +++ b/arch/arm64/include/asm/cpuidle_haltpoll.h
>> @@ -0,0 +1,9 @@
>> +/* SPDX-License-Identifier: GPL-2.0 */
>> +#ifndef _ARCH_HALTPOLL_H
>> +#define _ARCH_HALTPOLL_H
>> +
>> +static inline void arch_haltpoll_enable(unsigned int cpu) { }
>> +static inline void arch_haltpoll_disable(unsigned int cpu) { }
>> +
>> +bool arch_haltpoll_want(bool force);
>> +#endif
>> diff --git a/arch/arm64/kernel/cpuidle.c b/arch/arm64/kernel/cpuidle.c
>> index f372295207fb..334df82a0eac 100644
>> --- a/arch/arm64/kernel/cpuidle.c
>> +++ b/arch/arm64/kernel/cpuidle.c
>> @@ -72,3 +72,26 @@ __cpuidle int acpi_processor_ffh_lpi_enter(struct acpi_lpi_state *lpi)
>> lpi->index, state);
>> }
>> #endif
>> +
>> +#if IS_ENABLED(CONFIG_HALTPOLL_CPUIDLE)
>> +
>> +#include <asm/cpuidle_haltpoll.h>
>> +
>> +bool arch_haltpoll_want(bool force)
>> +{
>> + /*
>> + * Enabling haltpoll requires two things:
>> + *
>> + * - Event stream support to provide a terminating condition to the
>> + * WFE in the poll loop.
>> + *
>> + * - KVM support for arch_haltpoll_enable(), arch_haltpoll_enable().
>
> typo: "arch_haltpoll_enable" and "arch_haltpoll_enable"
>
>> + *
>> + * Given that the second is missing, allow haltpoll to only be force
>> + * loaded.
>> + */
>> + return (arch_timer_evtstrm_available() && false) || force;
>
> This should always evaluate false without force. Perhaps you meant
> something like this?
>
> ```
> - return (arch_timer_evtstrm_available() && false) || force;
> + return arch_timer_evtstrm_available() || force;
> ```
No. This was intentional. As I meniton in the comment above, right now
the KVM support is missing. Which means that the guest has no way to
tell the host to not poll as part of host haltpoll.
Until that is available, only allow force loading.
--
ankur
^ permalink raw reply [flat|nested] 27+ messages in thread* Re: [PATCH v6 09/10] arm64: support cpuidle-haltpoll
2024-07-29 18:02 ` Ankur Arora
@ 2024-08-13 15:26 ` Okanovic, Haris
2024-08-13 18:56 ` Ankur Arora
0 siblings, 1 reply; 27+ messages in thread
From: Okanovic, Haris @ 2024-08-13 15:26 UTC (permalink / raw)
To: ankur.a.arora@oracle.com
Cc: joao.m.martins@oracle.com, kvm@vger.kernel.org,
mark.rutland@arm.com, boris.ostrovsky@oracle.com, x86@kernel.org,
dave.hansen@linux.intel.com, konrad.wilk@oracle.com,
wanpengli@tencent.com, cl@gentwo.org,
linux-kernel@vger.kernel.org, catalin.marinas@arm.com,
mingo@redhat.com, pbonzini@redhat.com, tglx@linutronix.de,
misono.tomohiro@fujitsu.com, daniel.lezcano@linaro.org,
arnd@arndb.de, lenb@kernel.org, will@kernel.org, hpa@zytor.com,
peterz@infradead.org, vkuznets@redhat.com,
linux-arm-kernel@lists.infradead.org, Okanovic, Haris,
linux-pm@vger.kernel.org, bp@alien8.de, mtosatti@redhat.com,
rafael@kernel.org, sudeep.holla@arm.com
On Mon, 2024-07-29 at 11:02 -0700, Ankur Arora wrote:
> CAUTION: This email originated from outside of the organization. Do not click links or open attachments unless you can confirm the sender and know the content is safe.
>
>
>
> Okanovic, Haris <harisokn@amazon.com> writes:
>
> > On Fri, 2024-07-26 at 13:21 -0700, Ankur Arora wrote:
> > > CAUTION: This email originated from outside of the organization. Do not click links or open attachments unless you can confirm the sender and know the content is safe.
> > >
> > >
> > >
> > > Add architectural support for cpuidle-haltpoll driver by defining
> > > arch_haltpoll_*().
> > >
> > > Also define ARCH_CPUIDLE_HALTPOLL to allow cpuidle-haltpoll to be
> > > selected, and given that we have an optimized polling mechanism
> > > in smp_cond_load*(), select ARCH_HAS_OPTIMIZED_POLL.
> > >
> > > smp_cond_load*() are implemented via LDXR, WFE, with LDXR loading
> > > a memory region in exclusive state and the WFE waiting for any
> > > stores to it.
> > >
> > > In the edge case -- no CPU stores to the waited region and there's no
> > > interrupt -- the event-stream will provide the terminating condition
> > > ensuring we don't wait forever, but because the event-stream runs at
> > > a fixed frequency (configured at 10kHz) we might spend more time in
> > > the polling stage than specified by cpuidle_poll_time().
> > >
> > > This would only happen in the last iteration, since overshooting the
> > > poll_limit means the governor moves out of the polling stage.
> > >
> > > Signed-off-by: Ankur Arora <ankur.a.arora@oracle.com>
> > > ---
> > > arch/arm64/Kconfig | 10 ++++++++++
> > > arch/arm64/include/asm/cpuidle_haltpoll.h | 9 +++++++++
> > > arch/arm64/kernel/cpuidle.c | 23 +++++++++++++++++++++++
> > > 3 files changed, 42 insertions(+)
> > > create mode 100644 arch/arm64/include/asm/cpuidle_haltpoll.h
> > >
> > > diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
> > > index 5d91259ee7b5..cf1c6681eb0a 100644
> > > --- a/arch/arm64/Kconfig
> > > +++ b/arch/arm64/Kconfig
> > > @@ -35,6 +35,7 @@ config ARM64
> > > select ARCH_HAS_MEMBARRIER_SYNC_CORE
> > > select ARCH_HAS_NMI_SAFE_THIS_CPU_OPS
> > > select ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE
> > > + select ARCH_HAS_OPTIMIZED_POLL
> > > select ARCH_HAS_PTE_DEVMAP
> > > select ARCH_HAS_PTE_SPECIAL
> > > select ARCH_HAS_HW_PTE_YOUNG
> > > @@ -2376,6 +2377,15 @@ config ARCH_HIBERNATION_HEADER
> > > config ARCH_SUSPEND_POSSIBLE
> > > def_bool y
> > >
> > > +config ARCH_CPUIDLE_HALTPOLL
> > > + bool "Enable selection of the cpuidle-haltpoll driver"
> > > + default n
> > > + help
> > > + cpuidle-haltpoll allows for adaptive polling based on
> > > + current load before entering the idle state.
> > > +
> > > + Some virtualized workloads benefit from using it.
> > > +
> > > endmenu # "Power management options"
> > >
> > > menu "CPU Power Management"
> > > diff --git a/arch/arm64/include/asm/cpuidle_haltpoll.h b/arch/arm64/include/asm/cpuidle_haltpoll.h
> > > new file mode 100644
> > > index 000000000000..65f289407a6c
> > > --- /dev/null
> > > +++ b/arch/arm64/include/asm/cpuidle_haltpoll.h
> > > @@ -0,0 +1,9 @@
> > > +/* SPDX-License-Identifier: GPL-2.0 */
> > > +#ifndef _ARCH_HALTPOLL_H
> > > +#define _ARCH_HALTPOLL_H
> > > +
> > > +static inline void arch_haltpoll_enable(unsigned int cpu) { }
> > > +static inline void arch_haltpoll_disable(unsigned int cpu) { }
> > > +
> > > +bool arch_haltpoll_want(bool force);
> > > +#endif
> > > diff --git a/arch/arm64/kernel/cpuidle.c b/arch/arm64/kernel/cpuidle.c
> > > index f372295207fb..334df82a0eac 100644
> > > --- a/arch/arm64/kernel/cpuidle.c
> > > +++ b/arch/arm64/kernel/cpuidle.c
> > > @@ -72,3 +72,26 @@ __cpuidle int acpi_processor_ffh_lpi_enter(struct acpi_lpi_state *lpi)
> > > lpi->index, state);
> > > }
> > > #endif
> > > +
> > > +#if IS_ENABLED(CONFIG_HALTPOLL_CPUIDLE)
> > > +
> > > +#include <asm/cpuidle_haltpoll.h>
> > > +
> > > +bool arch_haltpoll_want(bool force)
> > > +{
> > > + /*
> > > + * Enabling haltpoll requires two things:
> > > + *
> > > + * - Event stream support to provide a terminating condition to the
> > > + * WFE in the poll loop.
> > > + *
> > > + * - KVM support for arch_haltpoll_enable(), arch_haltpoll_enable().
> >
> > typo: "arch_haltpoll_enable" and "arch_haltpoll_enable"
> >
> > > + *
> > > + * Given that the second is missing, allow haltpoll to only be force
> > > + * loaded.
> > > + */
> > > + return (arch_timer_evtstrm_available() && false) || force;
> >
> > This should always evaluate false without force. Perhaps you meant
> > something like this?
> >
> > ```
> > - return (arch_timer_evtstrm_available() && false) || force;
> > + return arch_timer_evtstrm_available() || force;
> > ```
>
> No. This was intentional. As I meniton in the comment above, right now
> the KVM support is missing. Which means that the guest has no way to
> tell the host to not poll as part of host haltpoll.
>
> Until that is available, only allow force loading.
I see, arm64's kvm is missing the poll control mechanism.
I'll follow-up your changes with a patch for AWS Graviton; still seeing
the same performance gains.
Tested-by: Haris Okanovic <harisokn@amazon.com>
>
> --
> ankur
^ permalink raw reply [flat|nested] 27+ messages in thread* Re: [PATCH v6 09/10] arm64: support cpuidle-haltpoll
2024-08-13 15:26 ` Okanovic, Haris
@ 2024-08-13 18:56 ` Ankur Arora
2024-08-13 21:14 ` Okanovic, Haris
0 siblings, 1 reply; 27+ messages in thread
From: Ankur Arora @ 2024-08-13 18:56 UTC (permalink / raw)
To: Okanovic, Haris
Cc: ankur.a.arora@oracle.com, joao.m.martins@oracle.com,
kvm@vger.kernel.org, mark.rutland@arm.com,
boris.ostrovsky@oracle.com, x86@kernel.org,
dave.hansen@linux.intel.com, konrad.wilk@oracle.com,
wanpengli@tencent.com, cl@gentwo.org,
linux-kernel@vger.kernel.org, catalin.marinas@arm.com,
mingo@redhat.com, pbonzini@redhat.com, tglx@linutronix.de,
misono.tomohiro@fujitsu.com, daniel.lezcano@linaro.org,
arnd@arndb.de, lenb@kernel.org, will@kernel.org, hpa@zytor.com,
peterz@infradead.org, vkuznets@redhat.com,
linux-arm-kernel@lists.infradead.org, linux-pm@vger.kernel.org,
bp@alien8.de, mtosatti@redhat.com, rafael@kernel.org,
sudeep.holla@arm.com
Okanovic, Haris <harisokn@amazon.com> writes:
> On Mon, 2024-07-29 at 11:02 -0700, Ankur Arora wrote:
>> CAUTION: This email originated from outside of the organization. Do not click links or open attachments unless you can confirm the sender and know the content is safe.
>>
>>
>>
>> Okanovic, Haris <harisokn@amazon.com> writes:
>>
>> > On Fri, 2024-07-26 at 13:21 -0700, Ankur Arora wrote:
>> > > CAUTION: This email originated from outside of the organization. Do not click links or open attachments unless you can confirm the sender and know the content is safe.
>> > >
>> > >
>> > >
>> > > Add architectural support for cpuidle-haltpoll driver by defining
>> > > arch_haltpoll_*().
>> > >
>> > > Also define ARCH_CPUIDLE_HALTPOLL to allow cpuidle-haltpoll to be
>> > > selected, and given that we have an optimized polling mechanism
>> > > in smp_cond_load*(), select ARCH_HAS_OPTIMIZED_POLL.
>> > >
>> > > smp_cond_load*() are implemented via LDXR, WFE, with LDXR loading
>> > > a memory region in exclusive state and the WFE waiting for any
>> > > stores to it.
>> > >
>> > > In the edge case -- no CPU stores to the waited region and there's no
>> > > interrupt -- the event-stream will provide the terminating condition
>> > > ensuring we don't wait forever, but because the event-stream runs at
>> > > a fixed frequency (configured at 10kHz) we might spend more time in
>> > > the polling stage than specified by cpuidle_poll_time().
>> > >
>> > > This would only happen in the last iteration, since overshooting the
>> > > poll_limit means the governor moves out of the polling stage.
>> > >
>> > > Signed-off-by: Ankur Arora <ankur.a.arora@oracle.com>
>> > > ---
>> > > arch/arm64/Kconfig | 10 ++++++++++
>> > > arch/arm64/include/asm/cpuidle_haltpoll.h | 9 +++++++++
>> > > arch/arm64/kernel/cpuidle.c | 23 +++++++++++++++++++++++
>> > > 3 files changed, 42 insertions(+)
>> > > create mode 100644 arch/arm64/include/asm/cpuidle_haltpoll.h
>> > >
>> > > diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
>> > > index 5d91259ee7b5..cf1c6681eb0a 100644
>> > > --- a/arch/arm64/Kconfig
>> > > +++ b/arch/arm64/Kconfig
>> > > @@ -35,6 +35,7 @@ config ARM64
>> > > select ARCH_HAS_MEMBARRIER_SYNC_CORE
>> > > select ARCH_HAS_NMI_SAFE_THIS_CPU_OPS
>> > > select ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE
>> > > + select ARCH_HAS_OPTIMIZED_POLL
>> > > select ARCH_HAS_PTE_DEVMAP
>> > > select ARCH_HAS_PTE_SPECIAL
>> > > select ARCH_HAS_HW_PTE_YOUNG
>> > > @@ -2376,6 +2377,15 @@ config ARCH_HIBERNATION_HEADER
>> > > config ARCH_SUSPEND_POSSIBLE
>> > > def_bool y
>> > >
>> > > +config ARCH_CPUIDLE_HALTPOLL
>> > > + bool "Enable selection of the cpuidle-haltpoll driver"
>> > > + default n
>> > > + help
>> > > + cpuidle-haltpoll allows for adaptive polling based on
>> > > + current load before entering the idle state.
>> > > +
>> > > + Some virtualized workloads benefit from using it.
>> > > +
>> > > endmenu # "Power management options"
>> > >
>> > > menu "CPU Power Management"
>> > > diff --git a/arch/arm64/include/asm/cpuidle_haltpoll.h b/arch/arm64/include/asm/cpuidle_haltpoll.h
>> > > new file mode 100644
>> > > index 000000000000..65f289407a6c
>> > > --- /dev/null
>> > > +++ b/arch/arm64/include/asm/cpuidle_haltpoll.h
>> > > @@ -0,0 +1,9 @@
>> > > +/* SPDX-License-Identifier: GPL-2.0 */
>> > > +#ifndef _ARCH_HALTPOLL_H
>> > > +#define _ARCH_HALTPOLL_H
>> > > +
>> > > +static inline void arch_haltpoll_enable(unsigned int cpu) { }
>> > > +static inline void arch_haltpoll_disable(unsigned int cpu) { }
>> > > +
>> > > +bool arch_haltpoll_want(bool force);
>> > > +#endif
>> > > diff --git a/arch/arm64/kernel/cpuidle.c b/arch/arm64/kernel/cpuidle.c
>> > > index f372295207fb..334df82a0eac 100644
>> > > --- a/arch/arm64/kernel/cpuidle.c
>> > > +++ b/arch/arm64/kernel/cpuidle.c
>> > > @@ -72,3 +72,26 @@ __cpuidle int acpi_processor_ffh_lpi_enter(struct acpi_lpi_state *lpi)
>> > > lpi->index, state);
>> > > }
>> > > #endif
>> > > +
>> > > +#if IS_ENABLED(CONFIG_HALTPOLL_CPUIDLE)
>> > > +
>> > > +#include <asm/cpuidle_haltpoll.h>
>> > > +
>> > > +bool arch_haltpoll_want(bool force)
>> > > +{
>> > > + /*
>> > > + * Enabling haltpoll requires two things:
>> > > + *
>> > > + * - Event stream support to provide a terminating condition to the
>> > > + * WFE in the poll loop.
>> > > + *
>> > > + * - KVM support for arch_haltpoll_enable(), arch_haltpoll_enable().
>> >
>> > typo: "arch_haltpoll_enable" and "arch_haltpoll_enable"
>> >
>> > > + *
>> > > + * Given that the second is missing, allow haltpoll to only be force
>> > > + * loaded.
>> > > + */
>> > > + return (arch_timer_evtstrm_available() && false) || force;
>> >
>> > This should always evaluate false without force. Perhaps you meant
>> > something like this?
>> >
>> > ```
>> > - return (arch_timer_evtstrm_available() && false) || force;
>> > + return arch_timer_evtstrm_available() || force;
>> > ```
>>
>> No. This was intentional. As I meniton in the comment above, right now
>> the KVM support is missing. Which means that the guest has no way to
>> tell the host to not poll as part of host haltpoll.
>>
>> Until that is available, only allow force loading.
>
> I see, arm64's kvm is missing the poll control mechanism.
>
> I'll follow-up your changes with a patch for AWS Graviton; still seeing
> the same performance gains.
Excellent. Could you Cc me when you send out your changes?
> Tested-by: Haris Okanovic <harisokn@amazon.com>
Thanks!
--
ankur
^ permalink raw reply [flat|nested] 27+ messages in thread* Re: [PATCH v6 09/10] arm64: support cpuidle-haltpoll
2024-08-13 18:56 ` Ankur Arora
@ 2024-08-13 21:14 ` Okanovic, Haris
0 siblings, 0 replies; 27+ messages in thread
From: Okanovic, Haris @ 2024-08-13 21:14 UTC (permalink / raw)
To: ankur.a.arora@oracle.com
Cc: joao.m.martins@oracle.com, boris.ostrovsky@oracle.com,
kvm@vger.kernel.org, rafael@kernel.org,
dave.hansen@linux.intel.com, konrad.wilk@oracle.com,
cl@gentwo.org, wanpengli@tencent.com,
linux-kernel@vger.kernel.org, catalin.marinas@arm.com,
mingo@redhat.com, pbonzini@redhat.com,
misono.tomohiro@fujitsu.com, tglx@linutronix.de,
daniel.lezcano@linaro.org, arnd@arndb.de, lenb@kernel.org,
will@kernel.org, mark.rutland@arm.com, hpa@zytor.com,
peterz@infradead.org, vkuznets@redhat.com,
linux-arm-kernel@lists.infradead.org, Okanovic, Haris,
linux-pm@vger.kernel.org, bp@alien8.de, mtosatti@redhat.com,
x86@kernel.org, sudeep.holla@arm.com
On Tue, 2024-08-13 at 11:56 -0700, Ankur Arora wrote:
> CAUTION: This email originated from outside of the organization. Do not click links or open attachments unless you can confirm the sender and know the content is safe.
>
>
>
> Okanovic, Haris <harisokn@amazon.com> writes:
>
> > On Mon, 2024-07-29 at 11:02 -0700, Ankur Arora wrote:
> > > CAUTION: This email originated from outside of the organization. Do not click links or open attachments unless you can confirm the sender and know the content is safe.
> > >
> > >
> > >
> > > Okanovic, Haris <harisokn@amazon.com> writes:
> > >
> > > > On Fri, 2024-07-26 at 13:21 -0700, Ankur Arora wrote:
> > > > > CAUTION: This email originated from outside of the organization. Do not click links or open attachments unless you can confirm the sender and know the content is safe.
> > > > >
> > > > >
> > > > >
> > > > > Add architectural support for cpuidle-haltpoll driver by defining
> > > > > arch_haltpoll_*().
> > > > >
> > > > > Also define ARCH_CPUIDLE_HALTPOLL to allow cpuidle-haltpoll to be
> > > > > selected, and given that we have an optimized polling mechanism
> > > > > in smp_cond_load*(), select ARCH_HAS_OPTIMIZED_POLL.
> > > > >
> > > > > smp_cond_load*() are implemented via LDXR, WFE, with LDXR loading
> > > > > a memory region in exclusive state and the WFE waiting for any
> > > > > stores to it.
> > > > >
> > > > > In the edge case -- no CPU stores to the waited region and there's no
> > > > > interrupt -- the event-stream will provide the terminating condition
> > > > > ensuring we don't wait forever, but because the event-stream runs at
> > > > > a fixed frequency (configured at 10kHz) we might spend more time in
> > > > > the polling stage than specified by cpuidle_poll_time().
> > > > >
> > > > > This would only happen in the last iteration, since overshooting the
> > > > > poll_limit means the governor moves out of the polling stage.
> > > > >
> > > > > Signed-off-by: Ankur Arora <ankur.a.arora@oracle.com>
> > > > > ---
> > > > > arch/arm64/Kconfig | 10 ++++++++++
> > > > > arch/arm64/include/asm/cpuidle_haltpoll.h | 9 +++++++++
> > > > > arch/arm64/kernel/cpuidle.c | 23 +++++++++++++++++++++++
> > > > > 3 files changed, 42 insertions(+)
> > > > > create mode 100644 arch/arm64/include/asm/cpuidle_haltpoll.h
> > > > >
> > > > > diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
> > > > > index 5d91259ee7b5..cf1c6681eb0a 100644
> > > > > --- a/arch/arm64/Kconfig
> > > > > +++ b/arch/arm64/Kconfig
> > > > > @@ -35,6 +35,7 @@ config ARM64
> > > > > select ARCH_HAS_MEMBARRIER_SYNC_CORE
> > > > > select ARCH_HAS_NMI_SAFE_THIS_CPU_OPS
> > > > > select ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE
> > > > > + select ARCH_HAS_OPTIMIZED_POLL
> > > > > select ARCH_HAS_PTE_DEVMAP
> > > > > select ARCH_HAS_PTE_SPECIAL
> > > > > select ARCH_HAS_HW_PTE_YOUNG
> > > > > @@ -2376,6 +2377,15 @@ config ARCH_HIBERNATION_HEADER
> > > > > config ARCH_SUSPEND_POSSIBLE
> > > > > def_bool y
> > > > >
> > > > > +config ARCH_CPUIDLE_HALTPOLL
> > > > > + bool "Enable selection of the cpuidle-haltpoll driver"
> > > > > + default n
> > > > > + help
> > > > > + cpuidle-haltpoll allows for adaptive polling based on
> > > > > + current load before entering the idle state.
> > > > > +
> > > > > + Some virtualized workloads benefit from using it.
> > > > > +
> > > > > endmenu # "Power management options"
> > > > >
> > > > > menu "CPU Power Management"
> > > > > diff --git a/arch/arm64/include/asm/cpuidle_haltpoll.h b/arch/arm64/include/asm/cpuidle_haltpoll.h
> > > > > new file mode 100644
> > > > > index 000000000000..65f289407a6c
> > > > > --- /dev/null
> > > > > +++ b/arch/arm64/include/asm/cpuidle_haltpoll.h
> > > > > @@ -0,0 +1,9 @@
> > > > > +/* SPDX-License-Identifier: GPL-2.0 */
> > > > > +#ifndef _ARCH_HALTPOLL_H
> > > > > +#define _ARCH_HALTPOLL_H
> > > > > +
> > > > > +static inline void arch_haltpoll_enable(unsigned int cpu) { }
> > > > > +static inline void arch_haltpoll_disable(unsigned int cpu) { }
> > > > > +
> > > > > +bool arch_haltpoll_want(bool force);
> > > > > +#endif
> > > > > diff --git a/arch/arm64/kernel/cpuidle.c b/arch/arm64/kernel/cpuidle.c
> > > > > index f372295207fb..334df82a0eac 100644
> > > > > --- a/arch/arm64/kernel/cpuidle.c
> > > > > +++ b/arch/arm64/kernel/cpuidle.c
> > > > > @@ -72,3 +72,26 @@ __cpuidle int acpi_processor_ffh_lpi_enter(struct acpi_lpi_state *lpi)
> > > > > lpi->index, state);
> > > > > }
> > > > > #endif
> > > > > +
> > > > > +#if IS_ENABLED(CONFIG_HALTPOLL_CPUIDLE)
> > > > > +
> > > > > +#include <asm/cpuidle_haltpoll.h>
> > > > > +
> > > > > +bool arch_haltpoll_want(bool force)
> > > > > +{
> > > > > + /*
> > > > > + * Enabling haltpoll requires two things:
> > > > > + *
> > > > > + * - Event stream support to provide a terminating condition to the
> > > > > + * WFE in the poll loop.
> > > > > + *
> > > > > + * - KVM support for arch_haltpoll_enable(), arch_haltpoll_enable().
> > > >
> > > > typo: "arch_haltpoll_enable" and "arch_haltpoll_enable"
> > > >
> > > > > + *
> > > > > + * Given that the second is missing, allow haltpoll to only be force
> > > > > + * loaded.
> > > > > + */
> > > > > + return (arch_timer_evtstrm_available() && false) || force;
> > > >
> > > > This should always evaluate false without force. Perhaps you meant
> > > > something like this?
> > > >
> > > > ```
> > > > - return (arch_timer_evtstrm_available() && false) || force;
> > > > + return arch_timer_evtstrm_available() || force;
> > > > ```
> > >
> > > No. This was intentional. As I meniton in the comment above, right now
> > > the KVM support is missing. Which means that the guest has no way to
> > > tell the host to not poll as part of host haltpoll.
> > >
> > > Until that is available, only allow force loading.
> >
> > I see, arm64's kvm is missing the poll control mechanism.
> >
> > I'll follow-up your changes with a patch for AWS Graviton; still seeing
> > the same performance gains.
>
> Excellent. Could you Cc me when you send out your changes?
Will do
-- Haris Okanovic
>
> > Tested-by: Haris Okanovic <harisokn@amazon.com>
>
> Thanks!
>
> --
> ankur
^ permalink raw reply [flat|nested] 27+ messages in thread
* Re: [PATCH v6 09/10] arm64: support cpuidle-haltpoll
2024-07-26 20:21 ` [PATCH v6 09/10] arm64: support cpuidle-haltpoll Ankur Arora
2024-07-29 17:20 ` Okanovic, Haris
@ 2024-08-06 1:37 ` maobibo
2024-08-12 22:48 ` Ankur Arora
2024-08-09 6:08 ` Tomohiro Misono (Fujitsu)
2 siblings, 1 reply; 27+ messages in thread
From: maobibo @ 2024-08-06 1:37 UTC (permalink / raw)
To: Ankur Arora, linux-pm, kvm, linux-arm-kernel, linux-kernel
Cc: catalin.marinas, will, tglx, mingo, bp, dave.hansen, x86, hpa,
pbonzini, wanpengli, vkuznets, rafael, daniel.lezcano, peterz,
arnd, lenb, mark.rutland, harisokn, mtosatti, sudeep.holla, cl,
misono.tomohiro, joao.m.martins, boris.ostrovsky, konrad.wilk
On 2024/7/27 上午4:21, Ankur Arora wrote:
> Add architectural support for cpuidle-haltpoll driver by defining
> arch_haltpoll_*().
>
> Also define ARCH_CPUIDLE_HALTPOLL to allow cpuidle-haltpoll to be
> selected, and given that we have an optimized polling mechanism
> in smp_cond_load*(), select ARCH_HAS_OPTIMIZED_POLL.
>
> smp_cond_load*() are implemented via LDXR, WFE, with LDXR loading
> a memory region in exclusive state and the WFE waiting for any
> stores to it.
>
> In the edge case -- no CPU stores to the waited region and there's no
> interrupt -- the event-stream will provide the terminating condition
> ensuring we don't wait forever, but because the event-stream runs at
> a fixed frequency (configured at 10kHz) we might spend more time in
> the polling stage than specified by cpuidle_poll_time().
>
> This would only happen in the last iteration, since overshooting the
> poll_limit means the governor moves out of the polling stage.
>
> Signed-off-by: Ankur Arora <ankur.a.arora@oracle.com>
> ---
> arch/arm64/Kconfig | 10 ++++++++++
> arch/arm64/include/asm/cpuidle_haltpoll.h | 9 +++++++++
> arch/arm64/kernel/cpuidle.c | 23 +++++++++++++++++++++++
> 3 files changed, 42 insertions(+)
> create mode 100644 arch/arm64/include/asm/cpuidle_haltpoll.h
>
> diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
> index 5d91259ee7b5..cf1c6681eb0a 100644
> --- a/arch/arm64/Kconfig
> +++ b/arch/arm64/Kconfig
> @@ -35,6 +35,7 @@ config ARM64
> select ARCH_HAS_MEMBARRIER_SYNC_CORE
> select ARCH_HAS_NMI_SAFE_THIS_CPU_OPS
> select ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE
> + select ARCH_HAS_OPTIMIZED_POLL
> select ARCH_HAS_PTE_DEVMAP
> select ARCH_HAS_PTE_SPECIAL
> select ARCH_HAS_HW_PTE_YOUNG
> @@ -2376,6 +2377,15 @@ config ARCH_HIBERNATION_HEADER
> config ARCH_SUSPEND_POSSIBLE
> def_bool y
>
> +config ARCH_CPUIDLE_HALTPOLL
> + bool "Enable selection of the cpuidle-haltpoll driver"
> + default n
> + help
> + cpuidle-haltpoll allows for adaptive polling based on
> + current load before entering the idle state.
> +
> + Some virtualized workloads benefit from using it.
> +
> endmenu # "Power management options"
>
> menu "CPU Power Management"
> diff --git a/arch/arm64/include/asm/cpuidle_haltpoll.h b/arch/arm64/include/asm/cpuidle_haltpoll.h
> new file mode 100644
> index 000000000000..65f289407a6c
> --- /dev/null
> +++ b/arch/arm64/include/asm/cpuidle_haltpoll.h
> @@ -0,0 +1,9 @@
> +/* SPDX-License-Identifier: GPL-2.0 */
> +#ifndef _ARCH_HALTPOLL_H
> +#define _ARCH_HALTPOLL_H
> +
> +static inline void arch_haltpoll_enable(unsigned int cpu) { }
> +static inline void arch_haltpoll_disable(unsigned int cpu) { }
It is better that guest supports halt poll on more architectures,
LoongArch wants this if result is good.
Do we need disable halt polling on host hypervisor if guest also uses
halt polling idle method?
Regards
Bibo Mao
> +
> +bool arch_haltpoll_want(bool force);
> +#endif
> diff --git a/arch/arm64/kernel/cpuidle.c b/arch/arm64/kernel/cpuidle.c
> index f372295207fb..334df82a0eac 100644
> --- a/arch/arm64/kernel/cpuidle.c
> +++ b/arch/arm64/kernel/cpuidle.c
> @@ -72,3 +72,26 @@ __cpuidle int acpi_processor_ffh_lpi_enter(struct acpi_lpi_state *lpi)
> lpi->index, state);
> }
> #endif
> +
> +#if IS_ENABLED(CONFIG_HALTPOLL_CPUIDLE)
> +
> +#include <asm/cpuidle_haltpoll.h>
> +
> +bool arch_haltpoll_want(bool force)
> +{
> + /*
> + * Enabling haltpoll requires two things:
> + *
> + * - Event stream support to provide a terminating condition to the
> + * WFE in the poll loop.
> + *
> + * - KVM support for arch_haltpoll_enable(), arch_haltpoll_enable().
> + *
> + * Given that the second is missing, allow haltpoll to only be force
> + * loaded.
> + */
> + return (arch_timer_evtstrm_available() && false) || force;
> +}
> +
> +EXPORT_SYMBOL_GPL(arch_haltpoll_want);
> +#endif
>
^ permalink raw reply [flat|nested] 27+ messages in thread* Re: [PATCH v6 09/10] arm64: support cpuidle-haltpoll
2024-08-06 1:37 ` maobibo
@ 2024-08-12 22:48 ` Ankur Arora
2024-08-13 0:54 ` maobibo
0 siblings, 1 reply; 27+ messages in thread
From: Ankur Arora @ 2024-08-12 22:48 UTC (permalink / raw)
To: maobibo
Cc: Ankur Arora, linux-pm, kvm, linux-arm-kernel, linux-kernel,
catalin.marinas, will, tglx, mingo, bp, dave.hansen, x86, hpa,
pbonzini, wanpengli, vkuznets, rafael, daniel.lezcano, peterz,
arnd, lenb, mark.rutland, harisokn, mtosatti, sudeep.holla, cl,
misono.tomohiro, joao.m.martins, boris.ostrovsky, konrad.wilk
maobibo <maobibo@loongson.cn> writes:
> On 2024/7/27 上午4:21, Ankur Arora wrote:
>> Add architectural support for cpuidle-haltpoll driver by defining
>> arch_haltpoll_*().
>> Also define ARCH_CPUIDLE_HALTPOLL to allow cpuidle-haltpoll to be
>> selected, and given that we have an optimized polling mechanism
>> in smp_cond_load*(), select ARCH_HAS_OPTIMIZED_POLL.
>> smp_cond_load*() are implemented via LDXR, WFE, with LDXR loading
>> a memory region in exclusive state and the WFE waiting for any
>> stores to it.
>> In the edge case -- no CPU stores to the waited region and there's no
>> interrupt -- the event-stream will provide the terminating condition
>> ensuring we don't wait forever, but because the event-stream runs at
>> a fixed frequency (configured at 10kHz) we might spend more time in
>> the polling stage than specified by cpuidle_poll_time().
>> This would only happen in the last iteration, since overshooting the
>> poll_limit means the governor moves out of the polling stage.
>> Signed-off-by: Ankur Arora <ankur.a.arora@oracle.com>
>> ---
>> arch/arm64/Kconfig | 10 ++++++++++
>> arch/arm64/include/asm/cpuidle_haltpoll.h | 9 +++++++++
>> arch/arm64/kernel/cpuidle.c | 23 +++++++++++++++++++++++
>> 3 files changed, 42 insertions(+)
>> create mode 100644 arch/arm64/include/asm/cpuidle_haltpoll.h
>> diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
>> index 5d91259ee7b5..cf1c6681eb0a 100644
>> --- a/arch/arm64/Kconfig
>> +++ b/arch/arm64/Kconfig
>> @@ -35,6 +35,7 @@ config ARM64
>> select ARCH_HAS_MEMBARRIER_SYNC_CORE
>> select ARCH_HAS_NMI_SAFE_THIS_CPU_OPS
>> select ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE
>> + select ARCH_HAS_OPTIMIZED_POLL
>> select ARCH_HAS_PTE_DEVMAP
>> select ARCH_HAS_PTE_SPECIAL
>> select ARCH_HAS_HW_PTE_YOUNG
>> @@ -2376,6 +2377,15 @@ config ARCH_HIBERNATION_HEADER
>> config ARCH_SUSPEND_POSSIBLE
>> def_bool y
>> +config ARCH_CPUIDLE_HALTPOLL
>> + bool "Enable selection of the cpuidle-haltpoll driver"
>> + default n
>> + help
>> + cpuidle-haltpoll allows for adaptive polling based on
>> + current load before entering the idle state.
>> +
>> + Some virtualized workloads benefit from using it.
>> +
>> endmenu # "Power management options"
>> menu "CPU Power Management"
>> diff --git a/arch/arm64/include/asm/cpuidle_haltpoll.h b/arch/arm64/include/asm/cpuidle_haltpoll.h
>> new file mode 100644
>> index 000000000000..65f289407a6c
>> --- /dev/null
>> +++ b/arch/arm64/include/asm/cpuidle_haltpoll.h
>> @@ -0,0 +1,9 @@
>> +/* SPDX-License-Identifier: GPL-2.0 */
>> +#ifndef _ARCH_HALTPOLL_H
>> +#define _ARCH_HALTPOLL_H
>> +
>> +static inline void arch_haltpoll_enable(unsigned int cpu) { }
>> +static inline void arch_haltpoll_disable(unsigned int cpu) { }
> It is better that guest supports halt poll on more architectures, LoongArch
> wants this if result is good.
>
> Do we need disable halt polling on host hypervisor if guest also uses halt
> polling idle method?
Yes. The intent is to work on that separately from this series. As the comment
below states, until that is available we only allow force loading.
>> +
>> +bool arch_haltpoll_want(bool force);
>> +#endif
>> diff --git a/arch/arm64/kernel/cpuidle.c b/arch/arm64/kernel/cpuidle.c
>> index f372295207fb..334df82a0eac 100644
>> --- a/arch/arm64/kernel/cpuidle.c
>> +++ b/arch/arm64/kernel/cpuidle.c
>> @@ -72,3 +72,26 @@ __cpuidle int acpi_processor_ffh_lpi_enter(struct acpi_lpi_state *lpi)
>> lpi->index, state);
>> }
>> #endif
>> +
>> +#if IS_ENABLED(CONFIG_HALTPOLL_CPUIDLE)
>> +
>> +#include <asm/cpuidle_haltpoll.h>
>> +
>> +bool arch_haltpoll_want(bool force)
>> +{
>> + /*
>> + * Enabling haltpoll requires two things:
>> + *
>> + * - Event stream support to provide a terminating condition to the
>> + * WFE in the poll loop.
>> + *
>> + * - KVM support for arch_haltpoll_enable(), arch_haltpoll_enable().
>> + *
>> + * Given that the second is missing, allow haltpoll to only be force
>> + * loaded.
>> + */
>> + return (arch_timer_evtstrm_available() && false) || force;
>> +}
>> +
>> +EXPORT_SYMBOL_GPL(arch_haltpoll_want);
>> +#endif
>>
--
ankur
^ permalink raw reply [flat|nested] 27+ messages in thread* Re: [PATCH v6 09/10] arm64: support cpuidle-haltpoll
2024-08-12 22:48 ` Ankur Arora
@ 2024-08-13 0:54 ` maobibo
0 siblings, 0 replies; 27+ messages in thread
From: maobibo @ 2024-08-13 0:54 UTC (permalink / raw)
To: Ankur Arora
Cc: linux-pm, kvm, linux-arm-kernel, linux-kernel, catalin.marinas,
will, tglx, mingo, bp, dave.hansen, x86, hpa, pbonzini, wanpengli,
vkuznets, rafael, daniel.lezcano, peterz, arnd, lenb,
mark.rutland, harisokn, mtosatti, sudeep.holla, cl,
misono.tomohiro, joao.m.martins, boris.ostrovsky, konrad.wilk
On 2024/8/13 上午6:48, Ankur Arora wrote:
>
> maobibo <maobibo@loongson.cn> writes:
>
>> On 2024/7/27 上午4:21, Ankur Arora wrote:
>>> Add architectural support for cpuidle-haltpoll driver by defining
>>> arch_haltpoll_*().
>>> Also define ARCH_CPUIDLE_HALTPOLL to allow cpuidle-haltpoll to be
>>> selected, and given that we have an optimized polling mechanism
>>> in smp_cond_load*(), select ARCH_HAS_OPTIMIZED_POLL.
>>> smp_cond_load*() are implemented via LDXR, WFE, with LDXR loading
>>> a memory region in exclusive state and the WFE waiting for any
>>> stores to it.
>>> In the edge case -- no CPU stores to the waited region and there's no
>>> interrupt -- the event-stream will provide the terminating condition
>>> ensuring we don't wait forever, but because the event-stream runs at
>>> a fixed frequency (configured at 10kHz) we might spend more time in
>>> the polling stage than specified by cpuidle_poll_time().
>>> This would only happen in the last iteration, since overshooting the
>>> poll_limit means the governor moves out of the polling stage.
>>> Signed-off-by: Ankur Arora <ankur.a.arora@oracle.com>
>>> ---
>>> arch/arm64/Kconfig | 10 ++++++++++
>>> arch/arm64/include/asm/cpuidle_haltpoll.h | 9 +++++++++
>>> arch/arm64/kernel/cpuidle.c | 23 +++++++++++++++++++++++
>>> 3 files changed, 42 insertions(+)
>>> create mode 100644 arch/arm64/include/asm/cpuidle_haltpoll.h
>>> diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
>>> index 5d91259ee7b5..cf1c6681eb0a 100644
>>> --- a/arch/arm64/Kconfig
>>> +++ b/arch/arm64/Kconfig
>>> @@ -35,6 +35,7 @@ config ARM64
>>> select ARCH_HAS_MEMBARRIER_SYNC_CORE
>>> select ARCH_HAS_NMI_SAFE_THIS_CPU_OPS
>>> select ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE
>>> + select ARCH_HAS_OPTIMIZED_POLL
>>> select ARCH_HAS_PTE_DEVMAP
>>> select ARCH_HAS_PTE_SPECIAL
>>> select ARCH_HAS_HW_PTE_YOUNG
>>> @@ -2376,6 +2377,15 @@ config ARCH_HIBERNATION_HEADER
>>> config ARCH_SUSPEND_POSSIBLE
>>> def_bool y
>>> +config ARCH_CPUIDLE_HALTPOLL
>>> + bool "Enable selection of the cpuidle-haltpoll driver"
>>> + default n
>>> + help
>>> + cpuidle-haltpoll allows for adaptive polling based on
>>> + current load before entering the idle state.
>>> +
>>> + Some virtualized workloads benefit from using it.
>>> +
>>> endmenu # "Power management options"
>>> menu "CPU Power Management"
>>> diff --git a/arch/arm64/include/asm/cpuidle_haltpoll.h b/arch/arm64/include/asm/cpuidle_haltpoll.h
>>> new file mode 100644
>>> index 000000000000..65f289407a6c
>>> --- /dev/null
>>> +++ b/arch/arm64/include/asm/cpuidle_haltpoll.h
>>> @@ -0,0 +1,9 @@
>>> +/* SPDX-License-Identifier: GPL-2.0 */
>>> +#ifndef _ARCH_HALTPOLL_H
>>> +#define _ARCH_HALTPOLL_H
>>> +
>>> +static inline void arch_haltpoll_enable(unsigned int cpu) { }
>>> +static inline void arch_haltpoll_disable(unsigned int cpu) { }
>> It is better that guest supports halt poll on more architectures, LoongArch
>> wants this if result is good.
>>
>> Do we need disable halt polling on host hypervisor if guest also uses halt
>> polling idle method?
>
> Yes. The intent is to work on that separately from this series. As the comment
> below states, until that is available we only allow force loading.
Thanks for your explanation. By internal test, it is useful for
LoongArch virtmachine on some scenarios. And in late we want to add
haltpoll support on LoongArch VM based your series.
Regards
Bibo Mao
>
>>> +
>>> +bool arch_haltpoll_want(bool force);
>>> +#endif
>>> diff --git a/arch/arm64/kernel/cpuidle.c b/arch/arm64/kernel/cpuidle.c
>>> index f372295207fb..334df82a0eac 100644
>>> --- a/arch/arm64/kernel/cpuidle.c
>>> +++ b/arch/arm64/kernel/cpuidle.c
>>> @@ -72,3 +72,26 @@ __cpuidle int acpi_processor_ffh_lpi_enter(struct acpi_lpi_state *lpi)
>>> lpi->index, state);
>>> }
>>> #endif
>>> +
>>> +#if IS_ENABLED(CONFIG_HALTPOLL_CPUIDLE)
>>> +
>>> +#include <asm/cpuidle_haltpoll.h>
>>> +
>>> +bool arch_haltpoll_want(bool force)
>>> +{
>>> + /*
>>> + * Enabling haltpoll requires two things:
>>> + *
>>> + * - Event stream support to provide a terminating condition to the
>>> + * WFE in the poll loop.
>>> + *
>>> + * - KVM support for arch_haltpoll_enable(), arch_haltpoll_enable().
>>> + *
>>> + * Given that the second is missing, allow haltpoll to only be force
>>> + * loaded.
>>> + */
>>> + return (arch_timer_evtstrm_available() && false) || force;
>>> +}
>>> +
>>> +EXPORT_SYMBOL_GPL(arch_haltpoll_want);
>>> +#endif
>>>
>
>
> --
> ankur
>
^ permalink raw reply [flat|nested] 27+ messages in thread
* RE: [PATCH v6 09/10] arm64: support cpuidle-haltpoll
2024-07-26 20:21 ` [PATCH v6 09/10] arm64: support cpuidle-haltpoll Ankur Arora
2024-07-29 17:20 ` Okanovic, Haris
2024-08-06 1:37 ` maobibo
@ 2024-08-09 6:08 ` Tomohiro Misono (Fujitsu)
2 siblings, 0 replies; 27+ messages in thread
From: Tomohiro Misono (Fujitsu) @ 2024-08-09 6:08 UTC (permalink / raw)
To: 'Ankur Arora', linux-pm@vger.kernel.org,
kvm@vger.kernel.org, linux-arm-kernel@lists.infradead.org,
linux-kernel@vger.kernel.org
Cc: catalin.marinas@arm.com, will@kernel.org, tglx@linutronix.de,
mingo@redhat.com, bp@alien8.de, dave.hansen@linux.intel.com,
x86@kernel.org, hpa@zytor.com, pbonzini@redhat.com,
wanpengli@tencent.com, vkuznets@redhat.com, rafael@kernel.org,
daniel.lezcano@linaro.org, peterz@infradead.org, arnd@arndb.de,
lenb@kernel.org, mark.rutland@arm.com, harisokn@amazon.com,
mtosatti@redhat.com, sudeep.holla@arm.com, cl@gentwo.org,
joao.m.martins@oracle.com, boris.ostrovsky@oracle.com,
konrad.wilk@oracle.com
> Subject: [PATCH v6 09/10] arm64: support cpuidle-haltpoll
>
> Add architectural support for cpuidle-haltpoll driver by defining
> arch_haltpoll_*().
>
> Also define ARCH_CPUIDLE_HALTPOLL to allow cpuidle-haltpoll to be
> selected, and given that we have an optimized polling mechanism
> in smp_cond_load*(), select ARCH_HAS_OPTIMIZED_POLL.
>
> smp_cond_load*() are implemented via LDXR, WFE, with LDXR loading
> a memory region in exclusive state and the WFE waiting for any
> stores to it.
>
> In the edge case -- no CPU stores to the waited region and there's no
> interrupt -- the event-stream will provide the terminating condition
> ensuring we don't wait forever, but because the event-stream runs at
> a fixed frequency (configured at 10kHz) we might spend more time in
> the polling stage than specified by cpuidle_poll_time().
>
> This would only happen in the last iteration, since overshooting the
> poll_limit means the governor moves out of the polling stage.
>
> Signed-off-by: Ankur Arora <ankur.a.arora@oracle.com>
> ---
> arch/arm64/Kconfig | 10 ++++++++++
> arch/arm64/include/asm/cpuidle_haltpoll.h | 9 +++++++++
> arch/arm64/kernel/cpuidle.c | 23 +++++++++++++++++++++++
FYI, arch/arm64/kernel/cpuidle.c is move to drivers/acpi/arm64/ in 6.11
and therefore I couldn't apply the series to 6.11.
https://github.com/torvalds/linux/commit/99e7a8adc0ca906151f5d70ff68b8a81f53fd106
Regards,
Tomohiro Misono
> 3 files changed, 42 insertions(+)
> create mode 100644 arch/arm64/include/asm/cpuidle_haltpoll.h
>
> diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
> index 5d91259ee7b5..cf1c6681eb0a 100644
> --- a/arch/arm64/Kconfig
> +++ b/arch/arm64/Kconfig
> @@ -35,6 +35,7 @@ config ARM64
> select ARCH_HAS_MEMBARRIER_SYNC_CORE
> select ARCH_HAS_NMI_SAFE_THIS_CPU_OPS
> select ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE
> + select ARCH_HAS_OPTIMIZED_POLL
> select ARCH_HAS_PTE_DEVMAP
> select ARCH_HAS_PTE_SPECIAL
> select ARCH_HAS_HW_PTE_YOUNG
> @@ -2376,6 +2377,15 @@ config ARCH_HIBERNATION_HEADER
> config ARCH_SUSPEND_POSSIBLE
> def_bool y
>
> +config ARCH_CPUIDLE_HALTPOLL
> + bool "Enable selection of the cpuidle-haltpoll driver"
> + default n
> + help
> + cpuidle-haltpoll allows for adaptive polling based on
> + current load before entering the idle state.
> +
> + Some virtualized workloads benefit from using it.
> +
> endmenu # "Power management options"
>
> menu "CPU Power Management"
> diff --git a/arch/arm64/include/asm/cpuidle_haltpoll.h b/arch/arm64/include/asm/cpuidle_haltpoll.h
> new file mode 100644
> index 000000000000..65f289407a6c
> --- /dev/null
> +++ b/arch/arm64/include/asm/cpuidle_haltpoll.h
> @@ -0,0 +1,9 @@
> +/* SPDX-License-Identifier: GPL-2.0 */
> +#ifndef _ARCH_HALTPOLL_H
> +#define _ARCH_HALTPOLL_H
> +
> +static inline void arch_haltpoll_enable(unsigned int cpu) { }
> +static inline void arch_haltpoll_disable(unsigned int cpu) { }
> +
> +bool arch_haltpoll_want(bool force);
> +#endif
> diff --git a/arch/arm64/kernel/cpuidle.c b/arch/arm64/kernel/cpuidle.c
> index f372295207fb..334df82a0eac 100644
> --- a/arch/arm64/kernel/cpuidle.c
> +++ b/arch/arm64/kernel/cpuidle.c
> @@ -72,3 +72,26 @@ __cpuidle int acpi_processor_ffh_lpi_enter(struct acpi_lpi_state *lpi)
> lpi->index, state);
> }
> #endif
> +
> +#if IS_ENABLED(CONFIG_HALTPOLL_CPUIDLE)
> +
> +#include <asm/cpuidle_haltpoll.h>
> +
> +bool arch_haltpoll_want(bool force)
> +{
> + /*
> + * Enabling haltpoll requires two things:
> + *
> + * - Event stream support to provide a terminating condition to the
> + * WFE in the poll loop.
> + *
> + * - KVM support for arch_haltpoll_enable(), arch_haltpoll_enable().
> + *
> + * Given that the second is missing, allow haltpoll to only be force
> + * loaded.
> + */
> + return (arch_timer_evtstrm_available() && false) || force;
> +}
> +
> +EXPORT_SYMBOL_GPL(arch_haltpoll_want);
> +#endif
> --
> 2.43.5
^ permalink raw reply [flat|nested] 27+ messages in thread
* [PATCH v6 10/10] cpuidle/poll_state: limit POLL_IDLE_RELAX_COUNT on arm64
2024-07-26 20:21 ` [PATCH v6 03/10] Kconfig: move ARCH_HAS_OPTIMIZED_POLL to arch/Kconfig Ankur Arora
` (5 preceding siblings ...)
2024-07-26 20:21 ` [PATCH v6 09/10] arm64: support cpuidle-haltpoll Ankur Arora
@ 2024-07-26 20:21 ` Ankur Arora
6 siblings, 0 replies; 27+ messages in thread
From: Ankur Arora @ 2024-07-26 20:21 UTC (permalink / raw)
To: linux-pm, kvm, linux-arm-kernel, linux-kernel
Cc: catalin.marinas, will, tglx, mingo, bp, dave.hansen, x86, hpa,
pbonzini, wanpengli, vkuznets, rafael, daniel.lezcano, peterz,
arnd, lenb, mark.rutland, harisokn, mtosatti, sudeep.holla, cl,
misono.tomohiro, joao.m.martins, boris.ostrovsky, konrad.wilk,
ankur.a.arora
smp_cond_load_relaxed(), in its generic polling variant, polls on
the loop condition waiting for it to change, eventually exiting the
loop if the time limit has been exceeded.
To limit the frequency of the relatively expensive time check it is
limited to once every POLL_IDLE_RELAX_COUNT iterations.
arm64, however uses an event based mechanism, where instead of
polling, we wait for store to a region.
Limit the POLL_IDLE_RELAX_COUNT to 1 for that case.
Suggested-by: Haris Okanovic <harisokn@amazon.com>
Signed-off-by: Ankur Arora <ankur.a.arora@oracle.com>
---
drivers/cpuidle/poll_state.c | 11 +++++++++++
1 file changed, 11 insertions(+)
diff --git a/drivers/cpuidle/poll_state.c b/drivers/cpuidle/poll_state.c
index 532e4ed19e0f..b69fe7b67cb4 100644
--- a/drivers/cpuidle/poll_state.c
+++ b/drivers/cpuidle/poll_state.c
@@ -8,7 +8,18 @@
#include <linux/sched/clock.h>
#include <linux/sched/idle.h>
+#ifdef CONFIG_ARM64
+/*
+ * POLL_IDLE_RELAX_COUNT determines how often we check for timeout
+ * while polling for TIF_NEED_RESCHED in thread_info->flags.
+ *
+ * Set this to a low value since arm64, instead of polling, uses a
+ * event based mechanism.
+ */
+#define POLL_IDLE_RELAX_COUNT 1
+#else
#define POLL_IDLE_RELAX_COUNT 200
+#endif
static int __cpuidle poll_idle(struct cpuidle_device *dev,
struct cpuidle_driver *drv, int index)
--
2.43.5
^ permalink raw reply related [flat|nested] 27+ messages in thread