public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed
* [PATCH v8 0/4] SRF: Fix offline CPU preventing pc6 entry
@ 2024-12-04 14:08 Patryk Wlazlyn
  2024-12-04 14:08 ` [PATCH v8 1/4] x86/smp: Allow calling mwait_play_dead with an arbitrary hint Patryk Wlazlyn
                   ` (4 more replies)
  0 siblings, 5 replies; 11+ messages in thread
From: Patryk Wlazlyn @ 2024-12-04 14:08 UTC (permalink / raw)
  To: x86
  Cc: linux-kernel, linux-pm, rafael.j.wysocki, peterz, dave.hansen,
	gautham.shenoy, tglx, len.brown, artem.bityutskiy, patryk.wlazlyn

Changes since v7:
  * Doing the s/mwait_play_dead/mwait_play_dead_cpuid_hint/
          and s/mwait_play_dead_with_hint/mwait_play_dead/

    as suggested by Gautham. It was a left-over from previous patches.
    The function(s) got renamed in the patches and I forgot to update
    the changelog accordingly for patch 4/4.

Patryk Wlazlyn (4):
  x86/smp: Allow calling mwait_play_dead with an arbitrary hint
  ACPI: processor_idle: Add FFH state handling
  intel_idle: Provide the default enter_dead() handler
  x86/smp native_play_dead: Prefer cpuidle_play_dead() over
    mwait_play_dead()

 arch/x86/include/asm/smp.h    |  3 +++
 arch/x86/kernel/acpi/cstate.c | 10 ++++++++
 arch/x86/kernel/smpboot.c     | 46 ++++-------------------------------
 drivers/acpi/processor_idle.c |  2 ++
 drivers/idle/intel_idle.c     | 18 ++++++++++++--
 include/acpi/processor.h      |  5 ++++
 6 files changed, 41 insertions(+), 43 deletions(-)

-- 
2.47.1


^ permalink raw reply	[flat|nested] 11+ messages in thread

* [PATCH v8 1/4] x86/smp: Allow calling mwait_play_dead with an arbitrary hint
  2024-12-04 14:08 [PATCH v8 0/4] SRF: Fix offline CPU preventing pc6 entry Patryk Wlazlyn
@ 2024-12-04 14:08 ` Patryk Wlazlyn
  2024-12-04 14:08 ` [PATCH v8 2/4] ACPI: processor_idle: Add FFH state handling Patryk Wlazlyn
                   ` (3 subsequent siblings)
  4 siblings, 0 replies; 11+ messages in thread
From: Patryk Wlazlyn @ 2024-12-04 14:08 UTC (permalink / raw)
  To: x86
  Cc: linux-kernel, linux-pm, rafael.j.wysocki, peterz, dave.hansen,
	gautham.shenoy, tglx, len.brown, artem.bityutskiy, patryk.wlazlyn

Introduce a helper function to allow offlined CPUs to enter FFh idle
states with a specific MWAIT hint. The new helper will be used in
subsequent patches by the acpi_idle and intel_idle drivers.

No functional change intended.

Signed-off-by: Patryk Wlazlyn <patryk.wlazlyn@linux.intel.com>
Reviewed-by: Gautham R. Shenoy <gautham.shenoy@amd.com>
---
 arch/x86/include/asm/smp.h |  3 ++
 arch/x86/kernel/smpboot.c  | 90 ++++++++++++++++++++------------------
 2 files changed, 51 insertions(+), 42 deletions(-)

diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h
index ca073f40698f..dfd09a1e09bf 100644
--- a/arch/x86/include/asm/smp.h
+++ b/arch/x86/include/asm/smp.h
@@ -114,6 +114,7 @@ void wbinvd_on_cpu(int cpu);
 int wbinvd_on_all_cpus(void);
 
 void smp_kick_mwait_play_dead(void);
+void mwait_play_dead(unsigned int hint);
 
 void native_smp_send_reschedule(int cpu);
 void native_send_call_func_ipi(const struct cpumask *mask);
@@ -164,6 +165,8 @@ static inline struct cpumask *cpu_llc_shared_mask(int cpu)
 {
 	return (struct cpumask *)cpumask_of(0);
 }
+
+static inline void mwait_play_dead(unsigned int eax_hint) { }
 #endif /* CONFIG_SMP */
 
 #ifdef CONFIG_DEBUG_NMI_SELFTEST
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index b5a8f0891135..8a3545c2cae9 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -1272,13 +1272,57 @@ void play_dead_common(void)
 	local_irq_disable();
 }
 
+void __noreturn mwait_play_dead(unsigned int eax_hint)
+{
+	struct mwait_cpu_dead *md = this_cpu_ptr(&mwait_cpu_dead);
+
+	/* Set up state for the kexec() hack below */
+	md->status = CPUDEAD_MWAIT_WAIT;
+	md->control = CPUDEAD_MWAIT_WAIT;
+
+	wbinvd();
+
+	while (1) {
+		/*
+		 * The CLFLUSH is a workaround for erratum AAI65 for
+		 * the Xeon 7400 series.  It's not clear it is actually
+		 * needed, but it should be harmless in either case.
+		 * The WBINVD is insufficient due to the spurious-wakeup
+		 * case where we return around the loop.
+		 */
+		mb();
+		clflush(md);
+		mb();
+		__monitor(md, 0, 0);
+		mb();
+		__mwait(eax_hint, 0);
+
+		if (READ_ONCE(md->control) == CPUDEAD_MWAIT_KEXEC_HLT) {
+			/*
+			 * Kexec is about to happen. Don't go back into mwait() as
+			 * the kexec kernel might overwrite text and data including
+			 * page tables and stack. So mwait() would resume when the
+			 * monitor cache line is written to and then the CPU goes
+			 * south due to overwritten text, page tables and stack.
+			 *
+			 * Note: This does _NOT_ protect against a stray MCE, NMI,
+			 * SMI. They will resume execution at the instruction
+			 * following the HLT instruction and run into the problem
+			 * which this is trying to prevent.
+			 */
+			WRITE_ONCE(md->status, CPUDEAD_MWAIT_KEXEC_HLT);
+			while(1)
+				native_halt();
+		}
+	}
+}
+
 /*
  * We need to flush the caches before going to sleep, lest we have
  * dirty data in our caches when we come back up.
  */
-static inline void mwait_play_dead(void)
+static inline void mwait_play_dead_cpuid_hint(void)
 {
-	struct mwait_cpu_dead *md = this_cpu_ptr(&mwait_cpu_dead);
 	unsigned int eax, ebx, ecx, edx;
 	unsigned int highest_cstate = 0;
 	unsigned int highest_subcstate = 0;
@@ -1316,45 +1360,7 @@ static inline void mwait_play_dead(void)
 			(highest_subcstate - 1);
 	}
 
-	/* Set up state for the kexec() hack below */
-	md->status = CPUDEAD_MWAIT_WAIT;
-	md->control = CPUDEAD_MWAIT_WAIT;
-
-	wbinvd();
-
-	while (1) {
-		/*
-		 * The CLFLUSH is a workaround for erratum AAI65 for
-		 * the Xeon 7400 series.  It's not clear it is actually
-		 * needed, but it should be harmless in either case.
-		 * The WBINVD is insufficient due to the spurious-wakeup
-		 * case where we return around the loop.
-		 */
-		mb();
-		clflush(md);
-		mb();
-		__monitor(md, 0, 0);
-		mb();
-		__mwait(eax, 0);
-
-		if (READ_ONCE(md->control) == CPUDEAD_MWAIT_KEXEC_HLT) {
-			/*
-			 * Kexec is about to happen. Don't go back into mwait() as
-			 * the kexec kernel might overwrite text and data including
-			 * page tables and stack. So mwait() would resume when the
-			 * monitor cache line is written to and then the CPU goes
-			 * south due to overwritten text, page tables and stack.
-			 *
-			 * Note: This does _NOT_ protect against a stray MCE, NMI,
-			 * SMI. They will resume execution at the instruction
-			 * following the HLT instruction and run into the problem
-			 * which this is trying to prevent.
-			 */
-			WRITE_ONCE(md->status, CPUDEAD_MWAIT_KEXEC_HLT);
-			while(1)
-				native_halt();
-		}
-	}
+	mwait_play_dead(eax);
 }
 
 /*
@@ -1407,7 +1413,7 @@ void native_play_dead(void)
 	play_dead_common();
 	tboot_shutdown(TB_SHUTDOWN_WFS);
 
-	mwait_play_dead();
+	mwait_play_dead_cpuid_hint();
 	if (cpuidle_play_dead())
 		hlt_play_dead();
 }
-- 
2.47.1


^ permalink raw reply related	[flat|nested] 11+ messages in thread

* [PATCH v8 2/4] ACPI: processor_idle: Add FFH state handling
  2024-12-04 14:08 [PATCH v8 0/4] SRF: Fix offline CPU preventing pc6 entry Patryk Wlazlyn
  2024-12-04 14:08 ` [PATCH v8 1/4] x86/smp: Allow calling mwait_play_dead with an arbitrary hint Patryk Wlazlyn
@ 2024-12-04 14:08 ` Patryk Wlazlyn
  2024-12-04 14:08 ` [PATCH v8 3/4] intel_idle: Provide the default enter_dead() handler Patryk Wlazlyn
                   ` (2 subsequent siblings)
  4 siblings, 0 replies; 11+ messages in thread
From: Patryk Wlazlyn @ 2024-12-04 14:08 UTC (permalink / raw)
  To: x86
  Cc: linux-kernel, linux-pm, rafael.j.wysocki, peterz, dave.hansen,
	gautham.shenoy, tglx, len.brown, artem.bityutskiy, patryk.wlazlyn

Recent Intel platforms will depend on the idle driver to pass the
correct hint for playing dead via mwait_play_dead_with_hint(). Expand
the existing enter_dead interface with handling for FFH states and pass
the MWAIT hint to the mwait_play_dead code.

Signed-off-by: Patryk Wlazlyn <patryk.wlazlyn@linux.intel.com>
Suggested-by: Gautham R. Shenoy <gautham.shenoy@amd.com>
---
 arch/x86/kernel/acpi/cstate.c | 10 ++++++++++
 drivers/acpi/processor_idle.c |  2 ++
 include/acpi/processor.h      |  5 +++++
 3 files changed, 17 insertions(+)

diff --git a/arch/x86/kernel/acpi/cstate.c b/arch/x86/kernel/acpi/cstate.c
index f3ffd0a3a012..8d7b8b02ddb9 100644
--- a/arch/x86/kernel/acpi/cstate.c
+++ b/arch/x86/kernel/acpi/cstate.c
@@ -15,6 +15,7 @@
 #include <acpi/processor.h>
 #include <asm/mwait.h>
 #include <asm/special_insns.h>
+#include <asm/smp.h>
 
 /*
  * Initialize bm_flags based on the CPU cache properties
@@ -204,6 +205,15 @@ int acpi_processor_ffh_cstate_probe(unsigned int cpu,
 }
 EXPORT_SYMBOL_GPL(acpi_processor_ffh_cstate_probe);
 
+void acpi_processor_ffh_play_dead(struct acpi_processor_cx *cx)
+{
+	unsigned int cpu = smp_processor_id();
+	struct cstate_entry *percpu_entry;
+
+	percpu_entry = per_cpu_ptr(cpu_cstate_entry, cpu);
+	mwait_play_dead(percpu_entry->states[cx->index].eax);
+}
+
 void __cpuidle acpi_processor_ffh_cstate_enter(struct acpi_processor_cx *cx)
 {
 	unsigned int cpu = smp_processor_id();
diff --git a/drivers/acpi/processor_idle.c b/drivers/acpi/processor_idle.c
index ce728cf7e301..83213fa47c1b 100644
--- a/drivers/acpi/processor_idle.c
+++ b/drivers/acpi/processor_idle.c
@@ -590,6 +590,8 @@ static void acpi_idle_play_dead(struct cpuidle_device *dev, int index)
 			raw_safe_halt();
 		else if (cx->entry_method == ACPI_CSTATE_SYSTEMIO) {
 			io_idle(cx->address);
+		} else if (cx->entry_method == ACPI_CSTATE_FFH) {
+			acpi_processor_ffh_play_dead(cx);
 		} else
 			return;
 	}
diff --git a/include/acpi/processor.h b/include/acpi/processor.h
index a17e97e634a6..63a37e72b721 100644
--- a/include/acpi/processor.h
+++ b/include/acpi/processor.h
@@ -280,6 +280,7 @@ int acpi_processor_ffh_cstate_probe(unsigned int cpu,
 				    struct acpi_processor_cx *cx,
 				    struct acpi_power_register *reg);
 void acpi_processor_ffh_cstate_enter(struct acpi_processor_cx *cstate);
+void acpi_processor_ffh_play_dead(struct acpi_processor_cx *cx);
 #else
 static inline void acpi_processor_power_init_bm_check(struct
 						      acpi_processor_flags
@@ -300,6 +301,10 @@ static inline void acpi_processor_ffh_cstate_enter(struct acpi_processor_cx
 {
 	return;
 }
+static inline void acpi_processor_ffh_play_dead(struct acpi_processor_cx *cx)
+{
+	return;
+}
 #endif
 
 static inline int call_on_cpu(int cpu, long (*fn)(void *), void *arg,
-- 
2.47.1


^ permalink raw reply related	[flat|nested] 11+ messages in thread

* [PATCH v8 3/4] intel_idle: Provide the default enter_dead() handler
  2024-12-04 14:08 [PATCH v8 0/4] SRF: Fix offline CPU preventing pc6 entry Patryk Wlazlyn
  2024-12-04 14:08 ` [PATCH v8 1/4] x86/smp: Allow calling mwait_play_dead with an arbitrary hint Patryk Wlazlyn
  2024-12-04 14:08 ` [PATCH v8 2/4] ACPI: processor_idle: Add FFH state handling Patryk Wlazlyn
@ 2024-12-04 14:08 ` Patryk Wlazlyn
  2024-12-10 20:26   ` Rafael J. Wysocki
  2024-12-16  1:31   ` kernel test robot
  2024-12-04 14:08 ` [PATCH v8 4/4] x86/smp native_play_dead: Prefer cpuidle_play_dead() over mwait_play_dead() Patryk Wlazlyn
  2024-12-04 15:05 ` [PATCH v8 0/4] SRF: Fix offline CPU preventing pc6 entry Dave Hansen
  4 siblings, 2 replies; 11+ messages in thread
From: Patryk Wlazlyn @ 2024-12-04 14:08 UTC (permalink / raw)
  To: x86
  Cc: linux-kernel, linux-pm, rafael.j.wysocki, peterz, dave.hansen,
	gautham.shenoy, tglx, len.brown, artem.bityutskiy, patryk.wlazlyn

Recent Intel platforms require idle driver to provide information about
the MWAIT hint used to enter the deepest idle state in the play_dead
code.

Provide the default enter_dead() handler for all of the platforms and
allow overwriting with a custom handler for each platform if needed.

Signed-off-by: Patryk Wlazlyn <patryk.wlazlyn@linux.intel.com>
---
 drivers/idle/intel_idle.c | 18 ++++++++++++++++--
 1 file changed, 16 insertions(+), 2 deletions(-)

diff --git a/drivers/idle/intel_idle.c b/drivers/idle/intel_idle.c
index ac4d8faa3886..c6874a6dbe95 100644
--- a/drivers/idle/intel_idle.c
+++ b/drivers/idle/intel_idle.c
@@ -56,6 +56,7 @@
 #include <asm/mwait.h>
 #include <asm/spec-ctrl.h>
 #include <asm/fpu/api.h>
+#include <asm/smp.h>
 
 #define INTEL_IDLE_VERSION "0.5.1"
 
@@ -227,6 +228,16 @@ static __cpuidle int intel_idle_s2idle(struct cpuidle_device *dev,
 	return 0;
 }
 
+static __cpuidle void intel_idle_enter_dead(struct cpuidle_device *dev,
+					    int index)
+{
+	struct cpuidle_driver *drv = cpuidle_get_cpu_driver(dev);
+	struct cpuidle_state *state = &drv->states[index];
+	unsigned long eax = flg2MWAIT(state->flags);
+
+	mwait_play_dead(eax);
+}
+
 /*
  * States are indexed by the cstate number,
  * which is also the index into the MWAIT hint array.
@@ -1798,6 +1809,7 @@ static void __init intel_idle_init_cstates_acpi(struct cpuidle_driver *drv)
 			state->flags |= CPUIDLE_FLAG_TIMER_STOP;
 
 		state->enter = intel_idle;
+		state->enter_dead = intel_idle_enter_dead;
 		state->enter_s2idle = intel_idle_s2idle;
 	}
 }
@@ -2143,10 +2155,12 @@ static void __init intel_idle_init_cstates_icpu(struct cpuidle_driver *drv)
 		if (intel_idle_max_cstate_reached(cstate))
 			break;
 
-		if (!cpuidle_state_table[cstate].enter &&
-		    !cpuidle_state_table[cstate].enter_s2idle)
+		if (!cpuidle_state_table[cstate].enter)
 			break;
 
+		if (!cpuidle_state_table[cstate].enter_dead)
+			cpuidle_state_table[cstate].enter_dead = intel_idle_enter_dead;
+
 		/* If marked as unusable, skip this state. */
 		if (cpuidle_state_table[cstate].flags & CPUIDLE_FLAG_UNUSABLE) {
 			pr_debug("state %s is disabled\n",
-- 
2.47.1


^ permalink raw reply related	[flat|nested] 11+ messages in thread

* [PATCH v8 4/4] x86/smp native_play_dead: Prefer cpuidle_play_dead() over mwait_play_dead()
  2024-12-04 14:08 [PATCH v8 0/4] SRF: Fix offline CPU preventing pc6 entry Patryk Wlazlyn
                   ` (2 preceding siblings ...)
  2024-12-04 14:08 ` [PATCH v8 3/4] intel_idle: Provide the default enter_dead() handler Patryk Wlazlyn
@ 2024-12-04 14:08 ` Patryk Wlazlyn
  2024-12-10 20:50   ` Rafael J. Wysocki
  2024-12-04 15:05 ` [PATCH v8 0/4] SRF: Fix offline CPU preventing pc6 entry Dave Hansen
  4 siblings, 1 reply; 11+ messages in thread
From: Patryk Wlazlyn @ 2024-12-04 14:08 UTC (permalink / raw)
  To: x86
  Cc: linux-kernel, linux-pm, rafael.j.wysocki, peterz, dave.hansen,
	gautham.shenoy, tglx, len.brown, artem.bityutskiy, patryk.wlazlyn

The current algorithm* for looking up the mwait hint for the deepest
cstate, in mwait_play_dead_cpuid_hint() code works by inspecting CPUID
leaf 0x5 and calculates the mwait hint based on the number of reported
substates.  This approach depends on the hints associated with them to
be continuous in the range [0, NUM_SUBSTATES-1]. This continuity is not
documented and is not met on the recent Intel platforms.

 * The current algorithm is implemented in the for loop inspecting edx
   in mwait_play_dead_cpuid_hint().

For example, Intel's Sierra Forest report two cstates with two substates
each in cpuid leaf 0x5:

  Name*   target cstate    target subcstate (mwait hint)
  ===========================================================
  C1      0x00             0x00
  C1E     0x00             0x01

  --      0x10             ----

  C6S     0x20             0x22
  C6P     0x20             0x23

  --      0x30             ----

  /* No more (sub)states all the way down to the end. */
  ===========================================================

   * Names of the cstates are not included in the CPUID leaf 0x5, they are
     taken from the product specific documentation.

Notice that hints 0x20 and 0x21 are skipped entirely for the target
cstate 0x20 (C6), being a cause of the problem for the current cpuid
leaf 0x5 algorithm.

Remove the old implementation of play_dead MWAIT hint calculation based
on the CPUID leaf 0x5 in mwait_play_dead_cpuid_hint() and delegate
calling of the mwait_play_dead() to the idle driver.

Signed-off-by: Patryk Wlazlyn <patryk.wlazlyn@linux.intel.com>
Reviewed-by: Gautham R. Shenoy <gautham.shenoy@amd.com>
---
 arch/x86/kernel/smpboot.c | 56 +++++----------------------------------
 1 file changed, 7 insertions(+), 49 deletions(-)

diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 8a3545c2cae9..82801137486d 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -1272,6 +1272,10 @@ void play_dead_common(void)
 	local_irq_disable();
 }
 
+/*
+ * We need to flush the caches before going to sleep, lest we have
+ * dirty data in our caches when we come back up.
+ */
 void __noreturn mwait_play_dead(unsigned int eax_hint)
 {
 	struct mwait_cpu_dead *md = this_cpu_ptr(&mwait_cpu_dead);
@@ -1317,52 +1321,6 @@ void __noreturn mwait_play_dead(unsigned int eax_hint)
 	}
 }
 
-/*
- * We need to flush the caches before going to sleep, lest we have
- * dirty data in our caches when we come back up.
- */
-static inline void mwait_play_dead_cpuid_hint(void)
-{
-	unsigned int eax, ebx, ecx, edx;
-	unsigned int highest_cstate = 0;
-	unsigned int highest_subcstate = 0;
-	int i;
-
-	if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD ||
-	    boot_cpu_data.x86_vendor == X86_VENDOR_HYGON)
-		return;
-	if (!this_cpu_has(X86_FEATURE_MWAIT))
-		return;
-	if (!this_cpu_has(X86_FEATURE_CLFLUSH))
-		return;
-	if (__this_cpu_read(cpu_info.cpuid_level) < CPUID_MWAIT_LEAF)
-		return;
-
-	eax = CPUID_MWAIT_LEAF;
-	ecx = 0;
-	native_cpuid(&eax, &ebx, &ecx, &edx);
-
-	/*
-	 * eax will be 0 if EDX enumeration is not valid.
-	 * Initialized below to cstate, sub_cstate value when EDX is valid.
-	 */
-	if (!(ecx & CPUID5_ECX_EXTENSIONS_SUPPORTED)) {
-		eax = 0;
-	} else {
-		edx >>= MWAIT_SUBSTATE_SIZE;
-		for (i = 0; i < 7 && edx; i++, edx >>= MWAIT_SUBSTATE_SIZE) {
-			if (edx & MWAIT_SUBSTATE_MASK) {
-				highest_cstate = i;
-				highest_subcstate = edx & MWAIT_SUBSTATE_MASK;
-			}
-		}
-		eax = (highest_cstate << MWAIT_SUBSTATE_SIZE) |
-			(highest_subcstate - 1);
-	}
-
-	mwait_play_dead(eax);
-}
-
 /*
  * Kick all "offline" CPUs out of mwait on kexec(). See comment in
  * mwait_play_dead().
@@ -1413,9 +1371,9 @@ void native_play_dead(void)
 	play_dead_common();
 	tboot_shutdown(TB_SHUTDOWN_WFS);
 
-	mwait_play_dead_cpuid_hint();
-	if (cpuidle_play_dead())
-		hlt_play_dead();
+	/* Below returns only on error. */
+	cpuidle_play_dead();
+	hlt_play_dead();
 }
 
 #else /* ... !CONFIG_HOTPLUG_CPU */
-- 
2.47.1


^ permalink raw reply related	[flat|nested] 11+ messages in thread

* Re: [PATCH v8 0/4] SRF: Fix offline CPU preventing pc6 entry
  2024-12-04 14:08 [PATCH v8 0/4] SRF: Fix offline CPU preventing pc6 entry Patryk Wlazlyn
                   ` (3 preceding siblings ...)
  2024-12-04 14:08 ` [PATCH v8 4/4] x86/smp native_play_dead: Prefer cpuidle_play_dead() over mwait_play_dead() Patryk Wlazlyn
@ 2024-12-04 15:05 ` Dave Hansen
  2024-12-04 15:35   ` Patryk Wlazlyn
  4 siblings, 1 reply; 11+ messages in thread
From: Dave Hansen @ 2024-12-04 15:05 UTC (permalink / raw)
  To: Patryk Wlazlyn, x86
  Cc: linux-kernel, linux-pm, rafael.j.wysocki, peterz, dave.hansen,
	gautham.shenoy, tglx, len.brown, artem.bityutskiy

On 12/4/24 06:08, Patryk Wlazlyn wrote:
> Changes since v7:
>   * Doing the s/mwait_play_dead/mwait_play_dead_cpuid_hint/
>           and s/mwait_play_dead_with_hint/mwait_play_dead/
> 
>     as suggested by Gautham. It was a left-over from previous patches.
>     The function(s) got renamed in the patches and I forgot to update
>     the changelog accordingly for patch 4/4.
> 
> Patryk Wlazlyn (4):
>   x86/smp: Allow calling mwait_play_dead with an arbitrary hint
>   ACPI: processor_idle: Add FFH state handling
>   intel_idle: Provide the default enter_dead() handler
>   x86/smp native_play_dead: Prefer cpuidle_play_dead() over
>     mwait_play_dead()
> 
>  arch/x86/include/asm/smp.h    |  3 +++
>  arch/x86/kernel/acpi/cstate.c | 10 ++++++++
>  arch/x86/kernel/smpboot.c     | 46 ++++-------------------------------
>  drivers/acpi/processor_idle.c |  2 ++
>  drivers/idle/intel_idle.c     | 18 ++++++++++++--
>  include/acpi/processor.h      |  5 ++++
>  6 files changed, 41 insertions(+), 43 deletions(-)

Hey Patryk,

I know this series is up to v8 and we all know what it's about. But it
would be much appreciated in the future if you could send along a cover
letter. It's important and really does help reviewers dive into a series
more efficiently.

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH v8 0/4] SRF: Fix offline CPU preventing pc6 entry
  2024-12-04 15:05 ` [PATCH v8 0/4] SRF: Fix offline CPU preventing pc6 entry Dave Hansen
@ 2024-12-04 15:35   ` Patryk Wlazlyn
  0 siblings, 0 replies; 11+ messages in thread
From: Patryk Wlazlyn @ 2024-12-04 15:35 UTC (permalink / raw)
  To: Dave Hansen, x86
  Cc: linux-kernel, linux-pm, rafael.j.wysocki, peterz, dave.hansen,
	gautham.shenoy, tglx, len.brown, artem.bityutskiy

> Hey Patryk,
>
> I know this series is up to v8 and we all know what it's about. But it
> would be much appreciated in the future if you could send along a cover
> letter. It's important and really does help reviewers dive into a series
> more efficiently.

ACK. I'll keep that in mind for the next time. Thanks.


^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH v8 3/4] intel_idle: Provide the default enter_dead() handler
  2024-12-04 14:08 ` [PATCH v8 3/4] intel_idle: Provide the default enter_dead() handler Patryk Wlazlyn
@ 2024-12-10 20:26   ` Rafael J. Wysocki
  2024-12-17 12:42     ` Patryk Wlazlyn
  2024-12-16  1:31   ` kernel test robot
  1 sibling, 1 reply; 11+ messages in thread
From: Rafael J. Wysocki @ 2024-12-10 20:26 UTC (permalink / raw)
  To: Patryk Wlazlyn
  Cc: x86, linux-kernel, linux-pm, rafael.j.wysocki, peterz,
	dave.hansen, gautham.shenoy, tglx, len.brown, artem.bityutskiy

On Wed, Dec 4, 2024 at 3:08 PM Patryk Wlazlyn
<patryk.wlazlyn@linux.intel.com> wrote:
>
> Recent Intel platforms require idle driver to provide information about
> the MWAIT hint used to enter the deepest idle state in the play_dead
> code.
>
> Provide the default enter_dead() handler for all of the platforms and
> allow overwriting with a custom handler for each platform if needed.

My changelog for this patch:

"A subsequent change is going to make native_play_dead() rely on the
idle driver to put CPUs going offline into appropriate idle states.

For this reason, provide the default :enter_dead() handler for all of
the idle states on all platforms supported by intel_idle with an
option to override it with a custom handler if needed."

> Signed-off-by: Patryk Wlazlyn <patryk.wlazlyn@linux.intel.com>
> ---
>  drivers/idle/intel_idle.c | 18 ++++++++++++++++--
>  1 file changed, 16 insertions(+), 2 deletions(-)
>
> diff --git a/drivers/idle/intel_idle.c b/drivers/idle/intel_idle.c
> index ac4d8faa3886..c6874a6dbe95 100644
> --- a/drivers/idle/intel_idle.c
> +++ b/drivers/idle/intel_idle.c
> @@ -56,6 +56,7 @@
>  #include <asm/mwait.h>
>  #include <asm/spec-ctrl.h>
>  #include <asm/fpu/api.h>
> +#include <asm/smp.h>
>
>  #define INTEL_IDLE_VERSION "0.5.1"
>
> @@ -227,6 +228,16 @@ static __cpuidle int intel_idle_s2idle(struct cpuidle_device *dev,
>         return 0;
>  }
>
> +static __cpuidle void intel_idle_enter_dead(struct cpuidle_device *dev,
> +                                           int index)
> +{
> +       struct cpuidle_driver *drv = cpuidle_get_cpu_driver(dev);
> +       struct cpuidle_state *state = &drv->states[index];
> +       unsigned long eax = flg2MWAIT(state->flags);
> +
> +       mwait_play_dead(eax);
> +}
> +
>  /*
>   * States are indexed by the cstate number,
>   * which is also the index into the MWAIT hint array.
> @@ -1798,6 +1809,7 @@ static void __init intel_idle_init_cstates_acpi(struct cpuidle_driver *drv)
>                         state->flags |= CPUIDLE_FLAG_TIMER_STOP;
>
>                 state->enter = intel_idle;
> +               state->enter_dead = intel_idle_enter_dead;
>                 state->enter_s2idle = intel_idle_s2idle;
>         }
>  }
> @@ -2143,10 +2155,12 @@ static void __init intel_idle_init_cstates_icpu(struct cpuidle_driver *drv)
>                 if (intel_idle_max_cstate_reached(cstate))
>                         break;
>
> -               if (!cpuidle_state_table[cstate].enter &&
> -                   !cpuidle_state_table[cstate].enter_s2idle)
> +               if (!cpuidle_state_table[cstate].enter)

I don't think that the above change belongs to this patch.  If I'm
mistaken, it should be mentioned in the changelog and the reason for
making it should be explained.

>                         break;
>
> +               if (!cpuidle_state_table[cstate].enter_dead)
> +                       cpuidle_state_table[cstate].enter_dead = intel_idle_enter_dead;
> +
>                 /* If marked as unusable, skip this state. */
>                 if (cpuidle_state_table[cstate].flags & CPUIDLE_FLAG_UNUSABLE) {
>                         pr_debug("state %s is disabled\n",
> --

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH v8 4/4] x86/smp native_play_dead: Prefer cpuidle_play_dead() over mwait_play_dead()
  2024-12-04 14:08 ` [PATCH v8 4/4] x86/smp native_play_dead: Prefer cpuidle_play_dead() over mwait_play_dead() Patryk Wlazlyn
@ 2024-12-10 20:50   ` Rafael J. Wysocki
  0 siblings, 0 replies; 11+ messages in thread
From: Rafael J. Wysocki @ 2024-12-10 20:50 UTC (permalink / raw)
  To: Patryk Wlazlyn
  Cc: x86, linux-kernel, linux-pm, rafael.j.wysocki, peterz,
	dave.hansen, gautham.shenoy, tglx, len.brown, artem.bityutskiy

First off, I'd change the subject to something like "x86/smp:
Eliminate mwait_play_dead_cpuid_hint()" because that's what the patch
is doing.

On Wed, Dec 4, 2024 at 3:08 PM Patryk Wlazlyn
<patryk.wlazlyn@linux.intel.com> wrote:
>
> The current algorithm* for looking up the mwait hint for the deepest
> cstate, in mwait_play_dead_cpuid_hint() code works by inspecting CPUID
> leaf 0x5 and calculates the mwait hint based on the number of reported
> substates.

I would just say

"Currently, mwait_play_dead_cpuid_hint() looks up the MWAIT hint of
the deepest idle state by inspecting CPUID leaf 0x5 with the
assumption that, if the number of sub-states for a given major C-state
is nonzero, those sub-states are always represented by consecutive
numbers starting from 0."

>  This approach depends on the hints associated with them to
> be continuous in the range [0, NUM_SUBSTATES-1]. This continuity is not
> documented and is not met on the recent Intel platforms.

And then

"This assumption is not based on the documented platform behavior and
in fact it is not met on recent Intel platforms."

>
>  * The current algorithm is implemented in the for loop inspecting edx
>    in mwait_play_dead_cpuid_hint().

The above sentence does not add any value IMV.

> For example, Intel's Sierra Forest report two cstates with two substates
> each in cpuid leaf 0x5:

It's "Intel Sierra Forest" and if you said C-states above, please be
consistent and say C-states here too.

>
>   Name*   target cstate    target subcstate (mwait hint)
>   ===========================================================
>   C1      0x00             0x00
>   C1E     0x00             0x01
>
>   --      0x10             ----
>
>   C6S     0x20             0x22
>   C6P     0x20             0x23
>
>   --      0x30             ----
>
>   /* No more (sub)states all the way down to the end. */
>   ===========================================================
>
>    * Names of the cstates are not included in the CPUID leaf 0x5, they are
>      taken from the product specific documentation.
>
> Notice that hints 0x20 and 0x21 are skipped entirely for the target
> cstate 0x20 (C6), being a cause of the problem for the current cpuid
> leaf 0x5 algorithm.

And here

"Notice that hints 0x20 and 0x21 are not defined for C-state 0x20
(C6), so the existing MWAIT hint lookup in
mwait_play_dead_cpuid_hint() based on the CPUID leaf 0x5 contents does
not work in this case."

> Remove the old implementation of play_dead MWAIT hint calculation based
> on the CPUID leaf 0x5 in mwait_play_dead_cpuid_hint() and delegate
> calling of the mwait_play_dead() to the idle driver.

Well, the above is not exactly what's going on.

I'd say something like

"Instead of using MWAIT hint lookup that is not guaranteed to work,
make native_play_dead() rely on the idle driver for the given platform
to put CPUs going offline into appropriate idle state and, if that
fails, fall back to hlt_play_dead().

Accordingly, drop mwait_play_dead_cpuid_hint() altogether and make
native_play_dead() call cpuidle_play_dead() instead of it
unconditionally with the assumption that it will not return if it is
successful.  Still, in case cpuidle_play_dead() fails, call
hlt_play_dead() at the end."

> Signed-off-by: Patryk Wlazlyn <patryk.wlazlyn@linux.intel.com>
> Reviewed-by: Gautham R. Shenoy <gautham.shenoy@amd.com>
> ---
>  arch/x86/kernel/smpboot.c | 56 +++++----------------------------------
>  1 file changed, 7 insertions(+), 49 deletions(-)
>
> diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
> index 8a3545c2cae9..82801137486d 100644
> --- a/arch/x86/kernel/smpboot.c
> +++ b/arch/x86/kernel/smpboot.c
> @@ -1272,6 +1272,10 @@ void play_dead_common(void)
>         local_irq_disable();
>  }
>
> +/*
> + * We need to flush the caches before going to sleep, lest we have
> + * dirty data in our caches when we come back up.
> + */
>  void __noreturn mwait_play_dead(unsigned int eax_hint)
>  {
>         struct mwait_cpu_dead *md = this_cpu_ptr(&mwait_cpu_dead);
> @@ -1317,52 +1321,6 @@ void __noreturn mwait_play_dead(unsigned int eax_hint)
>         }
>  }
>
> -/*
> - * We need to flush the caches before going to sleep, lest we have
> - * dirty data in our caches when we come back up.
> - */
> -static inline void mwait_play_dead_cpuid_hint(void)
> -{
> -       unsigned int eax, ebx, ecx, edx;
> -       unsigned int highest_cstate = 0;
> -       unsigned int highest_subcstate = 0;
> -       int i;
> -
> -       if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD ||
> -           boot_cpu_data.x86_vendor == X86_VENDOR_HYGON)
> -               return;
> -       if (!this_cpu_has(X86_FEATURE_MWAIT))
> -               return;
> -       if (!this_cpu_has(X86_FEATURE_CLFLUSH))
> -               return;
> -       if (__this_cpu_read(cpu_info.cpuid_level) < CPUID_MWAIT_LEAF)
> -               return;
> -
> -       eax = CPUID_MWAIT_LEAF;
> -       ecx = 0;
> -       native_cpuid(&eax, &ebx, &ecx, &edx);
> -
> -       /*
> -        * eax will be 0 if EDX enumeration is not valid.
> -        * Initialized below to cstate, sub_cstate value when EDX is valid.
> -        */
> -       if (!(ecx & CPUID5_ECX_EXTENSIONS_SUPPORTED)) {
> -               eax = 0;
> -       } else {
> -               edx >>= MWAIT_SUBSTATE_SIZE;
> -               for (i = 0; i < 7 && edx; i++, edx >>= MWAIT_SUBSTATE_SIZE) {
> -                       if (edx & MWAIT_SUBSTATE_MASK) {
> -                               highest_cstate = i;
> -                               highest_subcstate = edx & MWAIT_SUBSTATE_MASK;
> -                       }
> -               }
> -               eax = (highest_cstate << MWAIT_SUBSTATE_SIZE) |
> -                       (highest_subcstate - 1);
> -       }
> -
> -       mwait_play_dead(eax);
> -}
> -
>  /*
>   * Kick all "offline" CPUs out of mwait on kexec(). See comment in
>   * mwait_play_dead().
> @@ -1413,9 +1371,9 @@ void native_play_dead(void)
>         play_dead_common();
>         tboot_shutdown(TB_SHUTDOWN_WFS);
>
> -       mwait_play_dead_cpuid_hint();
> -       if (cpuidle_play_dead())
> -               hlt_play_dead();
> +       /* Below returns only on error. */
> +       cpuidle_play_dead();
> +       hlt_play_dead();
>  }
>
>  #else /* ... !CONFIG_HOTPLUG_CPU */
> --

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH v8 3/4] intel_idle: Provide the default enter_dead() handler
  2024-12-04 14:08 ` [PATCH v8 3/4] intel_idle: Provide the default enter_dead() handler Patryk Wlazlyn
  2024-12-10 20:26   ` Rafael J. Wysocki
@ 2024-12-16  1:31   ` kernel test robot
  1 sibling, 0 replies; 11+ messages in thread
From: kernel test robot @ 2024-12-16  1:31 UTC (permalink / raw)
  To: Patryk Wlazlyn, x86
  Cc: oe-kbuild-all, linux-kernel, linux-pm, rafael.j.wysocki, peterz,
	dave.hansen, gautham.shenoy, tglx, len.brown, artem.bityutskiy,
	patryk.wlazlyn

Hi Patryk,

kernel test robot noticed the following build warnings:

[auto build test WARNING on rafael-pm/linux-next]
[also build test WARNING on rafael-pm/bleeding-edge linus/master v6.13-rc2 next-20241213]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]

url:    https://github.com/intel-lab-lkp/linux/commits/Patryk-Wlazlyn/x86-smp-Allow-calling-mwait_play_dead-with-an-arbitrary-hint/20241204-221157
base:   https://git.kernel.org/pub/scm/linux/kernel/git/rafael/linux-pm.git linux-next
patch link:    https://lore.kernel.org/r/20241204140828.11699-4-patryk.wlazlyn%40linux.intel.com
patch subject: [PATCH v8 3/4] intel_idle: Provide the default enter_dead() handler
config: x86_64-randconfig-071-20241216 (https://download.01.org/0day-ci/archive/20241216/202412160941.mXBc9usk-lkp@intel.com/config)
compiler: gcc-12 (Debian 12.2.0-14) 12.2.0
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20241216/202412160941.mXBc9usk-lkp@intel.com/reproduce)

If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202412160941.mXBc9usk-lkp@intel.com/

All warnings (new ones prefixed by >>):

   vmlinux.o: warning: objtool: acpi_processor_ffh_play_dead+0xbe: mwait_play_dead() is missing a __noreturn annotation
>> vmlinux.o: warning: objtool: intel_idle_enter_dead+0x29: mwait_play_dead() is missing a __noreturn annotation

-- 
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests/wiki

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH v8 3/4] intel_idle: Provide the default enter_dead() handler
  2024-12-10 20:26   ` Rafael J. Wysocki
@ 2024-12-17 12:42     ` Patryk Wlazlyn
  0 siblings, 0 replies; 11+ messages in thread
From: Patryk Wlazlyn @ 2024-12-17 12:42 UTC (permalink / raw)
  To: Rafael J. Wysocki
  Cc: x86, linux-kernel, linux-pm, rafael.j.wysocki, peterz,
	dave.hansen, gautham.shenoy, tglx, len.brown, artem.bityutskiy

>> Recent Intel platforms require idle driver to provide information about
>> the MWAIT hint used to enter the deepest idle state in the play_dead
>> code.
>>
>> Provide the default enter_dead() handler for all of the platforms and
>> allow overwriting with a custom handler for each platform if needed.
>
> My changelog for this patch:
>
> "A subsequent change is going to make native_play_dead() rely on the
> idle driver to put CPUs going offline into appropriate idle states.
>
> For this reason, provide the default :enter_dead() handler for all of
> the idle states on all platforms supported by intel_idle with an
> option to override it with a custom handler if needed."
>

Ok. I'll apply that in the next version.

>> Signed-off-by: Patryk Wlazlyn <patryk.wlazlyn@linux.intel.com>
>> ---
>>  drivers/idle/intel_idle.c | 18 ++++++++++++++++--
>>  1 file changed, 16 insertions(+), 2 deletions(-)
>>
>> diff --git a/drivers/idle/intel_idle.c b/drivers/idle/intel_idle.c
>> index ac4d8faa3886..c6874a6dbe95 100644
>> --- a/drivers/idle/intel_idle.c
>> +++ b/drivers/idle/intel_idle.c
>> @@ -56,6 +56,7 @@
>>  #include <asm/mwait.h>
>>  #include <asm/spec-ctrl.h>
>>  #include <asm/fpu/api.h>
>> +#include <asm/smp.h>
>>
>>  #define INTEL_IDLE_VERSION "0.5.1"
>>
>> @@ -227,6 +228,16 @@ static __cpuidle int intel_idle_s2idle(struct cpuidle_device *dev,
>>         return 0;
>>  }
>>
>> +static __cpuidle void intel_idle_enter_dead(struct cpuidle_device *dev,
>> +                                           int index)
>> +{
>> +       struct cpuidle_driver *drv = cpuidle_get_cpu_driver(dev);
>> +       struct cpuidle_state *state = &drv->states[index];
>> +       unsigned long eax = flg2MWAIT(state->flags);
>> +
>> +       mwait_play_dead(eax);
>> +}
>> +
>>  /*
>>   * States are indexed by the cstate number,
>>   * which is also the index into the MWAIT hint array.
>> @@ -1798,6 +1809,7 @@ static void __init intel_idle_init_cstates_acpi(struct cpuidle_driver *drv)
>>                         state->flags |= CPUIDLE_FLAG_TIMER_STOP;
>>
>>                 state->enter = intel_idle;
>> +               state->enter_dead = intel_idle_enter_dead;
>>                 state->enter_s2idle = intel_idle_s2idle;
>>         }
>>  }
>> @@ -2143,10 +2155,12 @@ static void __init intel_idle_init_cstates_icpu(struct cpuidle_driver *drv)
>>                 if (intel_idle_max_cstate_reached(cstate))
>>                         break;
>>
>> -               if (!cpuidle_state_table[cstate].enter &&
>> -                   !cpuidle_state_table[cstate].enter_s2idle)
>> +               if (!cpuidle_state_table[cstate].enter)
>
> I don't think that the above change belongs to this patch.  If I'm
> mistaken, it should be mentioned in the changelog and the reason for
> making it should be explained.

Yeah, you are right, removing enter_s2idle check doesn't make much sense.
I think I was changing much more code, but eventually decided not to
change too much in one go and forgot to roll back that one.

With this:
                if (!cpuidle_state_table[cstate].enter &&
                    !cpuidle_state_table[cstate].enter_s2idle)

we would not set enter_dead for states that only provide enter_dead handler, but I doesn't seem to happen in practice, so for the sake of simplicity I am just going to leave the check unchanged.
Unless you think it makes more sense to do:
                if (!cpuidle_state_table[cstate].enter &&
+                   !cpuidle_state_table[cstate].enter_dead &&
                    !cpuidle_state_table[cstate].enter_s2idle)

>
>>                         break;
>>
>> +               if (!cpuidle_state_table[cstate].enter_dead)
>> +                       cpuidle_state_table[cstate].enter_dead = intel_idle_enter_dead;
>> +
>>                 /* If marked as unusable, skip this state. */
>>                 if (cpuidle_state_table[cstate].flags & CPUIDLE_FLAG_UNUSABLE) {
>>                         pr_debug("state %s is disabled\n",
>> --


^ permalink raw reply	[flat|nested] 11+ messages in thread

end of thread, other threads:[~2024-12-17 12:42 UTC | newest]

Thread overview: 11+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2024-12-04 14:08 [PATCH v8 0/4] SRF: Fix offline CPU preventing pc6 entry Patryk Wlazlyn
2024-12-04 14:08 ` [PATCH v8 1/4] x86/smp: Allow calling mwait_play_dead with an arbitrary hint Patryk Wlazlyn
2024-12-04 14:08 ` [PATCH v8 2/4] ACPI: processor_idle: Add FFH state handling Patryk Wlazlyn
2024-12-04 14:08 ` [PATCH v8 3/4] intel_idle: Provide the default enter_dead() handler Patryk Wlazlyn
2024-12-10 20:26   ` Rafael J. Wysocki
2024-12-17 12:42     ` Patryk Wlazlyn
2024-12-16  1:31   ` kernel test robot
2024-12-04 14:08 ` [PATCH v8 4/4] x86/smp native_play_dead: Prefer cpuidle_play_dead() over mwait_play_dead() Patryk Wlazlyn
2024-12-10 20:50   ` Rafael J. Wysocki
2024-12-04 15:05 ` [PATCH v8 0/4] SRF: Fix offline CPU preventing pc6 entry Dave Hansen
2024-12-04 15:35   ` Patryk Wlazlyn

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox