[PATCH v4 15/16] x86: do MTRR/PAT setup on all secondary CPUs in parallel

public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed

From: Juergen Gross <jgross@suse.com>
To: linux-kernel@vger.kernel.org, x86@kernel.org
Cc: Juergen Gross <jgross@suse.com>,
	Thomas Gleixner <tglx@linutronix.de>,
	Ingo Molnar <mingo@redhat.com>, Borislav Petkov <bp@alien8.de>,
	Dave Hansen <dave.hansen@linux.intel.com>,
	"H. Peter Anvin" <hpa@zytor.com>
Subject: [PATCH v4 15/16] x86: do MTRR/PAT setup on all secondary CPUs in parallel
Date: Tue,  4 Oct 2022 10:10:22 +0200	[thread overview]
Message-ID: <20221004081023.32402-16-jgross@suse.com> (raw)
In-Reply-To: <20221004081023.32402-1-jgross@suse.com>

Instead of serializing MTRR/PAT setup on the secondary CPUs in order
to avoid clobbering of static variables used by the setup process, put
those variables into a structure held on the stack and drop the
serialization.

This speeds up the start of secondary cpus a little bit (on a small
system with 8 CPUs the time needed for starting the secondary CPUs was
measured to go down from about 60 milliseconds without this patch to
about 55 milliseconds with this patch applied).

Signed-off-by: Juergen Gross <jgross@suse.com>
---
V4:
- new patch
---
 arch/x86/include/asm/cacheinfo.h   | 10 ++++++--
 arch/x86/include/asm/mtrr.h        | 13 +++++-----
 arch/x86/kernel/cpu/cacheinfo.c    | 28 ++++++++-------------
 arch/x86/kernel/cpu/mtrr/generic.c | 40 ++++++++++++++----------------
 4 files changed, 45 insertions(+), 46 deletions(-)

diff --git a/arch/x86/include/asm/cacheinfo.h b/arch/x86/include/asm/cacheinfo.h
index f6c521687535..77ddc04c4975 100644
--- a/arch/x86/include/asm/cacheinfo.h
+++ b/arch/x86/include/asm/cacheinfo.h
@@ -10,8 +10,14 @@ extern unsigned int memory_caching_control;
 void cacheinfo_amd_init_llc_id(struct cpuinfo_x86 *c, int cpu);
 void cacheinfo_hygon_init_llc_id(struct cpuinfo_x86 *c, int cpu);
 
-void cache_disable(void);
-void cache_enable(void);
+struct cache_state {
+	unsigned long cr4;
+	u32 mtrr_deftype_lo;
+	u32 mtrr_deftype_hi;
+};
+
+void cache_disable(struct cache_state *state);
+void cache_enable(struct cache_state *state);
 void set_cache_aps_delayed_init(void);
 void cache_bp_init(void);
 void cache_bp_restore(void);
diff --git a/arch/x86/include/asm/mtrr.h b/arch/x86/include/asm/mtrr.h
index ec73d1e5bafb..67c16c813259 100644
--- a/arch/x86/include/asm/mtrr.h
+++ b/arch/x86/include/asm/mtrr.h
@@ -23,6 +23,7 @@
 #ifndef _ASM_X86_MTRR_H
 #define _ASM_X86_MTRR_H
 
+#include <asm/cacheinfo.h>
 #include <uapi/asm/mtrr.h>
 
 void mtrr_bp_init(void);
@@ -45,9 +46,9 @@ extern void mtrr_centaur_report_mcr(int mcr, u32 lo, u32 hi);
 extern void mtrr_bp_restore(void);
 extern int mtrr_trim_uncached_memory(unsigned long end_pfn);
 extern int amd_special_default_mtrr(void);
-void mtrr_disable(void);
-void mtrr_enable(void);
-void mtrr_generic_set_state(void);
+void mtrr_disable(struct cache_state *state);
+void mtrr_enable(struct cache_state *state);
+void mtrr_generic_set_state(struct cache_state *state);
 #  else
 static inline u8 mtrr_type_lookup(u64 addr, u64 end, u8 *uniform)
 {
@@ -84,9 +85,9 @@ static inline void mtrr_centaur_report_mcr(int mcr, u32 lo, u32 hi)
 {
 }
 #define mtrr_bp_restore() do {} while (0)
-#define mtrr_disable() do {} while (0)
-#define mtrr_enable() do {} while (0)
-#define mtrr_generic_set_state() do {} while (0)
+#define mtrr_disable(s) do {} while (0)
+#define mtrr_enable(s) do {} while (0)
+#define mtrr_generic_set_state(s) do {} while (0)
 #  endif
 
 #ifdef CONFIG_COMPAT
diff --git a/arch/x86/kernel/cpu/cacheinfo.c b/arch/x86/kernel/cpu/cacheinfo.c
index 48ce48827f87..84684b50a5ce 100644
--- a/arch/x86/kernel/cpu/cacheinfo.c
+++ b/arch/x86/kernel/cpu/cacheinfo.c
@@ -1057,10 +1057,7 @@ int populate_cache_leaves(unsigned int cpu)
  * The caller must ensure that local interrupts are disabled and
  * are reenabled after cache_enable() has been called.
  */
-static unsigned long saved_cr4;
-static DEFINE_RAW_SPINLOCK(cache_disable_lock);
-
-void cache_disable(void) __acquires(cache_disable_lock)
+void cache_disable(struct cache_state *state)
 {
 	unsigned long cr0;
 
@@ -1071,8 +1068,6 @@ void cache_disable(void) __acquires(cache_disable_lock)
 	 * changes to the way the kernel boots
 	 */
 
-	raw_spin_lock(&cache_disable_lock);
-
 	/* Enter the no-fill (CD=1, NW=0) cache mode and flush caches. */
 	cr0 = read_cr0() | X86_CR0_CD;
 	write_cr0(cr0);
@@ -1088,8 +1083,8 @@ void cache_disable(void) __acquires(cache_disable_lock)
 
 	/* Save value of CR4 and clear Page Global Enable (bit 7) */
 	if (boot_cpu_has(X86_FEATURE_PGE)) {
-		saved_cr4 = __read_cr4();
-		__write_cr4(saved_cr4 & ~X86_CR4_PGE);
+		state->cr4 = __read_cr4();
+		__write_cr4(state->cr4 & ~X86_CR4_PGE);
 	}
 
 	/* Flush all TLBs via a mov %cr3, %reg; mov %reg, %cr3 */
@@ -1097,46 +1092,45 @@ void cache_disable(void) __acquires(cache_disable_lock)
 	flush_tlb_local();
 
 	if (boot_cpu_has(X86_FEATURE_MTRR))
-		mtrr_disable();
+		mtrr_disable(state);
 
 	/* Again, only flush caches if we have to. */
 	if (!static_cpu_has(X86_FEATURE_SELFSNOOP))
 		wbinvd();
 }
 
-void cache_enable(void) __releases(cache_disable_lock)
+void cache_enable(struct cache_state *state)
 {
 	/* Flush TLBs (no need to flush caches - they are disabled) */
 	count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ALL);
 	flush_tlb_local();
 
 	if (boot_cpu_has(X86_FEATURE_MTRR))
-		mtrr_enable();
+		mtrr_enable(state);
 
 	/* Enable caches */
 	write_cr0(read_cr0() & ~X86_CR0_CD);
 
 	/* Restore value of CR4 */
 	if (boot_cpu_has(X86_FEATURE_PGE))
-		__write_cr4(saved_cr4);
-
-	raw_spin_unlock(&cache_disable_lock);
+		__write_cr4(state->cr4);
 }
 
 static void cache_cpu_init(void)
 {
 	unsigned long flags;
+	struct cache_state state;
 
 	local_irq_save(flags);
-	cache_disable();
+	cache_disable(&state);
 
 	if (memory_caching_control & CACHE_MTRR)
-		mtrr_generic_set_state();
+		mtrr_generic_set_state(&state);
 
 	if (memory_caching_control & CACHE_PAT)
 		pat_cpu_init();
 
-	cache_enable();
+	cache_enable(&state);
 	local_irq_restore(flags);
 }
 
diff --git a/arch/x86/kernel/cpu/mtrr/generic.c b/arch/x86/kernel/cpu/mtrr/generic.c
index 2f2485d6657f..cddb440f330d 100644
--- a/arch/x86/kernel/cpu/mtrr/generic.c
+++ b/arch/x86/kernel/cpu/mtrr/generic.c
@@ -663,18 +663,13 @@ static bool set_mtrr_var_ranges(unsigned int index, struct mtrr_var_range *vr)
 	return changed;
 }
 
-static u32 deftype_lo, deftype_hi;
-
 /**
  * set_mtrr_state - Set the MTRR state for this CPU.
  *
- * NOTE: The CPU must already be in a safe state for MTRR changes, including
- *       measures that only a single CPU can be active in set_mtrr_state() in
- *       order to not be subject to races for usage of deftype_lo (this is
- *       accomplished by taking cache_disable_lock).
+ * NOTE: The CPU must already be in a safe state for MTRR changes.
  * RETURNS: 0 if no changes made, else a mask indicating what was changed.
  */
-static unsigned long set_mtrr_state(void)
+static unsigned long set_mtrr_state(struct cache_state *state)
 {
 	unsigned long change_mask = 0;
 	unsigned int i;
@@ -691,38 +686,40 @@ static unsigned long set_mtrr_state(void)
 	 * Set_mtrr_restore restores the old value of MTRRdefType,
 	 * so to set it we fiddle with the saved value:
 	 */
-	if ((deftype_lo & 0xff) != mtrr_state.def_type
-	    || ((deftype_lo & 0xc00) >> 10) != mtrr_state.enabled) {
-
-		deftype_lo = (deftype_lo & ~0xcff) | mtrr_state.def_type |
-			     (mtrr_state.enabled << 10);
+	if ((state->mtrr_deftype_lo & 0xff) != mtrr_state.def_type
+	    || ((state->mtrr_deftype_lo & 0xc00) >> 10) != mtrr_state.enabled) {
+		state->mtrr_deftype_lo = (state->mtrr_deftype_lo & ~0xcff) |
+					 mtrr_state.def_type |
+					 (mtrr_state.enabled << 10);
 		change_mask |= MTRR_CHANGE_MASK_DEFTYPE;
 	}
 
 	return change_mask;
 }
 
-void mtrr_disable(void)
+void mtrr_disable(struct cache_state *state)
 {
 	/* Save MTRR state */
-	rdmsr(MSR_MTRRdefType, deftype_lo, deftype_hi);
+	rdmsr(MSR_MTRRdefType, state->mtrr_deftype_lo, state->mtrr_deftype_hi);
 
 	/* Disable MTRRs, and set the default type to uncached */
-	mtrr_wrmsr(MSR_MTRRdefType, deftype_lo & ~0xcff, deftype_hi);
+	mtrr_wrmsr(MSR_MTRRdefType, state->mtrr_deftype_lo & ~0xcff,
+		   state->mtrr_deftype_hi);
 }
 
-void mtrr_enable(void)
+void mtrr_enable(struct cache_state *state)
 {
 	/* Intel (P6) standard MTRRs */
-	mtrr_wrmsr(MSR_MTRRdefType, deftype_lo, deftype_hi);
+	mtrr_wrmsr(MSR_MTRRdefType, state->mtrr_deftype_lo,
+		   state->mtrr_deftype_hi);
 }
 
-void mtrr_generic_set_state(void)
+void mtrr_generic_set_state(struct cache_state *state)
 {
 	unsigned long mask, count;
 
 	/* Actually set the state */
-	mask = set_mtrr_state();
+	mask = set_mtrr_state(state);
 
 	/* Use the atomic bitops to update the global mask */
 	for (count = 0; count < sizeof(mask) * 8; ++count) {
@@ -747,11 +744,12 @@ static void generic_set_mtrr(unsigned int reg, unsigned long base,
 {
 	unsigned long flags;
 	struct mtrr_var_range *vr;
+	struct cache_state state;
 
 	vr = &mtrr_state.var_ranges[reg];
 
 	local_irq_save(flags);
-	cache_disable();
+	cache_disable(&state);
 
 	if (size == 0) {
 		/*
@@ -770,7 +768,7 @@ static void generic_set_mtrr(unsigned int reg, unsigned long base,
 		mtrr_wrmsr(MTRRphysMask_MSR(reg), vr->mask_lo, vr->mask_hi);
 	}
 
-	cache_enable();
+	cache_enable(&state);
 	local_irq_restore(flags);
 }
 
-- 
2.35.3

next prev parent reply	other threads:[~2022-10-04  8:13 UTC|newest]

Thread overview: 39+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-10-04  8:10 [PATCH v4 00/16] x86: make pat and mtrr independent from each other Juergen Gross
2022-10-04  8:10 ` [PATCH v4 01/16] x86/mtrr: add comment for set_mtrr_state() serialization Juergen Gross
2022-10-04  8:10 ` [PATCH v4 02/16] x86/mtrr: remove unused cyrix_set_all() function Juergen Gross
2022-10-20 14:13   ` [tip: x86/cpu] x86/mtrr: Remove " tip-bot2 for Juergen Gross
2022-10-04  8:10 ` [PATCH v4 03/16] x86/mtrr: replace use_intel() with a local flag Juergen Gross
2022-10-21 17:19   ` Borislav Petkov
2022-10-21 18:05     ` Juergen Gross
2022-10-21 18:10       ` Borislav Petkov
2022-10-04  8:10 ` [PATCH v4 04/16] x86/mtrr: rename prepare_set() and post_set() Juergen Gross
2022-10-04  8:10 ` [PATCH v4 05/16] x86/mtrr: split MTRR specific handling from cache dis/enabling Juergen Gross
2022-10-26  9:24   ` Borislav Petkov
2022-10-26 11:42     ` Juergen Gross
2022-10-04  8:10 ` [PATCH v4 06/16] x86: move some code out of arch/x86/kernel/cpu/mtrr Juergen Gross
2022-10-04  8:10 ` [PATCH v4 07/16] x86/mtrr: split generic_set_all() Juergen Gross
2022-10-26 10:37   ` Borislav Petkov
2022-10-26 11:43     ` Juergen Gross
2022-10-26 12:10       ` Borislav Petkov
2022-10-04  8:10 ` [PATCH v4 08/16] x86/mtrr: remove set_all callback from struct mtrr_ops Juergen Gross
2022-10-27  9:18   ` Borislav Petkov
2022-10-04  8:10 ` [PATCH v4 09/16] x86/mtrr: simplify mtrr_bp_init() Juergen Gross
2022-10-27  9:32   ` Borislav Petkov
2022-10-04  8:10 ` [PATCH v4 10/16] x86/mtrr: get rid of mtrr_enabled bool Juergen Gross
2022-10-04  8:10 ` [PATCH v4 11/16] x86/mtrr: let cache_aps_delayed_init replace mtrr_aps_delayed_init Juergen Gross
2022-10-27 12:18   ` Borislav Petkov
2022-10-27 13:08     ` Juergen Gross
2022-10-04  8:10 ` [PATCH v4 12/16] x86/mtrr: add a stop_machine() handler calling only cache_cpu_init() Juergen Gross
2022-10-29 10:07   ` Borislav Petkov
2022-10-04  8:10 ` [PATCH v4 13/16] x86: decouple pat and mtrr handling Juergen Gross
2022-10-29 12:15   ` Borislav Petkov
2022-10-04  8:10 ` [PATCH v4 14/16] x86: switch cache_ap_init() to hotplug callback Juergen Gross
2022-10-04  8:10 ` Juergen Gross [this message]
2022-10-04  8:10 ` [PATCH v4 16/16] x86/mtrr: simplify mtrr_ops initialization Juergen Gross
2022-10-30 12:06   ` Borislav Petkov
2022-10-30 15:05     ` Juergen Gross
2022-10-30 16:39       ` Borislav Petkov
2022-10-30 17:48         ` Borislav Petkov
2022-11-01  9:48         ` Juergen Gross
2022-11-01 10:02           ` Borislav Petkov
2022-10-19  7:18 ` [PATCH v4 00/16] x86: make pat and mtrr independent from each other Juergen Gross

find likely ancestor, descendant, or conflicting patches for this message:
( dfblob:f6c52168753 dfblob:77ddc04c497 dfblob:ec73d1e5baf
dfblob:67c16c81325 dfblob:48ce48827f8 dfblob:84684b50a5c
dfblob:2f2485d6657 dfblob:cddb440f330 )
 OR (
bs:"[PATCH v4 15/16] x86: do MTRR/PAT setup on all secondary CPUs in parallel" )
	(help)

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20221004081023.32402-16-jgross@suse.com \
    --to=jgross@suse.com \
    --cc=bp@alien8.de \
    --cc=dave.hansen@linux.intel.com \
    --cc=hpa@zytor.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mingo@redhat.com \
    --cc=tglx@linutronix.de \
    --cc=x86@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox