All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] x86: Optimize load_mm_cr4 to load_mm_cr4_irqsoff
@ 2019-06-18  7:32 Jan Kiszka
  2019-06-18 14:41 ` Dave Hansen
                   ` (2 more replies)
  0 siblings, 3 replies; 6+ messages in thread
From: Jan Kiszka @ 2019-06-18  7:32 UTC (permalink / raw)
  To: Thomas Gleixner, Borislav Petkov, H. Peter Anvin, x86
  Cc: Peter Zijlstra, Ingo Molnar, Arnaldo Carvalho de Melo,
	Alexander Shishkin, Jiri Olsa, Namhyung Kim, Dave Hansen,
	Andy Lutomirski, linux-kernel

From: Jan Kiszka <jan.kiszka@siemens.com>

We only call load_mm_cr4 with interrupts disabled:
 - switch_mm_irqs_off
 - refresh_pce (on_each_cpu callback)

Thus, we can avoid disabling interrupts again in cr4_set/clear_bits.

Instead, provide cr4_set/clear_bits_irqsoffs and call those helpers from
load_mm_cr4_irqsoff. The renaming in combination with the checks
in __cr4_set shall ensure that any changes in the boundary conditions of
the invocations will be detected.

Signed-off-by: Jan Kiszka <jan.kiszka@siemens.com>
---

Found while porting Xenomai with its virtualized interrupt
infrastructure to a newer kernel.

 arch/x86/events/core.c             |  2 +-
 arch/x86/include/asm/mmu_context.h |  8 ++++----
 arch/x86/include/asm/tlbflush.h    | 30 +++++++++++++++++++++++-------
 arch/x86/mm/tlb.c                  |  2 +-
 4 files changed, 29 insertions(+), 13 deletions(-)

diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c
index f315425d8468..78a3fba28c62 100644
--- a/arch/x86/events/core.c
+++ b/arch/x86/events/core.c
@@ -2161,7 +2161,7 @@ static int x86_pmu_event_init(struct perf_event *event)
 
 static void refresh_pce(void *ignored)
 {
-	load_mm_cr4(this_cpu_read(cpu_tlbstate.loaded_mm));
+	load_mm_cr4_irqsoff(this_cpu_read(cpu_tlbstate.loaded_mm));
 }
 
 static void x86_pmu_event_mapped(struct perf_event *event, struct mm_struct *mm)
diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h
index 9024236693d2..16ae821483c8 100644
--- a/arch/x86/include/asm/mmu_context.h
+++ b/arch/x86/include/asm/mmu_context.h
@@ -28,16 +28,16 @@ static inline void paravirt_activate_mm(struct mm_struct *prev,
 
 DECLARE_STATIC_KEY_FALSE(rdpmc_always_available_key);
 
-static inline void load_mm_cr4(struct mm_struct *mm)
+static inline void load_mm_cr4_irqsoff(struct mm_struct *mm)
 {
 	if (static_branch_unlikely(&rdpmc_always_available_key) ||
 	    atomic_read(&mm->context.perf_rdpmc_allowed))
-		cr4_set_bits(X86_CR4_PCE);
+		cr4_set_bits_irqsoff(X86_CR4_PCE);
 	else
-		cr4_clear_bits(X86_CR4_PCE);
+		cr4_clear_bits_irqsoff(X86_CR4_PCE);
 }
 #else
-static inline void load_mm_cr4(struct mm_struct *mm) {}
+static inline void load_mm_cr4_irqsoff(struct mm_struct *mm) {}
 #endif
 
 #ifdef CONFIG_MODIFY_LDT_SYSCALL
diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h
index dee375831962..6f66d841262d 100644
--- a/arch/x86/include/asm/tlbflush.h
+++ b/arch/x86/include/asm/tlbflush.h
@@ -290,26 +290,42 @@ static inline void __cr4_set(unsigned long cr4)
 }
 
 /* Set in this cpu's CR4. */
-static inline void cr4_set_bits(unsigned long mask)
+static inline void cr4_set_bits_irqsoff(unsigned long mask)
 {
-	unsigned long cr4, flags;
+	unsigned long cr4;
 
-	local_irq_save(flags);
 	cr4 = this_cpu_read(cpu_tlbstate.cr4);
 	if ((cr4 | mask) != cr4)
 		__cr4_set(cr4 | mask);
-	local_irq_restore(flags);
 }
 
 /* Clear in this cpu's CR4. */
-static inline void cr4_clear_bits(unsigned long mask)
+static inline void cr4_clear_bits_irqsoff(unsigned long mask)
 {
-	unsigned long cr4, flags;
+	unsigned long cr4;
 
-	local_irq_save(flags);
 	cr4 = this_cpu_read(cpu_tlbstate.cr4);
 	if ((cr4 & ~mask) != cr4)
 		__cr4_set(cr4 & ~mask);
+}
+
+/* Set in this cpu's CR4. */
+static inline void cr4_set_bits(unsigned long mask)
+{
+	unsigned long flags;
+
+	local_irq_save(flags);
+	cr4_set_bits_irqsoff(mask);
+	local_irq_restore(flags);
+}
+
+/* Clear in this cpu's CR4. */
+static inline void cr4_clear_bits(unsigned long mask)
+{
+	unsigned long flags;
+
+	local_irq_save(flags);
+	cr4_clear_bits_irqsoff(mask);
 	local_irq_restore(flags);
 }
 
diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
index 91f6db92554c..8fc1eaa55b6e 100644
--- a/arch/x86/mm/tlb.c
+++ b/arch/x86/mm/tlb.c
@@ -440,7 +440,7 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
 	this_cpu_write(cpu_tlbstate.loaded_mm_asid, new_asid);
 
 	if (next != real_prev) {
-		load_mm_cr4(next);
+		load_mm_cr4_irqsoff(next);
 		switch_ldt(real_prev, next);
 	}
 }
-- 
2.16.4

^ permalink raw reply related	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2019-07-24 13:22 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2019-06-18  7:32 [PATCH] x86: Optimize load_mm_cr4 to load_mm_cr4_irqsoff Jan Kiszka
2019-06-18 14:41 ` Dave Hansen
2019-06-18 14:56   ` Jan Kiszka
2019-07-23 18:52 ` Jan Kiszka
2019-07-24 10:19   ` Thomas Gleixner
2019-07-24 13:22 ` [tip:x86/mm] x86/mm: Avoid redundant interrupt disable in load_mm_cr4() tip-bot for Jan Kiszka

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.