Re: [PATCH v3 10/12] x86/mm: Move flush_tlb_info back to the stack

public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed

From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
To: Chuyi Zhou <zhouchuyi@bytedance.com>, Nadav Amit <nadav.amit@gmail.com>
Cc: tglx@linutronix.de, mingo@redhat.com, luto@kernel.org,
	peterz@infradead.org, paulmck@kernel.org, muchun.song@linux.dev,
	bp@alien8.de, dave.hansen@linux.intel.com, pbonzini@redhat.com,
	clrkwllms@kernel.org, rostedt@goodmis.org,
	linux-kernel@vger.kernel.org
Subject: Re: [PATCH v3 10/12] x86/mm: Move flush_tlb_info back to the stack
Date: Wed, 18 Mar 2026 18:21:43 +0100	[thread overview]
Message-ID: <20260318172143.ICooJ3-U@linutronix.de> (raw)
In-Reply-To: <20260318045638.1572777-11-zhouchuyi@bytedance.com>

+Nadav, org post https://lore.kernel.org/all/20260318045638.1572777-11-zhouchuyi@bytedance.com/

On 2026-03-18 12:56:36 [+0800], Chuyi Zhou wrote:
> Commit 3db6d5a5ecaf ("x86/mm/tlb: Remove 'struct flush_tlb_info' from the
> stack") converted flush_tlb_info from stack variable to per-CPU variable.
> This brought about a performance improvement of around 3% in extreme test.
> However, it also required that all flush_tlb* operations keep preemption
> disabled entirely to prevent concurrent modifications of flush_tlb_info.
> flush_tlb* needs to send IPIs to remote CPUs and synchronously wait for
> all remote CPUs to complete their local TLB flushes. The process could
> take tens of milliseconds when interrupts are disabled or with a large
> number of remote CPUs.
…
PeterZ wasn't too happy to reverse this.
The snippet below results in the following assembly:

| 0000000000001ab0 <flush_tlb_kernel_range>:
…
|     1ac9:       48 89 e5                mov    %rsp,%rbp
|     1acc:       48 83 e4 c0             and    $0xffffffffffffffc0,%rsp
|     1ad0:       48 83 ec 40             sub    $0x40,%rsp

so it would align it properly which should result in the same cache-line
movement. I'm not sure about the virtual-to-physical translation of the
variables as in TLB misses since here we have a virtual mapped stack and
there we have virtual mapped per-CPU memory.

Here the below is my quick hack. Does this work, or still a now? I have
no numbers so…

diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h
index 5a3cdc439e38d..4a7f40c7f939a 100644
--- a/arch/x86/include/asm/tlbflush.h
+++ b/arch/x86/include/asm/tlbflush.h
@@ -227,7 +227,7 @@ struct flush_tlb_info {
 	u8			stride_shift;
 	u8			freed_tables;
 	u8			trim_cpumask;
-};
+} __aligned(SMP_CACHE_BYTES);
 
 void flush_tlb_local(void);
 void flush_tlb_one_user(unsigned long addr);
diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
index 621e09d049cb9..99b70e94ec281 100644
--- a/arch/x86/mm/tlb.c
+++ b/arch/x86/mm/tlb.c
@@ -1394,28 +1394,12 @@ void flush_tlb_multi(const struct cpumask *cpumask,
  */
 unsigned long tlb_single_page_flush_ceiling __read_mostly = 33;
 
-static DEFINE_PER_CPU_SHARED_ALIGNED(struct flush_tlb_info, flush_tlb_info);
-
-#ifdef CONFIG_DEBUG_VM
-static DEFINE_PER_CPU(unsigned int, flush_tlb_info_idx);
-#endif
-
-static struct flush_tlb_info *get_flush_tlb_info(struct mm_struct *mm,
-			unsigned long start, unsigned long end,
-			unsigned int stride_shift, bool freed_tables,
-			u64 new_tlb_gen)
+static void get_flush_tlb_info(struct flush_tlb_info *info,
+			       struct mm_struct *mm,
+			       unsigned long start, unsigned long end,
+			       unsigned int stride_shift, bool freed_tables,
+			       u64 new_tlb_gen)
 {
-	struct flush_tlb_info *info = this_cpu_ptr(&flush_tlb_info);
-
-#ifdef CONFIG_DEBUG_VM
-	/*
-	 * Ensure that the following code is non-reentrant and flush_tlb_info
-	 * is not overwritten. This means no TLB flushing is initiated by
-	 * interrupt handlers and machine-check exception handlers.
-	 */
-	BUG_ON(this_cpu_inc_return(flush_tlb_info_idx) != 1);
-#endif
-
 	/*
 	 * If the number of flushes is so large that a full flush
 	 * would be faster, do a full flush.
@@ -1433,8 +1417,6 @@ static struct flush_tlb_info *get_flush_tlb_info(struct mm_struct *mm,
 	info->new_tlb_gen	= new_tlb_gen;
 	info->initiating_cpu	= smp_processor_id();
 	info->trim_cpumask	= 0;
-
-	return info;
 }
 
 static void put_flush_tlb_info(void)
@@ -1450,15 +1432,16 @@ void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start,
 				unsigned long end, unsigned int stride_shift,
 				bool freed_tables)
 {
-	struct flush_tlb_info *info;
+	struct flush_tlb_info _info;
+	struct flush_tlb_info *info = &_info;
 	int cpu = get_cpu();
 	u64 new_tlb_gen;
 
 	/* This is also a barrier that synchronizes with switch_mm(). */
 	new_tlb_gen = inc_mm_tlb_gen(mm);
 
-	info = get_flush_tlb_info(mm, start, end, stride_shift, freed_tables,
-				  new_tlb_gen);
+	get_flush_tlb_info(&_info, mm, start, end, stride_shift, freed_tables,
+			   new_tlb_gen);
 
 	/*
 	 * flush_tlb_multi() is not optimized for the common case in which only
@@ -1548,17 +1531,15 @@ static void kernel_tlb_flush_range(struct flush_tlb_info *info)
 
 void flush_tlb_kernel_range(unsigned long start, unsigned long end)
 {
-	struct flush_tlb_info *info;
+	struct flush_tlb_info info;
 
-	guard(preempt)();
+	get_flush_tlb_info(&info, NULL, start, end, PAGE_SHIFT, false,
+			   TLB_GENERATION_INVALID);
 
-	info = get_flush_tlb_info(NULL, start, end, PAGE_SHIFT, false,
-				  TLB_GENERATION_INVALID);
-
-	if (info->end == TLB_FLUSH_ALL)
-		kernel_tlb_flush_all(info);
+	if (info.end == TLB_FLUSH_ALL)
+		kernel_tlb_flush_all(&info);
 	else
-		kernel_tlb_flush_range(info);
+		kernel_tlb_flush_range(&info);
 
 	put_flush_tlb_info();
 }
@@ -1728,12 +1709,11 @@ EXPORT_SYMBOL_FOR_KVM(__flush_tlb_all);
 
 void arch_tlbbatch_flush(struct arch_tlbflush_unmap_batch *batch)
 {
-	struct flush_tlb_info *info;
+	struct flush_tlb_info info;
 
 	int cpu = get_cpu();
-
-	info = get_flush_tlb_info(NULL, 0, TLB_FLUSH_ALL, 0, false,
-				  TLB_GENERATION_INVALID);
+	get_flush_tlb_info(&info, NULL, 0, TLB_FLUSH_ALL, 0, false,
+			   TLB_GENERATION_INVALID);
 	/*
 	 * flush_tlb_multi() is not optimized for the common case in which only
 	 * a local TLB flush is needed. Optimize this use-case by calling
@@ -1743,11 +1723,11 @@ void arch_tlbbatch_flush(struct arch_tlbflush_unmap_batch *batch)
 		invlpgb_flush_all_nonglobals();
 		batch->unmapped_pages = false;
 	} else if (cpumask_any_but(&batch->cpumask, cpu) < nr_cpu_ids) {
-		flush_tlb_multi(&batch->cpumask, info);
+		flush_tlb_multi(&batch->cpumask, &info);
 	} else if (cpumask_test_cpu(cpu, &batch->cpumask)) {
 		lockdep_assert_irqs_enabled();
 		local_irq_disable();
-		flush_tlb_func(info);
+		flush_tlb_func(&info);
 		local_irq_enable();
 	}

next prev parent reply	other threads:[~2026-03-18 17:21 UTC|newest]

Thread overview: 34+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2026-03-18  4:56 [PATCH v3 00/12] Allow preemption during IPI completion waiting to improve real-time performance Chuyi Zhou
2026-03-18  4:56 ` [PATCH v3 01/12] smp: Disable preemption explicitly in __csd_lock_wait Chuyi Zhou
2026-03-18 14:13   ` Steven Rostedt
2026-03-18  4:56 ` [PATCH v3 02/12] smp: Enable preemption early in smp_call_function_single Chuyi Zhou
2026-03-18 14:14   ` Steven Rostedt
2026-03-19  2:30     ` Chuyi Zhou
2026-03-18  4:56 ` [PATCH v3 03/12] smp: Remove get_cpu from smp_call_function_any Chuyi Zhou
2026-03-18 14:32   ` Steven Rostedt
2026-03-18 15:39     ` Sebastian Andrzej Siewior
2026-03-18  4:56 ` [PATCH v3 04/12] smp: Use on-stack cpumask in smp_call_function_many_cond Chuyi Zhou
2026-03-18 14:38   ` Steven Rostedt
2026-03-18 15:55   ` Sebastian Andrzej Siewior
2026-03-19  3:02     ` Chuyi Zhou
2026-03-18  4:56 ` [PATCH v3 05/12] smp: Free call_function_data via RCU in smpcfd_dead_cpu Chuyi Zhou
2026-03-18 16:19   ` Sebastian Andrzej Siewior
2026-03-19  7:48     ` Chuyi Zhou
2026-03-18  4:56 ` [PATCH v3 06/12] smp: Enable preemption early in smp_call_function_many_cond Chuyi Zhou
2026-03-18 16:55   ` Sebastian Andrzej Siewior
2026-03-19  3:46     ` Chuyi Zhou
2026-03-18  4:56 ` [PATCH v3 07/12] smp: Remove preempt_disable from smp_call_function Chuyi Zhou
2026-03-18  4:56 ` [PATCH v3 08/12] smp: Remove preempt_disable from on_each_cpu_cond_mask Chuyi Zhou
2026-03-18  4:56 ` [PATCH v3 09/12] scftorture: Remove preempt_disable in scftorture_invoke_one Chuyi Zhou
2026-03-18  4:56 ` [PATCH v3 10/12] x86/mm: Move flush_tlb_info back to the stack Chuyi Zhou
2026-03-18 17:21   ` Sebastian Andrzej Siewior [this message]
2026-03-18 20:24     ` Nadav Amit
2026-03-18 22:28       ` Nadav Amit
2026-03-19  8:49         ` Sebastian Andrzej Siewior
2026-03-19 10:37           ` Nadav Amit
2026-03-19 10:58             ` Sebastian Andrzej Siewior
2026-03-19 13:41               ` Chuyi Zhou
2026-03-19 14:40                 ` Sebastian Andrzej Siewior
2026-03-20 14:33     ` Chuyi Zhou
2026-03-18  4:56 ` [PATCH v3 11/12] x86/mm: Enable preemption during native_flush_tlb_multi Chuyi Zhou
2026-03-18  4:56 ` [PATCH v3 12/12] x86/mm: Enable preemption during flush_tlb_kernel_range Chuyi Zhou

find likely ancestor, descendant, or conflicting patches for this message:
( dfblob:5a3cdc439e38 dfblob:4a7f40c7f939 dfblob:621e09d049cb
dfblob:99b70e94ec28 )
 OR (
bs:"Re: [PATCH v3 10/12] x86/mm: Move flush_tlb_info back to the stack" )
	(help)

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20260318172143.ICooJ3-U@linutronix.de \
    --to=bigeasy@linutronix.de \
    --cc=bp@alien8.de \
    --cc=clrkwllms@kernel.org \
    --cc=dave.hansen@linux.intel.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=luto@kernel.org \
    --cc=mingo@redhat.com \
    --cc=muchun.song@linux.dev \
    --cc=nadav.amit@gmail.com \
    --cc=paulmck@kernel.org \
    --cc=pbonzini@redhat.com \
    --cc=peterz@infradead.org \
    --cc=rostedt@goodmis.org \
    --cc=tglx@linutronix.de \
    --cc=zhouchuyi@bytedance.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox