From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
To: Chuyi Zhou <zhouchuyi@bytedance.com>, Nadav Amit <nadav.amit@gmail.com>
Cc: tglx@linutronix.de, mingo@redhat.com, luto@kernel.org,
peterz@infradead.org, paulmck@kernel.org, muchun.song@linux.dev,
bp@alien8.de, dave.hansen@linux.intel.com, pbonzini@redhat.com,
clrkwllms@kernel.org, rostedt@goodmis.org,
linux-kernel@vger.kernel.org
Subject: Re: [PATCH v3 10/12] x86/mm: Move flush_tlb_info back to the stack
Date: Wed, 18 Mar 2026 18:21:43 +0100 [thread overview]
Message-ID: <20260318172143.ICooJ3-U@linutronix.de> (raw)
In-Reply-To: <20260318045638.1572777-11-zhouchuyi@bytedance.com>
+Nadav, org post https://lore.kernel.org/all/20260318045638.1572777-11-zhouchuyi@bytedance.com/
On 2026-03-18 12:56:36 [+0800], Chuyi Zhou wrote:
> Commit 3db6d5a5ecaf ("x86/mm/tlb: Remove 'struct flush_tlb_info' from the
> stack") converted flush_tlb_info from stack variable to per-CPU variable.
> This brought about a performance improvement of around 3% in extreme test.
> However, it also required that all flush_tlb* operations keep preemption
> disabled entirely to prevent concurrent modifications of flush_tlb_info.
> flush_tlb* needs to send IPIs to remote CPUs and synchronously wait for
> all remote CPUs to complete their local TLB flushes. The process could
> take tens of milliseconds when interrupts are disabled or with a large
> number of remote CPUs.
…
PeterZ wasn't too happy to reverse this.
The snippet below results in the following assembly:
| 0000000000001ab0 <flush_tlb_kernel_range>:
…
| 1ac9: 48 89 e5 mov %rsp,%rbp
| 1acc: 48 83 e4 c0 and $0xffffffffffffffc0,%rsp
| 1ad0: 48 83 ec 40 sub $0x40,%rsp
so it would align it properly which should result in the same cache-line
movement. I'm not sure about the virtual-to-physical translation of the
variables as in TLB misses since here we have a virtual mapped stack and
there we have virtual mapped per-CPU memory.
Here the below is my quick hack. Does this work, or still a now? I have
no numbers so…
diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h
index 5a3cdc439e38d..4a7f40c7f939a 100644
--- a/arch/x86/include/asm/tlbflush.h
+++ b/arch/x86/include/asm/tlbflush.h
@@ -227,7 +227,7 @@ struct flush_tlb_info {
u8 stride_shift;
u8 freed_tables;
u8 trim_cpumask;
-};
+} __aligned(SMP_CACHE_BYTES);
void flush_tlb_local(void);
void flush_tlb_one_user(unsigned long addr);
diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
index 621e09d049cb9..99b70e94ec281 100644
--- a/arch/x86/mm/tlb.c
+++ b/arch/x86/mm/tlb.c
@@ -1394,28 +1394,12 @@ void flush_tlb_multi(const struct cpumask *cpumask,
*/
unsigned long tlb_single_page_flush_ceiling __read_mostly = 33;
-static DEFINE_PER_CPU_SHARED_ALIGNED(struct flush_tlb_info, flush_tlb_info);
-
-#ifdef CONFIG_DEBUG_VM
-static DEFINE_PER_CPU(unsigned int, flush_tlb_info_idx);
-#endif
-
-static struct flush_tlb_info *get_flush_tlb_info(struct mm_struct *mm,
- unsigned long start, unsigned long end,
- unsigned int stride_shift, bool freed_tables,
- u64 new_tlb_gen)
+static void get_flush_tlb_info(struct flush_tlb_info *info,
+ struct mm_struct *mm,
+ unsigned long start, unsigned long end,
+ unsigned int stride_shift, bool freed_tables,
+ u64 new_tlb_gen)
{
- struct flush_tlb_info *info = this_cpu_ptr(&flush_tlb_info);
-
-#ifdef CONFIG_DEBUG_VM
- /*
- * Ensure that the following code is non-reentrant and flush_tlb_info
- * is not overwritten. This means no TLB flushing is initiated by
- * interrupt handlers and machine-check exception handlers.
- */
- BUG_ON(this_cpu_inc_return(flush_tlb_info_idx) != 1);
-#endif
-
/*
* If the number of flushes is so large that a full flush
* would be faster, do a full flush.
@@ -1433,8 +1417,6 @@ static struct flush_tlb_info *get_flush_tlb_info(struct mm_struct *mm,
info->new_tlb_gen = new_tlb_gen;
info->initiating_cpu = smp_processor_id();
info->trim_cpumask = 0;
-
- return info;
}
static void put_flush_tlb_info(void)
@@ -1450,15 +1432,16 @@ void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start,
unsigned long end, unsigned int stride_shift,
bool freed_tables)
{
- struct flush_tlb_info *info;
+ struct flush_tlb_info _info;
+ struct flush_tlb_info *info = &_info;
int cpu = get_cpu();
u64 new_tlb_gen;
/* This is also a barrier that synchronizes with switch_mm(). */
new_tlb_gen = inc_mm_tlb_gen(mm);
- info = get_flush_tlb_info(mm, start, end, stride_shift, freed_tables,
- new_tlb_gen);
+ get_flush_tlb_info(&_info, mm, start, end, stride_shift, freed_tables,
+ new_tlb_gen);
/*
* flush_tlb_multi() is not optimized for the common case in which only
@@ -1548,17 +1531,15 @@ static void kernel_tlb_flush_range(struct flush_tlb_info *info)
void flush_tlb_kernel_range(unsigned long start, unsigned long end)
{
- struct flush_tlb_info *info;
+ struct flush_tlb_info info;
- guard(preempt)();
+ get_flush_tlb_info(&info, NULL, start, end, PAGE_SHIFT, false,
+ TLB_GENERATION_INVALID);
- info = get_flush_tlb_info(NULL, start, end, PAGE_SHIFT, false,
- TLB_GENERATION_INVALID);
-
- if (info->end == TLB_FLUSH_ALL)
- kernel_tlb_flush_all(info);
+ if (info.end == TLB_FLUSH_ALL)
+ kernel_tlb_flush_all(&info);
else
- kernel_tlb_flush_range(info);
+ kernel_tlb_flush_range(&info);
put_flush_tlb_info();
}
@@ -1728,12 +1709,11 @@ EXPORT_SYMBOL_FOR_KVM(__flush_tlb_all);
void arch_tlbbatch_flush(struct arch_tlbflush_unmap_batch *batch)
{
- struct flush_tlb_info *info;
+ struct flush_tlb_info info;
int cpu = get_cpu();
-
- info = get_flush_tlb_info(NULL, 0, TLB_FLUSH_ALL, 0, false,
- TLB_GENERATION_INVALID);
+ get_flush_tlb_info(&info, NULL, 0, TLB_FLUSH_ALL, 0, false,
+ TLB_GENERATION_INVALID);
/*
* flush_tlb_multi() is not optimized for the common case in which only
* a local TLB flush is needed. Optimize this use-case by calling
@@ -1743,11 +1723,11 @@ void arch_tlbbatch_flush(struct arch_tlbflush_unmap_batch *batch)
invlpgb_flush_all_nonglobals();
batch->unmapped_pages = false;
} else if (cpumask_any_but(&batch->cpumask, cpu) < nr_cpu_ids) {
- flush_tlb_multi(&batch->cpumask, info);
+ flush_tlb_multi(&batch->cpumask, &info);
} else if (cpumask_test_cpu(cpu, &batch->cpumask)) {
lockdep_assert_irqs_enabled();
local_irq_disable();
- flush_tlb_func(info);
+ flush_tlb_func(&info);
local_irq_enable();
}
next prev parent reply other threads:[~2026-03-18 17:21 UTC|newest]
Thread overview: 34+ messages / expand[flat|nested] mbox.gz Atom feed top
2026-03-18 4:56 [PATCH v3 00/12] Allow preemption during IPI completion waiting to improve real-time performance Chuyi Zhou
2026-03-18 4:56 ` [PATCH v3 01/12] smp: Disable preemption explicitly in __csd_lock_wait Chuyi Zhou
2026-03-18 14:13 ` Steven Rostedt
2026-03-18 4:56 ` [PATCH v3 02/12] smp: Enable preemption early in smp_call_function_single Chuyi Zhou
2026-03-18 14:14 ` Steven Rostedt
2026-03-19 2:30 ` Chuyi Zhou
2026-03-18 4:56 ` [PATCH v3 03/12] smp: Remove get_cpu from smp_call_function_any Chuyi Zhou
2026-03-18 14:32 ` Steven Rostedt
2026-03-18 15:39 ` Sebastian Andrzej Siewior
2026-03-18 4:56 ` [PATCH v3 04/12] smp: Use on-stack cpumask in smp_call_function_many_cond Chuyi Zhou
2026-03-18 14:38 ` Steven Rostedt
2026-03-18 15:55 ` Sebastian Andrzej Siewior
2026-03-19 3:02 ` Chuyi Zhou
2026-03-18 4:56 ` [PATCH v3 05/12] smp: Free call_function_data via RCU in smpcfd_dead_cpu Chuyi Zhou
2026-03-18 16:19 ` Sebastian Andrzej Siewior
2026-03-19 7:48 ` Chuyi Zhou
2026-03-18 4:56 ` [PATCH v3 06/12] smp: Enable preemption early in smp_call_function_many_cond Chuyi Zhou
2026-03-18 16:55 ` Sebastian Andrzej Siewior
2026-03-19 3:46 ` Chuyi Zhou
2026-03-18 4:56 ` [PATCH v3 07/12] smp: Remove preempt_disable from smp_call_function Chuyi Zhou
2026-03-18 4:56 ` [PATCH v3 08/12] smp: Remove preempt_disable from on_each_cpu_cond_mask Chuyi Zhou
2026-03-18 4:56 ` [PATCH v3 09/12] scftorture: Remove preempt_disable in scftorture_invoke_one Chuyi Zhou
2026-03-18 4:56 ` [PATCH v3 10/12] x86/mm: Move flush_tlb_info back to the stack Chuyi Zhou
2026-03-18 17:21 ` Sebastian Andrzej Siewior [this message]
2026-03-18 20:24 ` Nadav Amit
2026-03-18 22:28 ` Nadav Amit
2026-03-19 8:49 ` Sebastian Andrzej Siewior
2026-03-19 10:37 ` Nadav Amit
2026-03-19 10:58 ` Sebastian Andrzej Siewior
2026-03-19 13:41 ` Chuyi Zhou
2026-03-19 14:40 ` Sebastian Andrzej Siewior
2026-03-20 14:33 ` Chuyi Zhou
2026-03-18 4:56 ` [PATCH v3 11/12] x86/mm: Enable preemption during native_flush_tlb_multi Chuyi Zhou
2026-03-18 4:56 ` [PATCH v3 12/12] x86/mm: Enable preemption during flush_tlb_kernel_range Chuyi Zhou
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20260318172143.ICooJ3-U@linutronix.de \
--to=bigeasy@linutronix.de \
--cc=bp@alien8.de \
--cc=clrkwllms@kernel.org \
--cc=dave.hansen@linux.intel.com \
--cc=linux-kernel@vger.kernel.org \
--cc=luto@kernel.org \
--cc=mingo@redhat.com \
--cc=muchun.song@linux.dev \
--cc=nadav.amit@gmail.com \
--cc=paulmck@kernel.org \
--cc=pbonzini@redhat.com \
--cc=peterz@infradead.org \
--cc=rostedt@goodmis.org \
--cc=tglx@linutronix.de \
--cc=zhouchuyi@bytedance.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox