From: Yang Shi <yang@os.amperecomputing.com>
To: cl@gentwo.org, dennis@kernel.org, tj@kernel.org,
urezki@gmail.com, catalin.marinas@arm.com, will@kernel.org,
ryan.roberts@arm.com, david@kernel.org,
akpm@linux-foundation.org, hca@linux.ibm.com, gor@linux.ibm.com,
agordeev@linux.ibm.com
Cc: yang@os.amperecomputing.com, linux-mm@kvack.org,
linux-arm-kernel@lists.infradead.org,
linux-kernel@vger.kernel.org
Subject: [PATCH 01/11] arm64: mm: enable percpu kernel page table
Date: Wed, 29 Apr 2026 10:04:29 -0700 [thread overview]
Message-ID: <20260429170758.3018959-2-yang@os.amperecomputing.com> (raw)
In-Reply-To: <20260429170758.3018959-1-yang@os.amperecomputing.com>
Currently all cpus share the same kernel page table (swapper_pg_dir),
this patch creates kernel page table for each cpu and each cpu uses its
own kernel page table. The cpu 0 keeps using swapper_pg_dir. All the
kernel page tables share the same content. So we don't have to
duplicate the whole page table for all cpus, we just need to have
different pgd page for each cpu. All kernel page table modification (split,
creation, deletion, etc) actually still happens on swapper_pg_dir, the
modification needs to be synchronized to other cpu's page tables when
the pgd level is modified.
The percpu page table can't be shared across cores, so clear cnp bit
too even though CNP is supported.
Some features may not work with it for now, for example, KPTI.
Signed-off-by: Yang Shi <yang@os.amperecomputing.com>
---
arch/arm64/include/asm/mmu.h | 1 +
arch/arm64/include/asm/mmu_context.h | 6 +++-
arch/arm64/include/asm/pgtable.h | 3 ++
arch/arm64/kernel/setup.c | 3 ++
arch/arm64/kernel/smp.c | 8 +++++
arch/arm64/mm/mmu.c | 53 ++++++++++++++++++++++++++++
6 files changed, 73 insertions(+), 1 deletion(-)
diff --git a/arch/arm64/include/asm/mmu.h b/arch/arm64/include/asm/mmu.h
index 5e1211c540ab..8ed3b5f3cf84 100644
--- a/arch/arm64/include/asm/mmu.h
+++ b/arch/arm64/include/asm/mmu.h
@@ -63,6 +63,7 @@ static inline bool arm64_kernel_unmapped_at_el0(void)
extern void arm64_memblock_init(void);
extern void paging_init(void);
extern void bootmem_init(void);
+extern void setup_percpu_pgd(void);
extern void create_mapping_noalloc(phys_addr_t phys, unsigned long virt,
phys_addr_t size, pgprot_t prot);
extern void create_pgd_mapping(struct mm_struct *mm, phys_addr_t phys,
diff --git a/arch/arm64/include/asm/mmu_context.h b/arch/arm64/include/asm/mmu_context.h
index 803b68758152..0ee900eed612 100644
--- a/arch/arm64/include/asm/mmu_context.h
+++ b/arch/arm64/include/asm/mmu_context.h
@@ -27,6 +27,7 @@
#include <asm/tlbflush.h>
extern bool rodata_full;
+extern pgd_t *percpu_pgd[NR_CPUS];
static inline void contextidr_thread_switch(struct task_struct *next)
{
@@ -138,7 +139,10 @@ void __cpu_replace_ttbr1(pgd_t *pgdp, bool cnp);
static inline void cpu_enable_swapper_cnp(void)
{
- __cpu_replace_ttbr1(lm_alias(swapper_pg_dir), true);
+ unsigned int cpu = smp_processor_id();
+ pgd_t *ttbr1 = percpu_pgd[cpu];
+
+ __cpu_replace_ttbr1(ttbr1, false);
}
static inline void cpu_replace_ttbr1(pgd_t *pgdp)
diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h
index 4dfa42b7d053..38eec71ec383 100644
--- a/arch/arm64/include/asm/pgtable.h
+++ b/arch/arm64/include/asm/pgtable.h
@@ -1216,6 +1216,9 @@ p4d_t *p4d_offset_lockless_folded(pgd_t *pgdp, pgd_t pgd, unsigned long addr)
#endif /* CONFIG_PGTABLE_LEVELS > 4 */
+#define ARCH_PAGE_TABLE_SYNC_MASK \
+ (pgtable_l5_enabled() ? PGTBL_PGD_MODIFIED : PGTBL_P4D_MODIFIED)
+
#define pgd_ERROR(e) \
pr_err("%s:%d: bad pgd %016llx.\n", __FILE__, __LINE__, pgd_val(e))
diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c
index 23c05dc7a8f2..6d420ad59af4 100644
--- a/arch/arm64/kernel/setup.c
+++ b/arch/arm64/kernel/setup.c
@@ -360,6 +360,9 @@ void __init __no_sanitize_address setup_arch(char **cmdline_p)
smp_init_cpus();
smp_build_mpidr_hash();
+ /* Must be called after smp_init_cpus */
+ setup_percpu_pgd();
+
#ifdef CONFIG_ARM64_SW_TTBR0_PAN
/*
* Make sure init_thread_info.ttbr0 always generates translation
diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c
index 1aa324104afb..88a82eb56fb3 100644
--- a/arch/arm64/kernel/smp.c
+++ b/arch/arm64/kernel/smp.c
@@ -55,6 +55,8 @@
#include <trace/events/ipi.h>
+extern void idmap_cpu_replace_ttbr1(phys_addr_t pgdir);
+
/*
* as from 2.5, kernels no longer have an init_tasks structure
* so we need some other way of telling a new secondary core
@@ -198,6 +200,12 @@ asmlinkage notrace void secondary_start_kernel(void)
struct mm_struct *mm = &init_mm;
const struct cpu_operations *ops;
unsigned int cpu = smp_processor_id();
+ typedef void (ttbr_replace_func)(phys_addr_t);
+ ttbr_replace_func *replace_ttbr;
+
+ phys_addr_t ttbr1 = phys_to_ttbr(virt_to_phys(percpu_pgd[cpu]));
+ replace_ttbr = (void *)__pa_symbol(idmap_cpu_replace_ttbr1);
+ replace_ttbr(ttbr1);
/*
* All kernel threads share the same mm context; grab a
diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
index dd85e093ffdb..ed1545baa045 100644
--- a/arch/arm64/mm/mmu.c
+++ b/arch/arm64/mm/mmu.c
@@ -67,6 +67,59 @@ long __section(".mmuoff.data.write") __early_cpu_boot_status;
static DEFINE_SPINLOCK(swapper_pgdir_lock);
static DEFINE_MUTEX(fixmap_lock);
+pgd_t *percpu_pgd[NR_CPUS] __ro_after_init;
+bool percpu_pgd_setup_done __ro_after_init = false;
+
+void __init setup_percpu_pgd(void)
+{
+ int cpu;
+
+ for_each_possible_cpu(cpu) {
+ void *addr;
+
+ if (cpu == 0) {
+ percpu_pgd[cpu] = swapper_pg_dir;
+ continue;
+ }
+
+ addr = memblock_alloc(PAGE_SIZE, PAGE_SIZE);
+ if (!addr)
+ panic("Can't alloc percpu pgd\n");
+
+ memcpy(addr, (void *)swapper_pg_dir, PAGE_SIZE);
+ percpu_pgd[cpu] = (pgd_t *)addr;
+ }
+
+ dsb(ishst);
+
+ percpu_pgd_setup_done = true;
+}
+
+void arch_sync_kernel_mappings(unsigned long start, unsigned long end)
+{
+ unsigned long addr, next;
+ int cpu;
+ pgd_t *pgdp = pgd_offset_k(start);
+ pgd_t pgd;
+ unsigned int index = pgd_index(start);
+
+ BUG_ON(start > end);
+
+ if (!percpu_pgd_setup_done)
+ return;
+
+ addr = start;
+ do {
+ pgd = READ_ONCE(*pgdp);
+ next = pgd_addr_end(addr, end);
+ for_each_possible_cpu(cpu) {
+ if (cpu == 0)
+ continue;
+ set_pgd(percpu_pgd[cpu] + index, pgd);
+ }
+ } while (pgdp++, index++, addr = next, addr != end);
+}
+
void noinstr set_swapper_pgd(pgd_t *pgdp, pgd_t pgd)
{
pgd_t *fixmap_pgdp;
--
2.47.0
next prev parent reply other threads:[~2026-04-29 17:09 UTC|newest]
Thread overview: 13+ messages / expand[flat|nested] mbox.gz Atom feed top
2026-04-29 17:04 [RFC v1 PATCH 0/11] Optimize this_cpu_*() ops for non-x86 (ARM64 for this series) Yang Shi
2026-04-29 17:04 ` Yang Shi [this message]
2026-04-29 17:04 ` [PATCH 02/11] arm64: mm: define percpu virtual space area Yang Shi
2026-04-29 17:04 ` [PATCH 03/11] arm64: smp: define setup_per_cpu_areas() Yang Shi
2026-04-29 17:04 ` [PATCH 04/11] mm: percpu: prepare to use dedicated percpu area Yang Shi
2026-04-29 17:04 ` [PATCH 05/11] arm64: mm: map local percpu first chunk Yang Shi
2026-04-29 17:04 ` [PATCH 06/11] mm: percpu: set up first chunk and reserve chunk Yang Shi
2026-04-29 17:04 ` [PATCH 07/11] arm64: mm: introduce __per_cpu_local_off Yang Shi
2026-04-29 17:04 ` [PATCH 08/11] vmalloc: pass in pgd pointer for vmap{__vunmap}_range_noflush() Yang Shi
2026-04-29 17:04 ` [PATCH 09/11] mm: percpu: allocate and free local percpu vm area Yang Shi
2026-04-29 17:04 ` [PATCH 10/11] arm64: kconfig: select HAVE_LOCAL_PER_CPU_MAP Yang Shi
2026-04-29 17:04 ` [PATCH 11/11] arm64: percpu: use local percpu for this_cpu_*() APIs Yang Shi
2026-04-30 19:02 ` [RFC v1 PATCH 0/11] Optimize this_cpu_*() ops for non-x86 (ARM64 for this series) Yang Shi
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20260429170758.3018959-2-yang@os.amperecomputing.com \
--to=yang@os.amperecomputing.com \
--cc=agordeev@linux.ibm.com \
--cc=akpm@linux-foundation.org \
--cc=catalin.marinas@arm.com \
--cc=cl@gentwo.org \
--cc=david@kernel.org \
--cc=dennis@kernel.org \
--cc=gor@linux.ibm.com \
--cc=hca@linux.ibm.com \
--cc=linux-arm-kernel@lists.infradead.org \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-mm@kvack.org \
--cc=ryan.roberts@arm.com \
--cc=tj@kernel.org \
--cc=urezki@gmail.com \
--cc=will@kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox