From: Yang Shi <yang@os.amperecomputing.com>
To: cl@gentwo.org, dennis@kernel.org, tj@kernel.org,
urezki@gmail.com, catalin.marinas@arm.com, will@kernel.org,
ryan.roberts@arm.com, david@kernel.org,
akpm@linux-foundation.org, hca@linux.ibm.com, gor@linux.ibm.com,
agordeev@linux.ibm.com
Cc: yang@os.amperecomputing.com, linux-mm@kvack.org,
linux-arm-kernel@lists.infradead.org,
linux-kernel@vger.kernel.org
Subject: [PATCH 05/11] arm64: mm: map local percpu first chunk
Date: Wed, 29 Apr 2026 10:04:33 -0700 [thread overview]
Message-ID: <20260429170758.3018959-6-yang@os.amperecomputing.com> (raw)
In-Reply-To: <20260429170758.3018959-1-yang@os.amperecomputing.com>
Allocate local percpu area and map to percpu page table for the first
chunk.
It doesn't work for PCPU_FC_EMBED because the percpu base adddress may
be in linear mapping space in this case, it will result in returning huge
offset for percpu allocator. So percpu local map just can work with
PCPU_FC_PAGE which allocates percpu variables from vmalloc area or the
dedicated percpu area. So unselect NEED_PER_CPU_EMBED_FIRST_CHUNK if
the architectures support percpu local map.
Signed-off-by: Yang Shi <yang@os.amperecomputing.com>
---
arch/arm64/Kconfig | 1 -
arch/arm64/include/asm/mmu.h | 2 ++
arch/arm64/kernel/smp.c | 25 ++-----------------------
arch/arm64/mm/mmu.c | 18 ++++++++++++++++++
mm/percpu-internal.h | 12 ++++++++++++
mm/percpu.c | 13 +++++++++++++
6 files changed, 47 insertions(+), 24 deletions(-)
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index fe60738e5943..0e12e531a5b2 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -1525,7 +1525,6 @@ config NUMA
select GENERIC_ARCH_NUMA
select OF_NUMA
select HAVE_SETUP_PER_CPU_AREA
- select NEED_PER_CPU_EMBED_FIRST_CHUNK
select NEED_PER_CPU_PAGE_FIRST_CHUNK
select USE_PERCPU_NUMA_NODE_ID
help
diff --git a/arch/arm64/include/asm/mmu.h b/arch/arm64/include/asm/mmu.h
index 8ed3b5f3cf84..d81e5c483b55 100644
--- a/arch/arm64/include/asm/mmu.h
+++ b/arch/arm64/include/asm/mmu.h
@@ -73,6 +73,8 @@ extern void *fixmap_remap_fdt(phys_addr_t dt_phys, int *size, pgprot_t prot);
extern void mark_linear_text_alias_ro(void);
extern int split_kernel_leaf_mapping(unsigned long start, unsigned long end);
extern void linear_map_maybe_split_to_ptes(void);
+extern void map_local_percpu_first_chunk(pgd_t *pgdir, unsigned long virt,
+ struct page **pages, unsigned int nr);
/*
* This check is triggered during the early boot before the cpufeature
diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c
index 0cc8f4a9efa7..4caa6ebec12f 100644
--- a/arch/arm64/kernel/smp.c
+++ b/arch/arm64/kernel/smp.c
@@ -831,36 +831,15 @@ int early_cpu_to_node(int cpu)
return cpu_to_node_map[cpu];
}
-static int __init pcpu_cpu_distance(unsigned int from, unsigned int to)
-{
- return node_distance(early_cpu_to_node(from), early_cpu_to_node(to));
-}
-
void __init setup_per_cpu_areas(void)
{
unsigned long delta;
unsigned int cpu;
int rc = -EINVAL;
- if (pcpu_chosen_fc != PCPU_FC_PAGE) {
- /*
- * Always reserve area for module percpu variables. That's
- * what the legacy allocator did.
- */
- rc = pcpu_embed_first_chunk(PERCPU_MODULE_RESERVE,
- PERCPU_DYNAMIC_RESERVE, PAGE_SIZE,
- pcpu_cpu_distance,
- early_cpu_to_node);
-#ifdef CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK
- if (rc < 0)
- pr_warn("PERCPU: %s allocator failed (%d), falling back to page size\n",
- pcpu_fc_names[pcpu_chosen_fc], rc);
-#endif
- }
-
#ifdef CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK
- if (rc < 0)
- rc = pcpu_page_first_chunk(PERCPU_MODULE_RESERVE, early_cpu_to_node);
+ /* PCPU page table just can support PCPU_FC_PAGE */
+ rc = pcpu_page_first_chunk(PERCPU_MODULE_RESERVE, early_cpu_to_node);
#endif
if (rc < 0)
panic("Failed to initialize percpu areas (err=%d).", rc);
diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
index 7708dcc1b6a9..81b662433677 100644
--- a/arch/arm64/mm/mmu.c
+++ b/arch/arm64/mm/mmu.c
@@ -1044,6 +1044,24 @@ void __init linear_map_maybe_split_to_ptes(void)
}
}
+void __init map_local_percpu_first_chunk(pgd_t *pgdir, unsigned long virt,
+ struct page **pages, unsigned int nr)
+{
+ int i;
+
+ arch_enter_lazy_mmu_mode();
+
+ for (i = 0; i < nr; i++) {
+ phys_addr_t phys = page_to_phys(pages[i]);
+ __create_pgd_mapping_locked(pgdir, phys, virt, PAGE_SIZE, PAGE_KERNEL,
+ early_pgtable_alloc, NO_EXEC_MAPPINGS);
+
+ virt += PAGE_SIZE;
+ }
+
+ arch_leave_lazy_mmu_mode();
+}
+
/*
* This function can only be used to modify existing table entries,
* without allocating new levels of table. Note that this permits the
diff --git a/mm/percpu-internal.h b/mm/percpu-internal.h
index 4b3d6ec43703..b33d1f5aba1b 100644
--- a/mm/percpu-internal.h
+++ b/mm/percpu-internal.h
@@ -5,6 +5,7 @@
#include <linux/types.h>
#include <linux/percpu.h>
#include <linux/memcontrol.h>
+#include <linux/mmu_context.h>
/*
* pcpu_block_md is the metadata block struct.
@@ -162,6 +163,17 @@ static inline size_t pcpu_obj_full_size(size_t size)
return size * num_possible_cpus() + extra_size;
}
+#ifdef CONFIG_HAVE_LOCAL_PER_CPU_MAP
+extern void __init map_local_percpu_first_chunk(pgd_t *pgdir, unsigned long virt,
+ struct page **pages, unsigned int nr);
+#else
+static inline void __init map_local_percpu_first_chunk(pgd_t *pgdir, unsigned long virt,
+ struct page **pages, unsigned int nr)
+{
+ return;
+}
+#endif
+
#ifdef CONFIG_PERCPU_STATS
#include <linux/spinlock.h>
diff --git a/mm/percpu.c b/mm/percpu.c
index daa2c88e6971..59682b77089c 100644
--- a/mm/percpu.c
+++ b/mm/percpu.c
@@ -3194,6 +3194,7 @@ void __init __weak pcpu_populate_pte(unsigned long addr)
int __init pcpu_page_first_chunk(size_t reserved_size, pcpu_fc_cpu_to_node_fn_t cpu_to_nd_fn)
{
static struct vm_struct vm;
+ static struct vm_struct pcpu_vm;
struct pcpu_alloc_info *ai;
char psize_str[16];
int unit_pages;
@@ -3247,6 +3248,10 @@ int __init pcpu_page_first_chunk(size_t reserved_size, pcpu_fc_cpu_to_node_fn_t
vm.addr = (void *)ALIGN(PERCPU_START, PAGE_SIZE);
vm.size = num_possible_cpus() * ai->unit_size;
vm_area_add_early(&vm);
+
+ pcpu_vm.addr = (void *)ALIGN(LOCAL_PERCPU_START, PAGE_SIZE);
+ pcpu_vm.size = ai->unit_size;
+ vm_area_add_early(&pcpu_vm);
#else
vm.flags = VM_ALLOC;
vm.size = num_possible_cpus() * ai->unit_size;
@@ -3270,6 +3275,14 @@ int __init pcpu_page_first_chunk(size_t reserved_size, pcpu_fc_cpu_to_node_fn_t
/* copy static data */
memcpy((void *)unit_addr, __per_cpu_start, ai->static_size);
+
+ /*
+ * Map percpu data to PERCPU map.
+ *
+ * PCPU_FC_EMBED can't support it.
+ */
+ map_local_percpu_first_chunk(percpu_pgd[unit], (unsigned long)pcpu_vm.addr,
+ &pages[unit * unit_pages], unit_pages);
}
/* we're ready, commit */
--
2.47.0
next prev parent reply other threads:[~2026-04-29 17:09 UTC|newest]
Thread overview: 13+ messages / expand[flat|nested] mbox.gz Atom feed top
2026-04-29 17:04 [RFC v1 PATCH 0/11] Optimize this_cpu_*() ops for non-x86 (ARM64 for this series) Yang Shi
2026-04-29 17:04 ` [PATCH 01/11] arm64: mm: enable percpu kernel page table Yang Shi
2026-04-29 17:04 ` [PATCH 02/11] arm64: mm: define percpu virtual space area Yang Shi
2026-04-29 17:04 ` [PATCH 03/11] arm64: smp: define setup_per_cpu_areas() Yang Shi
2026-04-29 17:04 ` [PATCH 04/11] mm: percpu: prepare to use dedicated percpu area Yang Shi
2026-04-29 17:04 ` Yang Shi [this message]
2026-04-29 17:04 ` [PATCH 06/11] mm: percpu: set up first chunk and reserve chunk Yang Shi
2026-04-29 17:04 ` [PATCH 07/11] arm64: mm: introduce __per_cpu_local_off Yang Shi
2026-04-29 17:04 ` [PATCH 08/11] vmalloc: pass in pgd pointer for vmap{__vunmap}_range_noflush() Yang Shi
2026-04-29 17:04 ` [PATCH 09/11] mm: percpu: allocate and free local percpu vm area Yang Shi
2026-04-29 17:04 ` [PATCH 10/11] arm64: kconfig: select HAVE_LOCAL_PER_CPU_MAP Yang Shi
2026-04-29 17:04 ` [PATCH 11/11] arm64: percpu: use local percpu for this_cpu_*() APIs Yang Shi
2026-04-30 19:02 ` [RFC v1 PATCH 0/11] Optimize this_cpu_*() ops for non-x86 (ARM64 for this series) Yang Shi
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20260429170758.3018959-6-yang@os.amperecomputing.com \
--to=yang@os.amperecomputing.com \
--cc=agordeev@linux.ibm.com \
--cc=akpm@linux-foundation.org \
--cc=catalin.marinas@arm.com \
--cc=cl@gentwo.org \
--cc=david@kernel.org \
--cc=dennis@kernel.org \
--cc=gor@linux.ibm.com \
--cc=hca@linux.ibm.com \
--cc=linux-arm-kernel@lists.infradead.org \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-mm@kvack.org \
--cc=ryan.roberts@arm.com \
--cc=tj@kernel.org \
--cc=urezki@gmail.com \
--cc=will@kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox