[PATCH 09/11] mm: percpu: allocate and free local percpu vm area

public inbox for linux-arm-kernel@lists.infradead.org
 help / color / mirror / Atom feed

From: Yang Shi <yang@os.amperecomputing.com>
To: cl@gentwo.org, dennis@kernel.org, tj@kernel.org,
	urezki@gmail.com, catalin.marinas@arm.com, will@kernel.org,
	ryan.roberts@arm.com, david@kernel.org,
	akpm@linux-foundation.org, hca@linux.ibm.com, gor@linux.ibm.com,
	agordeev@linux.ibm.com
Cc: yang@os.amperecomputing.com, linux-mm@kvack.org,
	linux-arm-kernel@lists.infradead.org,
	linux-kernel@vger.kernel.org
Subject: [PATCH 09/11] mm: percpu: allocate and free local percpu vm area
Date: Wed, 29 Apr 2026 10:04:37 -0700	[thread overview]
Message-ID: <20260429170758.3018959-10-yang@os.amperecomputing.com> (raw)
In-Reply-To: <20260429170758.3018959-1-yang@os.amperecomputing.com>

Allocate local percpu vm area.  The delta between the allocated addr
(chunk local base) and pcpu_local_base must be same with the delta
between chunk base and pcpu_base_addr.  Each CPU's local percpu area
will be mapped to its own page table.  This section of page table is not
shared between CPUs.

And free local percpu vm area.  Also unmap from percpu page table.

Signed-off-by: Yang Shi <yang@os.amperecomputing.com>
---
 include/linux/vmalloc.h |  3 ++
 mm/percpu-internal.h    |  1 +
 mm/percpu-vm.c          | 91 +++++++++++++++++++++++++++++++++++++++++
 mm/vmalloc.c            | 69 ++++++++++++++++++++++++++++---
 4 files changed, 159 insertions(+), 5 deletions(-)

diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h
index 3b02c0c6b371..4b53992a063c 100644
--- a/include/linux/vmalloc.h
+++ b/include/linux/vmalloc.h
@@ -311,6 +311,9 @@ struct vm_struct **pcpu_get_vm_areas(const unsigned long *offsets,
 				     size_t align);
 
 void pcpu_free_vm_areas(struct vm_struct **vms, int nr_vms);
+struct vm_struct *pcpu_get_local_vm_area(unsigned long hint,
+				     int unit_size, size_t align);
+
 # else
 static inline struct vm_struct **
 pcpu_get_vm_areas(const unsigned long *offsets,
diff --git a/mm/percpu-internal.h b/mm/percpu-internal.h
index 64b48b99ac06..2c560e44ee58 100644
--- a/mm/percpu-internal.h
+++ b/mm/percpu-internal.h
@@ -71,6 +71,7 @@ struct pcpu_chunk {
 	struct pcpu_block_md	*md_blocks;	/* metadata blocks */
 
 	void			*data;		/* chunk data */
+	void			*local_data;	/* chunk local vm */
 	bool			immutable;	/* no [de]population allowed */
 	bool			isolated;	/* isolated from active chunk
 						   slots */
diff --git a/mm/percpu-vm.c b/mm/percpu-vm.c
index 4f5937090590..1e6b8fdcab71 100644
--- a/mm/percpu-vm.c
+++ b/mm/percpu-vm.c
@@ -9,6 +9,7 @@
  * This is the default chunk allocator.
  */
 #include "internal.h"
+#include "percpu-internal.h"
 
 static struct page *pcpu_chunk_page(struct pcpu_chunk *chunk,
 				    unsigned int cpu, int page_idx)
@@ -130,6 +131,11 @@ static void pcpu_pre_unmap_flush(struct pcpu_chunk *chunk,
 	flush_cache_vunmap(
 		pcpu_chunk_addr(chunk, pcpu_low_unit_cpu, page_start),
 		pcpu_chunk_addr(chunk, pcpu_high_unit_cpu, page_end));
+
+#ifdef CONFIG_HAVE_LOCAL_PER_CPU_MAP
+	flush_cache_vunmap((unsigned long)chunk->local_base + (page_start << PAGE_SHIFT),
+			(unsigned long)chunk->local_base + (page_end << PAGE_SHIFT));
+#endif
 }
 
 static void __pcpu_unmap_pages(unsigned long addr, int nr_pages)
@@ -137,6 +143,20 @@ static void __pcpu_unmap_pages(unsigned long addr, int nr_pages)
 	vunmap_range_noflush(addr, addr + (nr_pages << PAGE_SHIFT));
 }
 
+#ifdef CONFIG_HAVE_LOCAL_PER_CPU_MAP
+static void __pcpu_unmap_pages_local(pgd_t *pgdir, unsigned long virt,
+			    int nr_pages)
+{
+	__vunmap_range_noflush(pgdir, virt, virt + (nr_pages << PAGE_SHIFT));
+}
+#else
+static void __pcpu_unmap_pages_local(pgd_t *pgdir, unsigned long virt,
+			    int nr_pages)
+{
+	return;
+}
+#endif
+
 /**
  * pcpu_unmap_pages - unmap pages out of a pcpu_chunk
  * @chunk: chunk of interest
@@ -166,6 +186,10 @@ static void pcpu_unmap_pages(struct pcpu_chunk *chunk,
 		}
 		__pcpu_unmap_pages(pcpu_chunk_addr(chunk, cpu, page_start),
 				   page_end - page_start);
+
+		__pcpu_unmap_pages_local(percpu_pgd[cpu],
+					(unsigned long)chunk->local_base + (page_start << PAGE_SHIFT),
+					page_end - page_start);
 	}
 }
 
@@ -188,6 +212,12 @@ static void pcpu_post_unmap_tlb_flush(struct pcpu_chunk *chunk,
 	flush_tlb_kernel_range(
 		pcpu_chunk_addr(chunk, pcpu_low_unit_cpu, page_start),
 		pcpu_chunk_addr(chunk, pcpu_high_unit_cpu, page_end));
+
+#ifdef CONFIG_HAVE_LOCAL_PER_CPU_MAP
+	flush_tlb_kernel_range(
+		(unsigned long)chunk->local_base + (page_start << PAGE_SHIFT),
+		(unsigned long)chunk->local_base + (page_end << PAGE_SHIFT));
+#endif
 }
 
 static int __pcpu_map_pages(unsigned long addr, struct page **pages,
@@ -197,6 +227,32 @@ static int __pcpu_map_pages(unsigned long addr, struct page **pages,
 			PAGE_KERNEL, pages, PAGE_SHIFT, GFP_KERNEL);
 }
 
+#ifdef CONFIG_HAVE_LOCAL_PER_CPU_MAP
+static int __pcpu_map_pages_local(pgd_t *pgdir, unsigned long virt, struct page **pages,
+			    int nr_pages)
+{
+	unsigned int i;
+	int err = 0;
+
+	for (i = 0; i < nr_pages; i++) {
+		err = vmap_range_noflush(pgdir, virt, virt + PAGE_SIZE,
+				page_to_phys(pages[i]), PAGE_KERNEL, PAGE_SHIFT);
+		if (err)
+			return err;
+
+		virt += PAGE_SIZE;
+	}
+
+	return err;
+}
+#else
+static int __pcpu_map_pages_local(pgd_t *pgdir, unsigned long virt, struct page **pages,
+			    int nr_pages)
+{
+	return 0;
+}
+#endif
+
 /**
  * pcpu_map_pages - map pages into a pcpu_chunk
  * @chunk: chunk of interest
@@ -224,6 +280,13 @@ static int pcpu_map_pages(struct pcpu_chunk *chunk,
 		if (err < 0)
 			goto err;
 
+		err = __pcpu_map_pages_local(percpu_pgd[cpu],
+					(unsigned long)chunk->local_base + (page_start << PAGE_SHIFT),
+					&pages[pcpu_page_idx(cpu, page_start)],
+					page_end - page_start);
+		if (err < 0)
+			goto err;
+
 		for (i = page_start; i < page_end; i++)
 			pcpu_set_page_chunk(pages[pcpu_page_idx(cpu, i)],
 					    chunk);
@@ -233,6 +296,9 @@ static int pcpu_map_pages(struct pcpu_chunk *chunk,
 	for_each_possible_cpu(tcpu) {
 		__pcpu_unmap_pages(pcpu_chunk_addr(chunk, tcpu, page_start),
 				   page_end - page_start);
+		__pcpu_unmap_pages_local(percpu_pgd[cpu],
+					(unsigned long)chunk->local_base + (page_start << PAGE_SHIFT),
+					page_end - page_start);
 		if (tcpu == cpu)
 			break;
 	}
@@ -258,6 +324,11 @@ static void pcpu_post_map_flush(struct pcpu_chunk *chunk,
 	flush_cache_vmap(
 		pcpu_chunk_addr(chunk, pcpu_low_unit_cpu, page_start),
 		pcpu_chunk_addr(chunk, pcpu_high_unit_cpu, page_end));
+
+#ifdef CONFIG_HAVE_LOCAL_PER_CPU_MAP
+	flush_cache_vmap((unsigned long)chunk->local_base + (page_start << PAGE_SHIFT),
+			 (unsigned long)chunk->local_base + (page_end << PAGE_SHIFT));
+#endif
 }
 
 /**
@@ -349,6 +420,24 @@ static struct pcpu_chunk *pcpu_create_chunk(gfp_t gfp)
 	chunk->data = vms;
 	chunk->base_addr = vms[0]->addr - pcpu_group_offsets[0];
 
+#ifdef CONFIG_HAVE_LOCAL_PER_CPU_MAP
+	unsigned long delta = (unsigned long)chunk->base_addr - (unsigned long)pcpu_base_addr;
+	unsigned long hint = delta + (unsigned long)pcpu_local_base;
+	struct vm_struct *local_vm = pcpu_get_local_vm_area(hint,
+					pcpu_unit_size, pcpu_atom_size);
+	if (!local_vm) {
+		pcpu_free_vm_areas(vms, pcpu_nr_groups);
+		pcpu_free_chunk(chunk);
+		return NULL;
+	}
+
+	chunk->local_base = local_vm->addr;
+	chunk->local_data = (void *)local_vm;
+#else
+	chunk->local_base = 0;
+	chunk->local_data = NULL;
+#endif
+
 	pcpu_stats_chunk_alloc();
 	trace_percpu_create_chunk(chunk->base_addr);
 
@@ -365,6 +454,8 @@ static void pcpu_destroy_chunk(struct pcpu_chunk *chunk)
 
 	if (chunk->data)
 		pcpu_free_vm_areas(chunk->data, pcpu_nr_groups);
+	if (chunk->local_data)
+		free_vm_area((struct vm_struct *)chunk->local_data);
 	pcpu_free_chunk(chunk);
 }
 
diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index 8ef7d9987e18..f224ffec5696 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -4836,17 +4836,21 @@ pvm_find_va_enclose_addr(unsigned long addr)
  *   in - the VA we start the search(reverse order);
  *   out - the VA with the highest aligned end address.
  * @align: alignment for required highest address
+ * @pcpu: whether request allocation from local percpu area
  *
  * Returns: determined end address within vmap_area
  */
 static unsigned long
-pvm_determine_end_from_reverse(struct vmap_area **va, unsigned long align)
+pvm_determine_end_from_reverse(struct vmap_area **va, unsigned long align, bool pcpu)
 {
 	unsigned long vmalloc_end;
 	unsigned long addr;
 
 #ifdef CONFIG_HAVE_LOCAL_PER_CPU_MAP
-	vmalloc_end = PERCPU_END & ~(align - 1);
+	if (pcpu)
+		vmalloc_end = LOCAL_PERCPU_END & ~(align - 1);
+	else
+		vmalloc_end = PERCPU_END & ~(align - 1);
 #else
 	vmalloc_end = VMALLOC_END & ~(align - 1);
 #endif
@@ -4955,7 +4959,7 @@ struct vm_struct **pcpu_get_vm_areas(const unsigned long *offsets,
 	end = start + sizes[area];
 
 	va = pvm_find_va_enclose_addr(vmalloc_end);
-	base = pvm_determine_end_from_reverse(&va, align) - end;
+	base = pvm_determine_end_from_reverse(&va, align, false) - end;
 
 	while (true) {
 		/*
@@ -4976,7 +4980,7 @@ struct vm_struct **pcpu_get_vm_areas(const unsigned long *offsets,
 		 * base downwards and then recheck.
 		 */
 		if (base + end > va->va_end) {
-			base = pvm_determine_end_from_reverse(&va, align) - end;
+			base = pvm_determine_end_from_reverse(&va, align, false) - end;
 			term_area = area;
 			continue;
 		}
@@ -4986,7 +4990,7 @@ struct vm_struct **pcpu_get_vm_areas(const unsigned long *offsets,
 		 */
 		if (base + start < va->va_start) {
 			va = node_to_va(rb_prev(&va->rb_node));
-			base = pvm_determine_end_from_reverse(&va, align) - end;
+			base = pvm_determine_end_from_reverse(&va, align, false) - end;
 			term_area = area;
 			continue;
 		}
@@ -5149,6 +5153,61 @@ void pcpu_free_vm_areas(struct vm_struct **vms, int nr_vms)
 		free_vm_area(vms[i]);
 	kfree(vms);
 }
+
+#ifdef CONFIG_HAVE_LOCAL_PER_CPU_MAP
+/* Find free vm area starts from hint */
+struct vm_struct *pcpu_get_local_vm_area(unsigned long hint,
+				     int unit_size, size_t align)
+{
+	struct vmap_area *tmp_va, *va;
+	struct vm_struct *vm;
+	struct vmap_node *vn;
+	unsigned long end;
+	int ret;
+
+	va = kmem_cache_zalloc(vmap_area_cachep, GFP_KERNEL);
+	vm = kzalloc(sizeof(struct vm_struct), GFP_KERNEL);
+	if (!va || !vm)
+		goto err_free;
+
+	spin_lock(&free_vmap_area_lock);
+
+	tmp_va = pvm_find_va_enclose_addr(hint);
+	if (!tmp_va)
+		return NULL;
+
+	end = pvm_determine_end_from_reverse(&tmp_va, align, true);
+
+	if (hint + unit_size > end)
+		return NULL;
+
+	ret = va_clip(&free_vmap_area_root,
+			&free_vmap_area_list, tmp_va, hint, unit_size);
+	if (ret)
+		return NULL;
+
+	va->va_start = hint;
+	va->va_end = hint + unit_size;
+
+	spin_unlock(&free_vmap_area_lock);
+
+	vn = addr_to_node(va->va_start);
+
+	spin_lock(&vn->busy.lock);
+	insert_vmap_area(va, &vn->busy.root, &vn->busy.head);
+	setup_vmalloc_vm(vm, va, VM_ALLOC,
+			 pcpu_get_local_vm_area);
+	spin_unlock(&vn->busy.lock);
+
+	return vm;
+
+err_free:
+	kmem_cache_free(vmap_area_cachep, va);
+	kfree(vm);
+
+	return NULL;
+}
+#endif
 #endif	/* CONFIG_SMP */
 
 #ifdef CONFIG_PRINTK
-- 
2.47.0

next prev parent reply	other threads:[~2026-04-29 17:09 UTC|newest]

Thread overview: 13+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2026-04-29 17:04 [RFC v1 PATCH 0/11] Optimize this_cpu_*() ops for non-x86 (ARM64 for this series) Yang Shi
2026-04-29 17:04 ` [PATCH 01/11] arm64: mm: enable percpu kernel page table Yang Shi
2026-04-29 17:04 ` [PATCH 02/11] arm64: mm: define percpu virtual space area Yang Shi
2026-04-29 17:04 ` [PATCH 03/11] arm64: smp: define setup_per_cpu_areas() Yang Shi
2026-04-29 17:04 ` [PATCH 04/11] mm: percpu: prepare to use dedicated percpu area Yang Shi
2026-04-29 17:04 ` [PATCH 05/11] arm64: mm: map local percpu first chunk Yang Shi
2026-04-29 17:04 ` [PATCH 06/11] mm: percpu: set up first chunk and reserve chunk Yang Shi
2026-04-29 17:04 ` [PATCH 07/11] arm64: mm: introduce __per_cpu_local_off Yang Shi
2026-04-29 17:04 ` [PATCH 08/11] vmalloc: pass in pgd pointer for vmap{__vunmap}_range_noflush() Yang Shi
2026-04-29 17:04 ` Yang Shi [this message]
2026-04-29 17:04 ` [PATCH 10/11] arm64: kconfig: select HAVE_LOCAL_PER_CPU_MAP Yang Shi
2026-04-29 17:04 ` [PATCH 11/11] arm64: percpu: use local percpu for this_cpu_*() APIs Yang Shi
2026-04-30 19:02 ` [RFC v1 PATCH 0/11] Optimize this_cpu_*() ops for non-x86 (ARM64 for this series) Yang Shi

find likely ancestor, descendant, or conflicting patches for this message:
( dfblob:3b02c0c6b37 dfblob:4b53992a063 dfblob:64b48b99ac0
dfblob:2c560e44ee5 dfblob:4f593709059 dfblob:1e6b8fdcab7
dfblob:8ef7d9987e1 dfblob:f224ffec569 )
 OR (
bs:"[PATCH 09/11] mm: percpu: allocate and free local percpu vm area" )
	(help)

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20260429170758.3018959-10-yang@os.amperecomputing.com \
    --to=yang@os.amperecomputing.com \
    --cc=agordeev@linux.ibm.com \
    --cc=akpm@linux-foundation.org \
    --cc=catalin.marinas@arm.com \
    --cc=cl@gentwo.org \
    --cc=david@kernel.org \
    --cc=dennis@kernel.org \
    --cc=gor@linux.ibm.com \
    --cc=hca@linux.ibm.com \
    --cc=linux-arm-kernel@lists.infradead.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=ryan.roberts@arm.com \
    --cc=tj@kernel.org \
    --cc=urezki@gmail.com \
    --cc=will@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox