All of lore.kernel.org
 help / color / mirror / Atom feed
From: Dev Jain <dev.jain@arm.com>
To: "Barry Song (Xiaomi)" <baohua@kernel.org>,
	linux-mm@kvack.org, linux-arm-kernel@lists.infradead.org,
	catalin.marinas@arm.com, will@kernel.org,
	akpm@linux-foundation.org, urezki@gmail.com
Cc: linux-kernel@vger.kernel.org, anshuman.khandual@arm.com,
	ryan.roberts@arm.com, ajd@linux.ibm.com, rppt@kernel.org,
	david@kernel.org, Xueyuan.chen21@gmail.com
Subject: Re: [RFC PATCH 5/8] mm/vmalloc: map contiguous pages in batches for vmap() if possible
Date: Wed, 8 Apr 2026 09:49:51 +0530	[thread overview]
Message-ID: <b297383a-731c-4efa-95b7-0f198b098d11@arm.com> (raw)
In-Reply-To: <20260408025115.27368-6-baohua@kernel.org>



On 08/04/26 8:21 am, Barry Song (Xiaomi) wrote:
> In many cases, the pages passed to vmap() may include high-order
> pages allocated with __GFP_COMP flags. For example, the systemheap
> often allocates pages in descending order: order 8, then 4, then 0.
> Currently, vmap() iterates over every page individually—even pages
> inside a high-order block are handled one by one.
> 
> This patch detects high-order pages and maps them as a single
> contiguous block whenever possible.
> 
> An alternative would be to implement a new API, vmap_sg(), but that
> change seems to be large in scope.
> 
> Signed-off-by: Barry Song (Xiaomi) <baohua@kernel.org>
> ---

Coincidentally, I was working on the same thing :)

We have a usecase regarding Arm TRBE and SPE aux buffers.

I'll take a look at your patches later, but my implementation is the
following, if you have any comments. I have squashed the patches into
a single diff.



From ccb9670a52b7f50b1f1e07b579a1316f76b84811 Mon Sep 17 00:00:00 2001
From: Dev Jain <dev.jain@arm.com>
Date: Thu, 26 Feb 2026 16:21:29 +0530
Subject: [PATCH] arm64/perf: map AUX buffer with large pages

Signed-off-by: Dev Jain <dev.jain@arm.com>
---
 .../hwtracing/coresight/coresight-etm-perf.c  |  3 +-
 drivers/hwtracing/coresight/coresight-trbe.c  |  3 +-
 drivers/perf/arm_spe_pmu.c                    |  5 +-
 mm/vmalloc.c                                  | 86 ++++++++++++++++---
 4 files changed, 79 insertions(+), 18 deletions(-)

diff --git a/drivers/hwtracing/coresight/coresight-etm-perf.c b/drivers/hwtracing/coresight/coresight-etm-perf.c
index 72017dcc3b7f1..e90a430af86bb 100644
--- a/drivers/hwtracing/coresight/coresight-etm-perf.c
+++ b/drivers/hwtracing/coresight/coresight-etm-perf.c
@@ -984,7 +984,8 @@ int __init etm_perf_init(void)

 	etm_pmu.capabilities		= (PERF_PMU_CAP_EXCLUSIVE |
 					   PERF_PMU_CAP_ITRACE |
-					   PERF_PMU_CAP_AUX_PAUSE);
+					   PERF_PMU_CAP_AUX_PAUSE |
+					   PERF_PMU_CAP_AUX_PREFER_LARGE);

 	etm_pmu.attr_groups		= etm_pmu_attr_groups;
 	etm_pmu.task_ctx_nr		= perf_sw_context;
diff --git a/drivers/hwtracing/coresight/coresight-trbe.c b/drivers/hwtracing/coresight/coresight-trbe.c
index 1511f8eb95afb..74e6ad891e236 100644
--- a/drivers/hwtracing/coresight/coresight-trbe.c
+++ b/drivers/hwtracing/coresight/coresight-trbe.c
@@ -760,7 +760,8 @@ static void *arm_trbe_alloc_buffer(struct coresight_device *csdev,
 	for (i = 0; i < nr_pages; i++)
 		pglist[i] = virt_to_page(pages[i]);

-	buf->trbe_base = (unsigned long)vmap(pglist, nr_pages, VM_MAP, PAGE_KERNEL);
+	buf->trbe_base = (unsigned long)vmap(pglist, nr_pages,
+			 VM_MAP | VM_ALLOW_HUGE_VMAP, PAGE_KERNEL);
 	if (!buf->trbe_base) {
 		kfree(pglist);
 		kfree(buf);
diff --git a/drivers/perf/arm_spe_pmu.c b/drivers/perf/arm_spe_pmu.c
index dbd0da1116390..90c349fd66b2c 100644
--- a/drivers/perf/arm_spe_pmu.c
+++ b/drivers/perf/arm_spe_pmu.c
@@ -1027,7 +1027,7 @@ static void *arm_spe_pmu_setup_aux(struct perf_event *event, void **pages,
 	for (i = 0; i < nr_pages; ++i)
 		pglist[i] = virt_to_page(pages[i]);

-	buf->base = vmap(pglist, nr_pages, VM_MAP, PAGE_KERNEL);
+	buf->base = vmap(pglist, nr_pages, VM_MAP | VM_ALLOW_HUGE_VMAP, PAGE_KERNEL);
 	if (!buf->base)
 		goto out_free_pglist;

@@ -1064,7 +1064,8 @@ static int arm_spe_pmu_perf_init(struct arm_spe_pmu *spe_pmu)
 	spe_pmu->pmu = (struct pmu) {
 		.module = THIS_MODULE,
 		.parent		= &spe_pmu->pdev->dev,
-		.capabilities	= PERF_PMU_CAP_EXCLUSIVE | PERF_PMU_CAP_ITRACE,
+		.capabilities	= PERF_PMU_CAP_EXCLUSIVE | PERF_PMU_CAP_ITRACE |
+				  PERF_PMU_CAP_AUX_PREFER_LARGE,
 		.attr_groups	= arm_spe_pmu_attr_groups,
 		/*
 		 * We hitch a ride on the software context here, so that
diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index 61caa55a44027..8482463d41203 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -660,14 +660,14 @@ int __vmap_pages_range_noflush(unsigned long addr, unsigned long end,
 		pgprot_t prot, struct page **pages, unsigned int page_shift)
 {
 	unsigned int i, nr = (end - addr) >> PAGE_SHIFT;
-
+	unsigned long step = 1UL << (page_shift - PAGE_SHIFT);
 	WARN_ON(page_shift < PAGE_SHIFT);

 	if (!IS_ENABLED(CONFIG_HAVE_ARCH_HUGE_VMALLOC) ||
 			page_shift == PAGE_SHIFT)
 		return vmap_small_pages_range_noflush(addr, end, prot, pages);

-	for (i = 0; i < nr; i += 1U << (page_shift - PAGE_SHIFT)) {
+	for (i = 0; i < ALIGN_DOWN(nr, step); i += step) {
 		int err;

 		err = vmap_range_noflush(addr, addr + (1UL << page_shift),
@@ -678,8 +678,9 @@ int __vmap_pages_range_noflush(unsigned long addr, unsigned long end,

 		addr += 1UL << page_shift;
 	}
-
-	return 0;
+	if (IS_ALIGNED(nr, step))
+		return 0;
+	return vmap_small_pages_range_noflush(addr, end, prot, pages + i);
 }

 int vmap_pages_range_noflush(unsigned long addr, unsigned long end,
@@ -3514,6 +3515,50 @@ void vunmap(const void *addr)
 }
 EXPORT_SYMBOL(vunmap);

+static inline unsigned int vm_shift(pgprot_t prot, unsigned long size)
+{
+	if (arch_vmap_pmd_supported(prot) && size >= PMD_SIZE)
+		return PMD_SHIFT;
+
+	return arch_vmap_pte_supported_shift(size);
+}
+
+static inline int __vmap_huge(struct page **pages, pgprot_t prot,
+		unsigned long addr, unsigned int count)
+{
+	unsigned int i = 0;
+	unsigned int shift;
+	unsigned long nr;
+
+	while (i < count) {
+		nr = num_pages_contiguous(pages + i, count - i);
+		shift = vm_shift(prot, nr << PAGE_SHIFT);
+		if (vmap_pages_range(addr, addr + (nr << PAGE_SHIFT),
+				     pgprot_nx(prot), pages + i, shift) < 0) {
+			return 1;
+		}
+		i += nr;
+		addr += (nr << PAGE_SHIFT);
+	}
+	return 0;
+}
+
+static unsigned long max_contiguous_stride_order(struct page **pages,
+		pgprot_t prot, unsigned int count)
+{
+	unsigned long max_shift = PAGE_SHIFT;
+	unsigned int i = 0;
+
+	while (i < count) {
+		unsigned long nr = num_pages_contiguous(pages + i, count - i);
+		unsigned long shift = vm_shift(prot, nr << PAGE_SHIFT);
+
+		max_shift = max(max_shift, shift);
+		i += nr;
+	}
+	return max_shift;
+}
+
 /**
  * vmap - map an array of pages into virtually contiguous space
  * @pages: array of page pointers
@@ -3552,15 +3597,32 @@ void *vmap(struct page **pages, unsigned int count,
 		return NULL;

 	size = (unsigned long)count << PAGE_SHIFT;
-	area = get_vm_area_caller(size, flags, __builtin_return_address(0));
+	if (flags & VM_ALLOW_HUGE_VMAP) {
+		/* determine from page array, the max alignment */
+		unsigned long max_shift = max_contiguous_stride_order(pages, prot, count);
+
+		area = __get_vm_area_node(size, 1 << max_shift, max_shift, flags,
+					  VMALLOC_START, VMALLOC_END, NUMA_NO_NODE,
+					  GFP_KERNEL, __builtin_return_address(0));
+	} else {
+		area = get_vm_area_caller(size, flags, __builtin_return_address(0));
+	}
 	if (!area)
 		return NULL;

 	addr = (unsigned long)area->addr;
-	if (vmap_pages_range(addr, addr + size, pgprot_nx(prot),
-				pages, PAGE_SHIFT) < 0) {
-		vunmap(area->addr);
-		return NULL;
+
+	if (flags & VM_ALLOW_HUGE_VMAP) {
+		if (__vmap_huge(pages, prot, addr, count)) {
+			vunmap(area->addr);
+			return NULL;
+		}
+	} else {
+		if (vmap_pages_range(addr, addr + size, pgprot_nx(prot),
+					pages, PAGE_SHIFT) < 0) {
+			vunmap(area->addr);
+			return NULL;
+		}
 	}

 	if (flags & VM_MAP_PUT_PAGES) {
@@ -4011,11 +4073,7 @@ void *__vmalloc_node_range_noprof(unsigned long size, unsigned long align,
 		 * their allocations due to apply_to_page_range not
 		 * supporting them.
 		 */
-
-		if (arch_vmap_pmd_supported(prot) && size >= PMD_SIZE)
-			shift = PMD_SHIFT;
-		else
-			shift = arch_vmap_pte_supported_shift(size);
+		shift = vm_shift(prot, size);

 		align = max(original_align, 1UL << shift);
 	}
-- 
2.34.1



>  mm/vmalloc.c | 51 +++++++++++++++++++++++++++++++++++++++++++++++++--
>  1 file changed, 49 insertions(+), 2 deletions(-)
> 
> diff --git a/mm/vmalloc.c b/mm/vmalloc.c
> index eba436386929..e8dbfada42bc 100644
> --- a/mm/vmalloc.c
> +++ b/mm/vmalloc.c
> @@ -3529,6 +3529,53 @@ void vunmap(const void *addr)
>  }
>  EXPORT_SYMBOL(vunmap);
>  
> +static inline int get_vmap_batch_order(struct page **pages,
> +		unsigned int max_steps, unsigned int idx)
> +{
> +	unsigned int nr_pages;
> +
> +	if (!IS_ENABLED(CONFIG_HAVE_ARCH_HUGE_VMAP) ||
> +			ioremap_max_page_shift == PAGE_SHIFT)
> +		return 0;
> +
> +	nr_pages = compound_nr(pages[idx]);
> +	if (nr_pages == 1 || max_steps < nr_pages)
> +		return 0;
> +
> +	if (num_pages_contiguous(&pages[idx], nr_pages) == nr_pages)
> +		return compound_order(pages[idx]);
> +	return 0;
> +}
> +
> +static int vmap_contig_pages_range(unsigned long addr, unsigned long end,
> +		pgprot_t prot, struct page **pages)
> +{
> +	unsigned int count = (end - addr) >> PAGE_SHIFT;
> +	int err;
> +
> +	err = kmsan_vmap_pages_range_noflush(addr, end, prot, pages,
> +						PAGE_SHIFT, GFP_KERNEL);
> +	if (err)
> +		goto out;
> +
> +	for (unsigned int i = 0; i < count; ) {
> +		unsigned int shift = PAGE_SHIFT +
> +			get_vmap_batch_order(pages, count - i, i);
> +
> +		err = vmap_range_noflush(addr, addr + (1UL << shift),
> +				page_to_phys(pages[i]), prot, shift);
> +		if (err)
> +			goto out;
> +
> +		addr += 1UL << shift;
> +		i += 1U << (shift - PAGE_SHIFT);
> +	}
> +
> +out:
> +	flush_cache_vmap(addr, end);
> +	return err;
> +}
> +
>  /**
>   * vmap - map an array of pages into virtually contiguous space
>   * @pages: array of page pointers
> @@ -3572,8 +3619,8 @@ void *vmap(struct page **pages, unsigned int count,
>  		return NULL;
>  
>  	addr = (unsigned long)area->addr;
> -	if (vmap_pages_range(addr, addr + size, pgprot_nx(prot),
> -				pages, PAGE_SHIFT) < 0) {
> +	if (vmap_contig_pages_range(addr, addr + size, pgprot_nx(prot),
> +				pages) < 0) {
>  		vunmap(area->addr);
>  		return NULL;
>  	}



  reply	other threads:[~2026-04-08  4:20 UTC|newest]

Thread overview: 35+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2026-04-08  2:51 [RFC PATCH 0/8] mm/vmalloc: Speed up ioremap, vmalloc and vmap with contiguous memory Barry Song (Xiaomi)
2026-04-08  2:51 ` [RFC PATCH 1/8] arm64/hugetlb: Extend batching of multiple CONT_PTE in a single PTE setup Barry Song (Xiaomi)
2026-04-08 10:32   ` Dev Jain
2026-04-08 11:00     ` Barry Song
2026-04-08  2:51 ` [RFC PATCH 2/8] arm64/vmalloc: Allow arch_vmap_pte_range_map_size to batch multiple CONT_PTE Barry Song (Xiaomi)
2026-04-08  2:51 ` [RFC PATCH 3/8] mm/vmalloc: Extend vmap_small_pages_range_noflush() to support larger page_shift sizes Barry Song (Xiaomi)
2026-04-08 11:08   ` Dev Jain
2026-04-08 21:29     ` Barry Song
2026-04-13 16:08   ` Mike Rapoport
2026-04-13 20:16     ` Barry Song
2026-04-08  2:51 ` [RFC PATCH 4/8] mm/vmalloc: Eliminate page table zigzag for huge vmalloc mappings Barry Song (Xiaomi)
2026-04-13 16:16   ` Mike Rapoport
2026-04-13 19:49     ` Barry Song
2026-04-08  2:51 ` [RFC PATCH 5/8] mm/vmalloc: map contiguous pages in batches for vmap() if possible Barry Song (Xiaomi)
2026-04-08  4:19   ` Dev Jain [this message]
2026-04-08  5:12     ` Barry Song
2026-04-08 11:22       ` Dev Jain
2026-04-08 14:03   ` Dev Jain
2026-04-08 21:54     ` Barry Song
2026-04-09 10:10       ` Dev Jain
2026-04-09 10:20       ` Uladzislau Rezki
2026-04-10  1:02         ` Barry Song
2026-04-13 19:23           ` David Hildenbrand (Arm)
2026-04-13 19:56             ` Barry Song
2026-04-08  2:51 ` [RFC PATCH 6/8] mm/vmalloc: align vm_area so vmap() can batch mappings Barry Song (Xiaomi)
2026-04-08  2:51 ` [RFC PATCH 7/8] mm/vmalloc: Coalesce same page_shift mappings in vmap to avoid pgtable zigzag Barry Song (Xiaomi)
2026-04-08 11:36   ` Dev Jain
2026-04-08 21:58     ` Barry Song
2026-04-08  2:51 ` [RFC PATCH 8/8] mm/vmalloc: Stop scanning for compound pages after encountering small pages in vmap Barry Song (Xiaomi)
2026-04-08  9:14 ` [RFC PATCH 0/8] mm/vmalloc: Speed up ioremap, vmalloc and vmap with contiguous memory Dev Jain
2026-04-08 10:51   ` Barry Song
2026-04-08 10:55     ` Dev Jain
2026-04-27 15:04 ` Dev Jain
2026-04-28  3:16   ` Barry Song
2026-04-28  4:14     ` Dev Jain

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=b297383a-731c-4efa-95b7-0f198b098d11@arm.com \
    --to=dev.jain@arm.com \
    --cc=Xueyuan.chen21@gmail.com \
    --cc=ajd@linux.ibm.com \
    --cc=akpm@linux-foundation.org \
    --cc=anshuman.khandual@arm.com \
    --cc=baohua@kernel.org \
    --cc=catalin.marinas@arm.com \
    --cc=david@kernel.org \
    --cc=linux-arm-kernel@lists.infradead.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=rppt@kernel.org \
    --cc=ryan.roberts@arm.com \
    --cc=urezki@gmail.com \
    --cc=will@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.