All of lore.kernel.org
 help / color / mirror / Atom feed
From: "Barry Song (Xiaomi)" <baohua@kernel.org>
To: linux-mm@kvack.org, linux-arm-kernel@lists.infradead.org,
	catalin.marinas@arm.com, will@kernel.org,
	akpm@linux-foundation.org, urezki@gmail.com
Cc: linux-kernel@vger.kernel.org, anshuman.khandual@arm.com,
	ryan.roberts@arm.com, ajd@linux.ibm.com, rppt@kernel.org,
	david@kernel.org, Xueyuan.chen21@gmail.com,
	"Barry Song (Xiaomi)" <baohua@kernel.org>
Subject: [RFC PATCH 3/8] mm/vmalloc: Extend vmap_small_pages_range_noflush() to support larger page_shift sizes
Date: Wed,  8 Apr 2026 10:51:10 +0800	[thread overview]
Message-ID: <20260408025115.27368-4-baohua@kernel.org> (raw)
In-Reply-To: <20260408025115.27368-1-baohua@kernel.org>

vmap_small_pages_range_noflush() provides a clean interface by taking
struct page **pages and mapping them via direct PTE iteration. This
avoids the page table zigzag seen when using
vmap_range_noflush() for page_shift values other than PAGE_SHIFT.

Extend it to support larger page_shift values, and add PMD- and
contiguous-PTE mappings as well.

Signed-off-by: Barry Song (Xiaomi) <baohua@kernel.org>
---
 mm/vmalloc.c | 54 ++++++++++++++++++++++++++++++++++++++++------------
 1 file changed, 42 insertions(+), 12 deletions(-)

diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index 57eae99d9909..5bf072297536 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -524,8 +524,9 @@ void vunmap_range(unsigned long addr, unsigned long end)
 
 static int vmap_pages_pte_range(pmd_t *pmd, unsigned long addr,
 		unsigned long end, pgprot_t prot, struct page **pages, int *nr,
-		pgtbl_mod_mask *mask)
+		pgtbl_mod_mask *mask, unsigned int shift)
 {
+	unsigned int steps = 1;
 	int err = 0;
 	pte_t *pte;
 
@@ -543,6 +544,7 @@ static int vmap_pages_pte_range(pmd_t *pmd, unsigned long addr,
 	do {
 		struct page *page = pages[*nr];
 
+		steps = 1;
 		if (WARN_ON(!pte_none(ptep_get(pte)))) {
 			err = -EBUSY;
 			break;
@@ -556,9 +558,24 @@ static int vmap_pages_pte_range(pmd_t *pmd, unsigned long addr,
 			break;
 		}
 
+#ifdef CONFIG_HUGETLB_PAGE
+		if (shift != PAGE_SHIFT) {
+			unsigned long pfn = page_to_pfn(page), size;
+
+			size = arch_vmap_pte_range_map_size(addr, end, pfn, shift);
+			if (size != PAGE_SIZE) {
+				steps = size >> PAGE_SHIFT;
+				pte_t entry = pfn_pte(pfn, prot);
+
+				entry = arch_make_huge_pte(entry, ilog2(size), 0);
+				set_huge_pte_at(&init_mm, addr, pte, entry, size);
+				continue;
+			}
+		}
+#endif
+
 		set_pte_at(&init_mm, addr, pte, mk_pte(page, prot));
-		(*nr)++;
-	} while (pte++, addr += PAGE_SIZE, addr != end);
+	} while (pte += steps, *nr += steps, addr += PAGE_SIZE * steps, addr != end);
 
 	lazy_mmu_mode_disable();
 	*mask |= PGTBL_PTE_MODIFIED;
@@ -568,7 +585,7 @@ static int vmap_pages_pte_range(pmd_t *pmd, unsigned long addr,
 
 static int vmap_pages_pmd_range(pud_t *pud, unsigned long addr,
 		unsigned long end, pgprot_t prot, struct page **pages, int *nr,
-		pgtbl_mod_mask *mask)
+		pgtbl_mod_mask *mask, unsigned int shift)
 {
 	pmd_t *pmd;
 	unsigned long next;
@@ -578,7 +595,20 @@ static int vmap_pages_pmd_range(pud_t *pud, unsigned long addr,
 		return -ENOMEM;
 	do {
 		next = pmd_addr_end(addr, end);
-		if (vmap_pages_pte_range(pmd, addr, next, prot, pages, nr, mask))
+
+		if (shift == PMD_SHIFT) {
+			struct page *page = pages[*nr];
+			phys_addr_t phys_addr = page_to_phys(page);
+
+			if (vmap_try_huge_pmd(pmd, addr, next, phys_addr, prot,
+						shift)) {
+				*mask |= PGTBL_PMD_MODIFIED;
+				*nr += 1 << (shift - PAGE_SHIFT);
+				continue;
+			}
+		}
+
+		if (vmap_pages_pte_range(pmd, addr, next, prot, pages, nr, mask, shift))
 			return -ENOMEM;
 	} while (pmd++, addr = next, addr != end);
 	return 0;
@@ -586,7 +616,7 @@ static int vmap_pages_pmd_range(pud_t *pud, unsigned long addr,
 
 static int vmap_pages_pud_range(p4d_t *p4d, unsigned long addr,
 		unsigned long end, pgprot_t prot, struct page **pages, int *nr,
-		pgtbl_mod_mask *mask)
+		pgtbl_mod_mask *mask, unsigned int shift)
 {
 	pud_t *pud;
 	unsigned long next;
@@ -596,7 +626,7 @@ static int vmap_pages_pud_range(p4d_t *p4d, unsigned long addr,
 		return -ENOMEM;
 	do {
 		next = pud_addr_end(addr, end);
-		if (vmap_pages_pmd_range(pud, addr, next, prot, pages, nr, mask))
+		if (vmap_pages_pmd_range(pud, addr, next, prot, pages, nr, mask, shift))
 			return -ENOMEM;
 	} while (pud++, addr = next, addr != end);
 	return 0;
@@ -604,7 +634,7 @@ static int vmap_pages_pud_range(p4d_t *p4d, unsigned long addr,
 
 static int vmap_pages_p4d_range(pgd_t *pgd, unsigned long addr,
 		unsigned long end, pgprot_t prot, struct page **pages, int *nr,
-		pgtbl_mod_mask *mask)
+		pgtbl_mod_mask *mask, unsigned int shift)
 {
 	p4d_t *p4d;
 	unsigned long next;
@@ -614,14 +644,14 @@ static int vmap_pages_p4d_range(pgd_t *pgd, unsigned long addr,
 		return -ENOMEM;
 	do {
 		next = p4d_addr_end(addr, end);
-		if (vmap_pages_pud_range(p4d, addr, next, prot, pages, nr, mask))
+		if (vmap_pages_pud_range(p4d, addr, next, prot, pages, nr, mask, shift))
 			return -ENOMEM;
 	} while (p4d++, addr = next, addr != end);
 	return 0;
 }
 
 static int vmap_small_pages_range_noflush(unsigned long addr, unsigned long end,
-		pgprot_t prot, struct page **pages)
+		pgprot_t prot, struct page **pages, unsigned int shift)
 {
 	unsigned long start = addr;
 	pgd_t *pgd;
@@ -636,7 +666,7 @@ static int vmap_small_pages_range_noflush(unsigned long addr, unsigned long end,
 		next = pgd_addr_end(addr, end);
 		if (pgd_bad(*pgd))
 			mask |= PGTBL_PGD_MODIFIED;
-		err = vmap_pages_p4d_range(pgd, addr, next, prot, pages, &nr, &mask);
+		err = vmap_pages_p4d_range(pgd, addr, next, prot, pages, &nr, &mask, shift);
 		if (err)
 			break;
 	} while (pgd++, addr = next, addr != end);
@@ -665,7 +695,7 @@ int __vmap_pages_range_noflush(unsigned long addr, unsigned long end,
 
 	if (!IS_ENABLED(CONFIG_HAVE_ARCH_HUGE_VMALLOC) ||
 			page_shift == PAGE_SHIFT)
-		return vmap_small_pages_range_noflush(addr, end, prot, pages);
+		return vmap_small_pages_range_noflush(addr, end, prot, pages, PAGE_SHIFT);
 
 	for (i = 0; i < nr; i += 1U << (page_shift - PAGE_SHIFT)) {
 		int err;
-- 
2.39.3 (Apple Git-146)



  parent reply	other threads:[~2026-04-08  2:51 UTC|newest]

Thread overview: 35+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2026-04-08  2:51 [RFC PATCH 0/8] mm/vmalloc: Speed up ioremap, vmalloc and vmap with contiguous memory Barry Song (Xiaomi)
2026-04-08  2:51 ` [RFC PATCH 1/8] arm64/hugetlb: Extend batching of multiple CONT_PTE in a single PTE setup Barry Song (Xiaomi)
2026-04-08 10:32   ` Dev Jain
2026-04-08 11:00     ` Barry Song
2026-04-08  2:51 ` [RFC PATCH 2/8] arm64/vmalloc: Allow arch_vmap_pte_range_map_size to batch multiple CONT_PTE Barry Song (Xiaomi)
2026-04-08  2:51 ` Barry Song (Xiaomi) [this message]
2026-04-08 11:08   ` [RFC PATCH 3/8] mm/vmalloc: Extend vmap_small_pages_range_noflush() to support larger page_shift sizes Dev Jain
2026-04-08 21:29     ` Barry Song
2026-04-13 16:08   ` Mike Rapoport
2026-04-13 20:16     ` Barry Song
2026-04-08  2:51 ` [RFC PATCH 4/8] mm/vmalloc: Eliminate page table zigzag for huge vmalloc mappings Barry Song (Xiaomi)
2026-04-13 16:16   ` Mike Rapoport
2026-04-13 19:49     ` Barry Song
2026-04-08  2:51 ` [RFC PATCH 5/8] mm/vmalloc: map contiguous pages in batches for vmap() if possible Barry Song (Xiaomi)
2026-04-08  4:19   ` Dev Jain
2026-04-08  5:12     ` Barry Song
2026-04-08 11:22       ` Dev Jain
2026-04-08 14:03   ` Dev Jain
2026-04-08 21:54     ` Barry Song
2026-04-09 10:10       ` Dev Jain
2026-04-09 10:20       ` Uladzislau Rezki
2026-04-10  1:02         ` Barry Song
2026-04-13 19:23           ` David Hildenbrand (Arm)
2026-04-13 19:56             ` Barry Song
2026-04-08  2:51 ` [RFC PATCH 6/8] mm/vmalloc: align vm_area so vmap() can batch mappings Barry Song (Xiaomi)
2026-04-08  2:51 ` [RFC PATCH 7/8] mm/vmalloc: Coalesce same page_shift mappings in vmap to avoid pgtable zigzag Barry Song (Xiaomi)
2026-04-08 11:36   ` Dev Jain
2026-04-08 21:58     ` Barry Song
2026-04-08  2:51 ` [RFC PATCH 8/8] mm/vmalloc: Stop scanning for compound pages after encountering small pages in vmap Barry Song (Xiaomi)
2026-04-08  9:14 ` [RFC PATCH 0/8] mm/vmalloc: Speed up ioremap, vmalloc and vmap with contiguous memory Dev Jain
2026-04-08 10:51   ` Barry Song
2026-04-08 10:55     ` Dev Jain
2026-04-27 15:04 ` Dev Jain
2026-04-28  3:16   ` Barry Song
2026-04-28  4:14     ` Dev Jain

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20260408025115.27368-4-baohua@kernel.org \
    --to=baohua@kernel.org \
    --cc=Xueyuan.chen21@gmail.com \
    --cc=ajd@linux.ibm.com \
    --cc=akpm@linux-foundation.org \
    --cc=anshuman.khandual@arm.com \
    --cc=catalin.marinas@arm.com \
    --cc=david@kernel.org \
    --cc=linux-arm-kernel@lists.infradead.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=rppt@kernel.org \
    --cc=ryan.roberts@arm.com \
    --cc=urezki@gmail.com \
    --cc=will@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.