* + ksm-use-range-walk-function-to-jump-over-holes-in-scan_get_next_rmap_item.patch added to mm-new branch
@ 2025-10-16 21:07 Andrew Morton
2025-10-17 15:20 ` craftfever
0 siblings, 1 reply; 3+ messages in thread
From: Andrew Morton @ 2025-10-16 21:07 UTC (permalink / raw)
To: mm-commits, xu.xin16, david, craftfever, chengming.zhou,
pedrodemargomes, akpm
The patch titled
Subject: ksm: use range-walk function to jump over holes in scan_get_next_rmap_item
has been added to the -mm mm-new branch. Its filename is
ksm-use-range-walk-function-to-jump-over-holes-in-scan_get_next_rmap_item.patch
This patch will shortly appear at
https://git.kernel.org/pub/scm/linux/kernel/git/akpm/25-new.git/tree/patches/ksm-use-range-walk-function-to-jump-over-holes-in-scan_get_next_rmap_item.patch
This patch will later appear in the mm-new branch at
git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
Note, mm-new is a provisional staging ground for work-in-progress
patches, and acceptance into mm-new is a notification for others take
notice and to finish up reviews. Please do not hesitate to respond to
review feedback and post updated versions to replace or incrementally
fixup patches in mm-new.
Before you just go and hit "reply", please:
a) Consider who else should be cc'ed
b) Prefer to cc a suitable mailing list as well
c) Ideally: find the original patch on the mailing list and do a
reply-to-all to that, adding suitable additional cc's
*** Remember to use Documentation/process/submit-checklist.rst when testing your code ***
The -mm tree is included into linux-next via the mm-everything
branch at git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
and is updated there every 2-3 working days
------------------------------------------------------
From: Pedro Demarchi Gomes <pedrodemargomes@gmail.com>
Subject: ksm: use range-walk function to jump over holes in scan_get_next_rmap_item
Date: Wed, 15 Oct 2025 22:22:36 -0300
Currently, scan_get_next_rmap_item() walks every page address in a VMA to
locate mergeable pages. This becomes highly inefficient when scanning
large virtual memory areas that contain mostly unmapped regions.
This patch replaces the per-address lookup with a range walk using
walk_page_range(). The range walker allows KSM to skip over entire
unmapped holes in a VMA, avoiding unnecessary lookups. This problem was
previously discussed in [1].
[1] https://lore.kernel.org/linux-mm/423de7a3-1c62-4e72-8e79-19a6413e420c@redhat.com/
Link: https://lkml.kernel.org/r/20251016012236.4189-1-pedrodemargomes@gmail.com
Link: https://lore.kernel.org/linux-mm/423de7a3-1c62-4e72-8e79-19a6413e420c@redhat.com/ [1]
Signed-off-by: Pedro Demarchi Gomes <pedrodemargomes@gmail.com>
Reported-by: craftfever <craftfever@airmail.cc>
Closes: https://lkml.kernel.org/r/020cf8de6e773bb78ba7614ef250129f11a63781@murena.io
Suggested-by: David Hildenbrand <david@redhat.com>
Cc: Chengming Zhou <chengming.zhou@linux.dev>
Cc: xu xin <xu.xin16@zte.com.cn>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
mm/ksm.c | 185 ++++++++++++++++++++++++++++++++++++++---------------
1 file changed, 135 insertions(+), 50 deletions(-)
--- a/mm/ksm.c~ksm-use-range-walk-function-to-jump-over-holes-in-scan_get_next_rmap_item
+++ a/mm/ksm.c
@@ -2455,14 +2455,119 @@ static bool should_skip_rmap_item(struct
return true;
}
+struct ksm_walk_private {
+ struct page *page;
+ struct folio *folio;
+ struct vm_area_struct *vma;
+ unsigned long address;
+};
+
+static int ksm_walk_test(unsigned long addr, unsigned long next, struct mm_walk *walk)
+{
+ struct vm_area_struct *vma = walk->vma;
+ struct ksm_walk_private *private;
+
+ if (!(vma->vm_flags & VM_MERGEABLE))
+ return 1;
+
+ private = (struct ksm_walk_private *) walk->private;
+ private->address = vma->vm_end;
+
+ if (!vma->anon_vma)
+ return 1;
+
+ return 0;
+}
+
+static int ksm_pmd_entry(pmd_t *pmd, unsigned long addr,
+ unsigned long end, struct mm_walk *walk)
+{
+ struct mm_struct *mm = walk->mm;
+ struct vm_area_struct *vma = walk->vma;
+ struct ksm_walk_private *private = (struct ksm_walk_private *) walk->private;
+ struct folio *folio;
+ pte_t *start_pte, *pte, ptent;
+ pmd_t pmde;
+ struct page *page;
+ spinlock_t *ptl;
+ int ret = 0;
+
+ if (ksm_test_exit(mm))
+ return 1;
+
+ ptl = pmd_lock(mm, pmd);
+ pmde = pmdp_get(pmd);
+
+ if (!pmd_present(pmde))
+ goto pmd_out;
+
+ if (!pmd_trans_huge(pmde))
+ goto pte_table;
+
+ page = vm_normal_page_pmd(vma, addr, pmde);
+
+ if (!page)
+ goto pmd_out;
+
+ folio = page_folio(page);
+ if (folio_is_zone_device(folio) || !folio_test_anon(folio))
+ goto pmd_out;
+
+ ret = 1;
+ folio_get(folio);
+ private->page = page + ((addr & (PMD_SIZE - 1)) >> PAGE_SHIFT);
+ private->folio = folio;
+ private->vma = vma;
+ private->address = addr;
+pmd_out:
+ spin_unlock(ptl);
+ return ret;
+
+pte_table:
+ spin_unlock(ptl);
+
+ start_pte = pte = pte_offset_map_lock(mm, pmd, addr, &ptl);
+ if (!start_pte)
+ return 0;
+
+ for (; addr < end; pte++, addr += PAGE_SIZE) {
+ ptent = ptep_get(pte);
+ page = vm_normal_page(vma, addr, ptent);
+
+ if (!page)
+ continue;
+
+ folio = page_folio(page);
+ if (folio_is_zone_device(folio) || !folio_test_anon(folio))
+ continue;
+
+ ret = 1;
+ folio_get(folio);
+ private->page = page;
+ private->folio = folio;
+ private->vma = vma;
+ private->address = addr;
+ break;
+ }
+ pte_unmap_unlock(start_pte, ptl);
+
+ cond_resched();
+ return ret;
+}
+
+struct mm_walk_ops walk_ops = {
+ .pmd_entry = ksm_pmd_entry,
+ .test_walk = ksm_walk_test,
+ .walk_lock = PGWALK_RDLOCK,
+};
+
static struct ksm_rmap_item *scan_get_next_rmap_item(struct page **page)
{
struct mm_struct *mm;
struct ksm_mm_slot *mm_slot;
struct mm_slot *slot;
- struct vm_area_struct *vma;
struct ksm_rmap_item *rmap_item;
- struct vma_iterator vmi;
+ struct ksm_walk_private walk_private;
int nid;
if (list_empty(&ksm_mm_head.slot.mm_node))
@@ -2527,64 +2632,44 @@ next_mm:
slot = &mm_slot->slot;
mm = slot->mm;
- vma_iter_init(&vmi, mm, ksm_scan.address);
mmap_read_lock(mm);
if (ksm_test_exit(mm))
goto no_vmas;
- for_each_vma(vmi, vma) {
- if (!(vma->vm_flags & VM_MERGEABLE))
- continue;
- if (ksm_scan.address < vma->vm_start)
- ksm_scan.address = vma->vm_start;
- if (!vma->anon_vma)
- ksm_scan.address = vma->vm_end;
-
- while (ksm_scan.address < vma->vm_end) {
- struct page *tmp_page = NULL;
- struct folio_walk fw;
- struct folio *folio;
+ while (true) {
+ struct folio *folio;
- if (ksm_test_exit(mm))
- break;
+ walk_private.page = NULL;
+ walk_private.folio = NULL;
+ walk_private.address = ksm_scan.address;
+
+ walk_page_range(mm, ksm_scan.address, -1, &walk_ops, (void *) &walk_private);
+ ksm_scan.address = walk_private.address;
+ if (!walk_private.page)
+ break;
+
+ folio = walk_private.folio;
+ flush_anon_page(walk_private.vma, walk_private.page, ksm_scan.address);
+ flush_dcache_page(walk_private.page);
+ rmap_item = get_next_rmap_item(mm_slot,
+ ksm_scan.rmap_list, ksm_scan.address);
+ if (rmap_item) {
+ ksm_scan.rmap_list =
+ &rmap_item->rmap_list;
- folio = folio_walk_start(&fw, vma, ksm_scan.address, 0);
- if (folio) {
- if (!folio_is_zone_device(folio) &&
- folio_test_anon(folio)) {
- folio_get(folio);
- tmp_page = fw.page;
- }
- folio_walk_end(&fw, vma);
+ ksm_scan.address += PAGE_SIZE;
+ if (should_skip_rmap_item(folio, rmap_item)) {
+ folio_put(folio);
+ continue;
}
- if (tmp_page) {
- flush_anon_page(vma, tmp_page, ksm_scan.address);
- flush_dcache_page(tmp_page);
- rmap_item = get_next_rmap_item(mm_slot,
- ksm_scan.rmap_list, ksm_scan.address);
- if (rmap_item) {
- ksm_scan.rmap_list =
- &rmap_item->rmap_list;
-
- if (should_skip_rmap_item(folio, rmap_item)) {
- folio_put(folio);
- goto next_page;
- }
-
- ksm_scan.address += PAGE_SIZE;
- *page = tmp_page;
- } else {
- folio_put(folio);
- }
- mmap_read_unlock(mm);
- return rmap_item;
- }
-next_page:
- ksm_scan.address += PAGE_SIZE;
- cond_resched();
+ *page = walk_private.page;
+ } else {
+ folio_put(folio);
}
+ mmap_read_unlock(mm);
+ return rmap_item;
}
if (ksm_test_exit(mm)) {
_
Patches currently in -mm which might be from pedrodemargomes@gmail.com are
ksm-use-range-walk-function-to-jump-over-holes-in-scan_get_next_rmap_item.patch
^ permalink raw reply [flat|nested] 3+ messages in thread* Re: + ksm-use-range-walk-function-to-jump-over-holes-in-scan_get_next_rmap_item.patch added to mm-new branch
2025-10-16 21:07 + ksm-use-range-walk-function-to-jump-over-holes-in-scan_get_next_rmap_item.patch added to mm-new branch Andrew Morton
@ 2025-10-17 15:20 ` craftfever
0 siblings, 0 replies; 3+ messages in thread
From: craftfever @ 2025-10-17 15:20 UTC (permalink / raw)
To: Andrew Morton, mm-commits, xu.xin16, david, chengming.zhou,
pedrodemargomes
Andrew Morton wrote:
> The patch titled
> Subject: ksm: use range-walk function to jump over holes in scan_get_next_rmap_item
> has been added to the -mm mm-new branch. Its filename is
> ksm-use-range-walk-function-to-jump-over-holes-in-scan_get_next_rmap_item.patch
>
> This patch will shortly appear at
> https://git.kernel.org/pub/scm/linux/kernel/git/akpm/25-new.git/tree/patches/ksm-use-range-walk-function-to-jump-over-holes-in-scan_get_next_rmap_item.patch
>
> This patch will later appear in the mm-new branch at
> git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
>
> Note, mm-new is a provisional staging ground for work-in-progress
> patches, and acceptance into mm-new is a notification for others take
> notice and to finish up reviews. Please do not hesitate to respond to
> review feedback and post updated versions to replace or incrementally
> fixup patches in mm-new.
>
> Before you just go and hit "reply", please:
> a) Consider who else should be cc'ed
> b) Prefer to cc a suitable mailing list as well
> c) Ideally: find the original patch on the mailing list and do a
> reply-to-all to that, adding suitable additional cc's
>
> *** Remember to use Documentation/process/submit-checklist.rst when testing your code ***
>
> The -mm tree is included into linux-next via the mm-everything
> branch at git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
> and is updated there every 2-3 working days
>
> ------------------------------------------------------
> From: Pedro Demarchi Gomes <pedrodemargomes@gmail.com>
> Subject: ksm: use range-walk function to jump over holes in scan_get_next_rmap_item
> Date: Wed, 15 Oct 2025 22:22:36 -0300
>
> Currently, scan_get_next_rmap_item() walks every page address in a VMA to
> locate mergeable pages. This becomes highly inefficient when scanning
> large virtual memory areas that contain mostly unmapped regions.
>
> This patch replaces the per-address lookup with a range walk using
> walk_page_range(). The range walker allows KSM to skip over entire
> unmapped holes in a VMA, avoiding unnecessary lookups. This problem was
> previously discussed in [1].
>
> [1] https://lore.kernel.org/linux-mm/423de7a3-1c62-4e72-8e79-19a6413e420c@redhat.com/
>
> Link: https://lkml.kernel.org/r/20251016012236.4189-1-pedrodemargomes@gmail.com
> Link: https://lore.kernel.org/linux-mm/423de7a3-1c62-4e72-8e79-19a6413e420c@redhat.com/ [1]
> Signed-off-by: Pedro Demarchi Gomes <pedrodemargomes@gmail.com>
> Reported-by: craftfever <craftfever@airmail.cc>
> Closes: https://lkml.kernel.org/r/020cf8de6e773bb78ba7614ef250129f11a63781@murena.io
> Suggested-by: David Hildenbrand <david@redhat.com>
> Cc: Chengming Zhou <chengming.zhou@linux.dev>
> Cc: xu xin <xu.xin16@zte.com.cn>
> Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
> ---
>
> mm/ksm.c | 185 ++++++++++++++++++++++++++++++++++++++---------------
> 1 file changed, 135 insertions(+), 50 deletions(-)
>
> --- a/mm/ksm.c~ksm-use-range-walk-function-to-jump-over-holes-in-scan_get_next_rmap_item
> +++ a/mm/ksm.c
> @@ -2455,14 +2455,119 @@ static bool should_skip_rmap_item(struct
> return true;
> }
>
> +struct ksm_walk_private {
> + struct page *page;
> + struct folio *folio;
> + struct vm_area_struct *vma;
> + unsigned long address;
> +};
> +
> +static int ksm_walk_test(unsigned long addr, unsigned long next, struct mm_walk *walk)
> +{
> + struct vm_area_struct *vma = walk->vma;
> + struct ksm_walk_private *private;
> +
> + if (!(vma->vm_flags & VM_MERGEABLE))
> + return 1;
> +
> + private = (struct ksm_walk_private *) walk->private;
> + private->address = vma->vm_end;
> +
> + if (!vma->anon_vma)
> + return 1;
> +
> + return 0;
> +}
> +
> +static int ksm_pmd_entry(pmd_t *pmd, unsigned long addr,
> + unsigned long end, struct mm_walk *walk)
> +{
> + struct mm_struct *mm = walk->mm;
> + struct vm_area_struct *vma = walk->vma;
> + struct ksm_walk_private *private = (struct ksm_walk_private *) walk->private;
> + struct folio *folio;
> + pte_t *start_pte, *pte, ptent;
> + pmd_t pmde;
> + struct page *page;
> + spinlock_t *ptl;
> + int ret = 0;
> +
> + if (ksm_test_exit(mm))
> + return 1;
> +
> + ptl = pmd_lock(mm, pmd);
> + pmde = pmdp_get(pmd);
> +
> + if (!pmd_present(pmde))
> + goto pmd_out;
> +
> + if (!pmd_trans_huge(pmde))
> + goto pte_table;
> +
> + page = vm_normal_page_pmd(vma, addr, pmde);
> +
> + if (!page)
> + goto pmd_out;
> +
> + folio = page_folio(page);
> + if (folio_is_zone_device(folio) || !folio_test_anon(folio))
> + goto pmd_out;
> +
> + ret = 1;
> + folio_get(folio);
> + private->page = page + ((addr & (PMD_SIZE - 1)) >> PAGE_SHIFT);
> + private->folio = folio;
> + private->vma = vma;
> + private->address = addr;
> +pmd_out:
> + spin_unlock(ptl);
> + return ret;
> +
> +pte_table:
> + spin_unlock(ptl);
> +
> + start_pte = pte = pte_offset_map_lock(mm, pmd, addr, &ptl);
> + if (!start_pte)
> + return 0;
> +
> + for (; addr < end; pte++, addr += PAGE_SIZE) {
> + ptent = ptep_get(pte);
> + page = vm_normal_page(vma, addr, ptent);
> +
> + if (!page)
> + continue;
> +
> + folio = page_folio(page);
> + if (folio_is_zone_device(folio) || !folio_test_anon(folio))
> + continue;
> +
> + ret = 1;
> + folio_get(folio);
> + private->page = page;
> + private->folio = folio;
> + private->vma = vma;
> + private->address = addr;
> + break;
> + }
> + pte_unmap_unlock(start_pte, ptl);
> +
> + cond_resched();
> + return ret;
> +}
> +
> +struct mm_walk_ops walk_ops = {
> + .pmd_entry = ksm_pmd_entry,
> + .test_walk = ksm_walk_test,
> + .walk_lock = PGWALK_RDLOCK,
> +};
> +
> static struct ksm_rmap_item *scan_get_next_rmap_item(struct page **page)
> {
> struct mm_struct *mm;
> struct ksm_mm_slot *mm_slot;
> struct mm_slot *slot;
> - struct vm_area_struct *vma;
> struct ksm_rmap_item *rmap_item;
> - struct vma_iterator vmi;
> + struct ksm_walk_private walk_private;
> int nid;
>
> if (list_empty(&ksm_mm_head.slot.mm_node))
> @@ -2527,64 +2632,44 @@ next_mm:
>
> slot = &mm_slot->slot;
> mm = slot->mm;
> - vma_iter_init(&vmi, mm, ksm_scan.address);
>
> mmap_read_lock(mm);
> if (ksm_test_exit(mm))
> goto no_vmas;
>
> - for_each_vma(vmi, vma) {
> - if (!(vma->vm_flags & VM_MERGEABLE))
> - continue;
> - if (ksm_scan.address < vma->vm_start)
> - ksm_scan.address = vma->vm_start;
> - if (!vma->anon_vma)
> - ksm_scan.address = vma->vm_end;
> -
> - while (ksm_scan.address < vma->vm_end) {
> - struct page *tmp_page = NULL;
> - struct folio_walk fw;
> - struct folio *folio;
> + while (true) {
> + struct folio *folio;
>
> - if (ksm_test_exit(mm))
> - break;
> + walk_private.page = NULL;
> + walk_private.folio = NULL;
> + walk_private.address = ksm_scan.address;
> +
> + walk_page_range(mm, ksm_scan.address, -1, &walk_ops, (void *) &walk_private);
> + ksm_scan.address = walk_private.address;
> + if (!walk_private.page)
> + break;
> +
> + folio = walk_private.folio;
> + flush_anon_page(walk_private.vma, walk_private.page, ksm_scan.address);
> + flush_dcache_page(walk_private.page);
> + rmap_item = get_next_rmap_item(mm_slot,
> + ksm_scan.rmap_list, ksm_scan.address);
> + if (rmap_item) {
> + ksm_scan.rmap_list =
> + &rmap_item->rmap_list;
>
> - folio = folio_walk_start(&fw, vma, ksm_scan.address, 0);
> - if (folio) {
> - if (!folio_is_zone_device(folio) &&
> - folio_test_anon(folio)) {
> - folio_get(folio);
> - tmp_page = fw.page;
> - }
> - folio_walk_end(&fw, vma);
> + ksm_scan.address += PAGE_SIZE;
> + if (should_skip_rmap_item(folio, rmap_item)) {
> + folio_put(folio);
> + continue;
> }
>
> - if (tmp_page) {
> - flush_anon_page(vma, tmp_page, ksm_scan.address);
> - flush_dcache_page(tmp_page);
> - rmap_item = get_next_rmap_item(mm_slot,
> - ksm_scan.rmap_list, ksm_scan.address);
> - if (rmap_item) {
> - ksm_scan.rmap_list =
> - &rmap_item->rmap_list;
> -
> - if (should_skip_rmap_item(folio, rmap_item)) {
> - folio_put(folio);
> - goto next_page;
> - }
> -
> - ksm_scan.address += PAGE_SIZE;
> - *page = tmp_page;
> - } else {
> - folio_put(folio);
> - }
> - mmap_read_unlock(mm);
> - return rmap_item;
> - }
> -next_page:
> - ksm_scan.address += PAGE_SIZE;
> - cond_resched();
> + *page = walk_private.page;
> + } else {
> + folio_put(folio);
> }
> + mmap_read_unlock(mm);
> + return rmap_item;
> }
>
> if (ksm_test_exit(mm)) {
> _
>
> Patches currently in -mm which might be from pedrodemargomes@gmail.com are
>
> ksm-use-range-walk-function-to-jump-over-holes-in-scan_get_next_rmap_item.patch
>
https://git.kernel.org/pub/scm/linux/kernel/git/akpm/25-new.git/tree/patches/ksm-use-range-walk-function-to-jump-over-holes-in-scan_get_next_rmap_item.patch
Andrew, please update that patch with recent fix from Pedro related to
pte_present check, otherwise ksmd would crash and page faults
system-wide will occur. This is the new version of patch from him (I did
send it to you but I duplicate):
diff --git a/mm/ksm.c b/mm/ksm.c
index 3aed0478fdce..c8a0a986ccc3 100644
--- a/mm/ksm.c
+++ b/mm/ksm.c
@@ -2455,14 +2455,123 @@ static bool should_skip_rmap_item(struct folio
*folio,
return true;
}
+struct ksm_walk_private {
+ struct page *page;
+ struct folio *folio;
+ struct vm_area_struct *vma;
+ unsigned long address;
+};
+
+static int ksm_walk_test(unsigned long addr, unsigned long next, struct
mm_walk *walk)
+{
+ struct vm_area_struct *vma = walk->vma;
+ struct ksm_walk_private *private;
+
+ if (!(vma->vm_flags & VM_MERGEABLE))
+ return 1;
+
+ private = (struct ksm_walk_private *) walk->private;
+ private->address = vma->vm_end;
+
+ if (!vma->anon_vma)
+ return 1;
+
+ return 0;
+}
+
+static int ksm_pmd_entry(pmd_t *pmd, unsigned long addr,
+ unsigned long end, struct mm_walk *walk)
+{
+ struct mm_struct *mm = walk->mm;
+ struct vm_area_struct *vma = walk->vma;
+ struct ksm_walk_private *private = (struct ksm_walk_private *)
walk->private;
+ struct folio *folio;
+ pte_t *start_pte, *pte, ptent;
+ pmd_t pmde;
+ struct page *page;
+ spinlock_t *ptl;
+ int ret = 0;
+
+ if (ksm_test_exit(mm))
+ return 1;
+
+ ptl = pmd_lock(mm, pmd);
+ pmde = pmdp_get(pmd);
+
+ if (!pmd_present(pmde))
+ goto pmd_out;
+
+ if (!pmd_trans_huge(pmde))
+ goto pte_table;
+
+ page = vm_normal_page_pmd(vma, addr, pmde);
+
+ if (!page)
+ goto pmd_out;
+
+ folio = page_folio(page);
+ if (folio_is_zone_device(folio) || !folio_test_anon(folio))
+ goto pmd_out;
+
+ ret = 1;
+ folio_get(folio);
+ private->page = page + ((addr & (PMD_SIZE - 1)) >> PAGE_SHIFT);
+ private->folio = folio;
+ private->vma = vma;
+ private->address = addr;
+pmd_out:
+ spin_unlock(ptl);
+ return ret;
+
+pte_table:
+ spin_unlock(ptl);
+
+ start_pte = pte = pte_offset_map_lock(mm, pmd, addr, &ptl);
+ if (!start_pte)
+ return 0;
+
+ for (; addr < end; pte++, addr += PAGE_SIZE) {
+ ptent = ptep_get(pte);
+
+ if (!pte_present(ptent))
+ continue;
+
+ page = vm_normal_page(vma, addr, ptent);
+
+ if (!page)
+ continue;
+
+ folio = page_folio(page);
+ if (folio_is_zone_device(folio) || !folio_test_anon(folio))
+ continue;
+
+ ret = 1;
+ folio_get(folio);
+ private->page = page;
+ private->folio = folio;
+ private->vma = vma;
+ private->address = addr;
+ break;
+ }
+ pte_unmap_unlock(start_pte, ptl);
+
+ cond_resched();
+ return ret;
+}
+
+struct mm_walk_ops walk_ops = {
+ .pmd_entry = ksm_pmd_entry,
+ .test_walk = ksm_walk_test,
+ .walk_lock = PGWALK_RDLOCK,
+};
+
static struct ksm_rmap_item *scan_get_next_rmap_item(struct page **page)
{
struct mm_struct *mm;
struct ksm_mm_slot *mm_slot;
struct mm_slot *slot;
- struct vm_area_struct *vma;
struct ksm_rmap_item *rmap_item;
- struct vma_iterator vmi;
+ struct ksm_walk_private walk_private;
int nid;
if (list_empty(&ksm_mm_head.slot.mm_node))
@@ -2527,64 +2636,44 @@ static struct ksm_rmap_item
*scan_get_next_rmap_item(struct page **page)
slot = &mm_slot->slot;
mm = slot->mm;
- vma_iter_init(&vmi, mm, ksm_scan.address);
mmap_read_lock(mm);
if (ksm_test_exit(mm))
goto no_vmas;
- for_each_vma(vmi, vma) {
- if (!(vma->vm_flags & VM_MERGEABLE))
- continue;
- if (ksm_scan.address < vma->vm_start)
- ksm_scan.address = vma->vm_start;
- if (!vma->anon_vma)
- ksm_scan.address = vma->vm_end;
-
- while (ksm_scan.address < vma->vm_end) {
- struct page *tmp_page = NULL;
- struct folio_walk fw;
- struct folio *folio;
+ while (true) {
+ struct folio *folio;
- if (ksm_test_exit(mm))
- break;
+ walk_private.page = NULL;
+ walk_private.folio = NULL;
+ walk_private.address = ksm_scan.address;
- folio = folio_walk_start(&fw, vma, ksm_scan.address, 0);
- if (folio) {
- if (!folio_is_zone_device(folio) &&
- folio_test_anon(folio)) {
- folio_get(folio);
- tmp_page = fw.page;
- }
- folio_walk_end(&fw, vma);
- }
+ walk_page_range(mm, ksm_scan.address, -1, &walk_ops, (void *)
&walk_private);
+ ksm_scan.address = walk_private.address;
+ if (!walk_private.page)
+ break;
+
+ folio = walk_private.folio;
+ flush_anon_page(walk_private.vma, walk_private.page, ksm_scan.address);
+ flush_dcache_page(walk_private.page);
+ rmap_item = get_next_rmap_item(mm_slot,
+ ksm_scan.rmap_list, ksm_scan.address);
+ if (rmap_item) {
+ ksm_scan.rmap_list =
+ &rmap_item->rmap_list;
- if (tmp_page) {
- flush_anon_page(vma, tmp_page, ksm_scan.address);
- flush_dcache_page(tmp_page);
- rmap_item = get_next_rmap_item(mm_slot,
- ksm_scan.rmap_list, ksm_scan.address);
- if (rmap_item) {
- ksm_scan.rmap_list =
- &rmap_item->rmap_list;
-
- if (should_skip_rmap_item(folio, rmap_item)) {
- folio_put(folio);
- goto next_page;
- }
-
- ksm_scan.address += PAGE_SIZE;
- *page = tmp_page;
- } else {
- folio_put(folio);
- }
- mmap_read_unlock(mm);
- return rmap_item;
- }
-next_page:
ksm_scan.address += PAGE_SIZE;
- cond_resched();
+ if (should_skip_rmap_item(folio, rmap_item)) {
+ folio_put(folio);
+ continue;
+ }
+
+ *page = walk_private.page;
+ } else {
+ folio_put(folio);
}
+ mmap_read_unlock(mm);
+ return rmap_item;
}
if (ksm_test_exit(mm)) {
-----------------
The difference is between 112-113 lines in your patch, after
ptent = ptep_get(pte);
there is new code piece (check for pte_present)
if (!pte_present(ptent))
continue;
(Full patch above is present for convenience)
Please, take attention to post fixed patch, that works correctly
(otherwise crashing will be occur), I noticed, that the file did not
updated, thanks.
^ permalink raw reply related [flat|nested] 3+ messages in thread
* + ksm-use-range-walk-function-to-jump-over-holes-in-scan_get_next_rmap_item.patch added to mm-new branch
@ 2025-10-22 20:31 Andrew Morton
0 siblings, 0 replies; 3+ messages in thread
From: Andrew Morton @ 2025-10-22 20:31 UTC (permalink / raw)
To: mm-commits, xu.xin16, david, chengming.zhou, pedrodemargomes,
akpm
The patch titled
Subject: ksm: use range-walk function to jump over holes in scan_get_next_rmap_item
has been added to the -mm mm-new branch. Its filename is
ksm-use-range-walk-function-to-jump-over-holes-in-scan_get_next_rmap_item.patch
This patch will shortly appear at
https://git.kernel.org/pub/scm/linux/kernel/git/akpm/25-new.git/tree/patches/ksm-use-range-walk-function-to-jump-over-holes-in-scan_get_next_rmap_item.patch
This patch will later appear in the mm-new branch at
git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
Note, mm-new is a provisional staging ground for work-in-progress
patches, and acceptance into mm-new is a notification for others take
notice and to finish up reviews. Please do not hesitate to respond to
review feedback and post updated versions to replace or incrementally
fixup patches in mm-new.
Before you just go and hit "reply", please:
a) Consider who else should be cc'ed
b) Prefer to cc a suitable mailing list as well
c) Ideally: find the original patch on the mailing list and do a
reply-to-all to that, adding suitable additional cc's
*** Remember to use Documentation/process/submit-checklist.rst when testing your code ***
The -mm tree is included into linux-next via the mm-everything
branch at git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
and is updated there every 2-3 working days
------------------------------------------------------
From: Pedro Demarchi Gomes <pedrodemargomes@gmail.com>
Subject: ksm: use range-walk function to jump over holes in scan_get_next_rmap_item
Date: Wed, 22 Oct 2025 12:30:59 -0300
Currently, scan_get_next_rmap_item() walks every page address in a VMA to
locate mergeable pages. This becomes highly inefficient when scanning
large virtual memory areas that contain mostly unmapped regions.
This patch replaces the per-address lookup with a range walk using
walk_page_range(). The range walker allows KSM to skip over entire
unmapped holes in a VMA, avoiding unnecessary lookups. This problem was
previously discussed in [1].
Link: https://lkml.kernel.org/r/20251022153059.22763-1-pedrodemargomes@gmail.com
Link: https://lore.kernel.org/linux-mm/423de7a3-1c62-4e72-8e79-19a6413e420c@redhat.com/ [1]
Signed-off-by: Pedro Demarchi Gomes <pedrodemargomes@gmail.com>
Cc: Chengming Zhou <chengming.zhou@linux.dev>
Cc: David Hildenbrand <david@redhat.com>
Cc: xu xin <xu.xin16@zte.com.cn>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
mm/ksm.c | 113 ++++++++++++++++++++++++++++++++++++++++++++++++-----
1 file changed, 104 insertions(+), 9 deletions(-)
--- a/mm/ksm.c~ksm-use-range-walk-function-to-jump-over-holes-in-scan_get_next_rmap_item
+++ a/mm/ksm.c
@@ -2455,6 +2455,95 @@ static bool should_skip_rmap_item(struct
return true;
}
+struct ksm_next_page_arg {
+ struct folio *folio;
+ struct page *page;
+ unsigned long addr;
+};
+
+static int ksm_next_page_pmd_entry(pmd_t *pmdp, unsigned long addr, unsigned long end,
+ struct mm_walk *walk)
+{
+ struct ksm_next_page_arg *private = walk->private;
+ struct vm_area_struct *vma = walk->vma;
+ pte_t *start_ptep = NULL, *ptep, pte;
+ struct mm_struct *mm = walk->mm;
+ struct folio *folio;
+ struct page *page;
+ spinlock_t *ptl;
+ pmd_t pmd;
+
+ if (ksm_test_exit(mm))
+ return 0;
+
+ cond_resched();
+
+ pmd = pmdp_get_lockless(pmdp);
+ if (!pmd_present(pmd))
+ return 0;
+
+ if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE) && pmd_leaf(pmd)) {
+ ptl = pmd_lock(mm, pmdp);
+ pmd = pmdp_get(pmdp);
+
+ if (!pmd_present(pmd)) {
+ goto not_found_unlock;
+ } else if (pmd_leaf(pmd)) {
+ page = vm_normal_page_pmd(vma, addr, pmd);
+ if (!page)
+ goto not_found_unlock;
+ folio = page_folio(page);
+
+ if (folio_is_zone_device(folio) || !folio_test_anon(folio))
+ goto not_found_unlock;
+
+ page += ((addr & (PMD_SIZE - 1)) >> PAGE_SHIFT);
+ goto found_unlock;
+ }
+ spin_unlock(ptl);
+ }
+
+ start_ptep = pte_offset_map_lock(mm, pmdp, addr, &ptl);
+ if (!start_ptep)
+ return 0;
+
+ for (ptep = start_ptep; addr < end; ptep++, addr += PAGE_SIZE) {
+ pte = ptep_get(ptep);
+
+ if (!pte_present(pte))
+ continue;
+
+ page = vm_normal_page(vma, addr, pte);
+ if (!page)
+ continue;
+ folio = page_folio(page);
+
+ if (folio_is_zone_device(folio) || !folio_test_anon(folio))
+ continue;
+ goto found_unlock;
+ }
+
+not_found_unlock:
+ spin_unlock(ptl);
+ if (start_ptep)
+ pte_unmap(start_ptep);
+ return 0;
+found_unlock:
+ folio_get(folio);
+ spin_unlock(ptl);
+ if (start_ptep)
+ pte_unmap(start_ptep);
+ private->page = page;
+ private->folio = folio;
+ private->addr = addr;
+ return 1;
+}
+
+static struct mm_walk_ops ksm_next_page_ops = {
+ .pmd_entry = ksm_next_page_pmd_entry,
+ .walk_lock = PGWALK_RDLOCK,
+};
+
static struct ksm_rmap_item *scan_get_next_rmap_item(struct page **page)
{
struct mm_struct *mm;
@@ -2542,21 +2631,27 @@ next_mm:
ksm_scan.address = vma->vm_end;
while (ksm_scan.address < vma->vm_end) {
+ struct ksm_next_page_arg ksm_next_page_arg;
struct page *tmp_page = NULL;
- struct folio_walk fw;
struct folio *folio;
if (ksm_test_exit(mm))
break;
- folio = folio_walk_start(&fw, vma, ksm_scan.address, 0);
- if (folio) {
- if (!folio_is_zone_device(folio) &&
- folio_test_anon(folio)) {
- folio_get(folio);
- tmp_page = fw.page;
- }
- folio_walk_end(&fw, vma);
+ int found;
+
+ found = walk_page_range_vma(vma, ksm_scan.address,
+ vma->vm_end,
+ &ksm_next_page_ops,
+ &ksm_next_page_arg);
+
+ if (found > 0) {
+ folio = ksm_next_page_arg.folio;
+ tmp_page = ksm_next_page_arg.page;
+ ksm_scan.address = ksm_next_page_arg.addr;
+ } else {
+ VM_WARN_ON_ONCE(found < 0);
+ ksm_scan.address = vma->vm_end - PAGE_SIZE;
}
if (tmp_page) {
_
Patches currently in -mm which might be from pedrodemargomes@gmail.com are
ksm-use-range-walk-function-to-jump-over-holes-in-scan_get_next_rmap_item.patch
^ permalink raw reply [flat|nested] 3+ messages in thread
end of thread, other threads:[~2025-10-22 20:31 UTC | newest]
Thread overview: 3+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2025-10-16 21:07 + ksm-use-range-walk-function-to-jump-over-holes-in-scan_get_next_rmap_item.patch added to mm-new branch Andrew Morton
2025-10-17 15:20 ` craftfever
-- strict thread matches above, loose matches on Subject: below --
2025-10-22 20:31 Andrew Morton
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.