[PATCH] mm/damon/vaddr: attempt per-vma lock during page table walk

DAMON development mailing list
 help / color / mirror / Atom feed

* [PATCH] mm/damon/vaddr: attempt per-vma lock during page table walk
@ 2026-05-11 13:25 Kefeng Wang
  2026-05-12  1:31 ` SeongJae Park
  2026-05-12  5:26 ` sashiko-bot
  0 siblings, 2 replies; 5+ messages in thread
From: Kefeng Wang @ 2026-05-11 13:25 UTC (permalink / raw)
  To: SeongJae Park, Andrew Morton; +Cc: damon, linux-mm, sunnanyong, Kefeng Wang

Currently, DAMON virtual address operations use mmap_read_lock
during page table walks, which can cause unnecessary contention
under high concurrency.

Introduce damon_va_walk_page_range() to first attempt acquiring a
per-vma lock. If the VMA is found and the range is fully contained
within it, the page table walk proceeds with the per-vma lock
instead of mmap_read_lock.

This optimization is particularly effective for damon_va_young()
and damon_va_mkold(), which are frequently called and typically
operate within a single VMA.

Signed-off-by: Kefeng Wang <wangkefeng.wang@huawei.com>
---
 mm/damon/vaddr.c | 66 +++++++++++++++++++++++++++++-------------------
 1 file changed, 40 insertions(+), 26 deletions(-)

diff --git a/mm/damon/vaddr.c b/mm/damon/vaddr.c
index dd5f2d7027ac..cd6c0c5f3655 100644
--- a/mm/damon/vaddr.c
+++ b/mm/damon/vaddr.c
@@ -237,6 +237,32 @@ static void damon_va_update(struct damon_ctx *ctx)
 	}
 }
 
+static void damon_va_walk_page_range(struct mm_struct *mm, unsigned long start,
+		unsigned long end, struct mm_walk_ops *ops, void *private)
+{
+	struct vm_area_struct *vma;
+
+	vma = lock_vma_under_rcu(mm, start);
+	if (!vma)
+		goto lock_mmap;
+
+	if (end > vma->vm_end) {
+		vma_end_read(vma);
+		goto lock_mmap;
+	}
+
+	ops->walk_lock = PGWALK_VMA_RDLOCK_VERIFY;
+	walk_page_range_vma(vma, start, end, ops, private);
+	vma_end_read(vma);
+	return;
+
+lock_mmap:
+	mmap_read_lock(mm);
+	ops->walk_lock = PGWALK_RDLOCK;
+	walk_page_range(mm, start, end, ops, private);
+	mmap_read_unlock(mm);
+}
+
 static int damon_mkold_pmd_entry(pmd_t *pmd, unsigned long addr,
 		unsigned long next, struct mm_walk *walk)
 {
@@ -315,17 +341,14 @@ static int damon_mkold_hugetlb_entry(pte_t *pte, unsigned long hmask,
 #define damon_mkold_hugetlb_entry NULL
 #endif /* CONFIG_HUGETLB_PAGE */
 
-static const struct mm_walk_ops damon_mkold_ops = {
-	.pmd_entry = damon_mkold_pmd_entry,
-	.hugetlb_entry = damon_mkold_hugetlb_entry,
-	.walk_lock = PGWALK_RDLOCK,
-};
-
 static void damon_va_mkold(struct mm_struct *mm, unsigned long addr)
 {
-	mmap_read_lock(mm);
-	walk_page_range(mm, addr, addr + 1, &damon_mkold_ops, NULL);
-	mmap_read_unlock(mm);
+	struct mm_walk_ops damon_mkold_ops = {
+		.pmd_entry = damon_mkold_pmd_entry,
+		.hugetlb_entry = damon_mkold_hugetlb_entry,
+	};
+
+	damon_va_walk_page_range(mm, addr, addr + 1, &damon_mkold_ops, NULL);
 }
 
 /*
@@ -444,12 +467,6 @@ static int damon_young_hugetlb_entry(pte_t *pte, unsigned long hmask,
 #define damon_young_hugetlb_entry NULL
 #endif /* CONFIG_HUGETLB_PAGE */
 
-static const struct mm_walk_ops damon_young_ops = {
-	.pmd_entry = damon_young_pmd_entry,
-	.hugetlb_entry = damon_young_hugetlb_entry,
-	.walk_lock = PGWALK_RDLOCK,
-};
-
 static bool damon_va_young(struct mm_struct *mm, unsigned long addr,
 		unsigned long *folio_sz)
 {
@@ -458,9 +475,12 @@ static bool damon_va_young(struct mm_struct *mm, unsigned long addr,
 		.young = false,
 	};
 
-	mmap_read_lock(mm);
-	walk_page_range(mm, addr, addr + 1, &damon_young_ops, &arg);
-	mmap_read_unlock(mm);
+	struct mm_walk_ops damon_young_ops = {
+		.pmd_entry = damon_young_pmd_entry,
+		.hugetlb_entry = damon_young_hugetlb_entry,
+	};
+
+	damon_va_walk_page_range(mm, addr, addr + 1, &damon_young_ops, &arg);
 	return arg.young;
 }
 
@@ -749,7 +769,6 @@ static unsigned long damos_va_migrate(struct damon_target *target,
 	struct mm_walk_ops walk_ops = {
 		.pmd_entry = damos_va_migrate_pmd_entry,
 		.pte_entry = NULL,
-		.walk_lock = PGWALK_RDLOCK,
 	};
 
 	use_target_nid = dests->nr_dests == 0;
@@ -767,9 +786,7 @@ static unsigned long damos_va_migrate(struct damon_target *target,
 	if (!mm)
 		goto free_lists;
 
-	mmap_read_lock(mm);
-	walk_page_range(mm, r->ar.start, r->ar.end, &walk_ops, &priv);
-	mmap_read_unlock(mm);
+	damon_va_walk_page_range(mm, r->ar.start, r->ar.end, &walk_ops, &priv);
 	mmput(mm);
 
 	for (int i = 0; i < nr_dests; i++) {
@@ -861,7 +878,6 @@ static unsigned long damos_va_stat(struct damon_target *target,
 	struct mm_struct *mm;
 	struct mm_walk_ops walk_ops = {
 		.pmd_entry = damos_va_stat_pmd_entry,
-		.walk_lock = PGWALK_RDLOCK,
 	};
 
 	priv.scheme = s;
@@ -874,9 +890,7 @@ static unsigned long damos_va_stat(struct damon_target *target,
 	if (!mm)
 		return 0;
 
-	mmap_read_lock(mm);
-	walk_page_range(mm, r->ar.start, r->ar.end, &walk_ops, &priv);
-	mmap_read_unlock(mm);
+	damon_va_walk_page_range(mm, r->ar.start, r->ar.end, &walk_ops, &priv);
 	mmput(mm);
 	return 0;
 }
-- 
2.27.0


^ permalink raw reply related	[flat|nested] 5+ messages in thread

* Re: [PATCH] mm/damon/vaddr: attempt per-vma lock during page table walk
  2026-05-11 13:25 [PATCH] mm/damon/vaddr: attempt per-vma lock during page table walk Kefeng Wang
@ 2026-05-12  1:31 ` SeongJae Park
  2026-05-12 13:59   ` Kefeng Wang
  2026-05-12  5:26 ` sashiko-bot
  1 sibling, 1 reply; 5+ messages in thread
From: SeongJae Park @ 2026-05-12  1:31 UTC (permalink / raw)
  To: Kefeng Wang; +Cc: SeongJae Park, Andrew Morton, damon, linux-mm, sunnanyong

On Mon, 11 May 2026 21:25:46 +0800 Kefeng Wang <wangkefeng.wang@huawei.com> wrote:

> Currently, DAMON virtual address operations use mmap_read_lock
> during page table walks, which can cause unnecessary contention
> under high concurrency.
> 
> Introduce damon_va_walk_page_range() to first attempt acquiring a
> per-vma lock. If the VMA is found and the range is fully contained
> within it, the page table walk proceeds with the per-vma lock
> instead of mmap_read_lock.
> 
> This optimization is particularly effective for damon_va_young()
> and damon_va_mkold(), which are frequently called and typically
> operate within a single VMA.

Makes sense.  Do you have some measurements?

> 
> Signed-off-by: Kefeng Wang <wangkefeng.wang@huawei.com>

Looks good to me.  Nonetheless, because I'm not familiar with per-vma locking,
I'd like to wait for Sashiko review.


Thanks,
SJ

[...]

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH] mm/damon/vaddr: attempt per-vma lock during page table walk
  2026-05-11 13:25 [PATCH] mm/damon/vaddr: attempt per-vma lock during page table walk Kefeng Wang
  2026-05-12  1:31 ` SeongJae Park
@ 2026-05-12  5:26 ` sashiko-bot
  1 sibling, 0 replies; 5+ messages in thread
From: sashiko-bot @ 2026-05-12  5:26 UTC (permalink / raw)
  To: Kefeng Wang; +Cc: damon

Thank you for your contribution! Sashiko AI review found 1 potential issue(s) to consider:
- [Medium] Inconsistent VMA Filtering (VM_PFNMAP) in damon_va_walk_page_range()
--

commit b5bde92b4c2b28a90660c48f50de91cd04010328
Author: Kefeng Wang <wangkefeng.wang@huawei.com>

mm/damon/vaddr: attempt per-vma lock during page table walk

This commit introduces damon_va_walk_page_range() to optimize DAMON's
virtual address page table walks. It attempts to acquire a per-VMA lock
and proceeds with walk_page_range_vma() if the range is contained within
a single VMA, falling back to the mmap_read_lock and walk_page_range()
otherwise.

> diff --git a/mm/damon/vaddr.c b/mm/damon/vaddr.c
> index 1b0ebe3b6951e..eed2ae132a589 100644
> --- a/mm/damon/vaddr.c
> +++ b/mm/damon/vaddr.c
> @@ -237,6 +237,32 @@ static void damon_va_update(struct damon_ctx *ctx)
>  	}
>  }
>  
> +static void damon_va_walk_page_range(struct mm_struct *mm, unsigned long start,
> +		unsigned long end, struct mm_walk_ops *ops, void *private)
> +{
> +	struct vm_area_struct *vma;
> +
> +	vma = lock_vma_under_rcu(mm, start);
> +	if (!vma)
> +		goto lock_mmap;
> +
> +	if (end > vma->vm_end) {
> +		vma_end_read(vma);
> +		goto lock_mmap;
> +	}
> +
> +	ops->walk_lock = PGWALK_VMA_RDLOCK_VERIFY;
> +	walk_page_range_vma(vma, start, end, ops, private);
> +	vma_end_read(vma);
> +	return;
> +
> +lock_mmap:
> +	mmap_read_lock(mm);
> +	ops->walk_lock = PGWALK_RDLOCK;
> +	walk_page_range(mm, start, end, ops, private);
> +	mmap_read_unlock(mm);
> +}

Does this code handle VMAs with the VM_PFNMAP flag inconsistently?

In the fallback path using walk_page_range(), the page walker calls
walk_page_test(), which skips VM_PFNMAP VMAs by default since the DAMON
operations structures do not provide a test_walk callback.

However, the fast path uses walk_page_range_vma(), which bypasses
walk_page_test() entirely and directly walks the page range. 

This means that if a queried address range is fully contained within a single
VM_PFNMAP VMA, the page tables will be walked via the fast path. But if the
same VMA is part of a larger range, the mmap lock fallback is triggered and
the VMA is properly skipped.

Can this lead to DAMON improperly modifying PTEs on raw PFN device mappings,
such as clearing the young bit in damon_mkold_pmd_entry(), when taking the
fast path?

-- 
Sashiko AI review · https://sashiko.dev/#/patchset/20260511132546.1973270-1-wangkefeng.wang@huawei.com?part=1

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH] mm/damon/vaddr: attempt per-vma lock during page table walk
  2026-05-12  1:31 ` SeongJae Park
@ 2026-05-12 13:59   ` Kefeng Wang
  2026-05-12 14:08     ` SeongJae Park
  0 siblings, 1 reply; 5+ messages in thread
From: Kefeng Wang @ 2026-05-12 13:59 UTC (permalink / raw)
  To: SeongJae Park; +Cc: Andrew Morton, damon, linux-mm, sunnanyong



On 5/12/2026 9:31 AM, SeongJae Park wrote:
> On Mon, 11 May 2026 21:25:46 +0800 Kefeng Wang <wangkefeng.wang@huawei.com> wrote:
> 
>> Currently, DAMON virtual address operations use mmap_read_lock
>> during page table walks, which can cause unnecessary contention
>> under high concurrency.
>>
>> Introduce damon_va_walk_page_range() to first attempt acquiring a
>> per-vma lock. If the VMA is found and the range is fully contained
>> within it, the page table walk proceeds with the per-vma lock
>> instead of mmap_read_lock.
>>
>> This optimization is particularly effective for damon_va_young()
>> and damon_va_mkold(), which are frequently called and typically
>> operate within a single VMA.
> 
> Makes sense.  Do you have some measurements?

In fact, I do not have performance-related tests.

> 
>>
>> Signed-off-by: Kefeng Wang <wangkefeng.wang@huawei.com>
> 
> Looks good to me.  Nonetheless, because I'm not familiar with per-vma locking,
> I'd like to wait for Sashiko review.
> 

Sashiko review reports a issue about handling VMAs with the VM_PFNMAP
flag inconsistently[1]，We indeed do not need to handle the vma of
VM_PFNMAP for damon, so a quick fix is as follows,

diff --git a/mm/damon/vaddr.c b/mm/damon/vaddr.c
index eed2ae132a58..d27147603564 100644
--- a/mm/damon/vaddr.c
+++ b/mm/damon/vaddr.c
@@ -251,8 +251,11 @@ static void damon_va_walk_page_range(struct 
mm_struct *mm, unsigned long start,
                 goto lock_mmap;
         }

-       ops->walk_lock = PGWALK_VMA_RDLOCK_VERIFY;
-       walk_page_range_vma(vma, start, end, ops, private);
+       if (!(vma->vm_flags & VM_PFNMAP)) {
+               ops->walk_lock = PGWALK_VMA_RDLOCK_VERIFY;
+               walk_page_range_vma(vma, start, end, ops, private);
+       }
+
         vma_end_read(vma);
         return;

Any more comments?

Thanks.

[1] 
https://sashiko.dev/#/patchset/20260511132546.1973270-1-wangkefeng.wang@huawei.com?part=1
> 
> Thanks,
> SJ
> 
> [...]
> 


^ permalink raw reply related	[flat|nested] 5+ messages in thread

* Re: [PATCH] mm/damon/vaddr: attempt per-vma lock during page table walk
  2026-05-12 13:59   ` Kefeng Wang
@ 2026-05-12 14:08     ` SeongJae Park
  0 siblings, 0 replies; 5+ messages in thread
From: SeongJae Park @ 2026-05-12 14:08 UTC (permalink / raw)
  To: Kefeng Wang; +Cc: SeongJae Park, Andrew Morton, damon, linux-mm, sunnanyong

On Tue, 12 May 2026 21:59:26 +0800 Kefeng Wang <wangkefeng.wang@huawei.com> wrote:

> 
> 
> On 5/12/2026 9:31 AM, SeongJae Park wrote:
> > On Mon, 11 May 2026 21:25:46 +0800 Kefeng Wang <wangkefeng.wang@huawei.com> wrote:
> > 
> >> Currently, DAMON virtual address operations use mmap_read_lock
> >> during page table walks, which can cause unnecessary contention
> >> under high concurrency.
> >>
> >> Introduce damon_va_walk_page_range() to first attempt acquiring a
> >> per-vma lock. If the VMA is found and the range is fully contained
> >> within it, the page table walk proceeds with the per-vma lock
> >> instead of mmap_read_lock.
> >>
> >> This optimization is particularly effective for damon_va_young()
> >> and damon_va_mkold(), which are frequently called and typically
> >> operate within a single VMA.
> > 
> > Makes sense.  Do you have some measurements?
> 
> In fact, I do not have performance-related tests.
> 
> > 
> >>
> >> Signed-off-by: Kefeng Wang <wangkefeng.wang@huawei.com>
> > 
> > Looks good to me.  Nonetheless, because I'm not familiar with per-vma locking,
> > I'd like to wait for Sashiko review.
> > 
> 
> Sashiko review reports a issue about handling VMAs with the VM_PFNMAP
> flag inconsistently[1]，

For other readers who may want to read the Sashiko review and might want to
commeent, it is also available [1] at lore.

> We indeed do not need to handle the vma of
> VM_PFNMAP for damon, so a quick fix is as follows,
> 
> diff --git a/mm/damon/vaddr.c b/mm/damon/vaddr.c
> index eed2ae132a58..d27147603564 100644
> --- a/mm/damon/vaddr.c
> +++ b/mm/damon/vaddr.c
> @@ -251,8 +251,11 @@ static void damon_va_walk_page_range(struct 
> mm_struct *mm, unsigned long start,
>                  goto lock_mmap;
>          }
> 
> -       ops->walk_lock = PGWALK_VMA_RDLOCK_VERIFY;
> -       walk_page_range_vma(vma, start, end, ops, private);
> +       if (!(vma->vm_flags & VM_PFNMAP)) {
> +               ops->walk_lock = PGWALK_VMA_RDLOCK_VERIFY;
> +               walk_page_range_vma(vma, start, end, ops, private);
> +       }
> +
>          vma_end_read(vma);
>          return;
> 
> Any more comments?

Looks good to me, thank you!  Could you send v2 with this change?

> 
> Thanks.
> 
> [1] 
> https://sashiko.dev/#/patchset/20260511132546.1973270-1-wangkefeng.wang@huawei.com?part=1

[1] https://lore.kernel.org/20260512052628.83798C2BCC7@smtp.kernel.org


Thanks,
SJ

[...]

^ permalink raw reply	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2026-05-12 14:09 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2026-05-11 13:25 [PATCH] mm/damon/vaddr: attempt per-vma lock during page table walk Kefeng Wang
2026-05-12  1:31 ` SeongJae Park
2026-05-12 13:59   ` Kefeng Wang
2026-05-12 14:08     ` SeongJae Park
2026-05-12  5:26 ` sashiko-bot

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox