* [RESEND RFC PATCH v2 1/5] mm/damon: add target_order field for DAMOS_COLLAPSE
2026-07-02 9:52 [RESEND RFC PATCH v2 0/5] mm/damon: add mTHP collapse and split actions Lian Wang
@ 2026-07-02 9:52 ` Lian Wang
2026-07-02 10:02 ` sashiko-bot
2026-07-02 9:52 ` [RESEND RFC PATCH v2 2/5] mm/khugepaged: add damon_collapse_folio_range() for external callers Lian Wang
` (4 subsequent siblings)
5 siblings, 1 reply; 15+ messages in thread
From: Lian Wang @ 2026-07-02 9:52 UTC (permalink / raw)
To: damon, linux-mm
Cc: linux-kernel, sj, gutierrez.asier, daichaobing, lianux.wang,
lianux.mm, kunwu.chan
DAMOS_COLLAPSE currently collapses into PMD-size THP only. Add a
target_order field to express per-order mTHP collapse intent. Zero
means system default (PMD order, same as current behavior). Valid
values are 0 and 2..HPAGE_PMD_ORDER.
Wire up the sysfs interface: a per-scheme rw file "target_order".
Validate at store time that the value is in range, and warn at scheme
creation time if DAMOS_COLLAPSE is used with an unsupported non-PMD
order, resetting to 0.
The actual mTHP application via the khugepaged wrapper will be added
in subsequent patches.
Co-developed-by: Kunwu Chan <kunwu.chan@gmail.com>
Signed-off-by: Kunwu Chan <kunwu.chan@gmail.com>
Signed-off-by: Lian Wang <lianux.mm@gmail.com>
Signed-off-by: Lian Wang <lianux.wang@processmission.com>
---
include/linux/damon.h | 5 ++++
mm/damon/core.c | 2 ++
mm/damon/sysfs-schemes.c | 61 ++++++++++++++++++++++++++++++++++++++++
3 files changed, 68 insertions(+)
diff --git a/include/linux/damon.h b/include/linux/damon.h
index 6f7edb3590ef..5a0587556573 100644
--- a/include/linux/damon.h
+++ b/include/linux/damon.h
@@ -572,6 +572,11 @@ struct damos_migrate_dests {
struct damos {
struct damos_access_pattern pattern;
enum damos_action action;
+ /*
+ * @target_order: target order for mTHP actions (DAMOS_COLLAPSE).
+ * 0 means system default (PMD order). Valid: 0, 2..HPAGE_PMD_ORDER.
+ */
+ unsigned int target_order;
unsigned long apply_interval_us;
/* private: internal use only */
/*
diff --git a/mm/damon/core.c b/mm/damon/core.c
index 265d51ade25b..be54defd4646 100644
--- a/mm/damon/core.c
+++ b/mm/damon/core.c
@@ -579,6 +579,7 @@ struct damos *damon_new_scheme(struct damos_access_pattern *pattern,
scheme->migrate_dests = (struct damos_migrate_dests){};
scheme->target_nid = target_nid;
+ scheme->target_order = 0;
return scheme;
}
@@ -1278,6 +1279,7 @@ static int damos_commit(struct damos *dst, struct damos *src)
dst->wmarks = src->wmarks;
dst->target_nid = src->target_nid;
+ dst->target_order = src->target_order;
err = damos_commit_dests(&dst->migrate_dests, &src->migrate_dests);
if (err)
diff --git a/mm/damon/sysfs-schemes.c b/mm/damon/sysfs-schemes.c
index 329cfd0bbe9f..7dcd582ded86 100644
--- a/mm/damon/sysfs-schemes.c
+++ b/mm/damon/sysfs-schemes.c
@@ -6,7 +6,9 @@
*/
#include <linux/slab.h>
+#include <linux/mm.h>
#include <linux/numa.h>
+#include <linux/huge_mm.h>
#include "sysfs-common.h"
@@ -2257,6 +2259,7 @@ struct damon_sysfs_scheme {
struct damon_sysfs_stats *stats;
struct damon_sysfs_scheme_regions *tried_regions;
int target_nid;
+ unsigned int target_order;
struct damos_sysfs_dests *dests;
};
@@ -2323,6 +2326,7 @@ static struct damon_sysfs_scheme *damon_sysfs_scheme_alloc(
scheme->action = action;
scheme->apply_interval_us = apply_interval_us;
scheme->target_nid = NUMA_NO_NODE;
+ scheme->target_order = 0;
return scheme;
}
@@ -2642,6 +2646,39 @@ static ssize_t target_nid_store(struct kobject *kobj,
return err ? err : count;
}
+static ssize_t target_order_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buf)
+{
+ struct damon_sysfs_scheme *scheme = container_of(kobj,
+ struct damon_sysfs_scheme, kobj);
+
+ return sysfs_emit(buf, "%u\n", scheme->target_order);
+}
+
+static ssize_t target_order_store(struct kobject *kobj,
+ struct kobj_attribute *attr, const char *buf, size_t count)
+{
+ struct damon_sysfs_scheme *scheme = container_of(kobj,
+ struct damon_sysfs_scheme, kobj);
+ unsigned int val;
+ int err;
+
+ err = kstrtouint(buf, 0, &val);
+ if (err)
+ return err;
+
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+ if (val != 0 && (val < 2 || val > HPAGE_PMD_ORDER))
+ return -EINVAL;
+#else
+ if (val != 0)
+ return -EINVAL;
+#endif
+
+ scheme->target_order = val;
+ return count;
+}
+
static void damon_sysfs_scheme_release(struct kobject *kobj)
{
kfree(container_of(kobj, struct damon_sysfs_scheme, kobj));
@@ -2656,10 +2693,14 @@ static struct kobj_attribute damon_sysfs_scheme_apply_interval_us_attr =
static struct kobj_attribute damon_sysfs_scheme_target_nid_attr =
__ATTR_RW_MODE(target_nid, 0600);
+static struct kobj_attribute damon_sysfs_scheme_target_order_attr =
+ __ATTR_RW_MODE(target_order, 0600);
+
static struct attribute *damon_sysfs_scheme_attrs[] = {
&damon_sysfs_scheme_action_attr.attr,
&damon_sysfs_scheme_apply_interval_us_attr.attr,
&damon_sysfs_scheme_target_nid_attr.attr,
+ &damon_sysfs_scheme_target_order_attr.attr,
NULL,
};
ATTRIBUTE_GROUPS(damon_sysfs_scheme);
@@ -2970,6 +3011,7 @@ static struct damos *damon_sysfs_mk_scheme(
struct damon_sysfs_weights *sysfs_weights = sysfs_quotas->weights;
struct damon_sysfs_watermarks *sysfs_wmarks = sysfs_scheme->watermarks;
struct damos *scheme;
+ unsigned int target_order;
int err;
struct damos_access_pattern pattern = {
@@ -3005,6 +3047,25 @@ static struct damos *damon_sysfs_mk_scheme(
if (!scheme)
return NULL;
+ target_order = sysfs_scheme->target_order;
+
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+ if (sysfs_scheme->action == DAMOS_COLLAPSE &&
+ target_order != 0 &&
+ target_order != HPAGE_PMD_ORDER) {
+ pr_warn("DAMON collapse: target_order %u not supported, only PMD order (%u) is available. Use 0 or %u.\n",
+ target_order,
+ HPAGE_PMD_ORDER, HPAGE_PMD_ORDER);
+ target_order = 0;
+ }
+#else
+ if (sysfs_scheme->action == DAMOS_COLLAPSE && target_order != 0) {
+ pr_warn("DAMON collapse: target_order not supported without THP. Use 0.\n");
+ target_order = 0;
+ }
+#endif
+ scheme->target_order = target_order;
+
err = damos_sysfs_add_quota_score(sysfs_quotas->goals, &scheme->quota);
if (err) {
damon_destroy_scheme(scheme);
^ permalink raw reply related [flat|nested] 15+ messages in thread* Re: [RESEND RFC PATCH v2 1/5] mm/damon: add target_order field for DAMOS_COLLAPSE
2026-07-02 9:52 ` [RESEND RFC PATCH v2 1/5] mm/damon: add target_order field for DAMOS_COLLAPSE Lian Wang
@ 2026-07-02 10:02 ` sashiko-bot
0 siblings, 0 replies; 15+ messages in thread
From: sashiko-bot @ 2026-07-02 10:02 UTC (permalink / raw)
To: Lian Wang; +Cc: damon
Thank you for your contribution! Sashiko AI review found 2 potential issue(s) to consider:
- [Medium] Sysfs value for `target_order` can silently diverge from the active DAMON scheme's internal state.
- [Low] The `#else` block fallback for `target_order` when THP is disabled is unreachable.
--
commit 38547105610a7847b39b76172e675c32ee3ca4f5
Author: Lian Wang <lianux.mm@gmail.com>
mm/damon: add target_order field for DAMOS_COLLAPSE
Adds a target_order field to express per-order mTHP collapse intent for
DAMOS_COLLAPSE. Wires up the sysfs interface target_order to configure this
value and warns when an unsupported non-PMD order is used.
> diff --git a/mm/damon/sysfs-schemes.c b/mm/damon/sysfs-schemes.c
> index dbf2b0515d582..9067cb66632ed 100644
> --- a/mm/damon/sysfs-schemes.c
> +++ b/mm/damon/sysfs-schemes.c
[ ... ]
> @@ -2644,6 +2648,39 @@ static ssize_t target_nid_store(struct kobject *kobj,
> return err ? err : count;
> }
>
> +static ssize_t target_order_show(struct kobject *kobj,
> + struct kobj_attribute *attr, char *buf)
> +{
> + struct damon_sysfs_scheme *scheme = container_of(kobj,
> + struct damon_sysfs_scheme, kobj);
> +
> + return sysfs_emit(buf, "%u\n", scheme->target_order);
> +}
> +
> +static ssize_t target_order_store(struct kobject *kobj,
> + struct kobj_attribute *attr, const char *buf, size_t count)
> +{
> + struct damon_sysfs_scheme *scheme = container_of(kobj,
> + struct damon_sysfs_scheme, kobj);
> + unsigned int val;
> + int err;
> +
> + err = kstrtouint(buf, 0, &val);
> + if (err)
> + return err;
> +
> +#ifdef CONFIG_TRANSPARENT_HUGEPAGE
> + if (val != 0 && (val < 2 || val > HPAGE_PMD_ORDER))
> + return -EINVAL;
> +#else
> + if (val != 0)
> + return -EINVAL;
> +#endif
> +
> + scheme->target_order = val;
> + return count;
> +}
[ ... ]
> @@ -3010,6 +3052,25 @@ static struct damos *damon_sysfs_mk_scheme(
> if (!scheme)
> return NULL;
>
> + target_order = sysfs_scheme->target_order;
> +
> +#ifdef CONFIG_TRANSPARENT_HUGEPAGE
> + if (sysfs_scheme->action == DAMOS_COLLAPSE &&
> + target_order != 0 &&
> + target_order != HPAGE_PMD_ORDER) {
> + pr_warn("DAMON collapse: target_order %u not supported, only PMD order (%u) is available. Use 0 or %u.\n",
> + target_order,
> + HPAGE_PMD_ORDER, HPAGE_PMD_ORDER);
> + target_order = 0;
[Severity: Medium]
Will this cause the sysfs API to report an incorrect state to userspace?
When an unsupported order is encountered, this overrides target_order to 0 for
the internal scheme, but it fails to update sysfs_scheme->target_order or
return an error. Userspace reading from target_order_show() will see the
divergent uncorrected value and believe a specific target order is active
when it has silently been overridden to the default.
> + }
> +#else
> + if (sysfs_scheme->action == DAMOS_COLLAPSE && target_order != 0) {
> + pr_warn("DAMON collapse: target_order not supported without THP. Use 0.\n");
> + target_order = 0;
> + }
[Severity: Low]
Is this block reachable?
In target_order_store(), if CONFIG_TRANSPARENT_HUGEPAGE is not set, any
non-zero value unconditionally returns -EINVAL. It seems target_order will
always be 0 when evaluating this block.
> +#endif
> + scheme->target_order = target_order;
> +
> err = damos_sysfs_add_quota_score(sysfs_quotas->goals, &scheme->quota);
> if (err) {
> damon_destroy_scheme(scheme);
--
Sashiko AI review · https://sashiko.dev/#/patchset/20260702095227.75866-1-lianux.mm@gmail.com?part=1
^ permalink raw reply [flat|nested] 15+ messages in thread
* [RESEND RFC PATCH v2 2/5] mm/khugepaged: add damon_collapse_folio_range() for external callers
2026-07-02 9:52 [RESEND RFC PATCH v2 0/5] mm/damon: add mTHP collapse and split actions Lian Wang
2026-07-02 9:52 ` [RESEND RFC PATCH v2 1/5] mm/damon: add target_order field for DAMOS_COLLAPSE Lian Wang
@ 2026-07-02 9:52 ` Lian Wang
2026-07-02 10:08 ` sashiko-bot
2026-07-02 9:52 ` [RESEND RFC PATCH v2 3/5] mm/damon/vaddr: implement mTHP-aware DAMOS_COLLAPSE handler Lian Wang
` (3 subsequent siblings)
5 siblings, 1 reply; 15+ messages in thread
From: Lian Wang @ 2026-07-02 9:52 UTC (permalink / raw)
To: damon, linux-mm
Cc: linux-kernel, sj, gutierrez.asier, daichaobing, lianux.wang,
lianux.mm, kunwu.chan
Export a thin wrapper around collapse_huge_page() that allows external
subsystems such as DAMON to trigger THP collapse on a target address
range.
Currently restricted to PMD order (HPAGE_PMD_ORDER), since
collapse_huge_page() does not yet support arbitrary mTHP orders.
The restriction can be relaxed when khugepaged gains mTHP support.
The caller must hold a reference to @mm. Do not hold mmap lock:
collapse_huge_page() acquires mmap_read_lock for validation, releases
it, then acquires mmap_write_lock for the actual collapse. Holding
an outer mmap_read_lock would cause a self-deadlock when the same
thread attempts the inner mmap_write_lock.
Co-developed-by: Kunwu Chan <kunwu.chan@gmail.com>
Signed-off-by: Kunwu Chan <kunwu.chan@gmail.com>
Signed-off-by: Lian Wang <lianux.mm@gmail.com>
Signed-off-by: Lian Wang <lianux.wang@processmission.com>
---
include/linux/khugepaged.h | 9 ++++++++
mm/khugepaged.c | 46 ++++++++++++++++++++++++++++++++++++++
2 files changed, 55 insertions(+)
diff --git a/include/linux/khugepaged.h b/include/linux/khugepaged.h
index d7a9053ff4fe..f7d49cba712f 100644
--- a/include/linux/khugepaged.h
+++ b/include/linux/khugepaged.h
@@ -20,6 +20,9 @@ extern bool current_is_khugepaged(void);
void collapse_pte_mapped_thp(struct mm_struct *mm, unsigned long addr,
bool install_pmd);
+int damon_collapse_folio_range(struct mm_struct *mm, unsigned long start_addr,
+ unsigned int target_order);
+
static inline void khugepaged_fork(struct mm_struct *mm, struct mm_struct *oldmm)
{
if (mm_flags_test(MMF_VM_HUGEPAGE, oldmm))
@@ -47,6 +50,12 @@ static inline void collapse_pte_mapped_thp(struct mm_struct *mm,
{
}
+static inline int damon_collapse_folio_range(struct mm_struct *mm,
+ unsigned long start_addr, unsigned int target_order)
+{
+ return -EINVAL;
+}
+
static inline void khugepaged_min_free_kbytes_update(void)
{
}
diff --git a/mm/khugepaged.c b/mm/khugepaged.c
index 617bca76db49..7fe9ce1e0533 100644
--- a/mm/khugepaged.c
+++ b/mm/khugepaged.c
@@ -3272,3 +3272,49 @@ int madvise_collapse(struct vm_area_struct *vma, unsigned long start,
return thps == ((hend - hstart) >> HPAGE_PMD_SHIFT) ? 0
: madvise_collapse_errno(last_fail);
}
+
+/**
+ * damon_collapse_folio_range() - Collapse base pages in range into a THP
+ * @mm: mm_struct of the target process
+ * @start_addr: start address (must be order-aligned)
+ * @target_order: page order of the collapse result (currently only
+ * HPAGE_PMD_ORDER is supported)
+ *
+ * Thin wrapper around collapse_huge_page() for external callers such as
+ * DAMON. The caller must hold a reference to @mm. Do not hold mmap
+ * lock: collapse_huge_page() acquires mmap_read_lock for validation,
+ * releases it, then acquires mmap_write_lock for the collapse. Holding
+ * an outer mmap_read_lock would self-deadlock.
+ *
+ * Return: 0 on success, -EINVAL on bad arguments, negative error from
+ * madvise_collapse_errno() otherwise.
+ */
+int damon_collapse_folio_range(struct mm_struct *mm, unsigned long start_addr,
+ unsigned int target_order)
+{
+ struct collapse_control *cc;
+ enum scan_result result;
+
+ if (target_order != HPAGE_PMD_ORDER) {
+ pr_warn_once("%s: only PMD order (%u) is supported, got %u\n",
+ __func__, HPAGE_PMD_ORDER, target_order);
+ return -EINVAL;
+ }
+ if (start_addr & ((PAGE_SIZE << target_order) - 1))
+ return -EINVAL;
+
+ cc = kmalloc_obj(*cc);
+ if (!cc)
+ return -ENOMEM;
+ cc->is_khugepaged = false;
+ cc->progress = 0;
+
+ lru_add_drain_all();
+
+ result = collapse_huge_page(mm, start_addr, 1, 0, cc, target_order);
+ kfree(cc);
+ if (result == SCAN_SUCCEED || result == SCAN_PMD_MAPPED)
+ return 0;
+ return madvise_collapse_errno(result);
+}
+EXPORT_SYMBOL_GPL(damon_collapse_folio_range);
^ permalink raw reply related [flat|nested] 15+ messages in thread* Re: [RESEND RFC PATCH v2 2/5] mm/khugepaged: add damon_collapse_folio_range() for external callers
2026-07-02 9:52 ` [RESEND RFC PATCH v2 2/5] mm/khugepaged: add damon_collapse_folio_range() for external callers Lian Wang
@ 2026-07-02 10:08 ` sashiko-bot
0 siblings, 0 replies; 15+ messages in thread
From: sashiko-bot @ 2026-07-02 10:08 UTC (permalink / raw)
To: Lian Wang; +Cc: damon
Thank you for your contribution! Sashiko AI review found 4 potential issue(s) to consider:
- [High] `damon_collapse_folio_range()` passes an uninitialized `struct collapse_control` to `collapse_huge_page()`, leaving fields like `node_load` and `alloc_nmask` with heap garbage, potentially corrupting NUMA node targeting during THP allocation.
- [High] Bypassing the read-only page table validation scan forces destructive PMD operations on un-collapsible memory ranges, creating a severe performance degradation vector.
- [Medium] `damon_collapse_folio_range()` silently fails to collapse file-backed memory ranges because it routes directly to the anonymous-only `collapse_huge_page()` function.
- [Medium] `damon_collapse_folio_range()` hardcodes the `unmapped` parameter to 0, which prevents the collapse operation from succeeding if there are any swapped-out pages in the PMD range.
--
commit e521d85e0499be054d0c31614f15b39332af3c13
Author: Lian Wang <lianux.mm@gmail.com>
mm/khugepaged: add damon_collapse_folio_range() for external callers
This commit introduces damon_collapse_folio_range(), providing a wrapper
around khugepaged's collapse_huge_page() for external subsystems like DAMON.
It allows targeted THP collapse on specific address ranges but is currently
limited to PMD order.
> diff --git a/mm/khugepaged.c b/mm/khugepaged.c
> index 617bca76db49b..7fe9ce1e05335 100644
> --- a/mm/khugepaged.c
> +++ b/mm/khugepaged.c
[ ... ]
> @@ -3272,3 +3272,49 @@ int madvise_collapse(struct vm_area_struct *vma, unsigned long start,
> return thps == ((hend - hstart) >> HPAGE_PMD_SHIFT) ? 0
> : madvise_collapse_errno(last_fail);
> }
> +
> +/**
> + * damon_collapse_folio_range() - Collapse base pages in range into a THP
> + * @mm: mm_struct of the target process
> + * @start_addr: start address (must be order-aligned)
> + * @target_order: page order of the collapse result (currently only
> + * HPAGE_PMD_ORDER is supported)
> + *
> + * Thin wrapper around collapse_huge_page() for external callers such as
> + * DAMON. The caller must hold a reference to @mm. Do not hold mmap
> + * lock: collapse_huge_page() acquires mmap_read_lock for validation,
> + * releases it, then acquires mmap_write_lock for the collapse. Holding
> + * an outer mmap_read_lock would self-deadlock.
> + *
> + * Return: 0 on success, -EINVAL on bad arguments, negative error from
> + * madvise_collapse_errno() otherwise.
> + */
> +int damon_collapse_folio_range(struct mm_struct *mm, unsigned long start_addr,
> + unsigned int target_order)
> +{
> + struct collapse_control *cc;
> + enum scan_result result;
> +
> + if (target_order != HPAGE_PMD_ORDER) {
> + pr_warn_once("%s: only PMD order (%u) is supported, got %u\n",
> + __func__, HPAGE_PMD_ORDER, target_order);
> + return -EINVAL;
> + }
> + if (start_addr & ((PAGE_SIZE << target_order) - 1))
> + return -EINVAL;
> +
> + cc = kmalloc_obj(*cc);
[Severity: High]
Does this leave the collapse_control struct uninitialized?
When collapse_huge_page() eventually calls alloc_charge_folio(), it uses
collapse_find_target_node(). This function iterates over cc->node_load and
reads/writes cc->alloc_nmask:
collapse_find_target_node()
...
node_set(nid, cc->alloc_nmask);
Since kmalloc_obj() does not zero memory, wouldn't this pass heap garbage
to the folio allocator and corrupt NUMA node targeting?
> + if (!cc)
> + return -ENOMEM;
> + cc->is_khugepaged = false;
> + cc->progress = 0;
> +
> + lru_add_drain_all();
> +
> + result = collapse_huge_page(mm, start_addr, 1, 0, cc, target_order);
[Severity: Medium]
Does this silently fail for file-backed memory regions?
By calling collapse_huge_page() directly instead of the dispatcher
collapse_single_pmd(), the flow reaches hugepage_vma_revalidate() with
expect_anon hardcoded to true:
hugepage_vma_revalidate()
if (expect_anon && !vma_is_anonymous(vma))
return SCAN_PAGE_ANON;
Wouldn't this unconditionally reject shmem/tmpfs mappings targeted by DAMON?
[Severity: Medium]
By passing 0 for the unmapped parameter here, does this prevent the collapse
operation if any pages are swapped out?
Because unmapped is 0, collapse_huge_page() skips
__collapse_huge_page_swapin(). Then, when __collapse_huge_page_isolate()
scans the PTEs, it will encounter swapped pages and fail:
__collapse_huge_page_isolate()
if (!pte_present(pteval)) {
result = SCAN_PTE_NON_PRESENT;
goto out;
}
Could we allow swap-in by passing 1 here instead?
[Severity: High]
Does bypassing the read-only scan phase cause severe performance issues?
Normally, collapse_scan_pmd() performs a read-only validation of the page
tables. By skipping straight to collapse_huge_page(), this code takes the
mmap_write_lock, destructively clears the PMD, and triggers a global TLB
flush before verifying if the PTEs can actually be isolated:
collapse_huge_page()
_pmd = pmdp_collapse_flush(vma, pmd_addr, pmd);
...
tlb_remove_table_sync_one();
If isolation fails (e.g., due to pinned or unmapped pages), it restores the
PMD and aborts. Because DAMON repeatedly scans and targets memory, wouldn't
this create continuous lock contention and TLB flush storms on
un-collapsible memory ranges?
> + kfree(cc);
> + if (result == SCAN_SUCCEED || result == SCAN_PMD_MAPPED)
> + return 0;
> + return madvise_collapse_errno(result);
> +}
> +EXPORT_SYMBOL_GPL(damon_collapse_folio_range);
--
Sashiko AI review · https://sashiko.dev/#/patchset/20260702095227.75866-1-lianux.mm@gmail.com?part=2
^ permalink raw reply [flat|nested] 15+ messages in thread
* [RESEND RFC PATCH v2 3/5] mm/damon/vaddr: implement mTHP-aware DAMOS_COLLAPSE handler
2026-07-02 9:52 [RESEND RFC PATCH v2 0/5] mm/damon: add mTHP collapse and split actions Lian Wang
2026-07-02 9:52 ` [RESEND RFC PATCH v2 1/5] mm/damon: add target_order field for DAMOS_COLLAPSE Lian Wang
2026-07-02 9:52 ` [RESEND RFC PATCH v2 2/5] mm/khugepaged: add damon_collapse_folio_range() for external callers Lian Wang
@ 2026-07-02 9:52 ` Lian Wang
2026-07-02 10:27 ` sashiko-bot
2026-07-02 9:52 ` [RESEND RFC PATCH v2 4/5] mm/damon: introduce DAMOS_SPLIT action Lian Wang
` (2 subsequent siblings)
5 siblings, 1 reply; 15+ messages in thread
From: Lian Wang @ 2026-07-02 9:52 UTC (permalink / raw)
To: damon, linux-mm
Cc: linux-kernel, sj, gutierrez.asier, daichaobing, lianux.wang,
lianux.mm, kunwu.chan
When target_order is set (non-zero), the DAMOS_COLLAPSE handler now calls
damon_collapse_folio_range() to collapse pages into the requested mTHP
size, iterating over the target region in order-aligned chunks. When
target_order is 0 (default), the existing madvise(MADV_COLLAPSE) path is
used, preserving backwards compatibility.
Region boundaries are expanded outward to the covering aligned range
(ALIGN_DOWN start, ALIGN end) so that collapse works even after
kdamond_split_regions reduces region sizes below the chunk size.
collapse_huge_page() internally validates VMA bounds, so expanding
beyond the original region is safe.
No external mmap lock is held: collapse_huge_page() acquires
mmap_read_lock internally for validation, releases it, then acquires
mmap_write_lock for the actual collapse. Holding an outer
mmap_read_lock would cause a self-deadlock when the same thread
attempts the inner mmap_write_lock.
Co-developed-by: Kunwu Chan <kunwu.chan@gmail.com>
Signed-off-by: Kunwu Chan <kunwu.chan@gmail.com>
Signed-off-by: Lian Wang <lianux.mm@gmail.com>
Signed-off-by: Lian Wang <lianux.wang@processmission.com>
---
mm/damon/vaddr.c | 48 ++++++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 48 insertions(+)
diff --git a/mm/damon/vaddr.c b/mm/damon/vaddr.c
index d27147603564..98a87609376b 100644
--- a/mm/damon/vaddr.c
+++ b/mm/damon/vaddr.c
@@ -14,6 +14,7 @@
#include <linux/page_idle.h>
#include <linux/pagewalk.h>
#include <linux/sched/mm.h>
+#include <linux/khugepaged.h>
#include "../internal.h"
#include "ops-common.h"
@@ -899,6 +900,50 @@ static unsigned long damos_va_stat(struct damon_target *target,
return 0;
}
+static unsigned long damos_va_collapse(struct damon_target *target,
+ struct damon_region *r, struct damos *s,
+ unsigned long *sz_filter_passed)
+{
+ unsigned long addr, end, chunk_sz;
+ unsigned long last_chunk = ULONG_MAX;
+ unsigned int target_order = s->target_order;
+ unsigned long applied = 0;
+ struct mm_struct *mm;
+ int ret;
+
+ if (target_order < 2 || target_order > HPAGE_PMD_ORDER)
+ return 0;
+
+ chunk_sz = PAGE_SIZE << target_order;
+ addr = ALIGN_DOWN(r->ar.start, chunk_sz);
+ end = ALIGN(r->ar.end, chunk_sz);
+ if (end < addr)
+ return 0;
+
+ mm = damon_get_mm(target);
+ if (!mm)
+ return 0;
+
+ while (addr < end) {
+ if (addr + chunk_sz < addr)
+ break;
+ if (addr == last_chunk)
+ goto next;
+ last_chunk = addr;
+
+ ret = damon_collapse_folio_range(mm, addr, target_order);
+ if (!ret)
+ applied += chunk_sz;
+ *sz_filter_passed += chunk_sz;
+next:
+ addr += chunk_sz;
+ cond_resched();
+ }
+
+ mmput(mm);
+ return applied;
+}
+
static unsigned long damon_va_apply_scheme(struct damon_ctx *ctx,
struct damon_target *t, struct damon_region *r,
struct damos *scheme, unsigned long *sz_filter_passed)
@@ -922,6 +967,9 @@ static unsigned long damon_va_apply_scheme(struct damon_ctx *ctx,
madv_action = MADV_NOHUGEPAGE;
break;
case DAMOS_COLLAPSE:
+ if (scheme->target_order)
+ return damos_va_collapse(t, r, scheme,
+ sz_filter_passed);
madv_action = MADV_COLLAPSE;
break;
case DAMOS_MIGRATE_HOT:
^ permalink raw reply related [flat|nested] 15+ messages in thread* Re: [RESEND RFC PATCH v2 3/5] mm/damon/vaddr: implement mTHP-aware DAMOS_COLLAPSE handler
2026-07-02 9:52 ` [RESEND RFC PATCH v2 3/5] mm/damon/vaddr: implement mTHP-aware DAMOS_COLLAPSE handler Lian Wang
@ 2026-07-02 10:27 ` sashiko-bot
0 siblings, 0 replies; 15+ messages in thread
From: sashiko-bot @ 2026-07-02 10:27 UTC (permalink / raw)
To: Lian Wang; +Cc: damon
Thank you for your contribution! Sashiko AI review found 6 potential issue(s) to consider:
- [Critical] The `collapse_control` struct `cc` is allocated without zeroing, causing uninitialized memory usage.
- [Critical] Calling `lru_add_drain_all()` and allocating huge pages per-chunk in a tight loop without VMA validation causes system-wide IPI storms and severe CPU stalls.
- [High] damon_collapse_folio_range explicitly rejects non-PMD orders, breaking the advertised mTHP support.
- [High] DAMOS_COLLAPSE via `damos_va_collapse` silently fails on all file-backed VMAs.
- [Medium] Local `last_chunk` variable fails to deduplicate cross-region chunks, artificially inflating DAMON statistics.
- [Low] Build break when CONFIG_TRANSPARENT_HUGEPAGE is disabled due to unconditional use of HPAGE_PMD_ORDER.
--
commit 1758a6da0e4869c672603d9b92add29e2cab16d6
Author: Lian Wang <lianux.mm@gmail.com>
mm/damon/vaddr: implement mTHP-aware DAMOS_COLLAPSE handler
The commit implements an mTHP-aware DAMOS_COLLAPSE handler that iterates over
the target region in order-aligned chunks when a target mTHP order is provided.
It utilizes damon_collapse_folio_range() for the underlying memory collapse
operations while falling back to the madvise path for default configurations.
> diff --git a/mm/damon/vaddr.c b/mm/damon/vaddr.c
> index 2058db9c01d52..8bfc6dcc635d5 100644
> --- a/mm/damon/vaddr.c
> +++ b/mm/damon/vaddr.c
> @@ -895,6 +896,50 @@ static unsigned long damos_va_stat(struct damon_target *target,
> return 0;
> }
>
> +static unsigned long damos_va_collapse(struct damon_target *target,
> + struct damon_region *r, struct damos *s,
> + unsigned long *sz_filter_passed)
> +{
> + unsigned long addr, end, chunk_sz;
> + unsigned long last_chunk = ULONG_MAX;
> + unsigned int target_order = s->target_order;
> + unsigned long applied = 0;
> + struct mm_struct *mm;
> + int ret;
> +
> + if (target_order < 2 || target_order > HPAGE_PMD_ORDER)
[Severity: Low]
Is this safe to use when CONFIG_TRANSPARENT_HUGEPAGE is disabled?
In damos_va_collapse(), HPAGE_PMD_ORDER is unconditionally referenced. Without
THP, HPAGE_PMD_ORDER evaluates to an expression containing BUILD_BUG(), which
can cause a build breakage.
> + return 0;
> +
> + chunk_sz = PAGE_SIZE << target_order;
> + addr = ALIGN_DOWN(r->ar.start, chunk_sz);
> + end = ALIGN(r->ar.end, chunk_sz);
> + if (end < addr)
> + return 0;
> +
> + mm = damon_get_mm(target);
> + if (!mm)
> + return 0;
> +
> + while (addr < end) {
> + if (addr + chunk_sz < addr)
> + break;
> + if (addr == last_chunk)
> + goto next;
> + last_chunk = addr;
[Severity: Medium]
Will this reliably deduplicate chunks across multiple DAMON regions?
Because last_chunk is local to damos_va_collapse() and resets for each region,
adjacent or overlapping regions that map to the same chunk might bypass this
check. This can result in the same range being processed multiple times,
artificially inflating the sz_filter_passed and applied statistics.
> +
> + ret = damon_collapse_folio_range(mm, addr, target_order);
[Severity: High]
Will this function correctly handle mTHP orders smaller than PMD?
While damos_va_collapse() allows target_order values down to 2,
damon_collapse_folio_range() in mm/khugepaged.c strictly rejects non-PMD
orders:
if (target_order != HPAGE_PMD_ORDER) {
...
return -EINVAL;
}
Doesn't this unconditionally reject smaller mTHP orders, causing the collapse
to fail while still incorrectly incrementing the sz_filter_passed statistic?
[Severity: Critical]
Can this code cause uninitialized memory usage during the collapse?
When damon_collapse_folio_range() allocates the collapse_control struct:
cc = kmalloc_obj(*cc);
if (!cc)
return -ENOMEM;
cc->is_khugepaged = false;
cc->progress = 0;
The cc->node_load array and cc->alloc_nmask are left uninitialized because
kmalloc_obj() does not zero memory. Since this bypasses collapse_scan_pmd()
which normally initializes these fields, collapse_find_target_node() will
later read garbage data to make NUMA node placement decisions.
[Severity: Critical]
Does this code handle unmapped virtual address ranges efficiently?
Iterating over purely virtual address ranges without validating if they belong
to a VMA can be problematic. For each chunk, damon_collapse_folio_range()
in mm/khugepaged.c executes heavy operations like lru_add_drain_all() and
attempts huge page allocations.
If a large unmapped memory hole is encountered, could this lead to severe
CPU stalls and system-wide IPI storms before the VMA validation eventually
fails?
[Severity: High]
Does this silently fail on file-backed VMAs?
Looking at damon_collapse_folio_range() in mm/khugepaged.c, it directly wraps
collapse_huge_page() which uses hugepage_vma_revalidate() with expect_anon
set to true. This unconditionally rejects file-backed VMAs without falling
back to collapse_file(), potentially breaking the collapse functionality for
file-backed memory.
> + if (!ret)
> + applied += chunk_sz;
> + *sz_filter_passed += chunk_sz;
> +next:
> + addr += chunk_sz;
> + cond_resched();
> + }
> +
> + mmput(mm);
> + return applied;
> +}
[ ... ]
--
Sashiko AI review · https://sashiko.dev/#/patchset/20260702095227.75866-1-lianux.mm@gmail.com?part=3
^ permalink raw reply [flat|nested] 15+ messages in thread
* [RESEND RFC PATCH v2 4/5] mm/damon: introduce DAMOS_SPLIT action
2026-07-02 9:52 [RESEND RFC PATCH v2 0/5] mm/damon: add mTHP collapse and split actions Lian Wang
` (2 preceding siblings ...)
2026-07-02 9:52 ` [RESEND RFC PATCH v2 3/5] mm/damon/vaddr: implement mTHP-aware DAMOS_COLLAPSE handler Lian Wang
@ 2026-07-02 9:52 ` Lian Wang
2026-07-02 10:21 ` sashiko-bot
2026-07-02 9:52 ` [RESEND RFC PATCH v2 5/5] mm/damon/vaddr: implement DAMOS_SPLIT handler Lian Wang
2026-07-02 16:39 ` [RESEND RFC PATCH v2 0/5] mm/damon: add mTHP collapse and split actions SJ Park
5 siblings, 1 reply; 15+ messages in thread
From: Lian Wang @ 2026-07-02 9:52 UTC (permalink / raw)
To: damon, linux-mm
Cc: linux-kernel, sj, gutierrez.asier, daichaobing, lianux.wang,
lianux.mm, kunwu.chan
Add DAMOS_SPLIT to the damos_action enum for splitting large folios
into smaller mTHP-order folios. Add a target_order field to struct
damos to specify the desired split order.
Expose the action as "split" through the DAMON sysfs interface with
target_order validation (must be 2..HPAGE_PMD_ORDER-1).
Signed-off-by: Lian Wang <lianux.mm@gmail.com>
Signed-off-by: Lian Wang <lianux.wang@processmission.com>
---
include/linux/damon.h | 9 +++++++--
mm/damon/sysfs-schemes.c | 16 ++++++++++++++++
2 files changed, 23 insertions(+), 2 deletions(-)
diff --git a/include/linux/damon.h b/include/linux/damon.h
index 5a0587556573..30cf4afb212c 100644
--- a/include/linux/damon.h
+++ b/include/linux/damon.h
@@ -121,6 +121,7 @@ struct damon_target {
* @DAMOS_HUGEPAGE: Call ``madvise()`` for the region with MADV_HUGEPAGE.
* @DAMOS_NOHUGEPAGE: Call ``madvise()`` for the region with MADV_NOHUGEPAGE.
* @DAMOS_COLLAPSE: Call ``madvise()`` for the region with MADV_COLLAPSE.
+ * @DAMOS_SPLIT: Split large folios to the target mTHP order.
* @DAMOS_LRU_PRIO: Prioritize the region on its LRU lists.
* @DAMOS_LRU_DEPRIO: Deprioritize the region on its LRU lists.
* @DAMOS_MIGRATE_HOT: Migrate the regions prioritizing warmer regions.
@@ -141,6 +142,7 @@ enum damos_action {
DAMOS_HUGEPAGE,
DAMOS_NOHUGEPAGE,
DAMOS_COLLAPSE,
+ DAMOS_SPLIT,
DAMOS_LRU_PRIO,
DAMOS_LRU_DEPRIO,
DAMOS_MIGRATE_HOT,
@@ -573,8 +575,11 @@ struct damos {
struct damos_access_pattern pattern;
enum damos_action action;
/*
- * @target_order: target order for mTHP actions (DAMOS_COLLAPSE).
- * 0 means system default (PMD order). Valid: 0, 2..HPAGE_PMD_ORDER.
+ * @target_order: target mTHP order for DAMOS_COLLAPSE and
+ * DAMOS_SPLIT. For COLLAPSE, 0 means PMD order default,
+ * valid values: 0, 2..HPAGE_PMD_ORDER. For SPLIT,
+ * valid values: 2..HPAGE_PMD_ORDER-1; 0 and HPAGE_PMD_ORDER
+ * are rejected at scheme creation time (defaulting to 2).
*/
unsigned int target_order;
unsigned long apply_interval_us;
diff --git a/mm/damon/sysfs-schemes.c b/mm/damon/sysfs-schemes.c
index 7dcd582ded86..84a4617ca3d3 100644
--- a/mm/damon/sysfs-schemes.c
+++ b/mm/damon/sysfs-schemes.c
@@ -2293,6 +2293,10 @@ static struct damos_sysfs_action_name damos_sysfs_action_names[] = {
.action = DAMOS_COLLAPSE,
.name = "collapse",
},
+ {
+ .action = DAMOS_SPLIT,
+ .name = "split",
+ },
{
.action = DAMOS_LRU_PRIO,
.name = "lru_prio",
@@ -3058,11 +3062,23 @@ static struct damos *damon_sysfs_mk_scheme(
HPAGE_PMD_ORDER, HPAGE_PMD_ORDER);
target_order = 0;
}
+ if (sysfs_scheme->action == DAMOS_SPLIT &&
+ (target_order == 0 ||
+ target_order >= HPAGE_PMD_ORDER)) {
+ pr_warn("DAMON split: target_order %u invalid, need 2..%u. Defaulting to 2.\n",
+ target_order,
+ HPAGE_PMD_ORDER - 1);
+ target_order = 2;
+ }
#else
if (sysfs_scheme->action == DAMOS_COLLAPSE && target_order != 0) {
pr_warn("DAMON collapse: target_order not supported without THP. Use 0.\n");
target_order = 0;
}
+ if (sysfs_scheme->action == DAMOS_SPLIT) {
+ pr_warn("DAMON split: not supported without THP.\n");
+ target_order = 2;
+ }
#endif
scheme->target_order = target_order;
^ permalink raw reply related [flat|nested] 15+ messages in thread* Re: [RESEND RFC PATCH v2 4/5] mm/damon: introduce DAMOS_SPLIT action
2026-07-02 9:52 ` [RESEND RFC PATCH v2 4/5] mm/damon: introduce DAMOS_SPLIT action Lian Wang
@ 2026-07-02 10:21 ` sashiko-bot
0 siblings, 0 replies; 15+ messages in thread
From: sashiko-bot @ 2026-07-02 10:21 UTC (permalink / raw)
To: Lian Wang; +Cc: damon
Thank you for your contribution! Sashiko AI review found 1 potential issue(s) to consider:
- [Medium] Sysfs state inconsistency when an invalid target_order is supplied for DAMOS_SPLIT
--
commit 2142d0d071963cdc56d472e03ea29ec38eeda286
Author: Lian Wang <lianux.mm@gmail.com>
mm/damon: introduce DAMOS_SPLIT action
This commit adds the DAMOS_SPLIT action for splitting large folios into
smaller mTHP-order folios. It exposes this action through the DAMON sysfs
interface and validates the target_order to be within the allowed bounds.
[ ... ]
> diff --git a/mm/damon/sysfs-schemes.c b/mm/damon/sysfs-schemes.c
> index 9067cb66632ed..198b64ca22598 100644
> --- a/mm/damon/sysfs-schemes.c
> +++ b/mm/damon/sysfs-schemes.c
[ ... ]
> @@ -3063,11 +3067,23 @@ static struct damos *damon_sysfs_mk_scheme(
> HPAGE_PMD_ORDER, HPAGE_PMD_ORDER);
> target_order = 0;
> }
> + if (sysfs_scheme->action == DAMOS_SPLIT &&
> + (target_order == 0 ||
> + target_order >= HPAGE_PMD_ORDER)) {
> + pr_warn("DAMON split: target_order %u invalid, need 2..%u. Defaulting to 2.\n",
> + target_order,
> + HPAGE_PMD_ORDER - 1);
> + target_order = 2;
> + }
[Severity: Medium]
Does this create a state inconsistency between the user-facing sysfs
interface and the actual running kernel configuration?
When an invalid target_order is supplied, target_order is overridden to a
safe default for the internal scheme here, but the sysfs_scheme->target_order
field is left unmodified. As a result, the sysfs read interface will
continue to report the incorrect value (such as 0) instead of the actual
value (2) being used by the active scheme.
> #else
> if (sysfs_scheme->action == DAMOS_COLLAPSE && target_order != 0) {
> pr_warn("DAMON collapse: target_order not supported without THP. Use 0.\n");
> target_order = 0;
> }
> + if (sysfs_scheme->action == DAMOS_SPLIT) {
> + pr_warn("DAMON split: not supported without THP.\n");
> + target_order = 2;
> + }
> #endif
> scheme->target_order = target_order;
>
--
Sashiko AI review · https://sashiko.dev/#/patchset/20260702095227.75866-1-lianux.mm@gmail.com?part=4
^ permalink raw reply [flat|nested] 15+ messages in thread
* [RESEND RFC PATCH v2 5/5] mm/damon/vaddr: implement DAMOS_SPLIT handler
2026-07-02 9:52 [RESEND RFC PATCH v2 0/5] mm/damon: add mTHP collapse and split actions Lian Wang
` (3 preceding siblings ...)
2026-07-02 9:52 ` [RESEND RFC PATCH v2 4/5] mm/damon: introduce DAMOS_SPLIT action Lian Wang
@ 2026-07-02 9:52 ` Lian Wang
2026-07-02 10:30 ` sashiko-bot
2026-07-02 16:39 ` [RESEND RFC PATCH v2 0/5] mm/damon: add mTHP collapse and split actions SJ Park
5 siblings, 1 reply; 15+ messages in thread
From: Lian Wang @ 2026-07-02 9:52 UTC (permalink / raw)
To: damon, linux-mm
Cc: linux-kernel, sj, gutierrez.asier, daichaobing, lianux.wang,
lianux.mm, kunwu.chan
Implement the vaddr operations layer handler for DAMOS_SPLIT.
For each folio in the target region that is larger than the
scheme's target_order, split it via split_folio_to_order().
This supports both anonymous and file-backed (e.g. tmpfs/shmem)
folios, covering KVM guest memory backed by THP tmpfs.
Signed-off-by: Lian Wang <lianux.mm@gmail.com>
Signed-off-by: Lian Wang <lianux.wang@processmission.com>
---
mm/damon/vaddr.c | 80 ++++++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 80 insertions(+)
diff --git a/mm/damon/vaddr.c b/mm/damon/vaddr.c
index 98a87609376b..a177703f7e0a 100644
--- a/mm/damon/vaddr.c
+++ b/mm/damon/vaddr.c
@@ -944,6 +944,83 @@ static unsigned long damos_va_collapse(struct damon_target *target,
return applied;
}
+static unsigned long damos_va_split(struct damon_target *target,
+ struct damon_region *r, struct damos *s,
+ unsigned long *sz_filter_passed)
+{
+ unsigned long addr, end, chunk_sz;
+ unsigned int target_order = s->target_order;
+ unsigned long applied = 0;
+ struct mm_struct *mm;
+ struct vm_area_struct *vma;
+ struct folio *folio;
+ struct folio_walk fw;
+
+ mm = damon_get_mm(target);
+ if (!mm)
+ return 0;
+
+ chunk_sz = PAGE_SIZE << HPAGE_PMD_ORDER;
+ addr = ALIGN_DOWN(r->ar.start, chunk_sz);
+ end = ALIGN(r->ar.end, chunk_sz);
+ if (end < addr)
+ goto out_mmput;
+
+ while (addr < end) {
+ unsigned long folio_sz;
+
+ if (addr + chunk_sz < addr)
+ break;
+
+ mmap_read_lock(mm);
+ vma = find_vma(mm, addr);
+
+ if (!vma || addr < vma->vm_start ||
+ vma->vm_flags & (VM_HUGETLB | VM_MIXEDMAP))
+ goto unlock;
+
+ folio = folio_walk_start(&fw, vma, addr, 0);
+ if (!folio)
+ goto unlock;
+
+ folio_sz = PAGE_SIZE << folio_order(folio);
+
+ if (folio_order(folio) > target_order) {
+ if (!folio_trylock(folio)) {
+ folio_walk_end(&fw, vma);
+ goto unlock;
+ }
+ folio_get(folio);
+ folio_walk_end(&fw, vma);
+
+ if (!split_folio_to_order(folio, target_order))
+ applied += folio_sz;
+
+ folio_unlock(folio);
+ folio_put(folio);
+ *sz_filter_passed += folio_sz;
+ addr += folio_sz;
+ } else {
+ folio_walk_end(&fw, vma);
+ *sz_filter_passed += chunk_sz;
+ addr += chunk_sz;
+ }
+ mmap_read_unlock(mm);
+ cond_resched();
+ continue;
+
+unlock:
+ *sz_filter_passed += chunk_sz;
+ addr += chunk_sz;
+ mmap_read_unlock(mm);
+ cond_resched();
+ }
+
+out_mmput:
+ mmput(mm);
+ return applied;
+}
+
static unsigned long damon_va_apply_scheme(struct damon_ctx *ctx,
struct damon_target *t, struct damon_region *r,
struct damos *scheme, unsigned long *sz_filter_passed)
@@ -977,6 +1054,9 @@ static unsigned long damon_va_apply_scheme(struct damon_ctx *ctx,
return damos_va_migrate(t, r, scheme, sz_filter_passed);
case DAMOS_STAT:
return damos_va_stat(t, r, scheme, sz_filter_passed);
+ case DAMOS_SPLIT:
+ return damos_va_split(t, r, scheme,
+ sz_filter_passed);
default:
/*
* DAMOS actions that are not yet supported by 'vaddr'.
^ permalink raw reply related [flat|nested] 15+ messages in thread* Re: [RESEND RFC PATCH v2 5/5] mm/damon/vaddr: implement DAMOS_SPLIT handler
2026-07-02 9:52 ` [RESEND RFC PATCH v2 5/5] mm/damon/vaddr: implement DAMOS_SPLIT handler Lian Wang
@ 2026-07-02 10:30 ` sashiko-bot
0 siblings, 0 replies; 15+ messages in thread
From: sashiko-bot @ 2026-07-02 10:30 UTC (permalink / raw)
To: Lian Wang; +Cc: damon
Thank you for your contribution! Sashiko AI review found 2 potential issue(s) to consider:
- [High] Incorrect address range expansion and iteration logic in `damos_va_split()` causes out-of-bounds folio processing and skips memory.
- [High] `damos_va_split()` calls `split_folio_to_order()` on mapped compound pages without verifying they are valid LRU folios, leading to kernel crashes.
--
commit 1aa5abadd8058bacb6959d3c7864cfd3e927e6df
Author: Lian Wang <lianux.mm@gmail.com>
mm/damon/vaddr: implement DAMOS_SPLIT handler
This commit implements the vaddr operations layer handler for DAMOS_SPLIT.
It iterates over the target region and attempts to split any large folios
that exceed the scheme's target order.
> diff --git a/mm/damon/vaddr.c b/mm/damon/vaddr.c
> index 8bfc6dcc635d5..73cb61c655fc7 100644
> --- a/mm/damon/vaddr.c
> +++ b/mm/damon/vaddr.c
> @@ -940,6 +940,83 @@ static unsigned long damos_va_collapse(struct damon_target *target,
> return applied;
> }
>
> +static unsigned long damos_va_split(struct damon_target *target,
> + struct damon_region *r, struct damos *s,
> + unsigned long *sz_filter_passed)
> +{
> + unsigned long addr, end, chunk_sz;
> + unsigned int target_order = s->target_order;
> + unsigned long applied = 0;
> + struct mm_struct *mm;
> + struct vm_area_struct *vma;
> + struct folio *folio;
> + struct folio_walk fw;
> +
> + mm = damon_get_mm(target);
> + if (!mm)
> + return 0;
> +
> + chunk_sz = PAGE_SIZE << HPAGE_PMD_ORDER;
> + addr = ALIGN_DOWN(r->ar.start, chunk_sz);
> + end = ALIGN(r->ar.end, chunk_sz);
[Severity: High]
Does aligning the start and end to 2MB boundaries risk processing memory outside
the requested target region? It seems this could expand the iteration beyond
the boundaries defined by r->ar.start and r->ar.end.
> + if (end < addr)
> + goto out_mmput;
> +
> + while (addr < end) {
> + unsigned long folio_sz;
> +
> + if (addr + chunk_sz < addr)
> + break;
> +
> + mmap_read_lock(mm);
> + vma = find_vma(mm, addr);
> +
> + if (!vma || addr < vma->vm_start ||
> + vma->vm_flags & (VM_HUGETLB | VM_MIXEDMAP))
> + goto unlock;
> +
> + folio = folio_walk_start(&fw, vma, addr, 0);
> + if (!folio)
> + goto unlock;
> +
> + folio_sz = PAGE_SIZE << folio_order(folio);
> +
> + if (folio_order(folio) > target_order) {
> + if (!folio_trylock(folio)) {
> + folio_walk_end(&fw, vma);
> + goto unlock;
> + }
> + folio_get(folio);
> + folio_walk_end(&fw, vma);
> +
> + if (!split_folio_to_order(folio, target_order))
[Severity: High]
Could this call to split_folio_to_order() encounter driver-allocated compound
pages?
While VM_MIXEDMAP and VM_HUGETLB are filtered out, other normal VMAs
might still be backed by compound pages that are not fully initialized LRU
folios. Without checking if the folio is actually a valid LRU folio (e.g., via
folio_test_lru()), this might dereference uninitialized fields during the split
and cause kernel crashes.
> + applied += folio_sz;
> +
> + folio_unlock(folio);
> + folio_put(folio);
> + *sz_filter_passed += folio_sz;
> + addr += folio_sz;
> + } else {
> + folio_walk_end(&fw, vma);
> + *sz_filter_passed += chunk_sz;
> + addr += chunk_sz;
[Severity: High]
When the folio order is smaller than or equal to the target order, does advancing
addr by a full 2MB chunk miss the remaining folios within that chunk?
If addr was advanced by folio_sz in a previous loop iteration, adding chunk_sz
here might overshoot and skip memory.
> + }
> + mmap_read_unlock(mm);
> + cond_resched();
> + continue;
> +
> +unlock:
> + *sz_filter_passed += chunk_sz;
> + addr += chunk_sz;
[Severity: High]
Similarly, does advancing by chunk_sz on error paths (like missing VMAs, holes,
or lock failures) skip intermediate memory and artificially inflate the
sz_filter_passed statistic?
> + mmap_read_unlock(mm);
> + cond_resched();
> + }
> +
> +out_mmput:
> + mmput(mm);
> + return applied;
> +}
--
Sashiko AI review · https://sashiko.dev/#/patchset/20260702095227.75866-1-lianux.mm@gmail.com?part=5
^ permalink raw reply [flat|nested] 15+ messages in thread
* Re: [RESEND RFC PATCH v2 0/5] mm/damon: add mTHP collapse and split actions
2026-07-02 9:52 [RESEND RFC PATCH v2 0/5] mm/damon: add mTHP collapse and split actions Lian Wang
` (4 preceding siblings ...)
2026-07-02 9:52 ` [RESEND RFC PATCH v2 5/5] mm/damon/vaddr: implement DAMOS_SPLIT handler Lian Wang
@ 2026-07-02 16:39 ` SJ Park
5 siblings, 0 replies; 15+ messages in thread
From: SJ Park @ 2026-07-02 16:39 UTC (permalink / raw)
To: Lian Wang
Cc: SJ Park, damon, linux-mm, linux-kernel, gutierrez.asier,
daichaobing, lianux.wang, kunwu.chan
On Thu, 2 Jul 2026 17:52:22 +0800 Lian Wang <lianux.mm@gmail.com> wrote:
> Resend of v2 with the RFC tag restored (v1 was RFC PATCH, so v2 should
> be RFC PATCH v2).
Somehow you sent this twice. Maybe your email setup issue? You also replied
same message twice to my previous comment. Anyway, I will review the later
posted one: https://lore.kernel.org/20260702094633.75658-1-lianux.mm@gmail.com
Thanks,
SJ
[...]
^ permalink raw reply [flat|nested] 15+ messages in thread