Linux-mm Archive on lore.kernel.org
 help / color / mirror / Atom feed
* [RFC PATCH v1.2 11/11] mm/damon/sysfs: fix typos in probe_{add,rm}_dirs: s/attr/probe/
From: SeongJae Park @ 2026-06-25 14:23 UTC (permalink / raw)
  Cc: SeongJae Park, Andrew Morton, damon, linux-kernel, linux-mm
In-Reply-To: <20260625142357.103500-1-sj@kernel.org>

damon_sysfs_probe_{add,rm}_dirs names a variable for damon_sysf_probe
as 'attr'.  Probably a trivial copy-pasta error, but it makes the code
not pleasant to read.  Fix those.

Signed-off-by: SeongJae Park <sj@kernel.org>
---
 mm/damon/sysfs.c | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/mm/damon/sysfs.c b/mm/damon/sysfs.c
index f3bb146b204df..36d71f1675426 100644
--- a/mm/damon/sysfs.c
+++ b/mm/damon/sysfs.c
@@ -1068,7 +1068,7 @@ static struct damon_sysfs_probe *damon_sysfs_probe_alloc(void)
 	return kzalloc_obj(struct damon_sysfs_probe);
 }
 
-static int damon_sysfs_probe_add_dirs(struct damon_sysfs_probe *attr)
+static int damon_sysfs_probe_add_dirs(struct damon_sysfs_probe *probe)
 {
 	struct damon_sysfs_filters *filters;
 	int err;
@@ -1076,22 +1076,22 @@ static int damon_sysfs_probe_add_dirs(struct damon_sysfs_probe *attr)
 	filters = damon_sysfs_filters_alloc();
 	if (!filters)
 		return -ENOMEM;
-	attr->filters = filters;
+	probe->filters = filters;
 
 	err = kobject_init_and_add(&filters->kobj, &damon_sysfs_filters_ktype,
-			&attr->kobj, "filters");
+			&probe->kobj, "filters");
 	if (err) {
 		kobject_put(&filters->kobj);
-		attr->filters = NULL;
+		probe->filters = NULL;
 	}
 	return err;
 }
 
-static void damon_sysfs_probe_rm_dirs(struct damon_sysfs_probe *attr)
+static void damon_sysfs_probe_rm_dirs(struct damon_sysfs_probe *probe)
 {
-	if (attr->filters) {
-		damon_sysfs_filters_rm_dirs(attr->filters);
-		kobject_put(&attr->filters->kobj);
+	if (probe->filters) {
+		damon_sysfs_filters_rm_dirs(probe->filters);
+		kobject_put(&probe->filters->kobj);
 	}
 }
 
-- 
2.47.3


^ permalink raw reply related

* [RFC PATCH v1.2 10/11] mm/damon/sysfs: split out filters setup function
From: SeongJae Park @ 2026-06-25 14:23 UTC (permalink / raw)
  Cc: SeongJae Park, Andrew Morton, damon, linux-kernel, linux-mm
In-Reply-To: <20260625142357.103500-1-sj@kernel.org>

damon_sysfs_set_probe() is doing not only probe setup but also filters
setup.  Split out filters setup for readability.

Signed-off-by: SeongJae Park <sj@kernel.org>
---
 mm/damon/sysfs.c | 20 +++++++++++++-------
 1 file changed, 13 insertions(+), 7 deletions(-)

diff --git a/mm/damon/sysfs.c b/mm/damon/sysfs.c
index 982d824f63c21..f3bb146b204df 100644
--- a/mm/damon/sysfs.c
+++ b/mm/damon/sysfs.c
@@ -1899,16 +1899,11 @@ static int damon_sysfs_set_attrs(struct damon_ctx *ctx,
 	return damon_set_attrs(ctx, &attrs);
 }
 
-static int damon_sysfs_set_probe(struct damon_probe *probe,
-		struct damon_sysfs_probe *sys_probe)
+static int damon_sysfs_set_filters(struct damon_probe *probe,
+		struct damon_sysfs_filters *sys_filters)
 {
-	struct damon_sysfs_filters *sys_filters;
 	int i;
 
-	sys_filters = sys_probe->filters;
-	if (!sys_filters)
-		return 0;
-
 	for (i = 0; i < sys_filters->nr; i++) {
 		struct damon_sysfs_filter *sys_filter =
 			sys_filters->filters_arr[i];
@@ -1935,6 +1930,17 @@ static int damon_sysfs_set_probe(struct damon_probe *probe,
 	return 0;
 }
 
+static int damon_sysfs_set_probe(struct damon_probe *probe,
+		struct damon_sysfs_probe *sys_probe)
+{
+	struct damon_sysfs_filters *sys_filters;
+
+	sys_filters = sys_probe->filters;
+	if (!sys_filters)
+		return 0;
+	return damon_sysfs_set_filters(probe, sys_filters);
+}
+
 static int damon_sysfs_set_probes(struct damon_ctx *ctx,
 		struct damon_sysfs_probes *sys_probes)
 {
-- 
2.47.3


^ permalink raw reply related

* [RFC PATCH v1.2 09/11] mm/damon/sysfs: split probe setup function out
From: SeongJae Park @ 2026-06-25 14:23 UTC (permalink / raw)
  Cc: SeongJae Park, Andrew Morton, damon, linux-kernel, linux-mm
In-Reply-To: <20260625142357.103500-1-sj@kernel.org>

damon_sysfs_set_probes() function is relatively long.  It has two nested
loop for setting two nested entities, namely probe and filter.  Split
out the probe level setup for readability.

Signed-off-by: SeongJae Park <sj@kernel.org>
---
 mm/damon/sysfs.c | 80 ++++++++++++++++++++++++++++--------------------
 1 file changed, 46 insertions(+), 34 deletions(-)

diff --git a/mm/damon/sysfs.c b/mm/damon/sysfs.c
index 2e95e3bac774d..982d824f63c21 100644
--- a/mm/damon/sysfs.c
+++ b/mm/damon/sysfs.c
@@ -1899,47 +1899,59 @@ static int damon_sysfs_set_attrs(struct damon_ctx *ctx,
 	return damon_set_attrs(ctx, &attrs);
 }
 
-static int damon_sysfs_set_probes(struct damon_ctx *ctx,
-		struct damon_sysfs_probes *sys_probes)
+static int damon_sysfs_set_probe(struct damon_probe *probe,
+		struct damon_sysfs_probe *sys_probe)
 {
+	struct damon_sysfs_filters *sys_filters;
 	int i;
 
-	for (i = 0; i < sys_probes->nr; i++) {
-		struct damon_sysfs_filters *sys_filters =
-			sys_probes->probes_arr[i]->filters;
-		struct damon_probe *c;
-		int j;
+	sys_filters = sys_probe->filters;
+	if (!sys_filters)
+		return 0;
 
-		if (!sys_filters)
-			continue;
-		c = damon_new_probe();
-		if (!c)
+	for (i = 0; i < sys_filters->nr; i++) {
+		struct damon_sysfs_filter *sys_filter =
+			sys_filters->filters_arr[i];
+		struct damon_filter *filter;
+
+		filter = damon_new_filter(sys_filter->type,
+				sys_filter->matching,
+				sys_filter->allow);
+		if (!filter)
 			return -ENOMEM;
-		damon_add_probe(ctx, c);
-
-		for (j = 0; j < sys_filters->nr; j++) {
-			struct damon_sysfs_filter *sys_filter =
-				sys_filters->filters_arr[j];
-			struct damon_filter *filter;
-
-			filter = damon_new_filter(sys_filter->type,
-					sys_filter->matching,
-					sys_filter->allow);
-			if (!filter)
-				return -ENOMEM;
-			if (filter->type == DAMON_FILTER_TYPE_MEMCG) {
-				int err;
-
-				err = damon_sysfs_memcg_path_to_id(
-						sys_filter->path,
-						&filter->memcg_id);
-				if (err) {
-					damon_destroy_filter(filter);
-					return err;
-				}
+		if (filter->type == DAMON_FILTER_TYPE_MEMCG) {
+			int err;
+
+			err = damon_sysfs_memcg_path_to_id(
+					sys_filter->path,
+					&filter->memcg_id);
+			if (err) {
+				damon_destroy_filter(filter);
+				return err;
 			}
-			damon_add_filter(c, filter);
 		}
+		damon_add_filter(probe, filter);
+	}
+	return 0;
+}
+
+static int damon_sysfs_set_probes(struct damon_ctx *ctx,
+		struct damon_sysfs_probes *sys_probes)
+{
+	int i, err;
+
+	for (i = 0; i < sys_probes->nr; i++) {
+		struct damon_sysfs_probe *sys_probe;
+		struct damon_probe *p;
+
+		p = damon_new_probe();
+		if (!p)
+			return -ENOMEM;
+		damon_add_probe(ctx, p);
+		sys_probe = sys_probes->probes_arr[i];
+		err = damon_sysfs_set_probe(p, sys_probe);
+		if (err)
+			return err;
 	}
 	return 0;
 }
-- 
2.47.3


^ permalink raw reply related

* [RFC PATCH v1.2 08/11] mm/damon/core: reduce range setup in damon_commit_target_regions()
From: SeongJae Park @ 2026-06-25 14:23 UTC (permalink / raw)
  Cc: SeongJae Park, Andrew Morton, damon, linux-kernel, linux-mm
In-Reply-To: <20260625142357.103500-1-sj@kernel.org>

damon_commit_target_regions() calls damon_set_regions() for updating the
destination target's monitoring target region boundaries.  It sets the
boundaries same to source target's monitoring regions, even if they are
adjacent.  Meanwhile, damon_set_region() sets the destination target
regions exactly the same to the source, only when the target regions are
empty.  When there are existing target regions, only a few regions are
expanded or shrunk to fit on only the boundaries for disjoint regions in
the source.  Hence the adjacent source ranges mean nothing in common
cases.  When there are many regions, such adjacent range setup is only a
waste of time and space.  We recently found [1] it is actually causing
memory overhead.  Setup the ranges for only distinct ranges.

[1] https://lore.kernel.org/20260603112306.58490-1-akinobu.mita@gmail.com

Signed-off-by: SeongJae Park <sj@kernel.org>
---
 mm/damon/core.c | 22 +++++++++++++++++-----
 1 file changed, 17 insertions(+), 5 deletions(-)

diff --git a/mm/damon/core.c b/mm/damon/core.c
index 7e4b9affc5b06..ce5294cb1b4f3 100644
--- a/mm/damon/core.c
+++ b/mm/damon/core.c
@@ -1349,21 +1349,33 @@ static struct damon_target *damon_nth_target(int n, struct damon_ctx *ctx)
 static int damon_commit_target_regions(struct damon_target *dst,
 		struct damon_target *src, unsigned long src_min_region_sz)
 {
-	struct damon_region *src_region;
+	struct damon_region *src_region, *prev = NULL;
 	struct damon_addr_range *ranges;
 	int i = 0, err;
 
-	damon_for_each_region(src_region, src)
-		i++;
+	damon_for_each_region(src_region, src) {
+		if (!prev || prev->ar.end != src_region->ar.start)
+			i++;
+		prev = src_region;
+	}
 	if (!i)
 		return 0;
 
 	ranges = kmalloc_objs(*ranges, i, GFP_KERNEL | __GFP_NOWARN);
 	if (!ranges)
 		return -ENOMEM;
+	prev = NULL;
 	i = 0;
-	damon_for_each_region(src_region, src)
-		ranges[i++] = src_region->ar;
+	damon_for_each_region(src_region, src) {
+		if (!prev) {
+			ranges[i].start = src_region->ar.start;
+		} else if (prev->ar.end != src_region->ar.start) {
+			ranges[i++].end = prev->ar.end;
+			ranges[i].start = src_region->ar.start;
+		}
+		prev = src_region;
+	}
+	ranges[i++].end = damon_last_region(src)->ar.end;
 	err = damon_set_regions(dst, ranges, i, src_min_region_sz);
 	kfree(ranges);
 	return err;
-- 
2.47.3


^ permalink raw reply related

* [RFC PATCH v1.2 07/11] selftests/damon/sysfs.sh: test all files in quota goal dir
From: SeongJae Park @ 2026-06-25 14:23 UTC (permalink / raw)
  Cc: SeongJae Park, Shuah Khan, damon, linux-kernel, linux-kselftest,
	linux-mm
In-Reply-To: <20260625142357.103500-1-sj@kernel.org>

DAMON sysfs interface for DAMOS quota has quite extended since its
initial introduction.  The test case for that in DAMON sysfs interface
essential file operations test (sysfs.sh) has not accordingly extended,
though.  Extend the test case to test all existing files.

Signed-off-by: SeongJae Park <sj@kernel.org>
---
 tools/testing/selftests/damon/sysfs.sh | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/tools/testing/selftests/damon/sysfs.sh b/tools/testing/selftests/damon/sysfs.sh
index b88bf7b98d7f7..811173cb89449 100755
--- a/tools/testing/selftests/damon/sysfs.sh
+++ b/tools/testing/selftests/damon/sysfs.sh
@@ -199,6 +199,20 @@ test_goal()
 	ensure_dir "$goal_dir" "exist"
 	ensure_file "$goal_dir/target_value" "exist" "600"
 	ensure_file "$goal_dir/current_value" "exist" "600"
+	ensure_file "$goal_dir/target_metric" "exist" "600"
+	local fpath="$goal_dir/target_metric"
+	ensure_write_succ "$fpath" "user_input" "valid input"
+	ensure_write_succ "$fpath" "some_mem_psi_us" "valid input"
+	ensure_write_succ "$fpath" "node_mem_used_bp" "valid input"
+	ensure_write_succ "$fpath" "node_mem_free_bp" "valid input"
+	ensure_write_succ "$fpath" "node_memcg_used_bp" "valid input"
+	ensure_write_succ "$fpath" "node_memcg_free_bp" "valid input"
+	ensure_write_succ "$fpath" "active_mem_bp" "valid input"
+	ensure_write_succ "$fpath" "inactive_mem_bp" "valid input"
+	ensure_write_succ "$fpath" "node_eligible_mem_bp" "valid input"
+	ensure_write_fail "$fpath" "foo" "invalid input"
+	ensure_file "$goal_dir/nid" "exist" "600"
+	ensure_file "$goal_dir/path" "exist" "600"
 }
 
 test_goals()
-- 
2.47.3


^ permalink raw reply related

* [RFC PATCH v1.2 06/11] selftests/damon/sysfs.sh: test dests dir
From: SeongJae Park @ 2026-06-25 14:23 UTC (permalink / raw)
  Cc: SeongJae Park, Shuah Khan, damon, linux-kernel, linux-kselftest,
	linux-mm
In-Reply-To: <20260625142357.103500-1-sj@kernel.org>

DAMON selftest interface essential file operations test (sysfs.sh) is
not testing DAMOS dests/ directory.  Add the test.

Signed-off-by: SeongJae Park <sj@kernel.org>
---
 tools/testing/selftests/damon/sysfs.sh | 24 ++++++++++++++++++++++++
 1 file changed, 24 insertions(+)

diff --git a/tools/testing/selftests/damon/sysfs.sh b/tools/testing/selftests/damon/sysfs.sh
index 07a33995be852..b88bf7b98d7f7 100755
--- a/tools/testing/selftests/damon/sysfs.sh
+++ b/tools/testing/selftests/damon/sysfs.sh
@@ -99,6 +99,29 @@ test_stats()
 	done
 }
 
+test_dest()
+{
+	dest_dir=$1
+	ensure_file "$dest_dir/id" "exist" "600"
+	ensure_file "$dest_dir/weight" "exist" "600"
+}
+
+test_dests()
+{
+	dests_dir=$1
+	ensure_file "$dests_dir/nr_dests" "exist" "600"
+	ensure_write_succ "$dests_dir/nr_dests" "1" "valid input"
+	test_dest "$dests_dir/0"
+
+	ensure_write_succ "$dests_dir/nr_dests" "2" "valid input"
+	test_dest "$dests_dir/0"
+	test_dest "$dests_dir/1"
+
+	ensure_write_succ "$dests_dir/nr_dests" "0" "valid input"
+	ensure_dir "$dests_dir/0" "not_exist"
+	ensure_dir "$dests_dir/1" "not_exist"
+}
+
 test_filter()
 {
 	filter_dir=$1
@@ -225,6 +248,7 @@ test_scheme()
 	ensure_file "$scheme_dir/apply_interval_us" "exist" "600"
 	test_quotas "$scheme_dir/quotas"
 	test_watermarks "$scheme_dir/watermarks"
+	test_dests "$scheme_dir/dests"
 	test_filters "$scheme_dir/filters"
 	test_filters "$scheme_dir/core_filters"
 	test_filters "$scheme_dir/ops_filters"
-- 
2.47.3


^ permalink raw reply related

* [RFC PATCH v1.2 05/11] selftests/damon/sysfs.sh: test {core,ops}_filters/ directories
From: SeongJae Park @ 2026-06-25 14:23 UTC (permalink / raw)
  Cc: SeongJae Park, Shuah Khan, damon, linux-kernel, linux-kselftest,
	linux-mm
In-Reply-To: <20260625142357.103500-1-sj@kernel.org>

DAMON sysfs interface essential file operations test (sysf.sh) is not
testing DAMOS {core,ops}_filters directories.  Add the tests.

Signed-off-by: SeongJae Park <sj@kernel.org>
---
 tools/testing/selftests/damon/sysfs.sh | 28 ++++++++++++++++++++++----
 1 file changed, 24 insertions(+), 4 deletions(-)

diff --git a/tools/testing/selftests/damon/sysfs.sh b/tools/testing/selftests/damon/sysfs.sh
index 0f2ef462a6b6a..07a33995be852 100755
--- a/tools/testing/selftests/damon/sysfs.sh
+++ b/tools/testing/selftests/damon/sysfs.sh
@@ -103,10 +103,28 @@ test_filter()
 {
 	filter_dir=$1
 	ensure_file "$filter_dir/type" "exist" "600"
-	ensure_write_succ "$filter_dir/type" "anon" "valid input"
-	ensure_write_succ "$filter_dir/type" "memcg" "valid input"
-	ensure_write_succ "$filter_dir/type" "addr" "valid input"
-	ensure_write_succ "$filter_dir/type" "target" "valid input"
+
+	local dir_name=$(basename "$(dirname "$filter_dir")")
+	if  [ "$dir_name" = "filters" ] || [ "$dir_name" = "ops_filters" ]
+	then
+		ensure_write_succ "$filter_dir/type" "anon" "valid input"
+		ensure_write_succ "$filter_dir/type" "memcg" "valid input"
+	fi
+	if  [ "$dir_name" = "filters" ] || [ "$dir_name" = "core_filters" ]
+	then
+		ensure_write_succ "$filter_dir/type" "addr" "valid input"
+		ensure_write_succ "$filter_dir/type" "target" "valid input"
+	fi
+	if [ "$dir_name" = "core_filters" ]
+	then
+		ensure_write_fail "$filter_dir/type" "anon" "ops type"
+		ensure_write_fail "$filter_dir/type" "memcg" "ops type"
+	fi
+	if [ "$dir_name"  = "ops_filters" ]
+	then
+		ensure_write_fail "$filter_dir/type" "addr" "core type"
+		ensure_write_fail "$filter_dir/type" "target" "core type"
+	fi
 	ensure_write_fail "$filter_dir/type" "foo" "invalid input"
 	ensure_file "$filter_dir/matching" "exist" "600"
 	ensure_file "$filter_dir/memcg_path" "exist" "600"
@@ -208,6 +226,8 @@ test_scheme()
 	test_quotas "$scheme_dir/quotas"
 	test_watermarks "$scheme_dir/watermarks"
 	test_filters "$scheme_dir/filters"
+	test_filters "$scheme_dir/core_filters"
+	test_filters "$scheme_dir/ops_filters"
 	test_stats "$scheme_dir/stats"
 	test_tried_regions "$scheme_dir/tried_regions"
 }
-- 
2.47.3


^ permalink raw reply related

* [RFC PATCH v1.2 04/11] selftests/damon/sysfs.sh: test multiple probe dirs creation
From: SeongJae Park @ 2026-06-25 14:23 UTC (permalink / raw)
  Cc: SeongJae Park, Shuah Khan, damon, linux-kernel, linux-kselftest,
	linux-mm
In-Reply-To: <20260625142357.103500-1-sj@kernel.org>

DAMON sysfs essential file operations test (sysfs.sh) was extended to
test DAMON probes sysfs directory, by commit 14885da09b0f
("selftests/damon/sysfs.sh: test probes dir").  Unlike other DAMON sysfs
files, it is testing only a single directory case.  Extend it for
multiple directories.

Signed-off-by: SeongJae Park <sj@kernel.org>
---
 tools/testing/selftests/damon/sysfs.sh | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/tools/testing/selftests/damon/sysfs.sh b/tools/testing/selftests/damon/sysfs.sh
index 78f4badb5bebb..0f2ef462a6b6a 100755
--- a/tools/testing/selftests/damon/sysfs.sh
+++ b/tools/testing/selftests/damon/sysfs.sh
@@ -346,8 +346,13 @@ test_probes()
 	ensure_write_succ "$probes_dir/nr_probes" "1" "valid input"
 	test_probe "$probes_dir/0"
 
+	ensure_write_succ "$probes_dir/nr_probes" "2" "valid input"
+	test_probe "$probes_dir/0"
+	test_probe "$probes_dir/1"
+
 	ensure_write_succ "$probes_dir/nr_probes" "0" "valid input"
 	ensure_dir "$probes_dir/0" "not_exist"
+	ensure_dir "$probes_dir/1" "not_exist"
 }
 
 test_monitoring_attrs()
-- 
2.47.3


^ permalink raw reply related

* [RFC PATCH v1.2 03/11] mm/damon/tests/core-kunit: test damon_rand()
From: SeongJae Park @ 2026-06-25 14:23 UTC (permalink / raw)
  Cc: SeongJae Park, Andrew Morton, Brendan Higgins, David Gow, damon,
	kunit-dev, linux-kernel, linux-kselftest, linux-mm
In-Reply-To: <20260625142357.103500-1-sj@kernel.org>

Commit 9012c4e647df ("mm/damon: replace damon_rand() with a per-ctx
lockless PRNG") optimized DAMON for better performance.  Add a kunit
test for ensuring the bounds of the output.

Signed-off-by: SeongJae Park <sj@kernel.org>
---
 mm/damon/tests/core-kunit.h | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

diff --git a/mm/damon/tests/core-kunit.h b/mm/damon/tests/core-kunit.h
index 1cfb8c176b873..eec7cb325a431 100644
--- a/mm/damon/tests/core-kunit.h
+++ b/mm/damon/tests/core-kunit.h
@@ -1460,6 +1460,22 @@ static void damon_test_is_last_region(struct kunit *test)
 	damon_free_target(t);
 }
 
+static void damon_test_rand(struct kunit *test)
+{
+	struct damon_ctx ctx;
+	int counts[10] = {};
+	int i;
+
+	prandom_seed_state(&ctx.rnd_state, get_random_u64());
+	for (i = 0; i < 10000; i++) {
+		unsigned long rnd = damon_rand(&ctx, 0, 10);
+
+		KUNIT_EXPECT_GE(test, rnd, 0);
+		KUNIT_EXPECT_LE(test, rnd, 9);
+		counts[rnd]++;
+	}
+}
+
 static struct kunit_case damon_test_cases[] = {
 	KUNIT_CASE(damon_test_target),
 	KUNIT_CASE(damon_test_regions),
@@ -1489,6 +1505,7 @@ static struct kunit_case damon_test_cases[] = {
 	KUNIT_CASE(damon_test_set_filters_default_reject),
 	KUNIT_CASE(damon_test_apply_min_nr_regions),
 	KUNIT_CASE(damon_test_is_last_region),
+	KUNIT_CASE(damon_test_rand),
 	{},
 };
 
-- 
2.47.3


^ permalink raw reply related

* [RFC PATCH v1.2 02/11] Docs/ABI/damon: document probe files
From: SeongJae Park @ 2026-06-25 14:23 UTC (permalink / raw)
  Cc: SeongJae Park, Liam R. Howlett, Andrew Morton, David Hildenbrand,
	Lorenzo Stoakes, Michal Hocko, Mike Rapoport, Suren Baghdasaryan,
	Vlastimil Babka, damon, linux-kernel, linux-mm
In-Reply-To: <20260625142357.103500-1-sj@kernel.org>

DAMON ABI document is not updated for the DAMON probe sysfs files.
Update.

Signed-off-by: SeongJae Park <sj@kernel.org>
---
 .../ABI/testing/sysfs-kernel-mm-damon         | 40 +++++++++++++++++++
 1 file changed, 40 insertions(+)

diff --git a/Documentation/ABI/testing/sysfs-kernel-mm-damon b/Documentation/ABI/testing/sysfs-kernel-mm-damon
index b73e6bc28ea5f..f914aab79fced 100644
--- a/Documentation/ABI/testing/sysfs-kernel-mm-damon
+++ b/Documentation/ABI/testing/sysfs-kernel-mm-damon
@@ -157,6 +157,46 @@ Description:	Writing a value to this file sets the maximum number of
 		monitoring regions of the DAMON context as the value.  Reading
 		this file returns the value.
 
+What:		/sys/kernel/mm/damon/admin/kdamonds/<K>/contexts/<C>/monitoring_attrs/probes/nr_probes
+Date:		May 2026
+Contact:	SeongJae Park <sj@kernel.org>
+Description:	Writing a number 'N' to this file creates the number of
+		directories for each DAMON probe named '0' to 'N-1' under the
+		probes/ directory.
+
+What:		/sys/kernel/mm/damon/admin/kdamonds/<K>/contexts/<C>/monitoring_attrs/probes/<P>/filters/nr_filters
+Date:		May 2026
+Contact:	SeongJae Park <sj@kernel.org>
+Description:	Writing a number 'N' to this file creates the number of
+		directories for each DAMON probe filter named '0' to 'N-1'
+		under the filters/ directory.
+
+What:		/sys/kernel/mm/damon/admin/kdamonds/<K>/contexts/<C>/monitoring_attrs/probes/<P>/filters/<F>/type
+Date:		May 2026
+Contact:	SeongJae Park <sj@kernel.org>
+Description:	Writing to and reading from this file sets and gets the type of
+		the memory of the interest.
+
+What:		/sys/kernel/mm/damon/admin/kdamonds/<K>/contexts/<C>/monitoring_attrs/probes/<P>/filters/<F>/path
+Date:		May 2026
+Contact:	SeongJae Park <sj@kernel.org>
+Description:	If 'memcg' is written to the 'type' file, writing to and
+		reading from this file sets and gets the path to the memory
+		cgroup of the interest.
+
+What:		/sys/kernel/mm/damon/admin/kdamonds/<K>/contexts/<C>/monitoring_attrs/probes/<P>/filters/<F>/matching
+Date:		May 2026
+Contact:	SeongJae Park <sj@kernel.org>
+Description:	Writing 'Y' or 'N' to this file sets whether the filter is for
+		the memory of the 'type', or all except the 'type'.
+
+What:		/sys/kernel/mm/damon/admin/kdamonds/<K>/contexts/<C>/monitoring_attrs/probes/<P>/filters/<F>/allow
+Date:		May 2026
+Contact:	SeongJae Park <sj@kernel.org>
+Description:	Writing 'Y' or 'N' to this file sets whether to allow or reject
+		hitting the probe for the memory that satisfies the 'type' and
+		the 'matching' of the directory.
+
 What:		/sys/kernel/mm/damon/admin/kdamonds/<K>/contexts/<C>/targets/nr_targets
 Date:		Mar 2022
 Contact:	SeongJae Park <sj@kernel.org>
-- 
2.47.3


^ permalink raw reply related

* [RFC PATCH v1.2 01/11] Docs/mm/damon/design: update for DAMOS_QUOTA_NODE_ELIGIBLE_MEM_BP
From: SeongJae Park @ 2026-06-25 14:23 UTC (permalink / raw)
  Cc: SeongJae Park, Liam R. Howlett, Andrew Morton, David Hildenbrand,
	Jonathan Corbet, Lorenzo Stoakes, Michal Hocko, Mike Rapoport,
	Shuah Khan, Suren Baghdasaryan, Vlastimil Babka, damon, linux-doc,
	linux-kernel, linux-mm
In-Reply-To: <20260625142357.103500-1-sj@kernel.org>

Commit 9138e27a3bc3 ("mm/damon: add node_eligible_mem_bp goal metric")
introduced DAMOS_QUOTA_NODE_ELIGIBLE_MEM_BP but forgot updating the
DAMON design document for that.  Update.

Signed-off-by: SeongJae Park <sj@kernel.org>
---
 Documentation/mm/damon/design.rst | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/Documentation/mm/damon/design.rst b/Documentation/mm/damon/design.rst
index 2da7ca0d3d17a..9dbace087a329 100644
--- a/Documentation/mm/damon/design.rst
+++ b/Documentation/mm/damon/design.rst
@@ -686,9 +686,11 @@ mechanism tries to make ``current_value`` of ``target_metric`` be same to
   (1/10,000).
 - ``inactive_mem_bp``: Inactive to active + inactive (LRU) memory size ratio in
   bp (1/10,000).
+- ``node_eligible_mem_bp``: Scheme target access pattern-eligible memory ratio
+  of a node in bp (1/10,000).
 
-``nid`` is optionally required for only ``node_mem_used_bp``,
-``node_mem_free_bp``, ``node_memcg_used_bp`` and ``node_memcg_free_bp`` to
+``nid`` is optionally required for ``node_mem_used_bp``, ``node_mem_free_bp``,
+``node_memcg_used_bp`,` ``node_memcg_free_bp`` and ``node_eligible_mem_bp`` to
 point the specific NUMA node.
 
 ``path`` is optionally required for only ``node_memcg_used_bp`` and
-- 
2.47.3


^ permalink raw reply related

* [RFC PATCH v1.2 00/11] mm/damon: update, optimize, and clean up doc, tests, and code
From: SeongJae Park @ 2026-06-25 14:23 UTC (permalink / raw)
  Cc: SeongJae Park, Liam R. Howlett, Andrew Morton, Brendan Higgins,
	David Gow, David Hildenbrand, Jonathan Corbet, Lorenzo Stoakes,
	Michal Hocko, Mike Rapoport, Shuah Khan, Shuah Khan,
	Suren Baghdasaryan, Vlastimil Babka, damon, kunit-dev, linux-doc,
	linux-kernel, linux-kselftest, linux-mm

Patches 1 and 2 update the design and ABI documents for recently added
DAMON features.  Patches 3-7 add or update more unit and self tests for
DAMON to cover recently changed or added functions and sysfs files.
Patch 8 optimizes damon_commit_target_regions() to skip unnecessary
adjacent ranges setup.  Patches 9-11 clean and fix up recently added
DAMON sysfs interface code for readability.

Changes from RFC v1.1
- RFC v1.1: https://lore.kernel.org/20260625050756.91115-1-sj@kernel.org
- Document nid requirement for node_eligible_mem_bp.
- Fix typos: s/memmcg/memcg/, s/geets/gets/.
- Drop damon_rnd() randomness test case; test boundness only.
- Fixup dests dir selftest to do real test with correct file permission checks.
Changes from RFC
- RFC: https://lore.kernel.org/20260624142008.87180-1-sj@kernel.org
- Rebase directly to latest mm-new.

SeongJae Park (11):
  Docs/mm/damon/design: update for DAMOS_QUOTA_NODE_ELIGIBLE_MEM_BP
  Docs/ABI/damon: document probe files
  mm/damon/tests/core-kunit: test damon_rand()
  selftests/damon/sysfs.sh: test multiple probe dirs creation
  selftests/damon/sysfs.sh: test {core,ops}_filters/ directories
  selftests/damon/sysfs.sh: test dests dir
  selftests/damon/sysfs.sh: test all files in quota goal dir
  mm/damon/core: reduce range setup in damon_commit_target_regions()
  mm/damon/sysfs: split probe setup function out
  mm/damon/sysfs: split out filters setup function
  mm/damon/sysfs: fix typos in probe_{add,rm}_dirs: s/attr/probe/

 .../ABI/testing/sysfs-kernel-mm-damon         |  40 +++++++
 Documentation/mm/damon/design.rst             |   6 +-
 mm/damon/core.c                               |  22 +++-
 mm/damon/sysfs.c                              | 102 ++++++++++--------
 mm/damon/tests/core-kunit.h                   |  17 +++
 tools/testing/selftests/damon/sysfs.sh        |  71 +++++++++++-
 6 files changed, 205 insertions(+), 53 deletions(-)


base-commit: ada7832345164eed1bbca10543b0c46f13738215
-- 
2.47.3


^ permalink raw reply

* Re: [PATCH 6.18.y v4 0/9] mm: backport sticky VMA flags and soft-dirty fix
From: Lorenzo Stoakes @ 2026-06-25 14:16 UTC (permalink / raw)
  To: Ahmed Elaidy; +Cc: stable, linux-mm, akpm, avagin
In-Reply-To: <20260515124218.151966-2-elaidya225@gmail.com>

On Fri, May 15, 2026 at 03:42:10PM +0300, Ahmed Elaidy wrote:
> This series backports the sticky VMA flags infrastructure and the
> VM_SOFTDIRTY-on-merge fix to linux-6.18.y.

Thanks again for doing this Ahmed! :)

Cheers, Lorenzo

>
> Motivation: CRIU incremental dump/restore can hit a missing-parent-pagemap
> failure when VM_SOFTDIRTY is lost during VMA merge operations.
>
> Patch 8 is the target fix:
>   mm: propagate VM_SOFTDIRTY on merge
>
> The preceding patches provide required dependencies on 6.18.y and are included
> to preserve upstream behavior, as requested by maintainers for stable backports.
>
> Changes since v3:
>   - Reverted to sending the full 9-patch series as requested by Greg KH and Lorenzo.
>   - Updated Lorenzo's email to ljs@kernel.org across all patches.
>   - Added Cc: stable@vger.kernel.org # 6.18.x to all patches.
>   - Added Fixes tag for soft-dirty merging in Patch 8.
>
> Lorenzo Stoakes (9):
>   mm: introduce VM_MAYBE_GUARD and make visible in /proc/$pid/smaps
>   mm: add atomic VMA flags and set VM_MAYBE_GUARD as such
>   mm: update vma_modify_flags() to handle residual flags, document
>   mm: implement sticky VMA flags
>   mm: introduce copy-on-fork VMAs and make VM_MAYBE_GUARD one
>   mm: set the VM_MAYBE_GUARD flag on guard region install
>   tools/testing/vma: add VMA sticky userland tests
>   mm: propagate VM_SOFTDIRTY on merge
>   testing/selftests/mm: add soft-dirty merge self-test
>
>  Documentation/filesystems/proc.rst      |   5 +-
>  fs/proc/task_mmu.c                      |   1 +
>  include/linux/mm.h                      | 100 +++++++++++++++++
>  include/trace/events/mmflags.h          |   1 +
>  mm/khugepaged.c                         |  71 +++++++-----
>  mm/madvise.c                            |  24 +++--
>  mm/memory.c                             |  14 +--
>  mm/mlock.c                              |   2 +-
>  mm/mprotect.c                           |   2 +-
>  mm/mseal.c                              |   7 +-
>  mm/vma.c                                |  81 +++++++-------
>  mm/vma.h                                | 138 +++++++++++++++++-------
>  tools/testing/selftests/mm/soft-dirty.c | 127 +++++++++++++++++++++-
>  tools/testing/vma/vma.c                 |  92 ++++++++++++++--
>  tools/testing/vma/vma_internal.h        |  49 +++++++++
>  15 files changed, 579 insertions(+), 135 deletions(-)
>
> --
> 2.54.0
>


^ permalink raw reply

* Re: [PATCH] mm/page_vma_mapped: guard check_pmd() with CONFIG_TRANSPARENT_HUGEPAGE
From: Lorenzo Stoakes @ 2026-06-25 14:02 UTC (permalink / raw)
  To: David Hildenbrand (Arm)
  Cc: Wei Yang, akpm, riel, liam, vbabka, harry, jannh, willy, linux-mm,
	linux-kernel, lance.yang
In-Reply-To: <c66f4bec-0933-401b-bf2f-a1b2e256023f@kernel.org>

On Thu, Jun 25, 2026 at 03:49:59PM +0200, David Hildenbrand (Arm) wrote:
> On 6/25/26 15:45, Lorenzo Stoakes wrote:
> > On Wed, Jun 24, 2026 at 08:23:59AM +0000, Wei Yang wrote:
> >> The kernel test robot reported a build failure on the parisc architecture
> >> when expanding HPAGE_PMD_NR in check_pmd().
> >
> > Let me first say that I absolutely hate that we continue to support museum
> > piece architectures to the point that we have to make changes in core code
> > to accommodate them.
>
> I wonder why we shouldn't be able to trigger that on other archs with
> !CONFIG_TRANSPARENT_HUGEPAGE ?

I think this should just use CONFIG_PGTABLE_HAS_HUGE_LEAVES, since that's the
property that literally defines whether check_pmd() makes any sense.

>
> I think the code just relies on pmd_trans_huge() == false, and consequently
> check_pmd will get compiled out completely.
>
> Now, the report was against Wei's new patch.
>
> There is *nothing* to be fixed for existing code.

OK so it's a fix sent in the merge window, against a patch sent in the merge
window. Great.

I mean, let's all chill here. Sip some wine. Some brandy. Some absinthe. Perhaps
even some turpentine* for the connoisseurs!

Slow down a bit Wei!

You're sending a lot of fiddly series that require a lot of review and we're
extremely busy with review already.

Please just relax and maybe go water your garden a bit. Have a cornetto :)

>
>
> Fixes: 2aff7a4755be ("mm: Convert page_vma_mapped_walk to work on PFNs")
>
> is just wrong?

Yes therefore it is indeed.

Though it's really horrible that we relied on things getting compiled out like
that... nasty!

>
> --
> Cheers,
>
> David

Cheers, Lorenzo

*Obligatory safety notice for the overly literal: do not do this, this is a
 joke.


^ permalink raw reply

* Re: [PATCH 0/5] Fix incorrect access of hugetlb pte entries
From: Zi Yan @ 2026-06-25 13:59 UTC (permalink / raw)
  To: Dev Jain, muchun.song, osalvador, akpm, ljs, david, liam
  Cc: riel, vbabka, harry, jannh, lance.yang, kas, linux-mm,
	linux-kernel, rcampbell, apopple, matthew.brost, joshua.hahnjy,
	rakie.kim, byungchul, gourry, ying.huang, mel, nao.horiguchi, ak,
	j-nomura, pfalcato, dave.hansen, tglx, jpoimboe, ryan.roberts,
	anshuman.khandual
In-Reply-To: <20260625112955.3254283-1-dev.jain@arm.com>

On Thu Jun 25, 2026 at 7:29 AM EDT, Dev Jain wrote:
> There are various places which use ptep_get() to get the pte entry
> corresponding to a hugetlb folio. Some arches have special handling

I think it is better to mention s390 as a concrete example.

> to compute the pteval, so they provide huge_ptep_get(). Use this
> helper consistently.
>
> Dev Jain (5):
>   mm/rmap: use huge_ptep_get() in try_to_unmap_one()
>   mm/rmap: use huge_ptep_get() in try_to_migrate_one()
>   mm/migrate: use huge_ptep_get() in remove_migration_pte()
>   mm/page_vma_mapped: use huge_ptep_get() for hugetlb
>   mm/mprotect: use huge_ptep_get() for hugetlb
>
>  include/linux/hugetlb.h |  3 +++
>  mm/migrate.c            |  6 +++++-
>  mm/mprotect.c           |  8 +++++++-
>  mm/page_vma_mapped.c    |  8 +++++++-
>  mm/rmap.c               | 32 ++++++++++++++++++++------------
>  5 files changed, 42 insertions(+), 15 deletions(-)




-- 
Best Regards,
Yan, Zi



^ permalink raw reply

* Re: [PATCH] mm/page_vma_mapped: guard check_pmd() with CONFIG_TRANSPARENT_HUGEPAGE
From: Lorenzo Stoakes @ 2026-06-25 13:51 UTC (permalink / raw)
  To: Wei Yang
  Cc: Andrew Morton, david, riel, liam, vbabka, harry, jannh, willy,
	linux-mm, linux-kernel, lance.yang, balbirs, Roman Gushchin
In-Reply-To: <20260625064102.tcmvrctcqibl54yr@master>

+cc Roman for Sashiko discussion

On Thu, Jun 25, 2026 at 06:41:02AM +0000, Wei Yang wrote:
> +cc Balbir
>
> On Wed, Jun 24, 2026 at 09:59:43PM -0700, Andrew Morton wrote:
> >On Thu, 25 Jun 2026 03:46:29 +0000 Wei Yang <richard.weiyang@gmail.com> wrote:
> >
> >> >Sashiko had an off-topic complaint about the surrounding code:
> >> >	https://lore.kernel.org/oe-kbuild-all/202606240042.ffPsEXVc-lkp@intel.com/
> >>
> >> I see this robot reply, but not see the Sashiko comment.
> >>
> >> How can I view Sashiko's commnet?
> >
> >oop sorry.
> >
> >You can go to https://sashiko.dev/ and search for the email subject.
> >
> >Or append your Message-ID to "https://sashiko.dev/#/patchset":
> >
> >	https://sashiko.dev/#/patchset/20260624082359.2869-1-richard.weiyang@gmail.com
> >
>
> Got it, thansk
>
> This one mentioned two things:
>
>   a. page_vma_mapped_walk() return without check
>   b. whether __split_huge_pmd_locked() would split device-private pmd
>
> For a., it is being fixing at [1].
>
> For b., to be honest I am not 100% for sure. If a device-private pmd could be
> file backed, then this looks like a bug.
>
> Balbir,
>
> Would you mind taking a look at the second comment raised by Sashiko?
>
> [1]: https://lore.kernel.org/linux-mm/20260624065353.1622-1-richard.weiyang@gmail.com/

I continue to dislike that sashiko does this.

Series with... interesting use of AI :).. are already taking up more of the time
we reviewers don't have... but interrupting existing review to mention random
stuff is unhelpful I feel :)

I think the better use of time here would be for Balbir to perhaps ask AI to
examine all cases where a PMD device private entry might crop up and to check to
see if there's any other bugs similar to the ones we've encountered before?

Given Sashiko is very token-constrained, I also wonder whether this feature
wouldn't be better disabled (or maybe have the ability to turn off
per-subsystem?)

In a couple other cases the 'also consider' stuff actually took a bunch of time
unnecessarily and I felt they interferred with the series landing.

Given the time constraints we all work under, it'd be better not to add to
workload this way (having to figure out if the points are valid are a time drain
in themselves).

Thanks, Lorenzo


^ permalink raw reply

* Re: [PATCH v5 5/9] mm/memory_hotplug: offline_and_remove_memory_ranges()
From: Gregory Price @ 2026-06-25 13:51 UTC (permalink / raw)
  To: David Hildenbrand (Arm)
  Cc: linux-mm, nvdimm, linux-kernel, linux-cxl, driver-core,
	linux-kselftest, kernel-team, osalvador, gregkh, rafael, dakr,
	djbw, vishal.l.verma, dave.jiang, akpm, ljs, liam, vbabka, rppt,
	surenb, mhocko, shuah, alison.schofield,
	Smita.KoralahalliChannabasappa, ira.weiny, apopple
In-Reply-To: <d48feca1-0203-43ff-bd66-6243291a51ba@kernel.org>

On Thu, Jun 25, 2026 at 09:22:01AM +0200, David Hildenbrand (Arm) wrote:
> On 6/24/26 16:57, Gregory Price wrote:
> >  extern int offline_and_remove_memory(u64 start, u64 size);
> > +int offline_and_remove_memory_ranges(const struct range *ranges, int nr_ranges);
> >  
> >  #else
> >  static inline void try_offline_node(int nid) {}
> > @@ -283,6 +284,12 @@ static inline int remove_memory(u64 start, u64 size)
> >  }
> >  
> >  static inline void __remove_memory(u64 start, u64 size) {}
> > +
> > +static inline int offline_and_remove_memory_ranges(const struct range *ranges,
> > +						   int nr_ranges)
> 
> Best to use "unsigned int" right from the start and use two tabs to indent.
> 

ack, ack.  need to reprogram my brain to two-indent style, i keep doing
this reflexively.

> > +int offline_and_remove_memory_ranges(const struct range *ranges, int nr_ranges)
> > +{
> > +	unsigned long mb_total = 0;
> >  	uint8_t *online_types, *tmp;
> > -	int rc;
> > +	int i, rc = 0;
> >  
> > -	if (!IS_ALIGNED(start, memory_block_size_bytes()) ||
> > -	    !IS_ALIGNED(size, memory_block_size_bytes()) || !size)
> > +	if (!ranges || nr_ranges <= 0)
> 
> With "unsigned int" this will be !nr_ranges.
> 
> Wondering whether we would WARN_ON_ONCE() here.
> 

Seems reasonable.  Do we normally WARN when callers send dumb arguments?
Seems like sending -EINVAL is sufficient?

> > -	online_types = kmalloc_array(mb_count, sizeof(*online_types),
> > +	online_types = kmalloc_array(mb_total, sizeof(*online_types),
> >  				     GFP_KERNEL);
> 
> Is "mb_total" really more expressive than "mb_count"?
> 

No, this was mostly my way ok keeping try of what was being moved around
while working it.  I will change it back.

> >  	/*
> > -	 * In case we succeeded to offline all memory, remove it.
> > -	 * This cannot fail as it cannot get onlined in the meantime.
> > +	 * Phase 2: Remove each range. This essentially cannot fail as we hold
> > +	 * the hotplug lock . WARN if that assumption is ever broken.
> >  	 */
> >  	if (!rc) {
> > -		rc = try_remove_memory(start, size);
> > -		if (rc)
> > -			pr_err("%s: Failed to remove memory: %d", __func__, rc);
> > +		for (i = 0; i < nr_ranges; i++) {
> > +			rc = try_remove_memory(ranges[i].start,
> > +					       range_len(&ranges[i]));
> > +			if (WARN_ON_ONCE(rc)) {
> > +				pr_err("%s: Failed to remove memory: %d",
> > +				       __func__, rc);
> > +				break;
> 
> Do we really want to break? I'd say, just warn and continue, and fake rc == 0.
> Something is seriously messed up already, and we partially removed memory. There
> is no clean rollback possible.
> 
> Similar to __remove_memory(), ignoring the error because it offlined it already.
> 

This seems reasonable, will change to warn and continue + return error.

Sashiko actually pointed out there there's a corner condition here with
offline rollback, so i needed to tweak this chunk anyway.

~Gregory


^ permalink raw reply

* Re: [PATCH] mm/page_vma_mapped: guard check_pmd() with CONFIG_TRANSPARENT_HUGEPAGE
From: David Hildenbrand (Arm) @ 2026-06-25 13:49 UTC (permalink / raw)
  To: Lorenzo Stoakes, Wei Yang
  Cc: akpm, riel, liam, vbabka, harry, jannh, willy, linux-mm,
	linux-kernel, lance.yang
In-Reply-To: <aj0vjdBN-oNMI2yI@lucifer>

On 6/25/26 15:45, Lorenzo Stoakes wrote:
> On Wed, Jun 24, 2026 at 08:23:59AM +0000, Wei Yang wrote:
>> The kernel test robot reported a build failure on the parisc architecture
>> when expanding HPAGE_PMD_NR in check_pmd().
> 
> Let me first say that I absolutely hate that we continue to support museum
> piece architectures to the point that we have to make changes in core code
> to accommodate them.

I wonder why we shouldn't be able to trigger that on other archs with
!CONFIG_TRANSPARENT_HUGEPAGE ?

I think the code just relies on pmd_trans_huge() == false, and consequently
check_pmd will get compiled out completely.

Now, the report was against Wei's new patch.

There is *nothing* to be fixed for existing code.


Fixes: 2aff7a4755be ("mm: Convert page_vma_mapped_walk to work on PFNs")

is just wrong?

-- 
Cheers,

David


^ permalink raw reply

* Re: [RFC PATCH] mm: Avoiding split large folios if swap has no space
From: David Hildenbrand (Arm) @ 2026-06-25 13:45 UTC (permalink / raw)
  To: Johannes Weiner
  Cc: Barry Song, akpm, axelrasmussen, baolin.wang, dev.jain, kasong,
	lance.yang, liam, linux-kernel, linux-mm, ljs, npache, qi.zheng,
	ryan.roberts, shakeel.butt, weixugc, yuanchu, zhaonanzhe, ziy,
	Michal Hocko, Roman Gushchin
In-Reply-To: <aj0u12N_GzGtQT6K@cmpxchg.org>

On 6/25/26 15:36, Johannes Weiner wrote:
> On Thu, Jun 25, 2026 at 09:49:56AM +0200, David Hildenbrand (Arm) wrote:
>>>
>>> I don't quite understand you. get_nr_swap_pages() returns
>>> nr_swap_pages, which increases or decreases as swap is allocated or
>>> freed. I guess it just reflects how many swaps we currently have
>>> available?
>>
>> Indeed, I was confused by the function name it's "free swap pages". So all goof :)
>>
>>>
>>>
>>> Yep. The tricky part is that mem_cgroup_try_charge_swap() cannot
>>> return how much swap quota is available in the memcg. Do you prefer to
>>> add an output argument to mem_cgroup_try_charge_swap() to expose
>>> that
>> That would probably be cleanest, if that is easily possible. We would want to
>> get memcg maintainer feedback on that.
>>
>> @memcg folks: we'd like to know whether splitting a large folio would make
>> mem_cgroup_try_charge_swap() succeed on a split (smaller) part, to distinguish
>> "there is no way we can swap out anything, don't split" vs. "we could swap out,
>> split".
> 
> It's technically doable, but is this worth the bother? The remaining
> headroom is less than a large folio. You can split this one, but you
> cannot even swap out all of its subpages anymore?

I was asking myself the same, but when we think in terms of THPs on arm64 64k
we're in the range of double-digit MiBs.

> From the cgroup
> side, we don't need the limit to be obeyed this rigidly. We overcharge
> temporarily in other places if it's convenient to do so. A fuzz factor
> around the limit is acceptable.

Thanks for that information.

> 
> But if you still want to do it, here is how:
> 
> The page_counter_try_charge() in __mem_cgroup_try_charge_swap() walks
> the hierarchy upwards. If it fails, it will store the first level that
> failed against its limit. You can do the mem_cgroup_margin() math
> against this counter to determine headroom. An ancestor *could* be
> more restrictive, so you need to finish the hierarchy walk to the root
> and use the min() of all the swap.max - page_counter_read(swap). Then
> return that in a return argument from __mem_cgroup_try_charge_swap().

Thanks! @Barry, up to you if we want to implement that right away or if we're
simply going to assume that if charging fails, not worth splitting (changing the
existing handling IIUC).

-- 
Cheers,

David


^ permalink raw reply

* Re: [PATCH] mm/page_vma_mapped: guard check_pmd() with CONFIG_TRANSPARENT_HUGEPAGE
From: Lorenzo Stoakes @ 2026-06-25 13:45 UTC (permalink / raw)
  To: Wei Yang
  Cc: akpm, david, riel, liam, vbabka, harry, jannh, willy, linux-mm,
	linux-kernel, lance.yang
In-Reply-To: <20260624082359.2869-1-richard.weiyang@gmail.com>

On Wed, Jun 24, 2026 at 08:23:59AM +0000, Wei Yang wrote:
> The kernel test robot reported a build failure on the parisc architecture
> when expanding HPAGE_PMD_NR in check_pmd().

Let me first say that I absolutely hate that we continue to support museum
piece architectures to the point that we have to make changes in core code
to accommodate them.

It's not unreasonable to ask retro people to either use older kernels or
make a downstream fork.

People having to think about this upstream is so incredibly silly. As if we
don't have enough work already...

Anyway, with that said...

>
>    mm/page_vma_mapped.c:142:13: note: in expansion of macro 'HPAGE_PMD_NR'
>      if ((pfn + HPAGE_PMD_NR - 1) < pvmw->pfn)
>                 ^~~~~~~~~~~~
>
> The config [1] in report link shows neither TRANSPARENT_HUGEPAGE nor
> HUGETLB_PAGE is defined. Then trigger the BUILD_BUG.
>
> Fix it by define check_pmd() under CONFIG_TRANSPARENT_HUGEPAGE.
>
> [1]: https://download.01.org/0day-ci/archive/20260624/202606240042.ffPsEXVc-lkp@intel.com/config

I think the fact this wasn't detected for 4 odd years goes to show how well
tested stuff on this arch is... (unless this is a very unusual
configuration at least).

>
> Fixes: 2aff7a4755be ("mm: Convert page_vma_mapped_walk to work on PFNs")
> Signed-off-by: Wei Yang <richard.weiyang@gmail.com>
> Reported-by: kernel test robot <lkp@intel.com>
> Closes: https://lore.kernel.org/oe-kbuild-all/202606240042.ffPsEXVc-lkp@intel.com/
> ---
>  mm/page_vma_mapped.c | 7 +++++++
>  1 file changed, 7 insertions(+)
>
> diff --git a/mm/page_vma_mapped.c b/mm/page_vma_mapped.c
> index 17dff8aab9f9..4aac94d9e8a9 100644
> --- a/mm/page_vma_mapped.c
> +++ b/mm/page_vma_mapped.c
> @@ -136,6 +136,7 @@ static bool check_pte(struct page_vma_mapped_walk *pvmw, unsigned long pte_nr)
>  	return true;
>  }
>
> +#ifdef CONFIG_TRANSPARENT_HUGEPAGE

As per Andrew, this should be CONFIG_PGTABLE_HAS_HUGE_LEAVES I think.

I don't like that CONFIG_T..HP is taken to mean 'anything to do with leaf
page tables'. That's a mess and one we should unwind.

So don't make it worse, use CONFIG_PGTABLE_HAS_HUGE_LEAVES.

>  /* Returns true if the two ranges overlap.  Careful to not overflow. */
>  static bool check_pmd(unsigned long pfn, struct page_vma_mapped_walk *pvmw)
>  {
> @@ -145,6 +146,12 @@ static bool check_pmd(unsigned long pfn, struct page_vma_mapped_walk *pvmw)
>  		return false;
>  	return true;
>  }
> +#else
> +static bool check_pmd(unsigned long pfn, struct page_vma_mapped_walk *pvmw)
> +{
> +	return false;

Should have a WARN_ON_ONCE("bug in stupid arch") or similar here ;)

> +}
> +#endif
>
>  static void step_forward(struct page_vma_mapped_walk *pvmw, unsigned long size)
>  {
> --
> 2.34.1
>

Thanks, Lorenzo


^ permalink raw reply

* Re: [RFC PATCH] mm: Avoiding split large folios if swap has no space
From: Johannes Weiner @ 2026-06-25 13:36 UTC (permalink / raw)
  To: David Hildenbrand (Arm)
  Cc: Barry Song, akpm, axelrasmussen, baolin.wang, dev.jain, kasong,
	lance.yang, liam, linux-kernel, linux-mm, ljs, npache, qi.zheng,
	ryan.roberts, shakeel.butt, weixugc, yuanchu, zhaonanzhe, ziy,
	Michal Hocko, Roman Gushchin
In-Reply-To: <c29f90c6-2075-43e8-8f0d-0d6718a0f124@kernel.org>

On Thu, Jun 25, 2026 at 09:49:56AM +0200, David Hildenbrand (Arm) wrote:
> >>
> >> But now I wonder whether we would also want to check "is there any free swap
> >> space", not just "is there any swap".
> > 
> > I don't quite understand you. get_nr_swap_pages() returns
> > nr_swap_pages, which increases or decreases as swap is allocated or
> > freed. I guess it just reflects how many swaps we currently have
> > available?
> 
> Indeed, I was confused by the function name it's "free swap pages". So all goof :)
> 
> > 
> >>
> >>
> >> Essentially, try returning -E2BIG if there is the chance to swap out after
> >> split, and  -ENOSPC / -ENOMEM if a split wouldn't help.
> >>
> >>>       }
> >>>
> >>>  again:
> >>> @@ -1769,11 +1772,13 @@ int folio_alloc_swap(struct folio *folio)
> >>>       }
> >>>
> >>>       /* Need to call this even if allocation failed, for MEMCG_SWAP_FAIL. */
> >>> -     if (unlikely(mem_cgroup_try_charge_swap(folio)))
> >>> +     if (unlikely(mem_cgroup_try_charge_swap(folio))) {
> >>>               swap_cache_del_folio(folio);
> >>> +             return -ENOMEM;
> >>
> >> Here we wouldn't have the information whether we could charge after a split.
> >>
> >> So that would require a rework to signal this more cleanly to the caller.
> > 
> > Yep. The tricky part is that mem_cgroup_try_charge_swap() cannot
> > return how much swap quota is available in the memcg. Do you prefer to
> > add an output argument to mem_cgroup_try_charge_swap() to expose
> > that
> That would probably be cleanest, if that is easily possible. We would want to
> get memcg maintainer feedback on that.
> 
> @memcg folks: we'd like to know whether splitting a large folio would make
> mem_cgroup_try_charge_swap() succeed on a split (smaller) part, to distinguish
> "there is no way we can swap out anything, don't split" vs. "we could swap out,
> split".

It's technically doable, but is this worth the bother? The remaining
headroom is less than a large folio. You can split this one, but you
cannot even swap out all of its subpages anymore? From the cgroup
side, we don't need the limit to be obeyed this rigidly. We overcharge
temporarily in other places if it's convenient to do so. A fuzz factor
around the limit is acceptable.

But if you still want to do it, here is how:

The page_counter_try_charge() in __mem_cgroup_try_charge_swap() walks
the hierarchy upwards. If it fails, it will store the first level that
failed against its limit. You can do the mem_cgroup_margin() math
against this counter to determine headroom. An ancestor *could* be
more restrictive, so you need to finish the hierarchy walk to the root
and use the min() of all the swap.max - page_counter_read(swap). Then
return that in a return argument from __mem_cgroup_try_charge_swap().


^ permalink raw reply

* Re: [PATCH v5 8/9] dax/kmem: add sysfs interface for atomic whole-device hotplug
From: Gregory Price @ 2026-06-25 13:35 UTC (permalink / raw)
  To: David Hildenbrand (Arm)
  Cc: linux-mm, nvdimm, linux-kernel, linux-cxl, driver-core,
	linux-kselftest, kernel-team, osalvador, gregkh, rafael, dakr,
	djbw, vishal.l.verma, dave.jiang, akpm, ljs, liam, vbabka, rppt,
	surenb, mhocko, shuah, alison.schofield,
	Smita.KoralahalliChannabasappa, ira.weiny, apopple,
	Hannes Reinecke
In-Reply-To: <1d8f74a7-502b-43cb-a0f0-1923049aa213@kernel.org>

On Thu, Jun 25, 2026 at 09:40:02AM +0200, David Hildenbrand (Arm) wrote:
> >  Documentation/ABI/testing/sysfs-bus-dax |  26 +++
> >  drivers/base/memory.c                   |   9 +
> 
> Can we have this ...
> 
> >  drivers/dax/kmem.c                      | 224 ++++++++++++++++++++----
> >  include/linux/memory_hotplug.h          |   1 +
> > 
> 
> ... and this as a separate patch, please?
> 
> Nothing else jumped at me.
> 

ack

~Gregory


^ permalink raw reply

* Re: [PATCH v18 4/8] rust: page: convert to `Ownable`
From: Gary Guo @ 2026-06-25 13:32 UTC (permalink / raw)
  To: Andreas Hindborg, Danilo Krummrich, Lorenzo Stoakes,
	Vlastimil Babka, Liam R. Howlett, Uladzislau Rezki, Miguel Ojeda,
	Boqun Feng, Gary Guo, Björn Roy Baron, Benno Lossin,
	Alice Ryhl, Trevor Gross, Daniel Almeida, Tamir Duberstein,
	Alexandre Courbot, Onur Özkan, Lyude Paul,
	Greg Kroah-Hartman, Arve Hjønnevåg, Todd Kjos,
	Christian Brauner, Carlos Llamas, Rafael J. Wysocki, Dave Ertman,
	Ira Weiny, Leon Romanovsky, Paul Moore, Serge Hallyn,
	David Airlie, Simona Vetter, Alexander Viro, Jan Kara,
	Igor Korotin, Viresh Kumar, Nishanth Menon, Stephen Boyd,
	Bjorn Helgaas, Krzysztof Wilczyński, Pavel Tikhomirov,
	Michal Wilczynski
  Cc: Philipp Stanner, rust-for-linux, linux-kernel, linux-mm,
	driver-core, linux-block, linux-security-module, dri-devel,
	linux-fsdevel, linux-pm, linux-pci, linux-pwm, Asahi Lina
In-Reply-To: <20260625-unique-ref-v18-4-4e06b5896d47@kernel.org>

On Thu Jun 25, 2026 at 11:15 AM BST, Andreas Hindborg wrote:
> From: Asahi Lina <lina@asahilina.net>
> 
> This allows Page references to be returned as borrowed references,
> without necessarily owning the struct page.
> 
> Remove `BorrowedPage` and update users to use `Owned<Page>`.
> 
> Signed-off-by: Asahi Lina <lina@asahilina.net>
> [ Andreas: Fix formatting and add a safety comment, update users. ]
> Signed-off-by: Andreas Hindborg <a.hindborg@kernel.org>

Nice to see `BorrowedPage` going away.

Reviewed-by: Gary Guo <gary@garyguo.net>

> ---
>  drivers/android/binder/page_range.rs |  10 +--
>  rust/kernel/alloc/allocator.rs       |  19 +++---
>  rust/kernel/alloc/allocator/iter.rs  |   6 +-
>  rust/kernel/page.rs                  | 122 +++++++++--------------------------
>  4 files changed, 46 insertions(+), 111 deletions(-)



^ permalink raw reply

* Re: [PATCH v18 3/8] rust: implement `ForeignOwnable` for `Owned`
From: Gary Guo @ 2026-06-25 13:29 UTC (permalink / raw)
  To: Andreas Hindborg, Danilo Krummrich, Lorenzo Stoakes,
	Vlastimil Babka, Liam R. Howlett, Uladzislau Rezki, Miguel Ojeda,
	Boqun Feng, Gary Guo, Björn Roy Baron, Benno Lossin,
	Alice Ryhl, Trevor Gross, Daniel Almeida, Tamir Duberstein,
	Alexandre Courbot, Onur Özkan, Lyude Paul,
	Greg Kroah-Hartman, Arve Hjønnevåg, Todd Kjos,
	Christian Brauner, Carlos Llamas, Rafael J. Wysocki, Dave Ertman,
	Ira Weiny, Leon Romanovsky, Paul Moore, Serge Hallyn,
	David Airlie, Simona Vetter, Alexander Viro, Jan Kara,
	Igor Korotin, Viresh Kumar, Nishanth Menon, Stephen Boyd,
	Bjorn Helgaas, Krzysztof Wilczyński, Pavel Tikhomirov,
	Michal Wilczynski
  Cc: Philipp Stanner, rust-for-linux, linux-kernel, linux-mm,
	driver-core, linux-block, linux-security-module, dri-devel,
	linux-fsdevel, linux-pm, linux-pci, linux-pwm
In-Reply-To: <20260625-unique-ref-v18-3-4e06b5896d47@kernel.org>

On Thu Jun 25, 2026 at 11:15 AM BST, Andreas Hindborg wrote:
> Implement `ForeignOwnable` for `Owned<T>`. This allows use of `Owned<T>` in
> places such as the `XArray`.
>
> Note that `T` does not need to implement `ForeignOwnable` for `Owned<T>` to
> implement `ForeignOwnable`.
>
> Signed-off-by: Andreas Hindborg <a.hindborg@kernel.org>
> ---
>  rust/kernel/owned.rs | 53 ++++++++++++++++++++++++++++++++++++++++++++++++++++
>  1 file changed, 53 insertions(+)
>
> diff --git a/rust/kernel/owned.rs b/rust/kernel/owned.rs
> index 7fe9ec3e55126..9c92d4a83cc1b 100644
> --- a/rust/kernel/owned.rs
> +++ b/rust/kernel/owned.rs
> @@ -15,6 +15,8 @@
>      ptr::NonNull, //
>  };
>  
> +use kernel::types::ForeignOwnable;
> +
>  /// Types that specify their own way of performing allocation and destruction. Typically, this trait
>  /// is implemented on types from the C side.
>  ///
> @@ -186,3 +188,54 @@ fn drop(&mut self) {
>          unsafe { T::release(self.ptr) };
>      }
>  }
> +
> +// SAFETY: We derive the pointer to `T` from a valid `T`, so the returned
> +// pointer satisfy alignment requirements of `T`.
> +unsafe impl<T: Ownable> ForeignOwnable for Owned<T> {
> +    const FOREIGN_ALIGN: usize = core::mem::align_of::<T>();
> +
> +    type Borrowed<'a>
> +        = &'a T
> +    where
> +        Self: 'a;
> +    type BorrowedMut<'a>
> +        = Pin<&'a mut T>
> +    where
> +        Self: 'a;
> +
> +    #[inline]
> +    fn into_foreign(self) -> *mut kernel::ffi::c_void {
> +        let ptr = self.ptr.as_ptr().cast();
> +        core::mem::forget(self);
> +        ptr

I think the pattern in `into_raw` is better:

    ManuallyDrop::new(self).ptr.as_ptr().cast()

Or perhaps this can just use `Self::into_raw(self).as_ptr().cast()`.

> +    }
> +
> +    #[inline]
> +    unsafe fn from_foreign(ptr: *mut kernel::ffi::c_void) -> Self {
> +        // INVARIANT: By the function safety contract, `ptr` was returned by `into_foreign`, which
> +        // gave up exclusive ownership of a valid, pinned `T`; we retake that ownership here.
> +        Self {
> +            // SAFETY: By function safety contract, `ptr` came from
> +            // `into_foreign` and cannot be null.
> +            ptr: unsafe { NonNull::new_unchecked(ptr.cast()) },
> +        }
> +    }

Same here, could be using `Self::from_raw`.

However, the current code looks correct to me regardless, so:

Reviewed-by: Gary Guo <gary@garyguo.net>

Best,
Gary

> +
> +    #[inline]
> +    unsafe fn borrow<'a>(ptr: *mut kernel::ffi::c_void) -> Self::Borrowed<'a> {
> +        // SAFETY: By function safety requirements, `ptr` is valid for use as a
> +        // reference for `'a`.
> +        unsafe { &*ptr.cast() }
> +    }
> +
> +    #[inline]
> +    unsafe fn borrow_mut<'a>(ptr: *mut kernel::ffi::c_void) -> Self::BorrowedMut<'a> {
> +        // SAFETY: By function safety requirements, `ptr` is valid for use as a
> +        // unique reference for `'a`.
> +        let inner = unsafe { &mut *ptr.cast() };
> +
> +        // SAFETY: We never move out of inner, and we do not hand out mutable
> +        // references when `T: !Unpin`.
> +        unsafe { Pin::new_unchecked(inner) }
> +    }
> +}




^ permalink raw reply

* Re: [Patch mm-hotfixes v4] mm/page_vma_mapped: fix device-private PMD handling
From: Lorenzo Stoakes @ 2026-06-25 13:12 UTC (permalink / raw)
  To: Lance Yang
  Cc: richard.weiyang, akpm, david, riel, liam, vbabka, harry, jannh,
	ziy, sj, balbirs, linux-mm, linux-kernel, stable
In-Reply-To: <20260624085756.6598-1-lance.yang@linux.dev>

On Wed, Jun 24, 2026 at 04:57:56PM +0800, Lance Yang wrote:
>
> On Wed, Jun 24, 2026 at 06:53:53AM +0000, Wei Yang wrote:
> >Commit 65edfda6f3f2 ("mm/rmap: extend rmap and migration support
> >device-private entries") introduced the concept of device-private
> >PMD entries, but did not correctly update the rmap walk code to
> >account for them.
> >
> >As a result, when page_vma_mapped_walk() encounters device-private
> >PMD entries, it takes no action other than to acquire the PMD lock
> >and exit.
> >
> >However this is highly problematic for two reasons - firstly,
> >device private entries possess a PFN so check_pmd() needs to be
> >called to ensure an overlapping PFN range.
> >
> >Secondly, and more importantly, if PVMW_MIGRATION is set the
> >caller assumes the returned entry is a migration entry, resulting
> >in memory corruption when the caller tries to interpret the device
> >private entry as such.
> >
> >In addition, commit 146287290023 ("mm/huge_memory: implement
> >device-private THP splitting") allowed device private PMDs to be
> >split like THP mappings, but again did not update this code path.
> >
> >As a result, we might race a PMD split prior to acquiring the PMD
> >lock.
> >
> >This patch addresses all of these issues by invoking check_pmd(),
> >ensuring PMVW_MIGRATION is not set and checks whether a split raced
> >us we do for PMD THP and migration entries.
> >
> >Fixes: 65edfda6f3f2 ("mm/rmap: extend rmap and migration support device-private entries")
> >Cc: <stable@vger.kernel.org>
> >Signed-off-by: Wei Yang <richard.weiyang@gmail.com>
> >Suggested-by: David Hildenbrand <david@kernel.org>
>
> Shouldn't we add
>
> Suggested-by: Lorenzo Stoakes <ljs@kernel.org>
>
> as well?
>
> v4 mostly follows Lorenzo's comments, code bits included. Feels only fair.

Thanks Lance :)

I'm kinda indifferent about it really, I'm really keen to ensure people sending
patches get the credit for their work, so if I send a patch in reply as a
shorthand for 'I think this might work better', I don't expect/require any
credit at all, it's just sometimes a quicker way of responding!

But if Wei wants to add a S-b that's fine by me also! :)

Cheers, Lorenzo


^ permalink raw reply


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox