* [RFC PATCH v2.1 1/3] mm/damon/reclaim: detect and use fresh enabled and kdamond_pid values
2026-04-18 22:27 [RFC PATCH v2.1 0/3] mm/damon/modules: detect and use fresh status SeongJae Park
@ 2026-04-18 22:27 ` SeongJae Park
2026-04-18 22:57 ` sashiko-bot
2026-04-18 22:27 ` [RFC PATCH v2.1 2/3] mm/damon/lru_sort: " SeongJae Park
2026-04-18 22:27 ` [RFC PATCH v2.1 3/3] mm/damon/stat: detect and use fresh enabled value SeongJae Park
2 siblings, 1 reply; 10+ messages in thread
From: SeongJae Park @ 2026-04-18 22:27 UTC (permalink / raw)
Cc: SeongJae Park, # 5 . 19 . x, Andrew Morton, damon, linux-kernel,
linux-mm, Liew Rui Yan
DAMON_RECLAIM updates 'enabled' and 'kdamond_pid' parameter values,
which represents the running status of its kdamond, when the user
explicitly requests start/stop of the kdamond. The kdamond can,
however, be stopped in events other than the explicit user request in
the following three events.
1. ctx->regions_score_histogram allocation failure at beginning of the
execution,
2. damon_commit_ctx() failure due to invalid user input, and
3. damon_commit_ctx() failure due to its internal allocation failures.
Hence, if the kdamond is stopped by the above three events, the values
of the status parameters can be stale. Users could show the stale
values and be confused. This is already bad, but the real consequence
is worse. DAMON_RECLAIM avoids unnecessary damon_start() and
damon_stop() calls based on the 'enabled' parameter value. And the
update of 'enabled' parameter value depends on the damon_start() and
damon_stop() call results. Hence, once the kdamond has stopped by the
unintentional events, the user cannot restart the kdamond before the
system reboot. For example, the issue can be reproduced via below
steps.
# cd /sys/module/damon_reclaim/parameters
#
# # start DAMON_RECLAIM
# echo Y > enabled
# ps -ef | grep kdamond
root 806 2 0 17:53 ? 00:00:00 [kdamond.0]
root 808 803 0 17:53 pts/4 00:00:00 grep kdamond
#
# # commit wrong input to stop kdamond withou explicit stop request
# echo 3 > addr_unit
# echo Y > commit_inputs
bash: echo: write error: Invalid argument
#
# # confirm kdamond is stopped
# ps -ef | grep kdamond
root 811 803 0 17:53 pts/4 00:00:00 grep kdamond
#
# # users casn now show stable status
# cat enabled
Y
# cat kdamond_pid
806
#
# # even after fixing the wrong parameter,
# # kdamond cannot be restarted.
# echo 1 > addr_unit
# echo Y > enabled
# ps -ef | grep kdamond
root 815 803 0 17:54 pts/4 00:00:00 grep kdamond
The problem will only rarely happen in real and common setups for the
following reasons. The allocation failures are unlikely in such setups
since those allocations are arguably too small to fail. Also sane users
on real production environments may not commit wrong input parameters.
But once it happens, the consequence is quite bad. And the bug is a
bug.
The issue stems from the fact that there are multiple events that can
change the status, and following all the events is challenging.
Dynamically detect and use the fresh status for the parameters when
those are requested.
Fixes: e035c280f6df ("mm/damon/reclaim: support online inputs update")
Cc: <stable@vger.kernel.org> # 5.19.x
Co-developed-by: Liew Rui Yan <aethernet65535@gmail.com>
Signed-off-by: Liew Rui Yan <aethernet65535@gmail.com>
Signed-off-by: SeongJae Park <sj@kernel.org>
---
mm/damon/reclaim.c | 85 ++++++++++++++++++++++++++++++----------------
1 file changed, 55 insertions(+), 30 deletions(-)
diff --git a/mm/damon/reclaim.c b/mm/damon/reclaim.c
index 86da147786583..fe7fce26cf6ce 100644
--- a/mm/damon/reclaim.c
+++ b/mm/damon/reclaim.c
@@ -144,15 +144,6 @@ static unsigned long addr_unit __read_mostly = 1;
static bool skip_anon __read_mostly;
module_param(skip_anon, bool, 0600);
-/*
- * PID of the DAMON thread
- *
- * If DAMON_RECLAIM is enabled, this becomes the PID of the worker thread.
- * Else, -1.
- */
-static int kdamond_pid __read_mostly = -1;
-module_param(kdamond_pid, int, 0400);
-
static struct damos_stat damon_reclaim_stat;
DEFINE_DAMON_MODULES_DAMOS_STATS_PARAMS(damon_reclaim_stat,
reclaim_tried_regions, reclaimed_regions, quota_exceeds);
@@ -288,12 +279,8 @@ static int damon_reclaim_turn(bool on)
{
int err;
- if (!on) {
- err = damon_stop(&ctx, 1);
- if (!err)
- kdamond_pid = -1;
- return err;
- }
+ if (!on)
+ return damon_stop(&ctx, 1);
err = damon_reclaim_apply_parameters();
if (err)
@@ -302,9 +289,6 @@ static int damon_reclaim_turn(bool on)
err = damon_start(&ctx, 1, true);
if (err)
return err;
- kdamond_pid = damon_kdamond_pid(ctx);
- if (kdamond_pid < 0)
- return kdamond_pid;
return damon_call(ctx, &call_control);
}
@@ -332,42 +316,83 @@ module_param_cb(addr_unit, &addr_unit_param_ops, &addr_unit, 0600);
MODULE_PARM_DESC(addr_unit,
"Scale factor for DAMON_RECLAIM to ops address conversion (default: 1)");
+static bool damon_reclaim_enabled(void)
+{
+ if (!ctx)
+ return false;
+ return damon_is_running(ctx);
+}
+
static int damon_reclaim_enabled_store(const char *val,
const struct kernel_param *kp)
{
- bool is_enabled = enabled;
- bool enable;
int err;
- err = kstrtobool(val, &enable);
+ err = kstrtobool(val, &enabled);
if (err)
return err;
- if (is_enabled == enable)
+ if (damon_reclaim_enabled() == enabled)
return 0;
/* Called before init function. The function will handle this. */
if (!damon_initialized())
- goto set_param_out;
+ return 0;
- err = damon_reclaim_turn(enable);
- if (err)
- return err;
+ return damon_reclaim_turn(enabled);
+}
-set_param_out:
- enabled = enable;
- return err;
+static int damon_reclaim_enabled_load(char *buffer,
+ const struct kernel_param *kp)
+{
+ return sprintf(buffer, "%c\n", damon_reclaim_enabled() ? 'Y' : 'N');
}
static const struct kernel_param_ops enabled_param_ops = {
.set = damon_reclaim_enabled_store,
- .get = param_get_bool,
+ .get = damon_reclaim_enabled_load,
};
module_param_cb(enabled, &enabled_param_ops, &enabled, 0600);
MODULE_PARM_DESC(enabled,
"Enable or disable DAMON_RECLAIM (default: disabled)");
+static int damon_reclaim_kdamond_pid_store(const char *val,
+ const struct kernel_param *kp)
+{
+ /*
+ * kdamond_pid is read-only, but kernel command line could write it.
+ * Do nothing here.
+ */
+ return 0;
+}
+
+static int damon_reclaim_kdamond_pid_load(char *buffer,
+ const struct kernel_param *kp)
+{
+ int kdamond_pid = -1;
+
+ if (ctx) {
+ kdamond_pid = damon_kdamond_pid(ctx);
+ if (kdamond_pid < 0)
+ kdamond_pid = -1;
+ }
+ return sprintf(buffer, "%d\n", kdamond_pid);
+}
+
+static const struct kernel_param_ops kdamond_pid_param_ops = {
+ .set = damon_reclaim_kdamond_pid_store,
+ .get = damon_reclaim_kdamond_pid_load,
+};
+
+/*
+ * PID of the DAMON thread
+ *
+ * If DAMON_RECLAIM is enabled, this becomes the PID of the worker thread.
+ * Else, -1.
+ */
+module_param_cb(kdamond_pid, &kdamond_pid_param_ops, NULL, 0400);
+
static int __init damon_reclaim_init(void)
{
int err;
--
2.47.3
^ permalink raw reply related [flat|nested] 10+ messages in thread* Re: [RFC PATCH v2.1 1/3] mm/damon/reclaim: detect and use fresh enabled and kdamond_pid values
2026-04-18 22:27 ` [RFC PATCH v2.1 1/3] mm/damon/reclaim: detect and use fresh enabled and kdamond_pid values SeongJae Park
@ 2026-04-18 22:57 ` sashiko-bot
2026-04-19 1:47 ` SeongJae Park
0 siblings, 1 reply; 10+ messages in thread
From: sashiko-bot @ 2026-04-18 22:57 UTC (permalink / raw)
To: SeongJae Park; +Cc: damon
Sashiko AI review found 2 potential issue(s):
- [High] Missing context initialization check in the sysfs store handler can trigger a NULL pointer dereference if module initialization fails.
- [Medium] The `damon_reclaim_enabled_store` function parses user input directly into the global `enabled` variable before using it for control flow decisions, causing a Time-Of-Check to Time-Of-Use (TOCTOU) race condition.
--
commit 9d2c63ed13e63f6709b2752f9d974f5055f9d97e
Author: SeongJae Park <sj@kernel.org>
mm/damon/reclaim: detect and use fresh enabled and kdamond_pid values
DAMON_RECLAIM updates the enabled and kdamond_pid parameter values dynamically
to prevent stale values when the daemon is stopped unintentionally. This
ensures users see the fresh running status and can restart the daemon when
needed.
> diff --git a/mm/damon/reclaim.c b/mm/damon/reclaim.c
> index 86da147786583..fe7fce26cf6ce 100644
> --- a/mm/damon/reclaim.c
> +++ b/mm/damon/reclaim.c
[ ... ]
> @@ -332,42 +316,83 @@ module_param_cb(addr_unit, &addr_unit_param_ops, &addr_unit, 0600);
> MODULE_PARM_DESC(addr_unit,
> "Scale factor for DAMON_RECLAIM to ops address conversion (default: 1)");
>
> +static bool damon_reclaim_enabled(void)
> +{
> + if (!ctx)
> + return false;
> + return damon_is_running(ctx);
> +}
> +
> static int damon_reclaim_enabled_store(const char *val,
> const struct kernel_param *kp)
> {
> - bool is_enabled = enabled;
> - bool enable;
> int err;
>
> - err = kstrtobool(val, &enable);
> + err = kstrtobool(val, &enabled);
By parsing user input directly into the global enabled variable, does this
introduce a race condition?
If multiple threads write to the sysfs enabled file concurrently, thread A's
parsed input could be overwritten by thread B before thread A evaluates it
in damon_reclaim_turn(enabled).
Could this cause thread A to execute the opposite action of what was
requested while still returning success to the user?
> if (err)
> return err;
>
> - if (is_enabled == enable)
> + if (damon_reclaim_enabled() == enabled)
> return 0;
>
> /* Called before init function. The function will handle this. */
> if (!damon_initialized())
> - goto set_param_out;
> + return 0;
>
> - err = damon_reclaim_turn(enable);
> - if (err)
> - return err;
> + return damon_reclaim_turn(enabled);
Can this trigger a null pointer dereference if the module initialization
fails?
If damon_reclaim is built into the kernel and damon_reclaim_init() fails
(for example, due to an allocation failure), the global ctx pointer remains
null. Because it is a built-in module, the sysfs parameters remain exposed.
When a user writes 'Y' to the enabled file, damon_initialized() will pass
because the core DAMON subsystem initialized successfully during boot. The
code then calls damon_reclaim_turn(true) which passes the null ctx down to
damon_commit_ctx(ctx, param_ctx).
Should there be an explicit if (!ctx) validation before attempting to start
the reclaim process?
> -set_param_out:
> - enabled = enable;
> - return err;
> +}
--
Sashiko AI review · https://sashiko.dev/#/patchset/20260418222758.39795-1-sj@kernel.org?part=1
^ permalink raw reply [flat|nested] 10+ messages in thread* Re: [RFC PATCH v2.1 1/3] mm/damon/reclaim: detect and use fresh enabled and kdamond_pid values
2026-04-18 22:57 ` sashiko-bot
@ 2026-04-19 1:47 ` SeongJae Park
0 siblings, 0 replies; 10+ messages in thread
From: SeongJae Park @ 2026-04-19 1:47 UTC (permalink / raw)
To: sashiko-bot; +Cc: SeongJae Park, damon
On Sat, 18 Apr 2026 22:57:57 +0000 sashiko-bot@kernel.org wrote:
TL; DR: no blocker for this patch.
> Sashiko AI review found 2 potential issue(s):
> - [High] Missing context initialization check in the sysfs store handler can
> trigger a NULL pointer dereference if module initialization fails.
Good catch, but not the fault of this patch. I will separately work on it.
> - [Medium] The `damon_reclaim_enabled_store` function parses user input
> directly into the global `enabled` variable before using it for control flow
> decisions, causing a Time-Of-Check to Time-Of-Use (TOCTOU) race condition.
Sashiko-claiming issue cannot happen. Read below for more details.
> --
>
> commit 9d2c63ed13e63f6709b2752f9d974f5055f9d97e
> Author: SeongJae Park <sj@kernel.org>
>
> mm/damon/reclaim: detect and use fresh enabled and kdamond_pid values
>
> DAMON_RECLAIM updates the enabled and kdamond_pid parameter values dynamically
> to prevent stale values when the daemon is stopped unintentionally. This
> ensures users see the fresh running status and can restart the daemon when
> needed.
>
> > diff --git a/mm/damon/reclaim.c b/mm/damon/reclaim.c
> > index 86da147786583..fe7fce26cf6ce 100644
> > --- a/mm/damon/reclaim.c
> > +++ b/mm/damon/reclaim.c
>
> [ ... ]
>
> > @@ -332,42 +316,83 @@ module_param_cb(addr_unit, &addr_unit_param_ops, &addr_unit, 0600);
> > MODULE_PARM_DESC(addr_unit,
> > "Scale factor for DAMON_RECLAIM to ops address conversion (default: 1)");
> >
> > +static bool damon_reclaim_enabled(void)
> > +{
> > + if (!ctx)
> > + return false;
> > + return damon_is_running(ctx);
> > +}
> > +
> > static int damon_reclaim_enabled_store(const char *val,
> > const struct kernel_param *kp)
> > {
> > - bool is_enabled = enabled;
> > - bool enable;
> > int err;
> >
> > - err = kstrtobool(val, &enable);
> > + err = kstrtobool(val, &enabled);
>
> By parsing user input directly into the global enabled variable, does this
> introduce a race condition?
>
> If multiple threads write to the sysfs enabled file concurrently, thread A's
> parsed input could be overwritten by thread B before thread A evaluates it
> in damon_reclaim_turn(enabled).
>
> Could this cause thread A to execute the opposite action of what was
> requested while still returning success to the user?
Sysfs writes are protected by kernel_param_lock, so such concurrent
user inputs cannot happen.
>
> > if (err)
> > return err;
> >
> > - if (is_enabled == enable)
> > + if (damon_reclaim_enabled() == enabled)
> > return 0;
> >
> > /* Called before init function. The function will handle this. */
> > if (!damon_initialized())
> > - goto set_param_out;
> > + return 0;
> >
> > - err = damon_reclaim_turn(enable);
> > - if (err)
> > - return err;
> > + return damon_reclaim_turn(enabled);
>
> Can this trigger a null pointer dereference if the module initialization
> fails?
>
> If damon_reclaim is built into the kernel and damon_reclaim_init() fails
> (for example, due to an allocation failure), the global ctx pointer remains
> null. Because it is a built-in module, the sysfs parameters remain exposed.
>
> When a user writes 'Y' to the enabled file, damon_initialized() will pass
> because the core DAMON subsystem initialized successfully during boot. The
> code then calls damon_reclaim_turn(true) which passes the null ctx down to
> damon_commit_ctx(ctx, param_ctx).
>
> Should there be an explicit if (!ctx) validation before attempting to start
> the reclaim process?
Good finding, but the bug was there even before this patch, so no blocker of
this patch. I will separately work on the bug.
Thanks,
SJ
[...]
^ permalink raw reply [flat|nested] 10+ messages in thread
* [RFC PATCH v2.1 2/3] mm/damon/lru_sort: detect and use fresh enabled and kdamond_pid values
2026-04-18 22:27 [RFC PATCH v2.1 0/3] mm/damon/modules: detect and use fresh status SeongJae Park
2026-04-18 22:27 ` [RFC PATCH v2.1 1/3] mm/damon/reclaim: detect and use fresh enabled and kdamond_pid values SeongJae Park
@ 2026-04-18 22:27 ` SeongJae Park
2026-04-18 23:29 ` sashiko-bot
2026-04-18 22:27 ` [RFC PATCH v2.1 3/3] mm/damon/stat: detect and use fresh enabled value SeongJae Park
2 siblings, 1 reply; 10+ messages in thread
From: SeongJae Park @ 2026-04-18 22:27 UTC (permalink / raw)
Cc: SeongJae Park, # 6 . 0 . x, Andrew Morton, damon, linux-kernel,
linux-mm, Liew Rui Yan
DAMON_LRU_SORT updates 'enabled' and 'kdamond_pid' parameter values,
which represents the running status of its kdamond, when the user
explicitly requests start/stop of the kdamond. The kdamond can,
however, be stopped in events other than the explicit user request in
the following three events.
1. ctx->regions_score_histogram allocation failure at beginning of the
execution,
2. damon_commit_ctx() failure due to invalid user input, and
3. damon_commit_ctx() failure due to its internal allocation failures.
Hence, if the kdamond is stopped by the above three events, the values
of the status parameters can be stale. Users could show the stale
values and be confused. This is already bad, but the real consequence
is worse. DAMON_LRU_SORT avoids unnecessary damon_start() and
damon_stop() calls based on the 'enabled' parameter value. And the
update of 'enabled' parameter value depends on the damon_start() and
damon_stop() call results. Hence, once the kdamond has stopped by the
unintentional events, the user cannot restart the kdamond before the
system reboot. For example, the issue can be reproduced via below
steps.
# cd /sys/module/damon_lru_sort/parameters
#
# # start DAMON_LRU_SORT
# echo Y > enabled
# ps -ef | grep kdamond
root 806 2 0 17:53 ? 00:00:00 [kdamond.0]
root 808 803 0 17:53 pts/4 00:00:00 grep kdamond
#
# # commit wrong input to stop kdamond withou explicit stop request
# echo 3 > addr_unit
# echo Y > commit_inputs
bash: echo: write error: Invalid argument
#
# # confirm kdamond is stopped
# ps -ef | grep kdamond
root 811 803 0 17:53 pts/4 00:00:00 grep kdamond
#
# # users casn now show stable status
# cat enabled
Y
# cat kdamond_pid
806
#
# # even after fixing the wrong parameter,
# # kdamond cannot be restarted.
# echo 1 > addr_unit
# echo Y > enabled
# ps -ef | grep kdamond
root 815 803 0 17:54 pts/4 00:00:00 grep kdamond
The problem will only rarely happen in real and common setups for the
following reasons. The allocation failures are unlikely in such setups
since those allocations are arguably too small to fail. Also sane users
on real production environments may not commit wrong input parameters.
But once it happens, the consequence is quite bad. And the bug is a
bug.
The issue stems from the fact that there are multiple events that can
change the status, and following all the events is challenging.
Dynamically detect and use the fresh status for the parameters when
those are requested.
Fixes: 40e983cca927 ("mm/damon: introduce DAMON-based LRU-lists Sorting")
Cc: <stable@vger.kernel.org> # 6.0.x
Co-developed-by: Liew Rui Yan <aethernet65535@gmail.com>
Signed-off-by: Liew Rui Yan <aethernet65535@gmail.com>
Signed-off-by: SeongJae Park <sj@kernel.org>
---
mm/damon/lru_sort.c | 85 +++++++++++++++++++++++++++++----------------
1 file changed, 55 insertions(+), 30 deletions(-)
diff --git a/mm/damon/lru_sort.c b/mm/damon/lru_sort.c
index 554559d729760..8494040b1ee48 100644
--- a/mm/damon/lru_sort.c
+++ b/mm/damon/lru_sort.c
@@ -161,15 +161,6 @@ module_param(monitor_region_end, ulong, 0600);
*/
static unsigned long addr_unit __read_mostly = 1;
-/*
- * PID of the DAMON thread
- *
- * If DAMON_LRU_SORT is enabled, this becomes the PID of the worker thread.
- * Else, -1.
- */
-static int kdamond_pid __read_mostly = -1;
-module_param(kdamond_pid, int, 0400);
-
static struct damos_stat damon_lru_sort_hot_stat;
DEFINE_DAMON_MODULES_DAMOS_STATS_PARAMS(damon_lru_sort_hot_stat,
lru_sort_tried_hot_regions, lru_sorted_hot_regions,
@@ -386,12 +377,8 @@ static int damon_lru_sort_turn(bool on)
{
int err;
- if (!on) {
- err = damon_stop(&ctx, 1);
- if (!err)
- kdamond_pid = -1;
- return err;
- }
+ if (!on)
+ return damon_stop(&ctx, 1);
err = damon_lru_sort_apply_parameters();
if (err)
@@ -400,9 +387,6 @@ static int damon_lru_sort_turn(bool on)
err = damon_start(&ctx, 1, true);
if (err)
return err;
- kdamond_pid = damon_kdamond_pid(ctx);
- if (kdamond_pid < 0)
- return kdamond_pid;
return damon_call(ctx, &call_control);
}
@@ -430,42 +414,83 @@ module_param_cb(addr_unit, &addr_unit_param_ops, &addr_unit, 0600);
MODULE_PARM_DESC(addr_unit,
"Scale factor for DAMON_LRU_SORT to ops address conversion (default: 1)");
+static bool damon_lru_sort_enabled(void)
+{
+ if (!ctx)
+ return false;
+ return damon_is_running(ctx);
+}
+
static int damon_lru_sort_enabled_store(const char *val,
const struct kernel_param *kp)
{
- bool is_enabled = enabled;
- bool enable;
int err;
- err = kstrtobool(val, &enable);
+ err = kstrtobool(val, &enabled);
if (err)
return err;
- if (is_enabled == enable)
+ if (damon_lru_sort_enabled() == enabled)
return 0;
/* Called before init function. The function will handle this. */
if (!damon_initialized())
- goto set_param_out;
+ return 0;
- err = damon_lru_sort_turn(enable);
- if (err)
- return err;
+ return damon_lru_sort_turn(enabled);
+}
-set_param_out:
- enabled = enable;
- return err;
+static int damon_lru_sort_enabled_load(char *buffer,
+ const struct kernel_param *kp)
+{
+ return sprintf(buffer, "%c\n", damon_lru_sort_enabled() ? 'Y' : 'N');
}
static const struct kernel_param_ops enabled_param_ops = {
.set = damon_lru_sort_enabled_store,
- .get = param_get_bool,
+ .get = damon_lru_sort_enabled_load,
};
module_param_cb(enabled, &enabled_param_ops, &enabled, 0600);
MODULE_PARM_DESC(enabled,
"Enable or disable DAMON_LRU_SORT (default: disabled)");
+static int damon_lru_sort_kdamond_pid_store(const char *val,
+ const struct kernel_param *kp)
+{
+ /*
+ * kdamond_pid is read-only, but kernel command line could write it.
+ * Do nothing here.
+ */
+ return 0;
+}
+
+static int damon_lru_sort_kdamond_pid_load(char *buffer,
+ const struct kernel_param *kp)
+{
+ int kdamond_pid = -1;
+
+ if (ctx) {
+ kdamond_pid = damon_kdamond_pid(ctx);
+ if (kdamond_pid < 0)
+ kdamond_pid = -1;
+ }
+ return sprintf(buffer, "%d\n", kdamond_pid);
+}
+
+static const struct kernel_param_ops kdamond_pid_param_ops = {
+ .set = damon_lru_sort_kdamond_pid_store,
+ .get = damon_lru_sort_kdamond_pid_load,
+};
+
+/*
+ * PID of the DAMON thread
+ *
+ * If DAMON_LRU_SORT is enabled, this becomes the PID of the worker thread.
+ * Else, -1.
+ */
+module_param_cb(kdamond_pid, &kdamond_pid_param_ops, NULL, 0400);
+
static int __init damon_lru_sort_init(void)
{
int err;
--
2.47.3
^ permalink raw reply related [flat|nested] 10+ messages in thread* Re: [RFC PATCH v2.1 2/3] mm/damon/lru_sort: detect and use fresh enabled and kdamond_pid values
2026-04-18 22:27 ` [RFC PATCH v2.1 2/3] mm/damon/lru_sort: " SeongJae Park
@ 2026-04-18 23:29 ` sashiko-bot
2026-04-19 1:49 ` SeongJae Park
0 siblings, 1 reply; 10+ messages in thread
From: sashiko-bot @ 2026-04-18 23:29 UTC (permalink / raw)
To: SeongJae Park; +Cc: damon
Sashiko AI review found 1 potential issue(s):
- [High] Writing to the `enabled` sysfs parameter causes a kernel panic if the module failed to initialize during boot.
--
commit fa970a77d3fa7186f23394f3a1cfedc7ae08380c
Author: SeongJae Park <sj@kernel.org>
Subject: mm/damon/lru_sort: detect and use fresh enabled and kdamond_pid values
This commit updates the module to dynamically query the running status for
the enabled and kdamond_pid parameters. This avoids stale parameter values
if the kdamond daemon stops due to unintentional events like allocation
failures.
Cc: <stable@vger.kernel.org> # 6.0.x
> diff --git a/mm/damon/lru_sort.c b/mm/damon/lru_sort.c
> index 554559d729760..8494040b1ee48 100644
> --- a/mm/damon/lru_sort.c
> +++ b/mm/damon/lru_sort.c
[ ... ]
> @@ -430,42 +414,83 @@ module_param_cb(addr_unit, &addr_unit_param_ops, &addr_unit, 0600);
[ ... ]
> static int damon_lru_sort_enabled_store(const char *val,
> const struct kernel_param *kp)
> {
> - bool is_enabled = enabled;
> - bool enable;
> int err;
>
> - err = kstrtobool(val, &enable);
> + err = kstrtobool(val, &enabled);
> if (err)
> return err;
>
> - if (is_enabled == enable)
> + if (damon_lru_sort_enabled() == enabled)
> return 0;
>
> /* Called before init function. The function will handle this. */
> if (!damon_initialized())
> - goto set_param_out;
> + return 0;
>
> - err = damon_lru_sort_turn(enable);
> - if (err)
> - return err;
> + return damon_lru_sort_turn(enabled);
Could writing to this parameter cause a kernel panic if the module failed to
initialize during boot?
If damon_lru_sort_init() fails, the static ctx variable remains NULL. Because
the sysfs parameters are registered early, they remain exposed and writable
even after an initialization failure.
Writing true to the enabled parameter bypasses the damon_initialized() check
since it only checks the core DAMON cache, not this module's ctx.
Execution would then follow this path:
damon_lru_sort_enabled_store()
damon_lru_sort_turn(true)
damon_lru_sort_apply_parameters()
damon_commit_ctx(ctx, param_ctx)
Since ctx is NULL, wouldn't damon_commit_ctx() crash when it tries to
dereference it? Should there be an explicit check for !ctx before attempting
to apply parameters or turn on the feature?
> +}
--
Sashiko AI review · https://sashiko.dev/#/patchset/20260418222758.39795-1-sj@kernel.org?part=2
^ permalink raw reply [flat|nested] 10+ messages in thread* Re: [RFC PATCH v2.1 2/3] mm/damon/lru_sort: detect and use fresh enabled and kdamond_pid values
2026-04-18 23:29 ` sashiko-bot
@ 2026-04-19 1:49 ` SeongJae Park
0 siblings, 0 replies; 10+ messages in thread
From: SeongJae Park @ 2026-04-19 1:49 UTC (permalink / raw)
To: sashiko-bot; +Cc: SeongJae Park, damon
TL; DR: no blocker for this patch.
On Sat, 18 Apr 2026 23:29:31 +0000 sashiko-bot@kernel.org wrote:
> Sashiko AI review found 1 potential issue(s):
> - [High] Writing to the `enabled` sysfs parameter causes a kernel panic if the module failed to initialize during boot.
Good finding, but the bug is not caused by this patch. I will separately work
on the bug.
Thanks,
SJ
[...]
^ permalink raw reply [flat|nested] 10+ messages in thread
* [RFC PATCH v2.1 3/3] mm/damon/stat: detect and use fresh enabled value
2026-04-18 22:27 [RFC PATCH v2.1 0/3] mm/damon/modules: detect and use fresh status SeongJae Park
2026-04-18 22:27 ` [RFC PATCH v2.1 1/3] mm/damon/reclaim: detect and use fresh enabled and kdamond_pid values SeongJae Park
2026-04-18 22:27 ` [RFC PATCH v2.1 2/3] mm/damon/lru_sort: " SeongJae Park
@ 2026-04-18 22:27 ` SeongJae Park
2026-04-18 23:51 ` sashiko-bot
2 siblings, 1 reply; 10+ messages in thread
From: SeongJae Park @ 2026-04-18 22:27 UTC (permalink / raw)
Cc: SeongJae Park, # 6 . 17 . x, Andrew Morton, damon, linux-kernel,
linux-mm
DAMON_STAT updates 'enabled' parameter value, which represents the
running status of its kdamond, when the user explicitly requests
start/stop of the kdamond. The kdamond can, however, be stopped even if
the user explicitly requested the stop, if ctx->regions_score_histogram
allocation failure at beginning of the execution of the kdamond. Hence,
if the kdamond is stopped by the allocation failure, the value of the
parameter can be stale.
Users could show the stale value and be confused. The problem will only
rarely happen in real and common setups because the allocation is
arguably too small to fail. Also, unlike the similar bugs that are now
fixed in DAMON_RECLAIM and DAMON_LRU_SORT, kdamond can be restarted in
this case, because DAMON_STAT force-updates the enabled parameter value
for user inputs. The bug is a bug, though.
The issue stems from the fact that there are multiple events that can
change the status, and following all the events is challenging.
Dynamically detect and use the fresh status for the parameters when
those are requested.
The issue was dicovered [1] by Sashiko.
[1] https://lore.kernel.org/20260416040602.88665-1-sj@kernel.org
Fixes: 369c415e6073 ("mm/damon: introduce DAMON_STAT module")
Cc: <stable@vger.kernel.org> # 6.17.x
Signed-off-by: SeongJae Park <sj@kernel.org>
---
mm/damon/stat.c | 30 ++++++++++++++++++++----------
1 file changed, 20 insertions(+), 10 deletions(-)
diff --git a/mm/damon/stat.c b/mm/damon/stat.c
index 99ba346f9e325..3951b762cbddf 100644
--- a/mm/damon/stat.c
+++ b/mm/damon/stat.c
@@ -19,14 +19,17 @@
static int damon_stat_enabled_store(
const char *val, const struct kernel_param *kp);
+static int damon_stat_enabled_load(char *buffer,
+ const struct kernel_param *kp);
+
static const struct kernel_param_ops enabled_param_ops = {
.set = damon_stat_enabled_store,
- .get = param_get_bool,
+ .get = damon_stat_enabled_load,
};
static bool enabled __read_mostly = IS_ENABLED(
CONFIG_DAMON_STAT_ENABLED_DEFAULT);
-module_param_cb(enabled, &enabled_param_ops, &enabled, 0600);
+module_param_cb(enabled, &enabled_param_ops, NULL, 0600);
MODULE_PARM_DESC(enabled, "Enable of disable DAMON_STAT");
static unsigned long estimated_memory_bandwidth __read_mostly;
@@ -273,17 +276,23 @@ static void damon_stat_stop(void)
damon_stat_context = NULL;
}
+static bool damon_stat_enabled(void)
+{
+ if (!damon_stat_context)
+ return false;
+ return damon_is_running(damon_stat_context);
+}
+
static int damon_stat_enabled_store(
const char *val, const struct kernel_param *kp)
{
- bool is_enabled = enabled;
int err;
err = kstrtobool(val, &enabled);
if (err)
return err;
- if (is_enabled == enabled)
+ if (damon_stat_enabled() == enabled)
return 0;
if (!damon_initialized())
@@ -293,16 +302,17 @@ static int damon_stat_enabled_store(
*/
return 0;
- if (enabled) {
- err = damon_stat_start();
- if (err)
- enabled = false;
- return err;
- }
+ if (enabled)
+ return damon_stat_start();
damon_stat_stop();
return 0;
}
+static int damon_stat_enabled_load(char *buffer, const struct kernel_param *kp)
+{
+ return sprintf(buffer, "%c\n", damon_stat_enabled() ? 'Y' : 'N');
+}
+
static int __init damon_stat_init(void)
{
int err = 0;
--
2.47.3
^ permalink raw reply related [flat|nested] 10+ messages in thread* Re: [RFC PATCH v2.1 3/3] mm/damon/stat: detect and use fresh enabled value
2026-04-18 22:27 ` [RFC PATCH v2.1 3/3] mm/damon/stat: detect and use fresh enabled value SeongJae Park
@ 2026-04-18 23:51 ` sashiko-bot
2026-04-19 1:52 ` SeongJae Park
0 siblings, 1 reply; 10+ messages in thread
From: sashiko-bot @ 2026-04-18 23:51 UTC (permalink / raw)
To: SeongJae Park; +Cc: damon
Sashiko AI review found 1 potential issue(s):
- [High] Bypassing damon_stat_stop() and overwriting damon_stat_context causes a memory leak when explicitly disabling or restarting a kdamond thread that stopped internally.
--
commit b9703c85b8e9f48ea8dc673102aa2ecc0e1ac448
Author: SeongJae Park <sj@kernel.org>
mm/damon/stat: detect and use fresh enabled value
This commit dynamically detects the status of kdamond instead of relying on
the saved parameter value to avoid showing stale data when the daemon stops
internally.
Link: https://lore.kernel.org/20260416040602.88665-1-sj@kernel.org
> diff --git a/mm/damon/stat.c b/mm/damon/stat.c
> index 99ba346f9e325..3951b762cbddf 100644
> --- a/mm/damon/stat.c
> +++ b/mm/damon/stat.c
[ ... ]
> @@ -273,17 +276,23 @@ static void damon_stat_stop(void)
> damon_stat_context = NULL;
> }
>
> +static bool damon_stat_enabled(void)
> +{
> + if (!damon_stat_context)
> + return false;
> + return damon_is_running(damon_stat_context);
> +}
> +
> static int damon_stat_enabled_store(
> const char *val, const struct kernel_param *kp)
> {
> - bool is_enabled = enabled;
> int err;
>
> err = kstrtobool(val, &enabled);
> if (err)
> return err;
>
> - if (is_enabled == enabled)
> + if (damon_stat_enabled() == enabled)
> return 0;
Does this early return cause a memory leak if kdamond stops internally?
If kdamond stops internally, damon_is_running() will return false, causing
damon_stat_enabled() to also return false. However, damon_stat_context
remains allocated.
If a user subsequently writes 'N' to the enabled parameter to clean up,
damon_stat_enabled() == enabled evaluates to false == false. This triggers
the early return here, bypassing damon_stat_stop() which would have
called damon_destroy_ctx(damon_stat_context) to free the memory.
Similarly, if a user writes 'Y' to restart the module, damon_stat_start()
will be called. Looking at damon_stat_start(), it allocates a new context
by calling damon_stat_build_ctx() and overwrites the damon_stat_context
pointer without freeing the previous one.
Can this leak the context in both scenarios?
>
> if (!damon_initialized())
> @@ -293,16 +302,17 @@ static int damon_stat_enabled_store(
> */
> return 0;
>
> - if (enabled) {
> - err = damon_stat_start();
> - if (err)
> - enabled = false;
> - return err;
> - }
> + if (enabled)
> + return damon_stat_start();
> damon_stat_stop();
> return 0;
> }
--
Sashiko AI review · https://sashiko.dev/#/patchset/20260418222758.39795-1-sj@kernel.org?part=3
^ permalink raw reply [flat|nested] 10+ messages in thread