* [RFC PATCH v3 1/4] mm/damon/core: add code borrowed from report-based monitoring work
2026-04-23 0:42 [RFC PATCH v3 0/4] mm/damon: introduce perf event based access check Akinobu Mita
@ 2026-04-23 0:42 ` Akinobu Mita
2026-04-23 1:21 ` sashiko-bot
2026-04-23 0:42 ` [RFC PATCH v3 2/4] mm/damon/core: add common code for perf event based access check Akinobu Mita
` (3 subsequent siblings)
4 siblings, 1 reply; 14+ messages in thread
From: Akinobu Mita @ 2026-04-23 0:42 UTC (permalink / raw)
To: damon; +Cc: linux-perf-users, sj, akinobu.mita
The perf event-based monitoring will be implemented on top of the
report-based monitoring infrastructure, but since the migration is
still in progress, only borrow the following necessary code for now.
* struct damon_access_report
* access_reported field in struct damon_region
* sample/primitives/ sysfs directory
[1] https://lore.kernel.org/linux-mm/20251208062943.68824-6-sj@kernel.org/T/
---
include/linux/damon.h | 68 ++++++
mm/damon/core.c | 1 +
mm/damon/sysfs.c | 499 ++++++++++++++++++++++++++++++++++++++++++
3 files changed, 568 insertions(+)
diff --git a/include/linux/damon.h b/include/linux/damon.h
index f2cdb7c3f5e6..9ba26d8a0bce 100644
--- a/include/linux/damon.h
+++ b/include/linux/damon.h
@@ -85,6 +85,7 @@ struct damon_region {
unsigned int age;
/* private: Internal value for age calculation. */
unsigned int last_nr_accesses;
+ bool access_reported;
};
/**
@@ -112,6 +113,30 @@ struct damon_target {
bool obsolete;
};
+/**
+ * struct damon_access_report - Represent single access report information.
+ * @paddr: Start physical address of the accessed address range.
+ * @vaddr: Start virtual address of the accessed address range.
+ * @size: The size of the accessed address range.
+ * @cpu: The id of the CPU that made the access.
+ * @tid: The task id of the task that made the access.
+ * @is_write: Whether the access is write.
+ *
+ * Any DAMON API callers that notified access events can report the information
+ * to DAMON using damon_report_access(). This struct contains the reporting
+ * information. Refer to damon_report_access() for more details.
+ */
+struct damon_access_report {
+ unsigned long paddr;
+ unsigned long vaddr;
+ unsigned long size;
+ unsigned int cpu;
+ pid_t tid;
+ bool is_write;
+/* private: */
+ unsigned long report_jiffies; /* when this report is made */
+};
+
/**
* enum damos_action - Represents an action of a Data Access Monitoring-based
* Operation Scheme.
@@ -763,6 +788,49 @@ struct damon_attrs {
unsigned long aggr_samples;
};
+/**
+ * enum damon_sample_filter_type - Type of &struct damon_sample_filter.
+ *
+ * @DAMON_FILTER_TYPE_CPUMASK: Filter by access-generated CPUs.
+ * @DAMON_FILTER_TYPE_THREADS: Filter by access-generated threads.
+ * @DAMON_FILTER_TYPE_WRITE: Filter by whether the access was for writing.
+ *
+ * Read &struct damon_sample_control for more details.
+ */
+enum damon_sample_filter_type {
+ DAMON_FILTER_TYPE_CPUMASK,
+ DAMON_FILTER_TYPE_THREADS,
+ DAMON_FILTER_TYPE_WRITE,
+};
+
+/**
+ * struct damon_sample_filter - &struct damon_access_report filter.
+ *
+ * @type: The type of this filter.
+ * @matching: Whether it is for condition-matching reports.
+ * @allow: Whether to include or excludie the @matching reports.
+ * @cpumask: Access-generated CPUs if @type is DAMON_FILTER_TYPE_CPUMASK.
+ * @tid_arr: Array of access-generated thread ids, if @type is
+ * DAMON_FILTER_TYPE_THREADS.
+ * @nr_tids: Size of @tid_arr, if @type is DAMON_FILTER_TYPE_THREADS.
+ * @list: List head for siblings.
+ *
+ * Read &struct damon_sample_control for more details.
+ */
+struct damon_sample_filter {
+ enum damon_sample_filter_type type;
+ bool matching;
+ bool allow;
+ union {
+ cpumask_t cpumask;
+ struct {
+ pid_t *tid_arr;
+ int nr_tids;
+ };
+ };
+ struct list_head list;
+};
+
/**
* struct damon_ctx - Represents a context for each monitoring. This is the
* main interface that allows users to set the attributes and get the results
diff --git a/mm/damon/core.c b/mm/damon/core.c
index 3dbbbfdeff71..a52b2962aa22 100644
--- a/mm/damon/core.c
+++ b/mm/damon/core.c
@@ -142,6 +142,7 @@ struct damon_region *damon_new_region(unsigned long start, unsigned long end)
region->age = 0;
region->last_nr_accesses = 0;
+ region->access_reported = false;
return region;
}
diff --git a/mm/damon/sysfs.c b/mm/damon/sysfs.c
index eefa959aa30a..c19556f2af3b 100644
--- a/mm/damon/sysfs.c
+++ b/mm/damon/sysfs.c
@@ -747,6 +747,483 @@ static const struct kobj_type damon_sysfs_intervals_ktype = {
.default_groups = damon_sysfs_intervals_groups,
};
+/*
+ * access check report filter directory
+ */
+
+struct damon_sysfs_sample_filter {
+ struct kobject kobj;
+ enum damon_sample_filter_type type;
+ bool matching;
+ bool allow;
+ cpumask_t cpumask;
+};
+
+static struct damon_sysfs_sample_filter *damon_sysfs_sample_filter_alloc(void)
+{
+ return kzalloc(sizeof(struct damon_sysfs_sample_filter), GFP_KERNEL);
+}
+
+struct damon_sysfs_sample_filter_type_name {
+ enum damon_sample_filter_type type;
+ char *name;
+};
+
+static const struct damon_sysfs_sample_filter_type_name
+damon_sysfs_sample_filter_type_names[] = {
+ {
+ .type = DAMON_FILTER_TYPE_CPUMASK,
+ .name = "cpumask",
+ },
+};
+
+static ssize_t type_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buf)
+{
+ struct damon_sysfs_sample_filter *filter = container_of(kobj,
+ struct damon_sysfs_sample_filter, kobj);
+ int i = 0;
+
+ for (; i < ARRAY_SIZE(damon_sysfs_sample_filter_type_names); i++) {
+ const struct damon_sysfs_sample_filter_type_name *type_name;
+
+ type_name = &damon_sysfs_sample_filter_type_names[i];
+ if (type_name->type == filter->type)
+ return sysfs_emit(buf, "%s\n", type_name->name);
+ }
+ return -EINVAL;
+}
+
+static ssize_t type_store(struct kobject *kobj,
+ struct kobj_attribute *attr, const char *buf, size_t count)
+{
+ struct damon_sysfs_sample_filter *filter = container_of(kobj,
+ struct damon_sysfs_sample_filter, kobj);
+ ssize_t ret = -EINVAL;
+ int i = 0;
+
+ for (; i < ARRAY_SIZE(damon_sysfs_sample_filter_type_names); i++) {
+ const struct damon_sysfs_sample_filter_type_name *type_name;
+
+ type_name = &damon_sysfs_sample_filter_type_names[i];
+ if (sysfs_streq(buf, type_name->name)) {
+ filter->type = type_name->type;
+ ret = count;
+ break;
+ }
+ }
+ return ret;
+}
+
+static ssize_t matching_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buf)
+{
+ struct damon_sysfs_sample_filter *filter = container_of(kobj,
+ struct damon_sysfs_sample_filter, kobj);
+
+ return sysfs_emit(buf, "%c\n", filter->matching ? 'Y' : 'N');
+}
+
+static ssize_t matching_store(struct kobject *kobj,
+ struct kobj_attribute *attr, const char *buf, size_t count)
+{
+ struct damon_sysfs_sample_filter *filter = container_of(kobj,
+ struct damon_sysfs_sample_filter, kobj);
+ bool matching;
+ int err = kstrtobool(buf, &matching);
+
+ if (err)
+ return err;
+
+ filter->matching = matching;
+ return count;
+}
+
+static ssize_t allow_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buf)
+{
+ struct damon_sysfs_sample_filter *filter = container_of(kobj,
+ struct damon_sysfs_sample_filter, kobj);
+
+ return sysfs_emit(buf, "%c\n", filter->allow ? 'Y' : 'N');
+}
+
+static ssize_t allow_store(struct kobject *kobj,
+ struct kobj_attribute *attr, const char *buf, size_t count)
+{
+ struct damon_sysfs_sample_filter *filter = container_of(kobj,
+ struct damon_sysfs_sample_filter, kobj);
+ bool allow;
+ int err = kstrtobool(buf, &allow);
+
+ if (err)
+ return err;
+
+ filter->allow = allow;
+ return count;
+}
+
+static ssize_t cpumask_show(struct kobject *kobj, struct kobj_attribute *attr,
+ char *buf)
+{
+ struct damon_sysfs_sample_filter *filter = container_of(kobj,
+ struct damon_sysfs_sample_filter, kobj);
+
+ return sysfs_emit(buf, "%*pbl\n", cpumask_pr_args(&filter->cpumask));
+}
+
+static ssize_t cpumask_store(struct kobject *kobj, struct kobj_attribute *attr,
+ const char *buf, size_t count)
+{
+ struct damon_sysfs_sample_filter *filter = container_of(kobj,
+ struct damon_sysfs_sample_filter, kobj);
+ cpumask_t cpumask;
+ int err = cpulist_parse(buf, &cpumask);
+
+ if (err)
+ return err;
+ filter->cpumask = cpumask;
+ return count;
+}
+
+static void damon_sysfs_sample_filter_release(struct kobject *kobj)
+{
+ struct damon_sysfs_sample_filter *filter = container_of(kobj,
+ struct damon_sysfs_sample_filter, kobj);
+
+ kfree(filter);
+}
+
+static struct kobj_attribute damon_sysfs_sample_filter_type_attr =
+ __ATTR_RW_MODE(type, 0600);
+
+static struct kobj_attribute damon_sysfs_sample_filter_matching_attr =
+ __ATTR_RW_MODE(matching, 0600);
+
+static struct kobj_attribute damon_sysfs_sample_filter_allow_attr =
+ __ATTR_RW_MODE(allow, 0600);
+
+static struct kobj_attribute damon_sysfs_sample_filter_cpumask_attr =
+ __ATTR_RW_MODE(cpumask, 0600);
+
+static struct attribute *damon_sysfs_sample_filter_attrs[] = {
+ &damon_sysfs_sample_filter_type_attr.attr,
+ &damon_sysfs_sample_filter_matching_attr.attr,
+ &damon_sysfs_sample_filter_allow_attr.attr,
+ &damon_sysfs_sample_filter_cpumask_attr.attr,
+ NULL,
+};
+ATTRIBUTE_GROUPS(damon_sysfs_sample_filter);
+
+static const struct kobj_type damon_sysfs_sample_filter_ktype = {
+ .release = damon_sysfs_sample_filter_release,
+ .sysfs_ops = &kobj_sysfs_ops,
+ .default_groups = damon_sysfs_sample_filter_groups,
+};
+
+/*
+ * access check report filters directory
+ */
+
+struct damon_sysfs_sample_filters {
+ struct kobject kobj;
+ struct damon_sysfs_sample_filter **filters_arr;
+ int nr;
+};
+
+static struct damon_sysfs_sample_filters *
+damon_sysfs_sample_filters_alloc(void)
+{
+ return kzalloc(sizeof(struct damon_sysfs_sample_filters), GFP_KERNEL);
+}
+
+static void damon_sysfs_sample_filters_rm_dirs(
+ struct damon_sysfs_sample_filters *filters)
+{
+ struct damon_sysfs_sample_filter **filters_arr = filters->filters_arr;
+ int i;
+
+ for (i = 0; i < filters->nr; i++)
+ kobject_put(&filters_arr[i]->kobj);
+ filters->nr = 0;
+ kfree(filters_arr);
+ filters->filters_arr = NULL;
+}
+
+static int damon_sysfs_sample_filters_add_dirs(
+ struct damon_sysfs_sample_filters *filters, int nr_filters)
+{
+ struct damon_sysfs_sample_filter **filters_arr, *filter;
+ int err, i;
+
+ damon_sysfs_sample_filters_rm_dirs(filters);
+ if (!nr_filters)
+ return 0;
+
+ filters_arr = kmalloc_array(nr_filters, sizeof(*filters_arr),
+ GFP_KERNEL | __GFP_NOWARN);
+ if (!filters_arr)
+ return -ENOMEM;
+ filters->filters_arr = filters_arr;
+
+ for (i = 0; i < nr_filters; i++) {
+ filter = damon_sysfs_sample_filter_alloc();
+ if (!filter) {
+ damon_sysfs_sample_filters_rm_dirs(filters);
+ return -ENOMEM;
+ }
+
+ err = kobject_init_and_add(&filter->kobj,
+ &damon_sysfs_sample_filter_ktype,
+ &filters->kobj, "%d", i);
+ if (err) {
+ kobject_put(&filter->kobj);
+ damon_sysfs_sample_filters_rm_dirs(filters);
+ return err;
+ }
+
+ filters_arr[i] = filter;
+ filters->nr++;
+ }
+ return 0;
+}
+
+static ssize_t nr_filters_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buf)
+{
+ struct damon_sysfs_sample_filters *filters = container_of(kobj,
+ struct damon_sysfs_sample_filters, kobj);
+
+ return sysfs_emit(buf, "%d\n", filters->nr);
+}
+
+static ssize_t nr_filters_store(struct kobject *kobj,
+ struct kobj_attribute *attr, const char *buf, size_t count)
+{
+ struct damon_sysfs_sample_filters *filters;
+ int nr, err = kstrtoint(buf, 0, &nr);
+
+ if (err)
+ return err;
+ if (nr < 0)
+ return -EINVAL;
+
+ filters = container_of(kobj, struct damon_sysfs_sample_filters, kobj);
+
+ if (!mutex_trylock(&damon_sysfs_lock))
+ return -EBUSY;
+ err = damon_sysfs_sample_filters_add_dirs(filters, nr);
+ mutex_unlock(&damon_sysfs_lock);
+ if (err)
+ return err;
+
+ return count;
+}
+
+static void damon_sysfs_sample_filters_release(struct kobject *kobj)
+{
+ kfree(container_of(kobj, struct damon_sysfs_sample_filters, kobj));
+}
+
+static struct kobj_attribute damon_sysfs_sample_filters_nr_attr =
+ __ATTR_RW_MODE(nr_filters, 0600);
+
+static struct attribute *damon_sysfs_sample_filters_attrs[] = {
+ &damon_sysfs_sample_filters_nr_attr.attr,
+ NULL,
+};
+ATTRIBUTE_GROUPS(damon_sysfs_sample_filters);
+
+static const struct kobj_type damon_sysfs_sample_filters_ktype = {
+ .release = damon_sysfs_sample_filters_release,
+ .sysfs_ops = &kobj_sysfs_ops,
+ .default_groups = damon_sysfs_sample_filters_groups,
+};
+
+/*
+ * access check primitives directory
+ */
+
+struct damon_sysfs_primitives {
+ struct kobject kobj;
+ bool page_table;
+ bool page_fault;
+};
+
+static struct damon_sysfs_primitives *damon_sysfs_primitives_alloc(
+ bool page_table, bool page_fault)
+{
+ struct damon_sysfs_primitives *primitives = kmalloc(
+ sizeof(*primitives), GFP_KERNEL);
+
+ if (!primitives)
+ return NULL;
+
+ primitives->kobj = (struct kobject){};
+ primitives->page_table = page_table;
+ primitives->page_fault = page_fault;
+ return primitives;
+}
+
+static ssize_t page_table_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buf)
+{
+ struct damon_sysfs_primitives *primitives = container_of(kobj,
+ struct damon_sysfs_primitives, kobj);
+
+ return sysfs_emit(buf, "%c\n", primitives->page_table ? 'Y' : 'N');
+}
+
+static ssize_t page_table_store(struct kobject *kobj,
+ struct kobj_attribute *attr, const char *buf, size_t count)
+{
+ struct damon_sysfs_primitives *primitives = container_of(kobj,
+ struct damon_sysfs_primitives, kobj);
+ bool enable;
+ int err = kstrtobool(buf, &enable);
+
+ if (err)
+ return err;
+ primitives->page_table = enable;
+ return count;
+}
+
+static ssize_t page_fault_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buf)
+{
+ struct damon_sysfs_primitives *primitives = container_of(kobj,
+ struct damon_sysfs_primitives, kobj);
+
+ return sysfs_emit(buf, "%c\n", primitives->page_fault ? 'Y' : 'N');
+}
+
+static ssize_t page_fault_store(struct kobject *kobj,
+ struct kobj_attribute *attr, const char *buf, size_t count)
+{
+ struct damon_sysfs_primitives *primitives = container_of(kobj,
+ struct damon_sysfs_primitives, kobj);
+ bool enable;
+ int err = kstrtobool(buf, &enable);
+
+ if (err)
+ return err;
+ primitives->page_fault = enable;
+ return count;
+}
+
+static void damon_sysfs_primitives_release(struct kobject *kobj)
+{
+ struct damon_sysfs_primitives *primitives = container_of(kobj,
+ struct damon_sysfs_primitives, kobj);
+
+ kfree(primitives);
+}
+
+static struct kobj_attribute damon_sysfs_primitives_page_table_attr =
+ __ATTR_RW_MODE(page_table, 0600);
+
+static struct kobj_attribute damon_sysfs_primitives_page_fault_attr =
+ __ATTR_RW_MODE(page_fault, 0600);
+
+static struct attribute *damon_sysfs_primitives_attrs[] = {
+ &damon_sysfs_primitives_page_table_attr.attr,
+ &damon_sysfs_primitives_page_fault_attr.attr,
+ NULL,
+};
+ATTRIBUTE_GROUPS(damon_sysfs_primitives);
+
+static const struct kobj_type damon_sysfs_primitives_ktype = {
+ .release = damon_sysfs_primitives_release,
+ .sysfs_ops = &kobj_sysfs_ops,
+ .default_groups = damon_sysfs_primitives_groups,
+};
+
+/*
+ * sample directory
+ */
+
+struct damon_sysfs_sample {
+ struct kobject kobj;
+ struct damon_sysfs_primitives *primitives;
+ struct damon_sysfs_sample_filters *filters;
+};
+
+static struct damon_sysfs_sample *damon_sysfs_sample_alloc(void)
+{
+ struct damon_sysfs_sample *sample = kmalloc(
+ sizeof(*sample), GFP_KERNEL);
+
+ if (!sample)
+ return NULL;
+ sample->kobj = (struct kobject){};
+ return sample;
+}
+
+static int damon_sysfs_sample_add_dirs(
+ struct damon_sysfs_sample *sample)
+{
+ struct damon_sysfs_primitives *primitives;
+ struct damon_sysfs_sample_filters *filters;
+ int err;
+
+ primitives = damon_sysfs_primitives_alloc(true, false);
+ if (!primitives)
+ return -ENOMEM;
+ err = kobject_init_and_add(&primitives->kobj,
+ &damon_sysfs_primitives_ktype, &sample->kobj,
+ "primitives");
+ if (err)
+ goto put_primitives_out;
+ sample->primitives = primitives;
+
+ filters = damon_sysfs_sample_filters_alloc();
+ if (!filters) {
+ err = -ENOMEM;
+ goto put_primitives_out;
+ }
+ err = kobject_init_and_add(&filters->kobj,
+ &damon_sysfs_sample_filters_ktype, &sample->kobj,
+ "filters");
+ if (err)
+ goto put_filters_out;
+ sample->filters = filters;
+ return 0;
+put_filters_out:
+ kobject_put(&filters->kobj);
+ sample->filters = NULL;
+put_primitives_out:
+ kobject_put(&primitives->kobj);
+ sample->primitives = NULL;
+ return err;
+}
+
+static void damon_sysfs_sample_rm_dirs(
+ struct damon_sysfs_sample *sample)
+{
+ if (sample->primitives)
+ kobject_put(&sample->primitives->kobj);
+ if (sample->filters) {
+ damon_sysfs_sample_filters_rm_dirs(sample->filters);
+ kobject_put(&sample->filters->kobj);
+ }
+}
+
+static void damon_sysfs_sample_release(struct kobject *kobj)
+{
+ kfree(container_of(kobj, struct damon_sysfs_sample, kobj));
+}
+
+static struct attribute *damon_sysfs_sample_attrs[] = {
+ NULL,
+};
+ATTRIBUTE_GROUPS(damon_sysfs_sample);
+
+static const struct kobj_type damon_sysfs_sample_ktype = {
+ .release = damon_sysfs_sample_release,
+ .sysfs_ops = &kobj_sysfs_ops,
+ .default_groups = damon_sysfs_sample_groups,
+};
+
/*
* monitoring_attrs directory
*/
@@ -755,6 +1232,7 @@ struct damon_sysfs_attrs {
struct kobject kobj;
struct damon_sysfs_intervals *intervals;
struct damon_sysfs_ul_range *nr_regions_range;
+ struct damon_sysfs_sample *sample;
};
static struct damon_sysfs_attrs *damon_sysfs_attrs_alloc(void)
@@ -771,6 +1249,7 @@ static int damon_sysfs_attrs_add_dirs(struct damon_sysfs_attrs *attrs)
{
struct damon_sysfs_intervals *intervals;
struct damon_sysfs_ul_range *nr_regions_range;
+ struct damon_sysfs_sample *sample;
int err;
intervals = damon_sysfs_intervals_alloc(5000, 100000, 60000000);
@@ -799,8 +1278,26 @@ static int damon_sysfs_attrs_add_dirs(struct damon_sysfs_attrs *attrs)
if (err)
goto put_nr_regions_intervals_out;
attrs->nr_regions_range = nr_regions_range;
+
+ sample = damon_sysfs_sample_alloc();
+ if (!sample) {
+ err = -ENOMEM;
+ goto put_nr_regions_intervals_out;
+ }
+ err = kobject_init_and_add(&sample->kobj,
+ &damon_sysfs_sample_ktype, &attrs->kobj,
+ "sample");
+ if (err)
+ goto put_sample_out;
+ err = damon_sysfs_sample_add_dirs(sample);
+ if (err)
+ goto put_sample_out;
+ attrs->sample = sample;
return 0;
+put_sample_out:
+ kobject_put(&sample->kobj);
+ attrs->sample = NULL;
put_nr_regions_intervals_out:
kobject_put(&nr_regions_range->kobj);
attrs->nr_regions_range = NULL;
@@ -817,6 +1314,8 @@ static void damon_sysfs_attrs_rm_dirs(struct damon_sysfs_attrs *attrs)
kobject_put(&attrs->nr_regions_range->kobj);
damon_sysfs_intervals_rm_dirs(attrs->intervals);
kobject_put(&attrs->intervals->kobj);
+ damon_sysfs_sample_rm_dirs(attrs->sample);
+ kobject_put(&attrs->sample->kobj);
}
static void damon_sysfs_attrs_release(struct kobject *kobj)
--
2.43.0
^ permalink raw reply related [flat|nested] 14+ messages in thread* Re: [RFC PATCH v3 1/4] mm/damon/core: add code borrowed from report-based monitoring work
2026-04-23 0:42 ` [RFC PATCH v3 1/4] mm/damon/core: add code borrowed from report-based monitoring work Akinobu Mita
@ 2026-04-23 1:21 ` sashiko-bot
0 siblings, 0 replies; 14+ messages in thread
From: sashiko-bot @ 2026-04-23 1:21 UTC (permalink / raw)
To: Akinobu Mita; +Cc: damon, linux-perf-users
Sashiko AI review found 2 potential issue(s):
- [Medium] Stack bloat / stack overflow risk from `cpumask_t` allocation on the stack.
- [Medium] Data race and potential torn reads from lockless assignment of `cpumask_t`.
--
commit 9c32a9c6100e762865883bc9bf4b2272ccb6dedb
Author: Akinobu Mita <akinobu.mita@gmail.com>
mm/damon/core: add code borrowed from report-based monitoring work
This commit borrows code from the report-based monitoring infrastructure
to support perf event-based monitoring. It brings in struct
damon_access_report, an access_reported field in damon_region, and the
sample/primitives sysfs directory structure.
Link: https://lore.kernel.org/linux-mm/20251208062943.68824-6-sj@kernel.org/T/
> diff --git a/mm/damon/sysfs.c b/mm/damon/sysfs.c
> index eefa959aa30ae..c19556f2af3bb 100644
> --- a/mm/damon/sysfs.c
> +++ b/mm/damon/sysfs.c
[ ... ]
> +static ssize_t cpumask_store(struct kobject *kobj, struct kobj_attribute *attr,
> + const char *buf, size_t count)
> +{
> + struct damon_sysfs_sample_filter *filter = container_of(kobj,
> + struct damon_sysfs_sample_filter, kobj);
> + cpumask_t cpumask;
Is it safe to allocate cpumask_t on the stack here?
The size of cpumask_t scales with CONFIG_NR_CPUS. On systems with a large
number of CPUs, this could consume a significant amount of the kernel stack
and potentially risk a stack overflow. Should this use alloc_cpumask_var()
instead to dynamically allocate the temporary mask?
> + int err = cpulist_parse(buf, &cpumask);
> +
> + if (err)
> + return err;
> + filter->cpumask = cpumask;
Can this lockless assignment cause a data race or torn reads?
Because cpumask_t can be larger than a single word, this assignment may
compile into a non-atomic multi-word copy like memcpy(). If a concurrent
sysfs read in cpumask_show() or a DAMON commit operation accesses
filter->cpumask while this copy is ongoing, could it read a partially
updated, torn mask?
Should this be protected by explicit synchronization, such as
damon_sysfs_lock?
> + return count;
> +}
--
Sashiko AI review · https://sashiko.dev/#/patchset/20260423004211.7037-1-akinobu.mita@gmail.com?part=1
^ permalink raw reply [flat|nested] 14+ messages in thread
* [RFC PATCH v3 2/4] mm/damon/core: add common code for perf event based access check
2026-04-23 0:42 [RFC PATCH v3 0/4] mm/damon: introduce perf event based access check Akinobu Mita
2026-04-23 0:42 ` [RFC PATCH v3 1/4] mm/damon/core: add code borrowed from report-based monitoring work Akinobu Mita
@ 2026-04-23 0:42 ` Akinobu Mita
2026-04-23 1:58 ` sashiko-bot
2026-04-23 0:42 ` [RFC PATCH v3 3/4] mm/damon/vaddr: support " Akinobu Mita
` (2 subsequent siblings)
4 siblings, 1 reply; 14+ messages in thread
From: Akinobu Mita @ 2026-04-23 0:42 UTC (permalink / raw)
To: damon; +Cc: linux-perf-users, sj, akinobu.mita
This patch creates the necessary infrastructure (structure definitions,
functions, sysfs interfaces) to support perf event based access checks
for both physical and virtual address space monitoring.
Perf events can be added through the following sysfs interface:
What: /sys/kernel/mm/damon/admin/kdamonds/<K>/contexts/<C>/monitoring_attrs/sample/primitives/perf_events/nr_perf_events
Description: Writing a number 'N' to this file creates the number of
directories for controlling each perf event named '0' to 'N-1' under the
perf_events/ directory.
What: /sys/kernel/mm/damon/admin/kdamonds/<K>/contexts/<C>/monitoring_attrs/sample/primitives/perf_events/<P>/type
Description: Writing to and reading from this file sets and gets the
"type" field of struct perf_event_attr for perf event based access
check.
What: /sys/kernel/mm/damon/admin/kdamonds/<K>/contexts/<C>/monitoring_attrs/sample/primitives/perf_events/<P>/config
Description: Same as above.
What: /sys/kernel/mm/damon/admin/kdamonds/<K>/contexts/<C>/monitoring_attrs/sample/primitives/perf_events/<P>/config1
Description: Same as above.
What: /sys/kernel/mm/damon/admin/kdamonds/<K>/contexts/<C>/monitoring_attrs/sample/primitives/perf_events/<P>/config2
Description: Same as above.
What: /sys/kernel/mm/damon/admin/kdamonds/<K>/contexts/<C>/monitoring_attrs/sample/primitives/perf_events/<P>/sample_freq
Description: Same as above.
What: /sys/kernel/mm/damon/admin/kdamonds/<K>/contexts/<C>/monitoring_attrs/sample/primitives/perf_events/<P>/sample_phys_addr
Description: Writing to and reading from this file sets and gets the
PERF_SAMPLE_PHYS_ADDR bitfield in the "sample_type" field of struct
perf_event_attr for perf event based access check.
The perf event that can be specified for the perf event based access
check must be able to obtain the data source address corresponding to
the sample.
One way to find such perf event settings is to run `perf mem record -vv`
and look for a dump of the perf_event_attr structure given to the
perf_event_open() system call in the debug messages output.
If at least one perf event is specified, the perf event based access
check will be enabled instead of the existing PTE accessed-bit based
access check. In other words, if no perf event is specified, the
existing PTE accessed-bit based access check will be used.
Add the following common functions required for both physical and
virtual address space monitoring:
damon_perf_init()
Each perf event specified by the sysfs interface is initialized with
perf_event_create_kernel_counter().
damon_perf_cleanup()
Release each initialized perf event with perf_event_release_kernel().
damon_perf_prepare_access_checks()
Enable each initialized perf event with perf_event_enable().
Signed-off-by: Akinobu Mita <akinobu.mita@gmail.com>
---
.../ABI/testing/sysfs-kernel-mm-damon | 8 +
include/linux/damon.h | 34 ++
mm/damon/core.c | 67 ++++
mm/damon/ops-common.h | 29 ++
mm/damon/sysfs.c | 377 +++++++++++++++++-
mm/damon/vaddr.c | 158 ++++++++
6 files changed, 669 insertions(+), 4 deletions(-)
diff --git a/Documentation/ABI/testing/sysfs-kernel-mm-damon b/Documentation/ABI/testing/sysfs-kernel-mm-damon
index 2424237ebb10..aa69ce09789a 100644
--- a/Documentation/ABI/testing/sysfs-kernel-mm-damon
+++ b/Documentation/ABI/testing/sysfs-kernel-mm-damon
@@ -576,3 +576,11 @@ Contact: SeongJae Park <sj@kernel.org>
Description: Reading this file returns the size of the memory in the region
that passed DAMON operations layer-handled filters of the
scheme in bytes.
+
+What: /sys/kernel/mm/damon/admin/kdamonds/<K>/contexts/<C>/monitoring_attrs/sample/primitives/perf_events/nr_perf_events
+What: /sys/kernel/mm/damon/admin/kdamonds/<K>/contexts/<C>/monitoring_attrs/sample/primitives/perf_events/<P>/type
+What: /sys/kernel/mm/damon/admin/kdamonds/<K>/contexts/<C>/monitoring_attrs/sample/primitives/perf_events/<P>/config
+What: /sys/kernel/mm/damon/admin/kdamonds/<K>/contexts/<C>/monitoring_attrs/sample/primitives/perf_events/<P>/config1
+What: /sys/kernel/mm/damon/admin/kdamonds/<K>/contexts/<C>/monitoring_attrs/sample/primitives/perf_events/<P>/config2
+What: /sys/kernel/mm/damon/admin/kdamonds/<K>/contexts/<C>/monitoring_attrs/sample/primitives/perf_events/<P>/sample_phys_addr
+What: /sys/kernel/mm/damon/admin/kdamonds/<K>/contexts/<C>/monitoring_attrs/sample/primitives/perf_events/<P>/sample_freq
diff --git a/include/linux/damon.h b/include/linux/damon.h
index 9ba26d8a0bce..2e7b5939f350 100644
--- a/include/linux/damon.h
+++ b/include/linux/damon.h
@@ -120,6 +120,7 @@ struct damon_target {
* @size: The size of the accessed address range.
* @cpu: The id of the CPU that made the access.
* @tid: The task id of the task that made the access.
+ * @tgid: The task group id of the task that made the access.
* @is_write: Whether the access is write.
*
* Any DAMON API callers that notified access events can report the information
@@ -132,6 +133,7 @@ struct damon_access_report {
unsigned long size;
unsigned int cpu;
pid_t tid;
+ pid_t tgid;
bool is_write;
/* private: */
unsigned long report_jiffies; /* when this report is made */
@@ -831,6 +833,36 @@ struct damon_sample_filter {
struct list_head list;
};
+/**
+ * struct damon_perf_event_attr - raw PMU event attr for access check
+ *
+ * @type: raw PMU event type for access check
+ * @config: raw PMU event config for access check
+ * @config1: raw PMU event config1 for access check
+ * @config2: raw PMU event config2 for access check
+ * @sample_phys_addr: raw PMU event PERF_SAMPLE_PHYS_ADDR in sample_type for access check
+ * @sample_freq: raw PMU event sample_freq for access check
+ */
+struct damon_perf_event_attr {
+ u32 type;
+ u64 config;
+ u64 config1;
+ u64 config2;
+ bool sample_phys_addr;
+ u64 sample_freq;
+};
+
+/**
+ * struct damon_perf_event - perf event for access check
+ *
+ * @priv: Monitoring operations-specific data
+ */
+struct damon_perf_event {
+ struct damon_perf_event_attr attr;
+ void *priv;
+ struct list_head list;
+};
+
/**
* struct damon_ctx - Represents a context for each monitoring. This is the
* main interface that allows users to set the attributes and get the results
@@ -855,6 +887,7 @@ struct damon_sample_filter {
* @ops: Set of monitoring operations for given use cases.
* @addr_unit: Scale factor for core to ops address conversion.
* @min_region_sz: Minimum region size.
+ * @perf_events: Head of perf events (&damon_perf_event) list.
* @adaptive_targets: Head of monitoring targets (&damon_target) list.
* @schemes: Head of schemes (&damos) list.
*/
@@ -909,6 +942,7 @@ struct damon_ctx {
unsigned long addr_unit;
unsigned long min_region_sz;
+ struct list_head perf_events;
struct list_head adaptive_targets;
struct list_head schemes;
};
diff --git a/mm/damon/core.c b/mm/damon/core.c
index a52b2962aa22..220b105482da 100644
--- a/mm/damon/core.c
+++ b/mm/damon/core.c
@@ -20,6 +20,8 @@
#define CREATE_TRACE_POINTS
#include <trace/events/damon.h>
+#include "ops-common.h"
+
static DEFINE_MUTEX(damon_lock);
static int nr_running_ctxs;
static bool running_exclusive_ctxs;
@@ -605,6 +607,7 @@ struct damon_ctx *damon_new_ctx(void)
ctx->addr_unit = 1;
ctx->min_region_sz = DAMON_MIN_REGION_SZ;
+ INIT_LIST_HEAD(&ctx->perf_events);
INIT_LIST_HEAD(&ctx->adaptive_targets);
INIT_LIST_HEAD(&ctx->schemes);
@@ -619,6 +622,19 @@ static void damon_destroy_targets(struct damon_ctx *ctx)
damon_destroy_target(t, ctx);
}
+static void damon_perf_destroy(struct damon_ctx *ctx)
+{
+ while (!list_empty(&ctx->perf_events)) {
+ struct damon_perf_event *event =
+ list_first_entry(&ctx->perf_events, typeof(*event), list);
+
+ damon_perf_cleanup(ctx, event);
+ list_del(&event->list);
+ kfree(event);
+ }
+
+}
+
void damon_destroy_ctx(struct damon_ctx *ctx)
{
struct damos *s, *next_s;
@@ -628,6 +644,8 @@ void damon_destroy_ctx(struct damon_ctx *ctx)
damon_for_each_scheme_safe(s, next_s, ctx)
damon_destroy_scheme(s);
+ damon_perf_destroy(ctx);
+
kfree(ctx);
}
@@ -1311,6 +1329,42 @@ static int damon_commit_targets(
return 0;
}
+static int damon_commit_perf_events(struct damon_ctx *dst, struct damon_ctx *src)
+{
+ struct damon_perf_event *src_event, *new_event;
+ int err = 0;
+
+ damon_perf_destroy(dst);
+
+ list_for_each_entry(src_event, &src->perf_events, list) {
+ new_event = kzalloc_obj(*new_event);
+ if (!new_event) {
+ err = -ENOMEM;
+ goto out;
+ }
+
+ new_event->attr.type = src_event->attr.type;
+ new_event->attr.config = src_event->attr.config;
+ new_event->attr.config1 = src_event->attr.config1;
+ new_event->attr.config2 = src_event->attr.config2;
+ new_event->attr.sample_phys_addr = src_event->attr.sample_phys_addr;
+ new_event->attr.sample_freq = src_event->attr.sample_freq;
+
+ if (damon_is_running(dst)) {
+ err = damon_perf_init(dst, new_event);
+ if (err) {
+ kfree(new_event);
+ goto out;
+ }
+ }
+ list_add_tail(&new_event->list, &dst->perf_events);
+ }
+ return 0;
+out:
+ damon_perf_destroy(dst);
+ return err;
+}
+
/**
* damon_commit_ctx() - Commit parameters of a DAMON context to another.
* @dst: The commit destination DAMON context.
@@ -1336,6 +1390,9 @@ int damon_commit_ctx(struct damon_ctx *dst, struct damon_ctx *src)
if (err)
return err;
err = damon_commit_targets(dst, src);
+ if (err)
+ return err;
+ err = damon_commit_perf_events(dst, src);
if (err)
return err;
/*
@@ -2933,6 +2990,7 @@ static void kdamond_init_ctx(struct damon_ctx *ctx)
unsigned long sample_interval = ctx->attrs.sample_interval ?
ctx->attrs.sample_interval : 1;
struct damos *scheme;
+ struct damon_perf_event *event, *next;
ctx->passed_sample_intervals = 0;
ctx->next_aggregation_sis = ctx->attrs.aggr_interval / sample_interval;
@@ -2945,6 +3003,15 @@ static void kdamond_init_ctx(struct damon_ctx *ctx)
damos_set_next_apply_sis(scheme, ctx);
damos_set_filters_default_reject(scheme);
}
+
+ list_for_each_entry_safe(event, next, &ctx->perf_events, list) {
+ int err = damon_perf_init(ctx, event);
+
+ if (err) {
+ list_del(&event->list);
+ kfree(event);
+ }
+ }
}
/*
diff --git a/mm/damon/ops-common.h b/mm/damon/ops-common.h
index 5efa5b5970de..e28c5afab7f0 100644
--- a/mm/damon/ops-common.h
+++ b/mm/damon/ops-common.h
@@ -23,3 +23,32 @@ bool damos_folio_filter_match(struct damos_filter *filter, struct folio *folio);
unsigned long damon_migrate_pages(struct list_head *folio_list, int target_nid);
bool damos_ops_has_filter(struct damos *s);
+
+#ifdef CONFIG_PERF_EVENTS
+
+void damon_perf_prepare_access_checks(struct damon_ctx *ctx, struct damon_perf_event *event);
+int damon_perf_init(struct damon_ctx *ctx, struct damon_perf_event *event);
+void damon_perf_cleanup(struct damon_ctx *ctx, struct damon_perf_event *event);
+
+struct damon_perf {
+ struct perf_event * __percpu *event;
+ struct damon_perf_buffer __percpu *buffer;
+};
+
+#else /* CONFIG_PERF_EVENTS */
+
+static inline void damon_perf_prepare_access_checks(struct damon_ctx *ctx,
+ struct damon_perf_event *event)
+{
+}
+
+static inline int damon_perf_init(struct damon_ctx *ctx, struct damon_perf_event *event)
+{
+ return 0;
+}
+
+static inline void damon_perf_cleanup(struct damon_ctx *ctx, struct damon_perf_event *event)
+{
+}
+
+#endif /* CONFIG_PERF_EVENTS */
diff --git a/mm/damon/sysfs.c b/mm/damon/sysfs.c
index c19556f2af3b..9a3b42d241d4 100644
--- a/mm/damon/sysfs.c
+++ b/mm/damon/sysfs.c
@@ -747,6 +747,309 @@ static const struct kobj_type damon_sysfs_intervals_ktype = {
.default_groups = damon_sysfs_intervals_groups,
};
+/*
+ * perf_event_attr directory
+ */
+
+struct damon_sysfs_perf_event_attr {
+ struct kobject kobj;
+ u32 type;
+ u64 config;
+ u64 config1;
+ u64 config2;
+ bool sample_phys_addr;
+ u64 sample_freq;
+};
+
+static struct damon_sysfs_perf_event_attr *damon_sysfs_perf_event_attr_alloc(void)
+{
+ return kzalloc(sizeof(struct damon_sysfs_perf_event_attr), GFP_KERNEL);
+}
+
+static ssize_t attr_type_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buf)
+{
+ struct damon_sysfs_perf_event_attr *perf_event_attr = container_of(kobj,
+ struct damon_sysfs_perf_event_attr, kobj);
+
+ return sysfs_emit(buf, "0x%x\n", perf_event_attr->type);
+}
+
+static ssize_t attr_type_store(struct kobject *kobj,
+ struct kobj_attribute *attr, const char *buf, size_t count)
+{
+ struct damon_sysfs_perf_event_attr *perf_event_attr = container_of(kobj,
+ struct damon_sysfs_perf_event_attr, kobj);
+ int err = kstrtou32(buf, 0, &perf_event_attr->type);
+
+ if (err)
+ return -EINVAL;
+ return count;
+}
+
+static ssize_t config_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buf)
+{
+ struct damon_sysfs_perf_event_attr *perf_event_attr = container_of(kobj,
+ struct damon_sysfs_perf_event_attr, kobj);
+
+ return sysfs_emit(buf, "0x%llx\n", perf_event_attr->config);
+}
+
+static ssize_t config_store(struct kobject *kobj,
+ struct kobj_attribute *attr, const char *buf, size_t count)
+{
+ struct damon_sysfs_perf_event_attr *perf_event_attr = container_of(kobj,
+ struct damon_sysfs_perf_event_attr, kobj);
+ int err = kstrtou64(buf, 0, &perf_event_attr->config);
+
+ if (err)
+ return -EINVAL;
+ return count;
+}
+
+static ssize_t config1_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buf)
+{
+ struct damon_sysfs_perf_event_attr *perf_event_attr = container_of(kobj,
+ struct damon_sysfs_perf_event_attr, kobj);
+
+ return sysfs_emit(buf, "0x%llx\n", perf_event_attr->config1);
+}
+
+static ssize_t config1_store(struct kobject *kobj,
+ struct kobj_attribute *attr, const char *buf, size_t count)
+{
+ struct damon_sysfs_perf_event_attr *perf_event_attr = container_of(kobj,
+ struct damon_sysfs_perf_event_attr, kobj);
+ int err = kstrtou64(buf, 0, &perf_event_attr->config1);
+
+ if (err)
+ return -EINVAL;
+ return count;
+}
+
+static ssize_t config2_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buf)
+{
+ struct damon_sysfs_perf_event_attr *perf_event_attr = container_of(kobj,
+ struct damon_sysfs_perf_event_attr, kobj);
+
+ return sysfs_emit(buf, "0x%llx\n", perf_event_attr->config2);
+}
+
+static ssize_t config2_store(struct kobject *kobj,
+ struct kobj_attribute *attr, const char *buf, size_t count)
+{
+ struct damon_sysfs_perf_event_attr *perf_event_attr = container_of(kobj,
+ struct damon_sysfs_perf_event_attr, kobj);
+ int err = kstrtou64(buf, 0, &perf_event_attr->config2);
+
+ if (err)
+ return -EINVAL;
+ return count;
+}
+
+static ssize_t sample_phys_addr_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buf)
+{
+ struct damon_sysfs_perf_event_attr *perf_event_attr = container_of(kobj,
+ struct damon_sysfs_perf_event_attr, kobj);
+
+ return sysfs_emit(buf, "%d\n", perf_event_attr->sample_phys_addr);
+}
+
+static ssize_t sample_phys_addr_store(struct kobject *kobj,
+ struct kobj_attribute *attr, const char *buf, size_t count)
+{
+ struct damon_sysfs_perf_event_attr *perf_event_attr = container_of(kobj,
+ struct damon_sysfs_perf_event_attr, kobj);
+ bool sample_phys_addr;
+ int err = kstrtobool(buf, &sample_phys_addr);
+
+ if (err)
+ return -EINVAL;
+
+ perf_event_attr->sample_phys_addr = sample_phys_addr;
+ return count;
+}
+
+static ssize_t sample_freq_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buf)
+{
+ struct damon_sysfs_perf_event_attr *perf_event_attr = container_of(kobj,
+ struct damon_sysfs_perf_event_attr, kobj);
+
+ return sysfs_emit(buf, "%llu\n", perf_event_attr->sample_freq);
+}
+
+static ssize_t sample_freq_store(struct kobject *kobj,
+ struct kobj_attribute *attr, const char *buf, size_t count)
+{
+ struct damon_sysfs_perf_event_attr *perf_event_attr = container_of(kobj,
+ struct damon_sysfs_perf_event_attr, kobj);
+ int err = kstrtou64(buf, 0, &perf_event_attr->sample_freq);
+
+ if (err)
+ return -EINVAL;
+ return count;
+}
+
+static void damon_sysfs_perf_event_attr_release(struct kobject *kobj)
+{
+ kfree(container_of(kobj, struct damon_sysfs_perf_event_attr, kobj));
+}
+
+static struct kobj_attribute damon_sysfs_perf_event_attr_type_attr =
+ __ATTR(type, 0600, attr_type_show, attr_type_store);
+
+static struct kobj_attribute damon_sysfs_perf_event_attr_config_attr =
+ __ATTR_RW_MODE(config, 0600);
+
+static struct kobj_attribute damon_sysfs_perf_event_attr_config1_attr =
+ __ATTR_RW_MODE(config1, 0600);
+
+static struct kobj_attribute damon_sysfs_perf_event_attr_config2_attr =
+ __ATTR_RW_MODE(config2, 0600);
+
+static struct kobj_attribute damon_sysfs_perf_event_attr_sample_phys_addr_attr =
+ __ATTR_RW_MODE(sample_phys_addr, 0600);
+
+static struct kobj_attribute damon_sysfs_perf_event_attr_sample_freq_attr =
+ __ATTR_RW_MODE(sample_freq, 0600);
+
+static struct attribute *damon_sysfs_perf_event_attr_attrs[] = {
+ &damon_sysfs_perf_event_attr_type_attr.attr,
+ &damon_sysfs_perf_event_attr_config_attr.attr,
+ &damon_sysfs_perf_event_attr_config1_attr.attr,
+ &damon_sysfs_perf_event_attr_config2_attr.attr,
+ &damon_sysfs_perf_event_attr_sample_phys_addr_attr.attr,
+ &damon_sysfs_perf_event_attr_sample_freq_attr.attr,
+ NULL,
+};
+ATTRIBUTE_GROUPS(damon_sysfs_perf_event_attr);
+
+static const struct kobj_type damon_sysfs_perf_event_attr_ktype = {
+ .release = damon_sysfs_perf_event_attr_release,
+ .sysfs_ops = &kobj_sysfs_ops,
+ .default_groups = damon_sysfs_perf_event_attr_groups,
+};
+
+/*
+ * perf_events directory
+ */
+
+struct damon_sysfs_perf_events {
+ struct kobject kobj;
+ struct damon_sysfs_perf_event_attr **attrs_arr;
+ int nr;
+};
+
+static struct damon_sysfs_perf_events *damon_sysfs_perf_events_alloc(void)
+{
+ return kzalloc(sizeof(struct damon_sysfs_perf_events), GFP_KERNEL);
+}
+
+static void damon_sysfs_perf_events_rm_dirs(struct damon_sysfs_perf_events *events)
+{
+ struct damon_sysfs_perf_event_attr **attrs_arr = events->attrs_arr;
+ int i;
+
+ for (i = 0; i < events->nr; i++)
+ kobject_put(&attrs_arr[i]->kobj);
+ events->nr = 0;
+ kfree(attrs_arr);
+ events->attrs_arr = NULL;
+}
+
+static int damon_sysfs_perf_events_add_dirs(struct damon_sysfs_perf_events *events,
+ int nr_events)
+{
+ struct damon_sysfs_perf_event_attr **attrs_arr, *attr;
+ int err, i;
+
+ damon_sysfs_perf_events_rm_dirs(events);
+ if (!nr_events)
+ return 0;
+
+ attrs_arr = kmalloc_array(nr_events, sizeof(*attrs_arr), GFP_KERNEL);
+ if (!attrs_arr)
+ return -ENOMEM;
+ events->attrs_arr = attrs_arr;
+
+ for (i = 0; i < nr_events; i++) {
+ attr = damon_sysfs_perf_event_attr_alloc();
+ if (!attr) {
+ damon_sysfs_perf_events_rm_dirs(events);
+ return -ENOMEM;
+ }
+
+ err = kobject_init_and_add(&attr->kobj,
+ &damon_sysfs_perf_event_attr_ktype, &events->kobj,
+ "%d", i);
+ if (err) {
+ kobject_put(&attr->kobj);
+ damon_sysfs_perf_events_rm_dirs(events);
+ return err;
+ }
+ attrs_arr[i] = attr;
+ events->nr++;
+ }
+ return 0;
+}
+
+static ssize_t nr_perf_events_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buf)
+{
+ struct damon_sysfs_perf_events *events = container_of(kobj,
+ struct damon_sysfs_perf_events, kobj);
+
+ return sysfs_emit(buf, "%d\n", events->nr);
+}
+
+static ssize_t nr_perf_events_store(struct kobject *kobj,
+ struct kobj_attribute *attr, const char *buf, size_t count)
+{
+ struct damon_sysfs_perf_events *events;
+ int nr, err = kstrtoint(buf, 0, &nr);
+
+ if (err)
+ return err;
+ if (nr < 0)
+ return -EINVAL;
+
+ events = container_of(kobj, struct damon_sysfs_perf_events, kobj);
+
+ if (!mutex_trylock(&damon_sysfs_lock))
+ return -EBUSY;
+ err = damon_sysfs_perf_events_add_dirs(events, nr);
+ mutex_unlock(&damon_sysfs_lock);
+ if (err)
+ return err;
+
+ return count;
+}
+
+static void damon_sysfs_perf_events_release(struct kobject *kobj)
+{
+ kfree(container_of(kobj, struct damon_sysfs_perf_events, kobj));
+}
+
+static struct kobj_attribute damon_sysfs_perf_events_nr_attr =
+ __ATTR_RW_MODE(nr_perf_events, 0600);
+
+static struct attribute *damon_sysfs_perf_events_attrs[] = {
+ &damon_sysfs_perf_events_nr_attr.attr,
+ NULL,
+};
+ATTRIBUTE_GROUPS(damon_sysfs_perf_events);
+
+static const struct kobj_type damon_sysfs_perf_events_ktype = {
+ .release = damon_sysfs_perf_events_release,
+ .sysfs_ops = &kobj_sysfs_ops,
+ .default_groups = damon_sysfs_perf_events_groups,
+};
+
/*
* access check report filter directory
*/
@@ -1048,6 +1351,7 @@ struct damon_sysfs_primitives {
struct kobject kobj;
bool page_table;
bool page_fault;
+ struct damon_sysfs_perf_events *perf_events;
};
static struct damon_sysfs_primitives *damon_sysfs_primitives_alloc(
@@ -1159,6 +1463,24 @@ static struct damon_sysfs_sample *damon_sysfs_sample_alloc(void)
return sample;
}
+static int damon_sysfs_context_set_perf_events(struct damon_sysfs_primitives
+ *primitives)
+{
+ struct damon_sysfs_perf_events *events = damon_sysfs_perf_events_alloc();
+ int err;
+
+ if (!events)
+ return -ENOMEM;
+ err = kobject_init_and_add(&events->kobj, &damon_sysfs_perf_events_ktype,
+ &primitives->kobj, "perf_events");
+ if (err) {
+ kobject_put(&events->kobj);
+ return err;
+ }
+ primitives->perf_events = events;
+ return 0;
+}
+
static int damon_sysfs_sample_add_dirs(
struct damon_sysfs_sample *sample)
{
@@ -1176,10 +1498,14 @@ static int damon_sysfs_sample_add_dirs(
goto put_primitives_out;
sample->primitives = primitives;
+ err = damon_sysfs_context_set_perf_events(primitives);
+ if (err)
+ goto put_primitives_out;
+
filters = damon_sysfs_sample_filters_alloc();
if (!filters) {
err = -ENOMEM;
- goto put_primitives_out;
+ goto put_perf_events_out;
}
err = kobject_init_and_add(&filters->kobj,
&damon_sysfs_sample_filters_ktype, &sample->kobj,
@@ -1191,6 +1517,9 @@ static int damon_sysfs_sample_add_dirs(
put_filters_out:
kobject_put(&filters->kobj);
sample->filters = NULL;
+put_perf_events_out:
+ kobject_put(&primitives->perf_events->kobj);
+ primitives->perf_events = NULL;
put_primitives_out:
kobject_put(&primitives->kobj);
sample->primitives = NULL;
@@ -1200,8 +1529,11 @@ static int damon_sysfs_sample_add_dirs(
static void damon_sysfs_sample_rm_dirs(
struct damon_sysfs_sample *sample)
{
- if (sample->primitives)
+ if (sample->primitives) {
+ damon_sysfs_perf_events_rm_dirs(sample->primitives->perf_events);
+ kobject_put(&sample->primitives->perf_events->kobj);
kobject_put(&sample->primitives->kobj);
+ }
if (sample->filters) {
damon_sysfs_sample_filters_rm_dirs(sample->filters);
kobject_put(&sample->filters->kobj);
@@ -1450,10 +1782,10 @@ static int damon_sysfs_context_add_dirs(struct damon_sysfs_context *context)
err = damon_sysfs_context_set_schemes(context);
if (err)
- goto put_targets_attrs_out;
+ goto put_targets_out;
return 0;
-put_targets_attrs_out:
+put_targets_out:
kobject_put(&context->targets->kobj);
context->targets = NULL;
rmdir_put_attrs_out:
@@ -1929,6 +2261,40 @@ static int damon_sysfs_add_targets(struct damon_ctx *ctx,
return 0;
}
+static int damon_sysfs_add_perf_event(struct damon_sysfs_perf_event_attr *sys_attr,
+ struct damon_ctx *ctx)
+{
+ struct damon_perf_event *event = kzalloc(sizeof(*event), GFP_KERNEL);
+
+ if (!event)
+ return -ENOMEM;
+
+ event->attr.type = sys_attr->type;
+ event->attr.config = sys_attr->config;
+ event->attr.config1 = sys_attr->config1;
+ event->attr.config2 = sys_attr->config2;
+ event->attr.sample_phys_addr = sys_attr->sample_phys_addr;
+ event->attr.sample_freq = sys_attr->sample_freq;
+
+ list_add_tail(&event->list, &ctx->perf_events);
+ return 0;
+}
+
+static int damon_sysfs_add_perf_events(struct damon_ctx *ctx,
+ struct damon_sysfs_perf_events *sysfs_perf_events)
+{
+ int i, err;
+
+ for (i = 0; i < sysfs_perf_events->nr; i++) {
+ struct damon_sysfs_perf_event_attr *attr = sysfs_perf_events->attrs_arr[i];
+
+ err = damon_sysfs_add_perf_event(attr, ctx);
+ if (err)
+ return err;
+ }
+ return 0;
+}
+
/*
* damon_sysfs_upd_schemes_stats() - Update schemes stats sysfs files.
* @data: The kobject wrapper that associated to the kdamond thread.
@@ -1970,6 +2336,9 @@ static int damon_sysfs_apply_inputs(struct damon_ctx *ctx,
ctx->min_region_sz = max(
DAMON_MIN_REGION_SZ / sys_ctx->addr_unit, 1);
err = damon_sysfs_set_attrs(ctx, sys_ctx->attrs);
+ if (err)
+ return err;
+ err = damon_sysfs_add_perf_events(ctx, sys_ctx->attrs->sample->primitives->perf_events);
if (err)
return err;
err = damon_sysfs_add_targets(ctx, sys_ctx->targets);
diff --git a/mm/damon/vaddr.c b/mm/damon/vaddr.c
index b069dbc7e3d2..1534372e7637 100644
--- a/mm/damon/vaddr.c
+++ b/mm/damon/vaddr.c
@@ -15,6 +15,9 @@
#include <linux/pagewalk.h>
#include <linux/sched/mm.h>
+#include <linux/circ_buf.h>
+#include <linux/perf_event.h>
+
#include "../internal.h"
#include "ops-common.h"
@@ -936,6 +939,161 @@ static int damon_va_scheme_score(struct damon_ctx *context,
return DAMOS_MAX_SCORE;
}
+#ifdef CONFIG_PERF_EVENTS
+
+struct damon_perf_buffer {
+ struct damon_access_report *reports;
+ unsigned long head;
+ unsigned long tail;
+ unsigned long size;
+};
+
+static void damon_perf_overflow(struct perf_event *perf_event, struct perf_sample_data *data,
+ struct pt_regs *regs)
+{
+ struct damon_perf_event *event = perf_event->overflow_handler_context;
+ struct damon_perf *perf = event->priv;
+ struct damon_perf_buffer *buffer = per_cpu_ptr(perf->buffer, smp_processor_id());
+ unsigned long head = buffer->head;
+ unsigned long tail = READ_ONCE(buffer->tail);
+
+ if (CIRC_SPACE(head, tail, buffer->size) >= 1) {
+ struct damon_access_report *report = &buffer->reports[head];
+
+ report->tid = task_pid_nr(current);
+ report->tgid = task_tgid_nr(current);
+ report->vaddr = data->addr;
+ report->paddr = data->phys_addr;
+
+ smp_store_release(&buffer->head, (head + 1) & (buffer->size - 1));
+ }
+}
+
+#define DAMON_PERF_MAX_RECORDS (1UL << 20)
+#define DAMON_PERF_INIT_RECORDS (1UL << 15)
+
+int damon_perf_init(struct damon_ctx *ctx, struct damon_perf_event *event)
+{
+ struct damon_perf *perf;
+ struct perf_event_attr attr = {
+ .type = PERF_TYPE_RAW,
+ .size = sizeof(attr),
+ .type = event->attr.type,
+ .config = event->attr.config,
+ .config1 = event->attr.config1,
+ .config2 = event->attr.config2,
+ .sample_freq = event->attr.sample_freq,
+ .freq = 1,
+ .sample_type = PERF_SAMPLE_TIME | PERF_SAMPLE_ADDR |
+ PERF_SAMPLE_PERIOD | PERF_SAMPLE_DATA_SRC |
+ (event->attr.sample_phys_addr ? PERF_SAMPLE_PHYS_ADDR : 0) |
+ PERF_SAMPLE_WEIGHT_STRUCT,
+ .precise_ip = 2,
+ .pinned = 1,
+ .disabled = 1,
+ };
+ int cpu;
+ int err = -ENOMEM;
+ bool found = false;
+
+ perf = kzalloc_obj(*perf);
+ if (!perf)
+ return -ENOMEM;
+
+ perf->event = alloc_percpu(typeof(*perf->event));
+ if (!perf->event)
+ goto free_percpu;
+
+ perf->buffer = alloc_percpu(typeof(*perf->buffer));
+ if (!perf->buffer)
+ goto free_percpu;
+
+ for_each_possible_cpu(cpu) {
+ struct perf_event *perf_event;
+ struct damon_perf_buffer *buffer = per_cpu_ptr(perf->buffer, cpu);
+
+ perf_event = perf_event_create_kernel_counter(&attr, cpu, NULL,
+ damon_perf_overflow, event);
+ if (IS_ERR(perf_event)) {
+ err = PTR_ERR(perf_event);
+ if (err == -ENODEV)
+ continue;
+ pr_err("perf event create on CPU %d failed with %d\n", cpu, err);
+ goto free_for_each_cpu;
+ }
+ found = true;
+ *per_cpu_ptr(perf->event, cpu) = perf_event;
+
+ buffer->size = DAMON_PERF_INIT_RECORDS;
+ buffer->reports = kvcalloc_node(buffer->size, sizeof(buffer->reports[0]),
+ GFP_KERNEL, cpu_to_node(cpu));
+ if (!buffer->reports)
+ goto free_for_each_cpu;
+ }
+ event->priv = perf;
+
+ return found ? 0 : -ENODEV;
+
+free_for_each_cpu:
+ for_each_possible_cpu(cpu) {
+ struct perf_event *perf_event = per_cpu(*perf->event, cpu);
+ struct damon_perf_buffer *buffer = per_cpu_ptr(perf->buffer, cpu);
+
+ if (perf_event)
+ perf_event_release_kernel(perf_event);
+ kvfree(buffer->reports);
+ }
+free_percpu:
+ free_percpu(perf->event);
+ free_percpu(perf->buffer);
+ kfree(perf);
+
+ return err;
+}
+
+void damon_perf_cleanup(struct damon_ctx *ctx, struct damon_perf_event *event)
+{
+ struct damon_perf *perf = event->priv;
+ int cpu;
+
+ if (!perf)
+ return;
+
+ for_each_possible_cpu(cpu) {
+ struct perf_event *perf_event = per_cpu(*perf->event, cpu);
+ struct damon_perf_buffer *buffer = per_cpu_ptr(perf->buffer, cpu);
+
+ if (!perf_event)
+ continue;
+ perf_event_disable(perf_event);
+ perf_event_release_kernel(perf_event);
+ kvfree(buffer->reports);
+ }
+ free_percpu(perf->event);
+ free_percpu(perf->buffer);
+ kfree(perf);
+ event->priv = NULL;
+}
+
+void damon_perf_prepare_access_checks(struct damon_ctx *ctx,
+ struct damon_perf_event *event)
+{
+ struct damon_perf *perf = event->priv;
+ int cpu;
+
+ if (!perf)
+ return;
+
+ for_each_possible_cpu(cpu) {
+ struct perf_event *perf_event = per_cpu(*perf->event, cpu);
+
+ if (perf_event)
+ perf_event_enable(perf_event);
+ }
+}
+
+#endif /* CONFIG_PERF_EVENTS */
+
static int __init damon_va_initcall(void)
{
struct damon_operations ops = {
--
2.43.0
^ permalink raw reply related [flat|nested] 14+ messages in thread* Re: [RFC PATCH v3 2/4] mm/damon/core: add common code for perf event based access check
2026-04-23 0:42 ` [RFC PATCH v3 2/4] mm/damon/core: add common code for perf event based access check Akinobu Mita
@ 2026-04-23 1:58 ` sashiko-bot
0 siblings, 0 replies; 14+ messages in thread
From: sashiko-bot @ 2026-04-23 1:58 UTC (permalink / raw)
To: Akinobu Mita; +Cc: damon, linux-perf-users
Sashiko AI review found 3 potential issue(s):
- [High] Memory leak in `damon_perf_init()` when `perf_event_create_kernel_counter()` fails with `-ENODEV` for all CPUs.
- [Medium] Unnecessary `PERF_SAMPLE_WEIGHT_STRUCT` flag in perf event attributes arbitrarily restricts hardware compatibility.
- [Low] Linker error when `CONFIG_DAMON_VADDR=n` but `CONFIG_PERF_EVENTS=y`.
--
commit 97617fb5ae7981867a46207cd14a68c3cbb4813d
Author: Akinobu Mita <akinobu.mita@gmail.com>
mm/damon/core: add common code for perf event based access check
This commit creates the necessary infrastructure (structure definitions,
functions, sysfs interfaces) to support perf event based access checks
for both physical and virtual address space monitoring.
> diff --git a/mm/damon/core.c b/mm/damon/core.c
> index a52b2962aa22a..220b105482daa 100644
> --- a/mm/damon/core.c
> +++ b/mm/damon/core.c
[ ... ]
> @@ -2933,6 +2990,7 @@ static void kdamond_init_ctx(struct damon_ctx *ctx)
> unsigned long sample_interval = ctx->attrs.sample_interval ?
> ctx->attrs.sample_interval : 1;
> struct damos *scheme;
> + struct damon_perf_event *event, *next;
>
> ctx->passed_sample_intervals = 0;
> ctx->next_aggregation_sis = ctx->attrs.aggr_interval / sample_interval;
> @@ -2945,6 +3003,15 @@ static void kdamond_init_ctx(struct damon_ctx *ctx)
> damos_set_next_apply_sis(scheme, ctx);
> damos_set_filters_default_reject(scheme);
> }
> +
> + list_for_each_entry_safe(event, next, &ctx->perf_events, list) {
> + int err = damon_perf_init(ctx, event);
If the kernel is configured with CONFIG_DAMON_VADDR=n and
CONFIG_PERF_EVENTS=y, will this cause a linker error? The function
damon_perf_init() is defined in mm/damon/vaddr.c, which would not be
compiled in that configuration.
> +
> + if (err) {
> + list_del(&event->list);
> + kfree(event);
> + }
> + }
> }
>
> /*
> diff --git a/mm/damon/vaddr.c b/mm/damon/vaddr.c
> index b069dbc7e3d25..1534372e7637c 100644
> --- a/mm/damon/vaddr.c
> +++ b/mm/damon/vaddr.c
[ ... ]
> @@ -936,6 +939,161 @@ static int damon_va_scheme_score(struct damon_ctx *context,
> return DAMOS_MAX_SCORE;
> }
>
> +#ifdef CONFIG_PERF_EVENTS
[ ... ]
> +int damon_perf_init(struct damon_ctx *ctx, struct damon_perf_event *event)
> +{
> + struct damon_perf *perf;
> + struct perf_event_attr attr = {
> + .type = PERF_TYPE_RAW,
> + .size = sizeof(attr),
> + .type = event->attr.type,
> + .config = event->attr.config,
> + .config1 = event->attr.config1,
> + .config2 = event->attr.config2,
> + .sample_freq = event->attr.sample_freq,
> + .freq = 1,
> + .sample_type = PERF_SAMPLE_TIME | PERF_SAMPLE_ADDR |
> + PERF_SAMPLE_PERIOD | PERF_SAMPLE_DATA_SRC |
> + (event->attr.sample_phys_addr ? PERF_SAMPLE_PHYS_ADDR : 0) |
> + PERF_SAMPLE_WEIGHT_STRUCT,
Does this configuration need to include PERF_SAMPLE_WEIGHT_STRUCT?
The overflow handler damon_perf_overflow() doesn't appear to read any
weight metrics from the sample data. If the underlying hardware PMU does
not support extended weight sampling, could this cause
perf_event_create_kernel_counter() to return -EINVAL and unnecessarily
reject the configuration?
> + .precise_ip = 2,
> + .pinned = 1,
> + .disabled = 1,
> + };
> + int cpu;
> + int err = -ENOMEM;
> + bool found = false;
> +
> + perf = kzalloc_obj(*perf);
> + if (!perf)
> + return -ENOMEM;
> +
> + perf->event = alloc_percpu(typeof(*perf->event));
> + if (!perf->event)
> + goto free_percpu;
> +
> + perf->buffer = alloc_percpu(typeof(*perf->buffer));
> + if (!perf->buffer)
> + goto free_percpu;
> +
> + for_each_possible_cpu(cpu) {
> + struct perf_event *perf_event;
> + struct damon_perf_buffer *buffer = per_cpu_ptr(perf->buffer, cpu);
> +
> + perf_event = perf_event_create_kernel_counter(&attr, cpu, NULL,
> + damon_perf_overflow, event);
> + if (IS_ERR(perf_event)) {
> + err = PTR_ERR(perf_event);
> + if (err == -ENODEV)
> + continue;
> + pr_err("perf event create on CPU %d failed with %d\n", cpu, err);
> + goto free_for_each_cpu;
> + }
> + found = true;
> + *per_cpu_ptr(perf->event, cpu) = perf_event;
> +
> + buffer->size = DAMON_PERF_INIT_RECORDS;
> + buffer->reports = kvcalloc_node(buffer->size, sizeof(buffer->reports[0]),
> + GFP_KERNEL, cpu_to_node(cpu));
> + if (!buffer->reports)
> + goto free_for_each_cpu;
> + }
> + event->priv = perf;
> +
> + return found ? 0 : -ENODEV;
If perf_event_create_kernel_counter() returns -ENODEV for all possible CPUs,
the found flag remains false, and this returns -ENODEV.
Since the caller will receive a non-zero error code and free the event
wrapper without calling damon_perf_cleanup(), does this leak the memory
allocated for perf, perf->event, and perf->buffer?
> +
> +free_for_each_cpu:
> + for_each_possible_cpu(cpu) {
> + struct perf_event *perf_event = per_cpu(*perf->event, cpu);
> + struct damon_perf_buffer *buffer = per_cpu_ptr(perf->buffer, cpu);
> +
> + if (perf_event)
> + perf_event_release_kernel(perf_event);
> + kvfree(buffer->reports);
> + }
> +free_percpu:
> + free_percpu(perf->event);
> + free_percpu(perf->buffer);
> + kfree(perf);
> +
> + return err;
> +}
--
Sashiko AI review · https://sashiko.dev/#/patchset/20260423004211.7037-1-akinobu.mita@gmail.com?part=2
^ permalink raw reply [flat|nested] 14+ messages in thread
* [RFC PATCH v3 3/4] mm/damon/vaddr: support perf event based access check
2026-04-23 0:42 [RFC PATCH v3 0/4] mm/damon: introduce perf event based access check Akinobu Mita
2026-04-23 0:42 ` [RFC PATCH v3 1/4] mm/damon/core: add code borrowed from report-based monitoring work Akinobu Mita
2026-04-23 0:42 ` [RFC PATCH v3 2/4] mm/damon/core: add common code for perf event based access check Akinobu Mita
@ 2026-04-23 0:42 ` Akinobu Mita
2026-04-23 2:48 ` sashiko-bot
2026-04-23 0:42 ` [RFC PATCH v3 4/4] mm/damon/paddr: " Akinobu Mita
2026-04-23 4:34 ` [RFC PATCH v3 0/4] mm/damon: introduce " SeongJae Park
4 siblings, 1 reply; 14+ messages in thread
From: Akinobu Mita @ 2026-04-23 0:42 UTC (permalink / raw)
To: damon; +Cc: linux-perf-users, sj, akinobu.mita
This patch adds perf event based access check for virtual address space
monitoring.
The perf event that can be specified for perf event-based access check
must be able to obtain the data source address corresponding to the
sample. In other words, PERF_SAMPLE_DATA_SRC must be able to be
specified in perf_event_attr.sample_type
The perf event based access check is performed as follows:
1. During the sampling interval, samples of the perf event are stored in
a sampling buffer.
2. After the sampling interval ends, during the access check, a
histogram showing the number of accesses to each memory address is
generated based on each sample contained in the sampling buffer.
If the sampling buffer is full during a sampling interval, the sampling
buffer is expanded to prepare for the next sampling interval.
If you configure the memory addresses corresponding to the perf event
samples to be obtained as virtual addresses, the histogram is
implemented as a two-dimensional xarray indexed by pid and virtual
memory address.
If you configure perf event samples to have their corresponding memory
addresses retrieved by physical addresses, the histogram is implemented
using an xarray indexed by physical addresses.
3. For each monitoring target region, the number of accesses is updated
based on the histogram.
Signed-off-by: Akinobu Mita <akinobu.mita@gmail.com>
---
mm/damon/core.c | 42 ++++-
mm/damon/ops-common.h | 24 +++
mm/damon/vaddr.c | 348 ++++++++++++++++++++++++++++++++++++++++++
3 files changed, 412 insertions(+), 2 deletions(-)
diff --git a/mm/damon/core.c b/mm/damon/core.c
index 220b105482da..def72672982a 100644
--- a/mm/damon/core.c
+++ b/mm/damon/core.c
@@ -3014,6 +3014,40 @@ static void kdamond_init_ctx(struct damon_ctx *ctx)
}
}
+static void kdamond_prepare_perf_event_report(struct damon_ctx *ctx)
+{
+ struct damon_perf_event *event;
+
+ list_for_each_entry(event, &ctx->perf_events, list)
+ damon_perf_prepare_access_checks(ctx, event);
+}
+
+static unsigned int kdamond_check_perf_event_reported_accesses(struct damon_ctx *ctx)
+{
+ struct damon_target *t;
+ struct damon_perf_event *event;
+ unsigned int max_nr_accesses = 0;
+
+ if (damon_target_has_pid(ctx)) {
+ list_for_each_entry(event, &ctx->perf_events, list)
+ damon_va_perf_check_accesses(ctx, event);
+ }
+
+ damon_for_each_target(t, ctx) {
+ struct damon_region *r;
+
+ damon_for_each_region(r, t) {
+ if (r->access_reported)
+ r->access_reported = false;
+ else
+ damon_update_region_access_rate(r, false, &ctx->attrs);
+ max_nr_accesses = max(r->nr_accesses, max_nr_accesses);
+ }
+ }
+
+ return max_nr_accesses;
+}
+
/*
* The monitoring daemon that runs as a kernel thread
*/
@@ -3057,13 +3091,17 @@ static int kdamond_fn(void *data)
if (kdamond_wait_activation(ctx))
break;
- if (ctx->ops.prepare_access_checks)
+ if (!list_empty(&ctx->perf_events))
+ kdamond_prepare_perf_event_report(ctx);
+ else if (ctx->ops.prepare_access_checks)
ctx->ops.prepare_access_checks(ctx);
kdamond_usleep(sample_interval);
ctx->passed_sample_intervals++;
- if (ctx->ops.check_accesses)
+ if (!list_empty(&ctx->perf_events))
+ max_nr_accesses = kdamond_check_perf_event_reported_accesses(ctx);
+ else if (ctx->ops.check_accesses)
max_nr_accesses = ctx->ops.check_accesses(ctx);
if (time_after_eq(ctx->passed_sample_intervals,
diff --git a/mm/damon/ops-common.h b/mm/damon/ops-common.h
index e28c5afab7f0..da39abe07cfc 100644
--- a/mm/damon/ops-common.h
+++ b/mm/damon/ops-common.h
@@ -27,14 +27,33 @@ bool damos_ops_has_filter(struct damos *s);
#ifdef CONFIG_PERF_EVENTS
void damon_perf_prepare_access_checks(struct damon_ctx *ctx, struct damon_perf_event *event);
+void damon_va_perf_check_accesses(struct damon_ctx *ctx, struct damon_perf_event *event);
int damon_perf_init(struct damon_ctx *ctx, struct damon_perf_event *event);
void damon_perf_cleanup(struct damon_ctx *ctx, struct damon_perf_event *event);
+struct damon_vaddr_histogram {
+ struct xarray targets;
+};
+
+struct damon_paddr_histogram {
+ struct xarray accesses;
+};
+
struct damon_perf {
struct perf_event * __percpu *event;
struct damon_perf_buffer __percpu *buffer;
+ union {
+ struct damon_vaddr_histogram vaddr_histogram;
+ struct damon_paddr_histogram paddr_histogram;
+ };
};
+void damon_paddr_histogram_init(struct damon_paddr_histogram *histogram);
+unsigned long damon_paddr_histogram_count(struct damon_paddr_histogram *histogram, u64 paddr);
+void damon_paddr_histogram_destroy(struct damon_paddr_histogram *histogram);
+
+void damon_perf_populate_paddr_histogram(struct damon_ctx *ctx, struct damon_perf_event *event);
+
#else /* CONFIG_PERF_EVENTS */
static inline void damon_perf_prepare_access_checks(struct damon_ctx *ctx,
@@ -42,6 +61,11 @@ static inline void damon_perf_prepare_access_checks(struct damon_ctx *ctx,
{
}
+static inline void damon_va_perf_check_accesses(struct damon_ctx *ctx,
+ struct damon_perf_event *event)
+{
+}
+
static inline int damon_perf_init(struct damon_ctx *ctx, struct damon_perf_event *event)
{
return 0;
diff --git a/mm/damon/vaddr.c b/mm/damon/vaddr.c
index 1534372e7637..1afbad9dfe64 100644
--- a/mm/damon/vaddr.c
+++ b/mm/damon/vaddr.c
@@ -17,6 +17,7 @@
#include <linux/circ_buf.h>
#include <linux/perf_event.h>
+#include <linux/xarray.h>
#include "../internal.h"
#include "ops-common.h"
@@ -948,6 +949,63 @@ struct damon_perf_buffer {
unsigned long size;
};
+struct damon_vaddr_histogram_per_target {
+ struct xarray accesses;
+};
+
+static void damon_vaddr_histogram_init(struct damon_vaddr_histogram *histogram)
+{
+ xa_init(&histogram->targets);
+}
+
+static void damon_vaddr_histogram_add(struct damon_vaddr_histogram *histogram, u32 pid,
+ u64 vaddr)
+{
+ struct damon_vaddr_histogram_per_target *target;
+ unsigned long nr_accesses;
+
+ while (!(target = xa_load(&histogram->targets, pid))) {
+ target = kmalloc_obj(*target);
+ if (!target)
+ return;
+
+ xa_init(&target->accesses);
+
+ if (xa_err(xa_store(&histogram->targets, pid, target, GFP_KERNEL))) {
+ pr_warn_once("Failed to store target histogram\n");
+ kfree(target);
+ return;
+ }
+ }
+
+ nr_accesses = xa_to_value(xa_load(&target->accesses, vaddr));
+ xa_store(&target->accesses, vaddr, xa_mk_value(nr_accesses + 1), GFP_KERNEL);
+}
+
+static unsigned long damon_vaddr_histogram_count(struct damon_vaddr_histogram *histogram,
+ u32 pid, u64 vaddr)
+{
+ struct damon_vaddr_histogram_per_target *target;
+ unsigned long nr_accesses = 0;
+
+ target = xa_load(&histogram->targets, pid);
+ if (target)
+ nr_accesses = xa_to_value(xa_load(&target->accesses, vaddr));
+
+ return nr_accesses;
+}
+
+static void damon_vaddr_histogram_destroy(struct damon_vaddr_histogram *histogram)
+{
+ unsigned long index;
+ struct damon_vaddr_histogram_per_target *target;
+
+ xa_for_each(&histogram->targets, index, target)
+ xa_destroy(&target->accesses);
+
+ xa_destroy(&histogram->targets);
+}
+
static void damon_perf_overflow(struct perf_event *perf_event, struct perf_sample_data *data,
struct pt_regs *regs)
{
@@ -1092,6 +1150,296 @@ void damon_perf_prepare_access_checks(struct damon_ctx *ctx,
}
}
+static void damon_va_perf_check_accesses_by_vaddr(struct damon_ctx *ctx,
+ struct damon_perf_event *event)
+{
+ struct damon_perf *perf = event->priv;
+ struct damon_target *t;
+ int cpu;
+ unsigned int tidx = 0;
+
+ if (!perf)
+ return;
+
+ damon_vaddr_histogram_init(&perf->vaddr_histogram);
+
+ for_each_possible_cpu(cpu) {
+ struct perf_event *perf_event = per_cpu(*perf->event, cpu);
+ struct damon_perf_buffer *buffer = per_cpu_ptr(perf->buffer, cpu);
+ unsigned long head, tail, count, i;
+
+ if (!perf_event)
+ continue;
+
+ perf_event_disable(perf_event);
+
+ head = smp_load_acquire(&buffer->head);
+ tail = buffer->tail;
+ count = CIRC_CNT(head, tail, buffer->size);
+
+ for (i = 0; i < count; i++) {
+ struct damon_access_report *report =
+ &buffer->reports[(tail + i) & (buffer->size - 1)];
+
+ damon_vaddr_histogram_add(&perf->vaddr_histogram, report->tgid,
+ report->vaddr & PAGE_MASK);
+ }
+ smp_store_release(&buffer->tail, (tail + count) & (buffer->size - 1));
+
+ if ((count == buffer->size - 1) && (buffer->size < DAMON_PERF_MAX_RECORDS)) {
+ void *new_reports = kvcalloc_node(buffer->size * 2,
+ sizeof(buffer->reports[0]), GFP_KERNEL,
+ cpu_to_node(cpu));
+
+ if (new_reports) {
+ kvfree(buffer->reports);
+ buffer->reports = new_reports;
+ buffer->head = 0;
+ buffer->tail = 0;
+ buffer->size *= 2;
+ }
+ }
+ }
+
+ damon_for_each_target(t, ctx) {
+ struct damon_region *r;
+ u32 pid = pid_nr(t->pid);
+
+ damon_for_each_region(r, t) {
+ unsigned long addr;
+
+ if (r->access_reported)
+ continue;
+
+ for (addr = r->ar.start; addr < r->ar.end; addr += PAGE_SIZE) {
+ if (damon_vaddr_histogram_count(&perf->vaddr_histogram, pid,
+ addr & PAGE_MASK)) {
+ damon_update_region_access_rate(r, true, &ctx->attrs);
+ r->access_reported = true;
+ break;
+ }
+ }
+ }
+ tidx++;
+ }
+
+ damon_vaddr_histogram_destroy(&perf->vaddr_histogram);
+}
+
+struct damon_paddr_walk {
+ struct damon_paddr_histogram *histogram;
+ bool accessed;
+};
+
+static void damon_paddr_histogram_add(struct damon_paddr_histogram *histogram, u64 paddr);
+static const struct mm_walk_ops damon_paddr_ops;
+
+void damon_perf_populate_paddr_histogram(struct damon_ctx *ctx, struct damon_perf_event *event)
+{
+ struct damon_perf *perf = event->priv;
+ int cpu;
+
+ if (!perf)
+ return;
+
+ for_each_possible_cpu(cpu) {
+ struct perf_event *perf_event = per_cpu(*perf->event, cpu);
+ struct damon_perf_buffer *buffer = per_cpu_ptr(perf->buffer, cpu);
+ unsigned long head, tail, count, i;
+
+ if (!perf_event)
+ continue;
+
+ perf_event_disable(perf_event);
+
+ head = smp_load_acquire(&buffer->head);
+ tail = buffer->tail;
+ count = CIRC_CNT(head, tail, buffer->size);
+
+ for (i = 0; i < count; i++) {
+ struct damon_access_report *report =
+ &buffer->reports[(tail + i) & (buffer->size - 1)];
+
+ damon_paddr_histogram_add(&perf->paddr_histogram,
+ report->paddr & PAGE_MASK);
+ }
+ smp_store_release(&buffer->tail, (tail + count) & (buffer->size - 1));
+
+ if ((count == buffer->size - 1) && (buffer->size < DAMON_PERF_MAX_RECORDS)) {
+ void *new_reports = kvcalloc_node(buffer->size * 2,
+ sizeof(buffer->reports[0]), GFP_KERNEL,
+ cpu_to_node(cpu));
+
+ if (new_reports) {
+ kvfree(buffer->reports);
+ buffer->reports = new_reports;
+ buffer->head = 0;
+ buffer->tail = 0;
+ buffer->size *= 2;
+ }
+ }
+ }
+}
+
+static void damon_va_perf_check_accesses_by_paddr(struct damon_ctx *ctx,
+ struct damon_perf_event *event)
+{
+ struct damon_perf *perf = event->priv;
+ struct damon_target *t;
+ unsigned int tidx = 0;
+
+ if (!perf)
+ return;
+
+ damon_paddr_histogram_init(&perf->paddr_histogram);
+
+ damon_perf_populate_paddr_histogram(ctx, event);
+
+ damon_for_each_target(t, ctx) {
+ struct damon_region *r;
+ struct mm_struct *mm = damon_get_mm(t);
+
+ if (!mm)
+ continue;
+
+ mmap_read_lock(mm);
+ damon_for_each_region(r, t) {
+ struct damon_paddr_walk walk_private = {
+ .histogram = &perf->paddr_histogram,
+ .accessed = false,
+ };
+
+ if (r->access_reported)
+ continue;
+
+ walk_page_range(mm, r->ar.start, r->ar.end, &damon_paddr_ops,
+ &walk_private);
+ if (walk_private.accessed) {
+ damon_update_region_access_rate(r, true, &ctx->attrs);
+ r->access_reported = true;
+ }
+ }
+ mmap_read_unlock(mm);
+ mmput(mm);
+ tidx++;
+ }
+
+ damon_paddr_histogram_destroy(&perf->paddr_histogram);
+}
+
+void damon_va_perf_check_accesses(struct damon_ctx *ctx, struct damon_perf_event *event)
+{
+ if (event->attr.sample_phys_addr)
+ return damon_va_perf_check_accesses_by_paddr(ctx, event);
+ else
+ return damon_va_perf_check_accesses_by_vaddr(ctx, event);
+}
+
+void damon_paddr_histogram_init(struct damon_paddr_histogram *histogram)
+{
+ xa_init(&histogram->accesses);
+}
+
+static void damon_paddr_histogram_add(struct damon_paddr_histogram *histogram,
+ u64 paddr)
+{
+ unsigned long nr_accesses;
+
+ nr_accesses = xa_to_value(xa_load(&histogram->accesses, paddr));
+ xa_store(&histogram->accesses, paddr, xa_mk_value(nr_accesses + 1), GFP_KERNEL);
+}
+
+unsigned long damon_paddr_histogram_count(struct damon_paddr_histogram *histogram, u64 paddr)
+{
+ return xa_to_value(xa_load(&histogram->accesses, paddr));
+}
+
+void damon_paddr_histogram_destroy(struct damon_paddr_histogram *histogram)
+{
+ xa_destroy(&histogram->accesses);
+}
+
+static int damon_paddr_pmd_entry(pmd_t *pmd, unsigned long addr,
+ unsigned long next, struct mm_walk *walk)
+{
+ pte_t *pte;
+ spinlock_t *ptl;
+ struct damon_paddr_walk *paddr_walk = walk->private;
+
+ ptl = pmd_trans_huge_lock(pmd, walk->vma);
+ if (ptl) {
+ pmd_t pmde = pmdp_get(pmd);
+
+ if (pmd_present(pmde)) {
+ for (; addr < next && !paddr_walk->accessed; addr += PAGE_SIZE) {
+ u64 frame = pmd_pfn(pmde) +
+ ((addr & ~HPAGE_PMD_MASK) >> PAGE_SHIFT);
+
+ if (damon_paddr_histogram_count(paddr_walk->histogram,
+ PFN_PHYS(frame))) {
+ paddr_walk->accessed = true;
+ break;
+ }
+ }
+ }
+ spin_unlock(ptl);
+ return paddr_walk->accessed ? 1 : 0;
+ }
+
+ pte = pte_offset_map_lock(walk->mm, pmd, addr, &ptl);
+ if (!pte) {
+ walk->action = ACTION_AGAIN;
+ return 0;
+ }
+
+ for (; addr < next && !paddr_walk->accessed; pte++, addr += PAGE_SIZE) {
+ pte_t ptent = ptep_get(pte);
+
+ if (pte_present(ptent)) {
+ if (damon_paddr_histogram_count(paddr_walk->histogram,
+ PFN_PHYS(pte_pfn(ptent)))) {
+ paddr_walk->accessed = true;
+ }
+ }
+ }
+
+ pte_unmap_unlock(pte - 1, ptl);
+
+ return paddr_walk->accessed ? 1 : 0;
+}
+
+#ifdef CONFIG_HUGETLB_PAGE
+static int damon_paddr_hugetlb_entry(pte_t *pte, unsigned long hmask,
+ unsigned long addr, unsigned long end,
+ struct mm_walk *walk)
+{
+ struct damon_paddr_walk *paddr_walk = walk->private;
+ pte_t entry = huge_ptep_get(walk->mm, addr, pte);
+
+ if (pte_present(entry)) {
+ for (; addr < end; addr += PAGE_SIZE) {
+ u64 frame = pte_pfn(entry) + ((addr & ~hmask) >> PAGE_SHIFT);
+
+ if (damon_paddr_histogram_count(paddr_walk->histogram,
+ PFN_PHYS(frame))) {
+ paddr_walk->accessed = true;
+ break;
+ }
+ }
+ }
+
+ return paddr_walk->accessed ? 1 : 0;
+}
+#else
+#define damon_perf_hugetlb_entry NULL
+#endif /* CONFIG_HUGETLB_PAGE */
+
+static const struct mm_walk_ops damon_paddr_ops = {
+ .pmd_entry = damon_paddr_pmd_entry,
+ .hugetlb_entry = damon_paddr_hugetlb_entry,
+ .walk_lock = PGWALK_RDLOCK,
+};
+
#endif /* CONFIG_PERF_EVENTS */
static int __init damon_va_initcall(void)
--
2.43.0
^ permalink raw reply related [flat|nested] 14+ messages in thread* Re: [RFC PATCH v3 3/4] mm/damon/vaddr: support perf event based access check
2026-04-23 0:42 ` [RFC PATCH v3 3/4] mm/damon/vaddr: support " Akinobu Mita
@ 2026-04-23 2:48 ` sashiko-bot
0 siblings, 0 replies; 14+ messages in thread
From: sashiko-bot @ 2026-04-23 2:48 UTC (permalink / raw)
To: Akinobu Mita; +Cc: damon, linux-perf-users
Sashiko AI review found 13 potential issue(s):
- [Critical] Compilation Failure: Use of undefined API `kmalloc_obj()`.
- [Critical] Compilation Failure: Non-existent field `sample_phys_addr` in `perf_event_attr`.
- [Critical] Compilation Failure: Typo in the `CONFIG_HUGETLB_PAGE` fallback macro.
- [High] Deadlock Risk: Recursive acquisition of `mmap_read_lock`.
- [High] Infinite Loop: Returning `ACTION_AGAIN` on `pte_offset_map_lock()` failure.
- [High] Memory Leak: `damon_vaddr_histogram_per_target` structs are never freed.
- [High] Memory Leak: Dynamically allocated `buffer->reports` arrays are orphaned.
- [High] Memory Leak: Orphaned `perf` structs when the `kdamond` thread is restarted.
- [High] Data Truncation: 64-bit physical addresses are silently truncated when used as xarray indices on 32-bit platforms.
- [High] Logic Error: Unbounded perf events for physical memory targets (`DAMON_OPS_PADDR`).
- [High] Logic Error: Invalid PFN extraction from non-present PMD migration entries.
- [High] Severe Performance Flaw: Full page table walks per sampling interval.
- [High] Severe Performance Flaw: O(N) iteration by `PAGE_SIZE` over massive virtual memory regions.
--
commit 04697c66c594dde51627f9cdaf4c7d11b08fe1fe
Author: Akinobu Mita <akinobu.mita@gmail.com>
mm/damon/vaddr: support perf event based access check
This commit introduces a mechanism to use perf events for virtual and
physical address space monitoring in DAMON. It buffers perf samples during
the sampling interval and generates a histogram of memory accesses. During
the access check phase, this histogram is used to update the access counts
for each monitoring target region.
> diff --git a/mm/damon/core.c b/mm/damon/core.c
> --- a/mm/damon/core.c
> +++ b/mm/damon/core.c
> @@ -3014,6 +3014,40 @@ static void kdamond_init_ctx(struct damon_ctx *ctx)
> }
> }
When the kdamond thread begins, kdamond_init_ctx() unconditionally initializes
the perf events. If a user stops and restarts the thread via sysfs, does this
leak the previous allocations? Calling damon_perf_init() again would allocate
new perf structures, hardware counters, and per-CPU buffers, permanently
leaking the previous ones.
> +static void kdamond_prepare_perf_event_report(struct damon_ctx *ctx)
> +{
> + struct damon_perf_event *event;
> +
> + list_for_each_entry(event, &ctx->perf_events, list)
> + damon_perf_prepare_access_checks(ctx, event);
> +}
> +
> +static unsigned int kdamond_check_perf_event_reported_accesses(struct damon_ctx *ctx)
> +{
> + struct damon_target *t;
> + struct damon_perf_event *event;
> + unsigned int max_nr_accesses = 0;
> +
> + if (damon_target_has_pid(ctx)) {
> + list_for_each_entry(event, &ctx->perf_events, list)
> + damon_va_perf_check_accesses(ctx, event);
> + }
For physical memory targets (which have no PIDs), the processing code inside
this if block is skipped. However, kdamond_prepare_perf_event_report()
unconditionally enables perf events. Does this leave perf events permanently
enabled and continuously firing for physical memory targets?
[ ... ]
> diff --git a/mm/damon/vaddr.c b/mm/damon/vaddr.c
> --- a/mm/damon/vaddr.c
> +++ b/mm/damon/vaddr.c
[ ... ]
> +static void damon_vaddr_histogram_add(struct damon_vaddr_histogram *histogram, u32 pid,
> + u64 vaddr)
> +{
> + struct damon_vaddr_histogram_per_target *target;
> + unsigned long nr_accesses;
> +
> + while (!(target = xa_load(&histogram->targets, pid))) {
> + target = kmalloc_obj(*target);
Can this code compile? There is no kmalloc_obj() macro or function in the
Linux kernel. A standard kmalloc() could be used instead:
target = kmalloc(sizeof(*target), GFP_KERNEL);
[ ... ]
> +static void damon_vaddr_histogram_destroy(struct damon_vaddr_histogram *histogram)
> +{
> + unsigned long index;
> + struct damon_vaddr_histogram_per_target *target;
> +
> + xa_for_each(&histogram->targets, index, target)
> + xa_destroy(&target->accesses);
Does this code leak the damon_vaddr_histogram_per_target structure? The nested
accesses xarray is destroyed via xa_destroy(), but the target struct itself is
not freed with kfree().
> + xa_destroy(&histogram->targets);
> +}
[ ... ]
> +static void damon_va_perf_check_accesses_by_vaddr(struct damon_ctx *ctx,
> + struct damon_perf_event *event)
> +{
[ ... ]
> + if ((count == buffer->size - 1) && (buffer->size < DAMON_PERF_MAX_RECORDS)) {
> + void *new_reports = kvcalloc_node(buffer->size * 2,
> + sizeof(buffer->reports[0]), GFP_KERNEL,
> + cpu_to_node(cpu));
Are these dynamically allocated buffer->reports arrays freed during cleanup?
When perf monitoring stops, damon_perf_cleanup() frees the top-level per-CPU
pointer wrapper but appears to orphan these inner reports arrays.
[ ... ]
> + damon_for_each_target(t, ctx) {
> + struct damon_region *r;
> + u32 pid = pid_nr(t->pid);
> +
> + damon_for_each_region(r, t) {
> + unsigned long addr;
> +
> + if (r->access_reported)
> + continue;
> +
> + for (addr = r->ar.start; addr < r->ar.end; addr += PAGE_SIZE) {
Can this loop stall the kernel thread? Since DAMON regions can merge to
encompass hundreds of gigabytes, iterating from r->ar.start to r->ar.end by
PAGE_SIZE could result in hundreds of millions of iterations per interval.
Could this logic iterate over the populated xarray entries instead?
> + if (damon_vaddr_histogram_count(&perf->vaddr_histogram, pid,
> + addr & PAGE_MASK)) {
> + damon_update_region_access_rate(r, true, &ctx->attrs);
> + r->access_reported = true;
> + break;
> + }
> + }
> + }
> + tidx++;
> + }
[ ... ]
> +static void damon_va_perf_check_accesses_by_paddr(struct damon_ctx *ctx,
> + struct damon_perf_event *event)
> +{
[ ... ]
> + damon_for_each_target(t, ctx) {
> + struct damon_region *r;
> + struct mm_struct *mm = damon_get_mm(t);
> +
> + if (!mm)
> + continue;
> +
> + mmap_read_lock(mm);
> + damon_for_each_region(r, t) {
> + struct damon_paddr_walk walk_private = {
> + .histogram = &perf->paddr_histogram,
> + .accessed = false,
> + };
> +
> + if (r->access_reported)
> + continue;
> +
> + walk_page_range(mm, r->ar.start, r->ar.end, &damon_paddr_ops,
> + &walk_private);
Can this sequence cause a recursive read lock deadlock? The mmap_read_lock is
acquired directly, but damon_paddr_ops is configured with
.walk_lock = PGWALK_RDLOCK, which instructs the page walker to also acquire
the mmap_read_lock. If a writer is pending, the second read lock will block
and self-deadlock.
Is it too expensive to call walk_page_range() across the target's entire
mapped virtual memory space on every sampling interval? For large memory
footprints, walking millions of page table entries every few milliseconds
could create severe CPU overhead and defeat the adaptive sampling design.
> + if (walk_private.accessed) {
> + damon_update_region_access_rate(r, true, &ctx->attrs);
> + r->access_reported = true;
> + }
> + }
> + mmap_read_unlock(mm);
> + mmput(mm);
> + tidx++;
> + }
> +
> + damon_paddr_histogram_destroy(&perf->paddr_histogram);
> +}
> +
> +void damon_va_perf_check_accesses(struct damon_ctx *ctx, struct damon_perf_event *event)
> +{
> + if (event->attr.sample_phys_addr)
Does struct perf_event_attr contain a sample_phys_addr field? Physical
address sampling is typically requested by setting the PERF_SAMPLE_PHYS_ADDR
flag in the sample_type bitmask.
> + return damon_va_perf_check_accesses_by_paddr(ctx, event);
> + else
> + return damon_va_perf_check_accesses_by_vaddr(ctx, event);
> +}
[ ... ]
> +static void damon_paddr_histogram_add(struct damon_paddr_histogram *histogram,
> + u64 paddr)
> +{
> + unsigned long nr_accesses;
> +
> + nr_accesses = xa_to_value(xa_load(&histogram->accesses, paddr));
> + xa_store(&histogram->accesses, paddr, xa_mk_value(nr_accesses + 1), GFP_KERNEL);
Can this silently truncate physical addresses on 32-bit platforms? The paddr
argument is a u64, but it is passed directly as the index to xa_load() and
xa_store(), which take an unsigned long. On 32-bit systems with PAE/LPAE,
this will drop the upper 32 bits. Could the physical address be shifted by
PAGE_SHIFT to store the PFN instead?
> +}
[ ... ]
> +static int damon_paddr_pmd_entry(pmd_t *pmd, unsigned long addr,
> + unsigned long next, struct mm_walk *walk)
> +{
> + pte_t *pte;
> + spinlock_t *ptl;
> + struct damon_paddr_walk *paddr_walk = walk->private;
> +
> + ptl = pmd_trans_huge_lock(pmd, walk->vma);
> + if (ptl) {
> + pmd_t pmde = pmdp_get(pmd);
> +
> + if (pmd_present(pmde)) {
> + for (; addr < next && !paddr_walk->accessed; addr += PAGE_SIZE) {
> + u64 frame = pmd_pfn(pmde) +
Can pmd_present() evaluate to true for non-present leaf entries like migration
entries? Extracting the PFN via pmd_pfn() on a migration entry would treat
swap offset metadata as a physical address, resulting in invalid calculations.
Does this code need to verify the PMD is a valid THP first?
> + ((addr & ~HPAGE_PMD_MASK) >> PAGE_SHIFT);
> +
> + if (damon_paddr_histogram_count(paddr_walk->histogram,
> + PFN_PHYS(frame))) {
> + paddr_walk->accessed = true;
> + break;
> + }
> + }
> + }
> + spin_unlock(ptl);
> + return paddr_walk->accessed ? 1 : 0;
> + }
> +
> + pte = pte_offset_map_lock(walk->mm, pmd, addr, &ptl);
> + if (!pte) {
> + walk->action = ACTION_AGAIN;
> + return 0;
Will setting walk->action = ACTION_AGAIN on a pte_offset_map_lock() failure
cause an infinite loop? Migration entries cause this lock to return NULL
non-transiently, meaning the page walker will continually retry the exact same
PMD without making progress. Returning 0 without ACTION_AGAIN skips it
gracefully.
> + }
[ ... ]
> +#ifdef CONFIG_HUGETLB_PAGE
> +static int damon_paddr_hugetlb_entry(pte_t *pte, unsigned long hmask,
[ ... ]
> + return paddr_walk->accessed ? 1 : 0;
> +}
> +#else
> +#define damon_perf_hugetlb_entry NULL
> +#endif /* CONFIG_HUGETLB_PAGE */
> +
> +static const struct mm_walk_ops damon_paddr_ops = {
> + .pmd_entry = damon_paddr_pmd_entry,
> + .hugetlb_entry = damon_paddr_hugetlb_entry,
Does this cause a compilation failure when CONFIG_HUGETLB_PAGE is disabled?
The fallback macro is defined as damon_perf_hugetlb_entry, but the struct
initializes .hugetlb_entry with damon_paddr_hugetlb_entry, leaving it
undeclared.
--
Sashiko AI review · https://sashiko.dev/#/patchset/20260423004211.7037-1-akinobu.mita@gmail.com?part=3
^ permalink raw reply [flat|nested] 14+ messages in thread
* [RFC PATCH v3 4/4] mm/damon/paddr: support perf event based access check
2026-04-23 0:42 [RFC PATCH v3 0/4] mm/damon: introduce perf event based access check Akinobu Mita
` (2 preceding siblings ...)
2026-04-23 0:42 ` [RFC PATCH v3 3/4] mm/damon/vaddr: support " Akinobu Mita
@ 2026-04-23 0:42 ` Akinobu Mita
2026-04-23 3:22 ` sashiko-bot
2026-04-23 4:34 ` [RFC PATCH v3 0/4] mm/damon: introduce " SeongJae Park
4 siblings, 1 reply; 14+ messages in thread
From: Akinobu Mita @ 2026-04-23 0:42 UTC (permalink / raw)
To: damon; +Cc: linux-perf-users, sj, akinobu.mita
This patch adds perf event based access checks for physical address
spaces monitoring.
The implementation is very similar to that described in perf event based
access check for virtual address space monitoring.
However, for perf events that can be specified with physical address
spaces monitoring, the data source address corresponding to the sample
must be obtainable as a physical address. In other words,
PERF_SAMPLE_DATA_SRC and PERF_SAMPLE_PHYS_ADDR must be specifiable in
perf_event_attr.sample_type.
Signed-off-by: Akinobu Mita <akinobu.mita@gmail.com>
---
mm/damon/core.c | 3 +++
mm/damon/ops-common.h | 6 ++++++
mm/damon/paddr.c | 41 +++++++++++++++++++++++++++++++++++++++++
3 files changed, 50 insertions(+)
diff --git a/mm/damon/core.c b/mm/damon/core.c
index def72672982a..bf9318c13d0b 100644
--- a/mm/damon/core.c
+++ b/mm/damon/core.c
@@ -3031,6 +3031,9 @@ static unsigned int kdamond_check_perf_event_reported_accesses(struct damon_ctx
if (damon_target_has_pid(ctx)) {
list_for_each_entry(event, &ctx->perf_events, list)
damon_va_perf_check_accesses(ctx, event);
+ } else {
+ list_for_each_entry(event, &ctx->perf_events, list)
+ damon_pa_perf_check_accesses(ctx, event);
}
damon_for_each_target(t, ctx) {
diff --git a/mm/damon/ops-common.h b/mm/damon/ops-common.h
index da39abe07cfc..f6db6bc4d882 100644
--- a/mm/damon/ops-common.h
+++ b/mm/damon/ops-common.h
@@ -28,6 +28,7 @@ bool damos_ops_has_filter(struct damos *s);
void damon_perf_prepare_access_checks(struct damon_ctx *ctx, struct damon_perf_event *event);
void damon_va_perf_check_accesses(struct damon_ctx *ctx, struct damon_perf_event *event);
+void damon_pa_perf_check_accesses(struct damon_ctx *ctx, struct damon_perf_event *event);
int damon_perf_init(struct damon_ctx *ctx, struct damon_perf_event *event);
void damon_perf_cleanup(struct damon_ctx *ctx, struct damon_perf_event *event);
@@ -66,6 +67,11 @@ static inline void damon_va_perf_check_accesses(struct damon_ctx *ctx,
{
}
+static inline void damon_pa_perf_check_accesses(struct damon_ctx *ctx,
+ struct damon_perf_event *event)
+{
+}
+
static inline int damon_perf_init(struct damon_ctx *ctx, struct damon_perf_event *event)
{
return 0;
diff --git a/mm/damon/paddr.c b/mm/damon/paddr.c
index 5cdcc5037cbc..97df21281fb1 100644
--- a/mm/damon/paddr.c
+++ b/mm/damon/paddr.c
@@ -363,6 +363,47 @@ static int damon_pa_scheme_score(struct damon_ctx *context,
return DAMOS_MAX_SCORE;
}
+#ifdef CONFIG_PERF_EVENTS
+
+void damon_pa_perf_check_accesses(struct damon_ctx *ctx, struct damon_perf_event *event)
+{
+ struct damon_perf *perf = event->priv;
+ struct damon_target *t;
+ unsigned int tidx = 0;
+
+ if (!perf)
+ return;
+
+ damon_paddr_histogram_init(&perf->paddr_histogram);
+
+ damon_perf_populate_paddr_histogram(ctx, event);
+
+ damon_for_each_target(t, ctx) {
+ struct damon_region *r;
+
+ damon_for_each_region(r, t) {
+ unsigned long addr;
+
+ if (r->access_reported)
+ continue;
+
+ for (addr = r->ar.start; addr < r->ar.end; addr += PAGE_SIZE) {
+ if (damon_paddr_histogram_count(&perf->paddr_histogram,
+ addr & PAGE_MASK)) {
+ damon_update_region_access_rate(r, true, &ctx->attrs);
+ r->access_reported = true;
+ break;
+ }
+ }
+ }
+ tidx++;
+ }
+
+ damon_paddr_histogram_destroy(&perf->paddr_histogram);
+}
+
+#endif /* CONFIG_PERF_EVENTS */
+
static int __init damon_pa_initcall(void)
{
struct damon_operations ops = {
--
2.43.0
^ permalink raw reply related [flat|nested] 14+ messages in thread* Re: [RFC PATCH v3 4/4] mm/damon/paddr: support perf event based access check
2026-04-23 0:42 ` [RFC PATCH v3 4/4] mm/damon/paddr: " Akinobu Mita
@ 2026-04-23 3:22 ` sashiko-bot
0 siblings, 0 replies; 14+ messages in thread
From: sashiko-bot @ 2026-04-23 3:22 UTC (permalink / raw)
To: Akinobu Mita; +Cc: damon, linux-perf-users
Sashiko AI review found 6 potential issue(s):
- [High] Severe Performance Regression and Soft Lockups via O(N) Memory Iteration
- [High] Address Scale Mismatch Breaking Access Reporting (`ctx->addr_unit` ignored)
- [High] Infinite Loop and Kernel Hang via Integer Overflow on 32-bit
- [High] XArray Index Truncation Corrupting PAE Physical Addresses
- [Medium] Silent Failure due to Missing `PERF_SAMPLE_PHYS_ADDR` Validation
- [Low] Unused Local Variable `tidx`
--
commit 0cc4cc53289c7273fbdf7b96674ce9315f78cdfb
Author: Akinobu Mita <akinobu.mita@gmail.com>
mm/damon/paddr: support perf event based access check
This commit adds perf event based access checks for physical address space
monitoring. It implements support similar to the virtual address space
monitoring, relying on PERF_SAMPLE_PHYS_ADDR to obtain the data source
address.
> mm/damon/paddr: support perf event based access check
[ ... ]
> However, for perf events that can be specified with physical address
> spaces monitoring, the data source address corresponding to the sample
> must be obtainable as a physical address. In other words,
> PERF_SAMPLE_DATA_SRC and PERF_SAMPLE_PHYS_ADDR must be specifiable in
> perf_event_attr.sample_type.
Is there validation in the code to enforce this requirement? If a user or
the framework fails to set the sample_phys_addr attribute, the perf
subsystem might fail to populate the physical address and default to 0.
Could this silently process invalid samples and record all accesses at
physical address 0?
[ ... ]
> diff --git a/mm/damon/paddr.c b/mm/damon/paddr.c
> index 5cdcc5037cbc1..97df21281fb13 100644
> --- a/mm/damon/paddr.c
> +++ b/mm/damon/paddr.c
[ ... ]
> @@ -363,6 +363,47 @@ static int damon_pa_scheme_score(struct damon_ctx *context,
> return DAMOS_MAX_SCORE;
> }
>
> +#ifdef CONFIG_PERF_EVENTS
> +
> +void damon_pa_perf_check_accesses(struct damon_ctx *ctx, struct damon_perf_event *event)
> +{
> + struct damon_perf *perf = event->priv;
> + struct damon_target *t;
> + unsigned int tidx = 0;
Is tidx needed here? It is incremented at the end of the loop but never
read, which might trigger an unused-but-set-variable compiler warning.
> +
> + if (!perf)
> + return;
> +
> + damon_paddr_histogram_init(&perf->paddr_histogram);
> +
> + damon_perf_populate_paddr_histogram(ctx, event);
> +
> + damon_for_each_target(t, ctx) {
> + struct damon_region *r;
> +
> + damon_for_each_region(r, t) {
> + unsigned long addr;
> +
> + if (r->access_reported)
> + continue;
> +
> + for (addr = r->ar.start; addr < r->ar.end; addr += PAGE_SIZE) {
Does this directly use the core address as a physical byte address?
DAMON scales physical addresses down using ctx->addr_unit, meaning
r->ar.start and r->ar.end hold these scaled core addresses. If
ctx->addr_unit is greater than 1 (e.g., PAGE_SIZE), r->ar.start is actually
a Page Frame Number. Incrementing a PFN by PAGE_SIZE would skip large
blocks of memory per iteration. Should the code convert core addresses to
physical addresses using damon_pa_phys_addr() first?
Could this increment overflow on 32-bit architectures? If a physical memory
region ends near ULONG_MAX, addr += PAGE_SIZE might wrap around to 0. This
would keep the addr < r->ar.end condition true indefinitely, causing an
unkillable infinite loop.
Additionally, does this loop iterate over every single page in the region?
Since DAMON regions can be extremely large, iterating over millions of
pages and performing a synchronous XArray lookup for each one without
yielding via cond_resched() could heavily saturate the CPU and stall RCU
grace periods.
> + if (damon_paddr_histogram_count(&perf->paddr_histogram,
> + addr & PAGE_MASK)) {
Does the underlying XArray implementation safely handle 64-bit physical
addresses on 32-bit systems?
The perf subsystem records the access address as a 64-bit u64 paddr, but
the XArray API uses unsigned long for its indices. On systems using PAE to
access more than 4GB of RAM, passing a 64-bit raw byte address to the
XArray could truncate the upper 32 bits, causing physical addresses above
4GB to alias with lower addresses. Would using Page Frame Numbers as XArray
indices instead of raw byte addresses avoid this?
> + damon_update_region_access_rate(r, true, &ctx->attrs);
> + r->access_reported = true;
> + break;
> + }
> + }
> + }
> + tidx++;
> + }
> +
> + damon_paddr_histogram_destroy(&perf->paddr_histogram);
> +}
> +
> +#endif /* CONFIG_PERF_EVENTS */
--
Sashiko AI review · https://sashiko.dev/#/patchset/20260423004211.7037-1-akinobu.mita@gmail.com?part=4
^ permalink raw reply [flat|nested] 14+ messages in thread
* Re: [RFC PATCH v3 0/4] mm/damon: introduce perf event based access check
2026-04-23 0:42 [RFC PATCH v3 0/4] mm/damon: introduce perf event based access check Akinobu Mita
` (3 preceding siblings ...)
2026-04-23 0:42 ` [RFC PATCH v3 4/4] mm/damon/paddr: " Akinobu Mita
@ 2026-04-23 4:34 ` SeongJae Park
2026-04-24 3:27 ` Akinobu Mita
4 siblings, 1 reply; 14+ messages in thread
From: SeongJae Park @ 2026-04-23 4:34 UTC (permalink / raw)
To: Akinobu Mita; +Cc: SeongJae Park, damon, linux-perf-users
Hello Akinobu,
On Thu, 23 Apr 2026 09:42:06 +0900 Akinobu Mita <akinobu.mita@gmail.com> wrote:
> DAMON currently only provides PTE accessed-bit based access check, this
> patch series adds a new perf event based access check.
>
> Since perf event-based access checks do not require modifying the PTE
> accessed-bit for pages representing each damon region, it reduces the
> overhead of monitoring at a fixed granularity of the page size.
As I also commented to the previous version, the high level idea makes sense to
me. I think this can be useful.
Also as I commented to the previous version, I understand the page level
monitoring overhead reduction is the main purpose of this patch, and it makes
sense to me. Nonetheless, I like this mainly because the perf event could
provide more detailed information including from which CPU/thread the access is
made, and whether the access is for read/write, to my understanding. Please
let me know if my understanding is wrong.
>
> Here is a method and its results for comparing access checks using
> existing access bits and new perf events.
[...]
> Using accessed bit, prepare_access_checks takes 7 seconds and
> check_accesses takes 5 seconds.
[...]
> Using perf event, prepare_access_checks takes 0.01 seconds and
> check_accesses takes 2.6 seconds.
Thank you so much for sharing these great test results with the detailed setup
description!
> ```
> $ sudo $HOME/damo/damo stop
> $ sudo $HOME/damo/damo start --ops vaddr \
> --perf_event 5000 0 0x4 0x1cd 0x1f 0 \
> --monitoring_intervals 5s 60s 300s \
> --monitoring_nr_regions_range 1000000000 1000000000 \
> --target_pid $(cat /tmp/memcached.pid)
[...]
> Note: damo's --perf_event option
>
> Using these features also requires modifications to damo, but these
> are not included in this patch series and are currently under
> development in the following branch:
>
> * https://github.com/mita/damo/tree/damo-perf-for-v3.2.2
Thank you for sharing this, too!
>
> The option newly added to damo for perf event-based access check has the
> following format:
>
> `--perf_event <sample freq> <sample phys addr> <type> <config> <config1> <config2>`
I think we may need to discuss more for final interface. But I think that
could be done later, after the kerenl ABI is fixed.
>
> - `sample freq`: A higher frequency improves access accuracy, but also
> increases overhead.
> - `sample phys addr`: specify 0 for vaddr and 1 for paddr.
> - The remaining type, config, config1, and config2 settings can be found
> as follows:
The values look bit human-unfriendly. I wonder if we could use more
human-friendly values, e.g., 'cpu' for type,
'mem_trans_retired.load_latency_gt_1024' for config, etc. Not necessarily we
need to fix this right now. Let's keep discussing.
>
> Run `perf mem record -vv` to obtain the log, and then find the part in
> the log where `perf_event_open` was executed successfully.
> ```
> $ sudo perf mem record -vv echo > /tmp/perf-mem-record.log 2>&1
> $ cat /tmp/perf-mem-record.log
> ...
> perf_event_attr:
> type 4 (cpu)
> size 144
> config 0x1cd (mem_trans_retired.load_latency_gt_1024)
[...]
> ...
> ```
>
> The values for `type` and `config` are taken from this output.
> Use the values of `config1` and `config2` if they are included in the log;
> otherwise, set them to 0.
The above information will also be very useful for early testers of this patch
series including myself. Thank you for sharing.
>
> Any feedback or advice on the patch set would be greatly appreciated.
Again, I like the high level concept of this patch series. I definitely want
this to be merged. Nonetheless, not exactly as it is right now. I think we
should spend enough time for the user interface design. I hope the final
interface is long-term maintainable and easy to extend.
>
> * RFC v3
> - drop "reintroduce damon_operations->cleanup()" as no longer needed
> - drop "allow user to set min size of region" (will be sent separately)
> - add tgid to struct damon_access_report and use it instead of tid
> - set perf_event_attr.precise_ip to 2
> - prepare for the transition to report-based monitoring
> - switch to use access_reported in struct damon_region
> - switch to use sample/primitives/perf_events sysfs directory
Thank you for doing this revisioning.
For the user interface and also internal interface, I suggested to consider
reusing those for page fault events based per-CPUs/threads/reads/writes
monitoring DAMON extension patch series in our previous discussion. I think
this version is following the right direction much more than the previous
version.
But, nowadays I'm thinking about a new user interface for the
per-CPUs/threads/reads/writes monitoring that can easily also extended for perf
event based monitoring and for not only access monitoring but general and
multiple data attributes monitoring.
It is still under the development and I'm planning to share a very early RFC
version of that by LSFMMBPF. I will try to add my idea about how this perf
event based monitoring could also implemented by reusing the interface on the
early RFC version.
Could we wait until the early RFC version is published, review my idea about
how this perf event based monitoring could reuse it, and continue discussions
about the next steps of this patch series?
[1] https://lore.kernel.org/20251208062943.68824-1-sj@kernel.org/
Thanks,
SJ
[...]
^ permalink raw reply [flat|nested] 14+ messages in thread* Re: [RFC PATCH v3 0/4] mm/damon: introduce perf event based access check
2026-04-23 4:34 ` [RFC PATCH v3 0/4] mm/damon: introduce " SeongJae Park
@ 2026-04-24 3:27 ` Akinobu Mita
2026-04-24 23:31 ` SeongJae Park
0 siblings, 1 reply; 14+ messages in thread
From: Akinobu Mita @ 2026-04-24 3:27 UTC (permalink / raw)
To: SeongJae Park; +Cc: damon, linux-perf-users
Hello SeongJae,
2026年4月23日(木) 13:34 SeongJae Park <sj@kernel.org>:
>
> Hello Akinobu,
>
> On Thu, 23 Apr 2026 09:42:06 +0900 Akinobu Mita <akinobu.mita@gmail.com> wrote:
>
> > DAMON currently only provides PTE accessed-bit based access check, this
> > patch series adds a new perf event based access check.
> >
> > Since perf event-based access checks do not require modifying the PTE
> > accessed-bit for pages representing each damon region, it reduces the
> > overhead of monitoring at a fixed granularity of the page size.
>
> As I also commented to the previous version, the high level idea makes sense to
> me. I think this can be useful.
>
> Also as I commented to the previous version, I understand the page level
> monitoring overhead reduction is the main purpose of this patch, and it makes
> sense to me. Nonetheless, I like this mainly because the perf event could
> provide more detailed information including from which CPU/thread the access is
> made, and whether the access is for read/write, to my understanding. Please
> let me know if my understanding is wrong.
That's correct.
Once the integration into your new monitoring infrastructure is
complete, those things will become possible.
> >
> > Here is a method and its results for comparing access checks using
> > existing access bits and new perf events.
> [...]
> > Using accessed bit, prepare_access_checks takes 7 seconds and
> > check_accesses takes 5 seconds.
> [...]
> > Using perf event, prepare_access_checks takes 0.01 seconds and
> > check_accesses takes 2.6 seconds.
>
> Thank you so much for sharing these great test results with the detailed setup
> description!
>
> > ```
> > $ sudo $HOME/damo/damo stop
> > $ sudo $HOME/damo/damo start --ops vaddr \
> > --perf_event 5000 0 0x4 0x1cd 0x1f 0 \
> > --monitoring_intervals 5s 60s 300s \
> > --monitoring_nr_regions_range 1000000000 1000000000 \
> > --target_pid $(cat /tmp/memcached.pid)
> [...]
> > Note: damo's --perf_event option
> >
> > Using these features also requires modifications to damo, but these
> > are not included in this patch series and are currently under
> > development in the following branch:
> >
> > * https://github.com/mita/damo/tree/damo-perf-for-v3.2.2
>
> Thank you for sharing this, too!
>
> >
> > The option newly added to damo for perf event-based access check has the
> > following format:
> >
> > `--perf_event <sample freq> <sample phys addr> <type> <config> <config1> <config2>`
>
> I think we may need to discuss more for final interface. But I think that
> could be done later, after the kerenl ABI is fixed.
My current patch introduces a set of kernel ABIs (.../perf_events/<P>/*),
and if I want to control another field in the perf_event_attr,
I would have to add a new ABI. This would be a significant
maintenance cost.
Instead, I would like to provide a single bin file where the
perf_event_attr I want to set is written as binary.
> >
> > - `sample freq`: A higher frequency improves access accuracy, but also
> > increases overhead.
> > - `sample phys addr`: specify 0 for vaddr and 1 for paddr.
> > - The remaining type, config, config1, and config2 settings can be found
> > as follows:
>
> The values look bit human-unfriendly. I wonder if we could use more
> human-friendly values, e.g., 'cpu' for type,
> 'mem_trans_retired.load_latency_gt_1024' for config, etc. Not necessarily we
> need to fix this right now. Let's keep discussing.
I'd like to do it that way. These processes can be handled better
from user space than from within the kernel, so I'd like to achieve
this from damo with the help of the perf tool and library.
> >
> > Run `perf mem record -vv` to obtain the log, and then find the part in
> > the log where `perf_event_open` was executed successfully.
> > ```
> > $ sudo perf mem record -vv echo > /tmp/perf-mem-record.log 2>&1
> > $ cat /tmp/perf-mem-record.log
> > ...
> > perf_event_attr:
> > type 4 (cpu)
> > size 144
> > config 0x1cd (mem_trans_retired.load_latency_gt_1024)
> [...]
> > ...
> > ```
> >
> > The values for `type` and `config` are taken from this output.
> > Use the values of `config1` and `config2` if they are included in the log;
> > otherwise, set them to 0.
>
> The above information will also be very useful for early testers of this patch
> series including myself. Thank you for sharing.
>
> >
> > Any feedback or advice on the patch set would be greatly appreciated.
>
> Again, I like the high level concept of this patch series. I definitely want
> this to be merged. Nonetheless, not exactly as it is right now. I think we
> should spend enough time for the user interface design. I hope the final
> interface is long-term maintainable and easy to extend.
>
> >
> > * RFC v3
> > - drop "reintroduce damon_operations->cleanup()" as no longer needed
> > - drop "allow user to set min size of region" (will be sent separately)
> > - add tgid to struct damon_access_report and use it instead of tid
> > - set perf_event_attr.precise_ip to 2
> > - prepare for the transition to report-based monitoring
> > - switch to use access_reported in struct damon_region
> > - switch to use sample/primitives/perf_events sysfs directory
>
> Thank you for doing this revisioning.
>
> For the user interface and also internal interface, I suggested to consider
> reusing those for page fault events based per-CPUs/threads/reads/writes
> monitoring DAMON extension patch series in our previous discussion. I think
> this version is following the right direction much more than the previous
> version.
>
> But, nowadays I'm thinking about a new user interface for the
> per-CPUs/threads/reads/writes monitoring that can easily also extended for perf
> event based monitoring and for not only access monitoring but general and
> multiple data attributes monitoring.
>
> It is still under the development and I'm planning to share a very early RFC
> version of that by LSFMMBPF. I will try to add my idea about how this perf
> event based monitoring could also implemented by reusing the interface on the
> early RFC version.
>
> Could we wait until the early RFC version is published, review my idea about
> how this perf event based monitoring could reuse it, and continue discussions
> about the next steps of this patch series?
That sounds good. I'd definitely like to check it out.
> [1] https://lore.kernel.org/20251208062943.68824-1-sj@kernel.org/
>
>
> Thanks,
> SJ
>
> [...]
^ permalink raw reply [flat|nested] 14+ messages in thread
* Re: [RFC PATCH v3 0/4] mm/damon: introduce perf event based access check
2026-04-24 3:27 ` Akinobu Mita
@ 2026-04-24 23:31 ` SeongJae Park
2026-04-25 12:33 ` Akinobu Mita
0 siblings, 1 reply; 14+ messages in thread
From: SeongJae Park @ 2026-04-24 23:31 UTC (permalink / raw)
To: Akinobu Mita; +Cc: SeongJae Park, damon, linux-perf-users
On Fri, 24 Apr 2026 12:27:07 +0900 Akinobu Mita <akinobu.mita@gmail.com> wrote:
> Hello SeongJae,
>
> 2026年4月23日(木) 13:34 SeongJae Park <sj@kernel.org>:
> >
> > Hello Akinobu,
> >
> > On Thu, 23 Apr 2026 09:42:06 +0900 Akinobu Mita <akinobu.mita@gmail.com> wrote:
> >
> > > DAMON currently only provides PTE accessed-bit based access check, this
> > > patch series adds a new perf event based access check.
> > >
> > > Since perf event-based access checks do not require modifying the PTE
> > > accessed-bit for pages representing each damon region, it reduces the
> > > overhead of monitoring at a fixed granularity of the page size.
> >
> > As I also commented to the previous version, the high level idea makes sense to
> > me. I think this can be useful.
> >
> > Also as I commented to the previous version, I understand the page level
> > monitoring overhead reduction is the main purpose of this patch, and it makes
> > sense to me. Nonetheless, I like this mainly because the perf event could
> > provide more detailed information including from which CPU/thread the access is
> > made, and whether the access is for read/write, to my understanding. Please
> > let me know if my understanding is wrong.
>
> That's correct.
> Once the integration into your new monitoring infrastructure is
> complete, those things will become possible.
Thank you for confirming!
>
> > >
> > > Here is a method and its results for comparing access checks using
> > > existing access bits and new perf events.
> > [...]
> > > Using accessed bit, prepare_access_checks takes 7 seconds and
> > > check_accesses takes 5 seconds.
> > [...]
> > > Using perf event, prepare_access_checks takes 0.01 seconds and
> > > check_accesses takes 2.6 seconds.
> >
> > Thank you so much for sharing these great test results with the detailed setup
> > description!
> >
> > > ```
> > > $ sudo $HOME/damo/damo stop
> > > $ sudo $HOME/damo/damo start --ops vaddr \
> > > --perf_event 5000 0 0x4 0x1cd 0x1f 0 \
> > > --monitoring_intervals 5s 60s 300s \
> > > --monitoring_nr_regions_range 1000000000 1000000000 \
> > > --target_pid $(cat /tmp/memcached.pid)
> > [...]
> > > Note: damo's --perf_event option
> > >
> > > Using these features also requires modifications to damo, but these
> > > are not included in this patch series and are currently under
> > > development in the following branch:
> > >
> > > * https://github.com/mita/damo/tree/damo-perf-for-v3.2.2
> >
> > Thank you for sharing this, too!
> >
> > >
> > > The option newly added to damo for perf event-based access check has the
> > > following format:
> > >
> > > `--perf_event <sample freq> <sample phys addr> <type> <config> <config1> <config2>`
> >
> > I think we may need to discuss more for final interface. But I think that
> > could be done later, after the kerenl ABI is fixed.
>
> My current patch introduces a set of kernel ABIs (.../perf_events/<P>/*),
> and if I want to control another field in the perf_event_attr,
> I would have to add a new ABI. This would be a significant
> maintenance cost.
>
> Instead, I would like to provide a single bin file where the
> perf_event_attr I want to set is written as binary.
I'd prefer having single file per parameter, as sysfs is designed for. It
allows keeping the input format simple and therefore easy to maintain and
extend. Of course, we should carefult at adding new parameters, though.
>
> > >
> > > - `sample freq`: A higher frequency improves access accuracy, but also
> > > increases overhead.
> > > - `sample phys addr`: specify 0 for vaddr and 1 for paddr.
> > > - The remaining type, config, config1, and config2 settings can be found
> > > as follows:
> >
> > The values look bit human-unfriendly. I wonder if we could use more
> > human-friendly values, e.g., 'cpu' for type,
> > 'mem_trans_retired.load_latency_gt_1024' for config, etc. Not necessarily we
> > need to fix this right now. Let's keep discussing.
>
> I'd like to do it that way. These processes can be handled better
> from user space than from within the kernel, so I'd like to achieve
> this from damo with the help of the perf tool and library.
Does existing perf event ABI also take that approach? If so, I think that is
fine.
>
> > >
> > > Run `perf mem record -vv` to obtain the log, and then find the part in
> > > the log where `perf_event_open` was executed successfully.
> > > ```
> > > $ sudo perf mem record -vv echo > /tmp/perf-mem-record.log 2>&1
> > > $ cat /tmp/perf-mem-record.log
> > > ...
> > > perf_event_attr:
> > > type 4 (cpu)
> > > size 144
> > > config 0x1cd (mem_trans_retired.load_latency_gt_1024)
> > [...]
> > > ...
> > > ```
> > >
> > > The values for `type` and `config` are taken from this output.
> > > Use the values of `config1` and `config2` if they are included in the log;
> > > otherwise, set them to 0.
> >
> > The above information will also be very useful for early testers of this patch
> > series including myself. Thank you for sharing.
> >
> > >
> > > Any feedback or advice on the patch set would be greatly appreciated.
> >
> > Again, I like the high level concept of this patch series. I definitely want
> > this to be merged. Nonetheless, not exactly as it is right now. I think we
> > should spend enough time for the user interface design. I hope the final
> > interface is long-term maintainable and easy to extend.
> >
> > >
> > > * RFC v3
> > > - drop "reintroduce damon_operations->cleanup()" as no longer needed
> > > - drop "allow user to set min size of region" (will be sent separately)
> > > - add tgid to struct damon_access_report and use it instead of tid
> > > - set perf_event_attr.precise_ip to 2
> > > - prepare for the transition to report-based monitoring
> > > - switch to use access_reported in struct damon_region
> > > - switch to use sample/primitives/perf_events sysfs directory
> >
> > Thank you for doing this revisioning.
> >
> > For the user interface and also internal interface, I suggested to consider
> > reusing those for page fault events based per-CPUs/threads/reads/writes
> > monitoring DAMON extension patch series in our previous discussion. I think
> > this version is following the right direction much more than the previous
> > version.
> >
> > But, nowadays I'm thinking about a new user interface for the
> > per-CPUs/threads/reads/writes monitoring that can easily also extended for perf
> > event based monitoring and for not only access monitoring but general and
> > multiple data attributes monitoring.
> >
> > It is still under the development and I'm planning to share a very early RFC
> > version of that by LSFMMBPF. I will try to add my idea about how this perf
> > event based monitoring could also implemented by reusing the interface on the
> > early RFC version.
> >
> > Could we wait until the early RFC version is published, review my idea about
> > how this perf event based monitoring could reuse it, and continue discussions
> > about the next steps of this patch series?
>
> That sounds good. I'd definitely like to check it out.
Thank you for accepting my humble suggestion. And sorry for making you be
delayed. Hopefully this interface extension will soon be published as it is
one of the topics for DAMON session [1] at LSFMMBPF.
>
> > [1] https://lore.kernel.org/20251208062943.68824-1-sj@kernel.org/
[1] https://docs.google.com/spreadsheets/d/1mGEdDrWskp7Ua91jGXzquQGinorcD58DAVXhOiRp2Gg/edit?gid=1852749899#gid=1852749899
Thanks,
SJ
[...]
^ permalink raw reply [flat|nested] 14+ messages in thread
* Re: [RFC PATCH v3 0/4] mm/damon: introduce perf event based access check
2026-04-24 23:31 ` SeongJae Park
@ 2026-04-25 12:33 ` Akinobu Mita
2026-04-25 15:33 ` SeongJae Park
0 siblings, 1 reply; 14+ messages in thread
From: Akinobu Mita @ 2026-04-25 12:33 UTC (permalink / raw)
To: SeongJae Park; +Cc: damon, linux-perf-users
2026年4月25日(土) 8:31 SeongJae Park <sj@kernel.org>:
>
> On Fri, 24 Apr 2026 12:27:07 +0900 Akinobu Mita <akinobu.mita@gmail.com> wrote:
>
> > Hello SeongJae,
> >
> > 2026年4月23日(木) 13:34 SeongJae Park <sj@kernel.org>:
> > >
> > > Hello Akinobu,
> > >
> > > On Thu, 23 Apr 2026 09:42:06 +0900 Akinobu Mita <akinobu.mita@gmail.com> wrote:
> > >
> > > > DAMON currently only provides PTE accessed-bit based access check, this
> > > > patch series adds a new perf event based access check.
> > > >
> > > > Since perf event-based access checks do not require modifying the PTE
> > > > accessed-bit for pages representing each damon region, it reduces the
> > > > overhead of monitoring at a fixed granularity of the page size.
> > >
> > > As I also commented to the previous version, the high level idea makes sense to
> > > me. I think this can be useful.
> > >
> > > Also as I commented to the previous version, I understand the page level
> > > monitoring overhead reduction is the main purpose of this patch, and it makes
> > > sense to me. Nonetheless, I like this mainly because the perf event could
> > > provide more detailed information including from which CPU/thread the access is
> > > made, and whether the access is for read/write, to my understanding. Please
> > > let me know if my understanding is wrong.
> >
> > That's correct.
> > Once the integration into your new monitoring infrastructure is
> > complete, those things will become possible.
>
> Thank you for confirming!
>
> >
> > > >
> > > > Here is a method and its results for comparing access checks using
> > > > existing access bits and new perf events.
> > > [...]
> > > > Using accessed bit, prepare_access_checks takes 7 seconds and
> > > > check_accesses takes 5 seconds.
> > > [...]
> > > > Using perf event, prepare_access_checks takes 0.01 seconds and
> > > > check_accesses takes 2.6 seconds.
> > >
> > > Thank you so much for sharing these great test results with the detailed setup
> > > description!
> > >
> > > > ```
> > > > $ sudo $HOME/damo/damo stop
> > > > $ sudo $HOME/damo/damo start --ops vaddr \
> > > > --perf_event 5000 0 0x4 0x1cd 0x1f 0 \
> > > > --monitoring_intervals 5s 60s 300s \
> > > > --monitoring_nr_regions_range 1000000000 1000000000 \
> > > > --target_pid $(cat /tmp/memcached.pid)
> > > [...]
> > > > Note: damo's --perf_event option
> > > >
> > > > Using these features also requires modifications to damo, but these
> > > > are not included in this patch series and are currently under
> > > > development in the following branch:
> > > >
> > > > * https://github.com/mita/damo/tree/damo-perf-for-v3.2.2
> > >
> > > Thank you for sharing this, too!
> > >
> > > >
> > > > The option newly added to damo for perf event-based access check has the
> > > > following format:
> > > >
> > > > `--perf_event <sample freq> <sample phys addr> <type> <config> <config1> <config2>`
> > >
> > > I think we may need to discuss more for final interface. But I think that
> > > could be done later, after the kerenl ABI is fixed.
> >
> > My current patch introduces a set of kernel ABIs (.../perf_events/<P>/*),
> > and if I want to control another field in the perf_event_attr,
> > I would have to add a new ABI. This would be a significant
> > maintenance cost.
> >
> > Instead, I would like to provide a single bin file where the
> > perf_event_attr I want to set is written as binary.
>
> I'd prefer having single file per parameter, as sysfs is designed for. It
> allows keeping the input format simple and therefore easy to maintain and
> extend. Of course, we should carefult at adding new parameters, though.
Another idea from Namhyung Kim [1] is to fix the events
specified by the CPU architecture being executed.
Of course, we would want to control a limited number of
parameters, such as sample_freq.
[1] https://lore.kernel.org/damon/aZwLDAxf9eP0lPdD@z2/
> >
> > > >
> > > > - `sample freq`: A higher frequency improves access accuracy, but also
> > > > increases overhead.
> > > > - `sample phys addr`: specify 0 for vaddr and 1 for paddr.
> > > > - The remaining type, config, config1, and config2 settings can be found
> > > > as follows:
> > >
> > > The values look bit human-unfriendly. I wonder if we could use more
> > > human-friendly values, e.g., 'cpu' for type,
> > > 'mem_trans_retired.load_latency_gt_1024' for config, etc. Not necessarily we
> > > need to fix this right now. Let's keep discussing.
> >
> > I'd like to do it that way. These processes can be handled better
> > from user space than from within the kernel, so I'd like to achieve
> > this from damo with the help of the perf tool and library.
>
> Does existing perf event ABI also take that approach? If so, I think that is
> fine.
On Intel CPUs, you can read the mem-loads and/or mem-stores
files as follows to find the value that should be set to
perf_event_attr:
$ cat /sys/bus/event_source/devices/cpu/events/mem-loads
event=0xcd,umask=0x1,ldlat=3
You can find out which bit field of which member of
perf_event_attr should be used to set these values from the
corresponding files in the format directory.
$ cat /sys/bus/event_source/devices/cpu/format/{event,umask,ldlat}
config:0-7
config:8-15
config1:0-15
This conversion from symbolic event name to values that should
be set in perf_event_attr can be done, for example, as shown below,
so I think it should be implemented using perf in some way.
import perf
if __name__ == '__main__':
evlist = perf.parse_events("mem-loads")
for evsel in evlist:
print(f"{evsel}: type={evsel.type} config={evsel.config}")
^ permalink raw reply [flat|nested] 14+ messages in thread* Re: [RFC PATCH v3 0/4] mm/damon: introduce perf event based access check
2026-04-25 12:33 ` Akinobu Mita
@ 2026-04-25 15:33 ` SeongJae Park
0 siblings, 0 replies; 14+ messages in thread
From: SeongJae Park @ 2026-04-25 15:33 UTC (permalink / raw)
To: Akinobu Mita; +Cc: SeongJae Park, damon, linux-perf-users
On Sat, 25 Apr 2026 21:33:07 +0900 Akinobu Mita <akinobu.mita@gmail.com> wrote:
> 2026年4月25日(土) 8:31 SeongJae Park <sj@kernel.org>:
> >
> > On Fri, 24 Apr 2026 12:27:07 +0900 Akinobu Mita <akinobu.mita@gmail.com> wrote:
> >
> > > Hello SeongJae,
> > >
> > > 2026年4月23日(木) 13:34 SeongJae Park <sj@kernel.org>:
[...]
> > > My current patch introduces a set of kernel ABIs (.../perf_events/<P>/*),
> > > and if I want to control another field in the perf_event_attr,
> > > I would have to add a new ABI. This would be a significant
> > > maintenance cost.
> > >
> > > Instead, I would like to provide a single bin file where the
> > > perf_event_attr I want to set is written as binary.
> >
> > I'd prefer having single file per parameter, as sysfs is designed for. It
> > allows keeping the input format simple and therefore easy to maintain and
> > extend. Of course, we should carefult at adding new parameters, though.
>
> Another idea from Namhyung Kim [1] is to fix the events
> specified by the CPU architecture being executed.
I think I need to study mroe about perf events :)
>
> Of course, we would want to control a limited number of
> parameters, such as sample_freq.
>
> [1] https://lore.kernel.org/damon/aZwLDAxf9eP0lPdD@z2/
>
> > >
> > > > >
> > > > > - `sample freq`: A higher frequency improves access accuracy, but also
> > > > > increases overhead.
> > > > > - `sample phys addr`: specify 0 for vaddr and 1 for paddr.
> > > > > - The remaining type, config, config1, and config2 settings can be found
> > > > > as follows:
> > > >
> > > > The values look bit human-unfriendly. I wonder if we could use more
> > > > human-friendly values, e.g., 'cpu' for type,
> > > > 'mem_trans_retired.load_latency_gt_1024' for config, etc. Not necessarily we
> > > > need to fix this right now. Let's keep discussing.
> > >
> > > I'd like to do it that way. These processes can be handled better
> > > from user space than from within the kernel, so I'd like to achieve
> > > this from damo with the help of the perf tool and library.
> >
> > Does existing perf event ABI also take that approach? If so, I think that is
> > fine.
>
> On Intel CPUs, you can read the mem-loads and/or mem-stores
> files as follows to find the value that should be set to
> perf_event_attr:
>
> $ cat /sys/bus/event_source/devices/cpu/events/mem-loads
> event=0xcd,umask=0x1,ldlat=3
>
> You can find out which bit field of which member of
> perf_event_attr should be used to set these values from the
> corresponding files in the format directory.
>
> $ cat /sys/bus/event_source/devices/cpu/format/{event,umask,ldlat}
> config:0-7
> config:8-15
> config1:0-15
>
> This conversion from symbolic event name to values that should
> be set in perf_event_attr can be done, for example, as shown below,
> so I think it should be implemented using perf in some way.
>
> import perf
>
> if __name__ == '__main__':
> evlist = perf.parse_events("mem-loads")
> for evsel in evlist:
> print(f"{evsel}: type={evsel.type} config={evsel.config}")
I definitely need to study mroe about perf event. I will comment after I learn
some fundamentals. But I believe this is not a blocker of this work.
Thanks,
SJ
^ permalink raw reply [flat|nested] 14+ messages in thread