* [RFC IDEA v2 1/6] mm/damon: implement DAMOS actions for access-aware contiguous memory allocation
2024-05-12 19:36 [RFC IDEA v2 0/6] mm/damon: introduce Access/Contiguity-aware Memory Auto-scaling (ACMA) SeongJae Park
@ 2024-05-12 19:36 ` SeongJae Park
2024-05-12 19:36 ` [RFC IDEA v2 2/6] mm/damon: add the initial part of access/contiguity-aware memory auto-scaling module SeongJae Park
` (4 subsequent siblings)
5 siblings, 0 replies; 7+ messages in thread
From: SeongJae Park @ 2024-05-12 19:36 UTC (permalink / raw)
Cc: SeongJae Park, damon, linux-mm, linux-kernel
Implement two DAMOS actions, namely DAMOS_ALLOC and DAMOS_FREE. As the
name says, the actions allocate/de-allocate given DAMOS target memory in
user-defined base granularity.
Signed-off-by: SeongJae Park <sj@kernel.org>
---
include/linux/damon.h | 37 ++++++++++++++++
mm/damon/paddr.c | 93 ++++++++++++++++++++++++++++++++++++++++
mm/damon/sysfs-schemes.c | 4 ++
3 files changed, 134 insertions(+)
diff --git a/include/linux/damon.h b/include/linux/damon.h
index 0c3f93374e8d..933bc7777f2d 100644
--- a/include/linux/damon.h
+++ b/include/linux/damon.h
@@ -107,6 +107,11 @@ struct damon_target {
* @DAMOS_LRU_DEPRIO: Deprioritize the region on its LRU lists.
* @DAMOS_MIGRATE_HOT: Migrate the regions prioritizing warmer regions.
* @DAMOS_MIGRATE_COLD: Migrate the regions prioritizing colder regions.
+#ifdef CONFIG_ACMA
+ * @DAMOS_ALLOC: Allocate pages in the region,
+ * &struct damos->alloc_order pages at once.
+ * @DAMOS_FREE: Return DAMOS_ALLOC-ed pages back to the system.
+#endif
* @DAMOS_STAT: Do nothing but count the stat.
* @NR_DAMOS_ACTIONS: Total number of DAMOS actions
*
@@ -126,6 +131,10 @@ enum damos_action {
DAMOS_LRU_DEPRIO,
DAMOS_MIGRATE_HOT,
DAMOS_MIGRATE_COLD,
+#ifdef CONFIG_ACMA
+ DAMOS_ALLOC,
+ DAMOS_FREE,
+#endif
DAMOS_STAT, /* Do nothing but only record the stat */
NR_DAMOS_ACTIONS,
};
@@ -375,6 +384,11 @@ struct damos_access_pattern {
* struct damos - Represents a Data Access Monitoring-based Operation Scheme.
* @pattern: Access pattern of target regions.
* @action: &damo_action to be applied to the target regions.
+#ifdef CONFIG_ACMA
+ * @alloc_order: DAMOS_ALLOC/FREE applying granularity.
+ * @alloc_callback: DAMOS_ALLOC success callback.
+ * @free_callback: DAMOS_FREE callback.
+#endif
* @apply_interval_us: The time between applying the @action.
* @quota: Control the aggressiveness of this scheme.
* @wmarks: Watermarks for automated (in)activation of this scheme.
@@ -388,6 +402,18 @@ struct damos_access_pattern {
* CPU time or IO resources for the &action, "a is used.
*
* If @apply_interval_us is zero, &damon_attrs->aggr_interval is used instead.
+#ifdef CONFIG_ACMA
+ *
+ * If @action is CONFIG_ALLOC or CONFIG_FREE, the action is applied to
+ * @alloc_order pages of the region at once. For example, if the region has
+ * 1024 pages, and @alloc_order is 9, DAMOS tries to allocate or free first 512
+ * (2^9) contiguous pages at once, and then next 512 pages.
+ *
+ * For each success of such allocation attemp, @alloc_callback is called back.
+ * For each attempt of deallocation, @free_callback is called back first,
+ * before trying the deallocation. If @free_callback returns non-zero, the
+ * deallocation attempt is aborted.
+#endif
*
* To do the work only when needed, schemes can be activated for specific
* system situations using &wmarks. If all schemes that registered to the
@@ -409,6 +435,11 @@ struct damos_access_pattern {
struct damos {
struct damos_access_pattern pattern;
enum damos_action action;
+#ifdef CONFIG_ACMA
+ unsigned int alloc_order;
+ int (*alloc_callback)(unsigned long start_addr);
+ int (*free_callback)(unsigned long start_addr);
+#endif
unsigned long apply_interval_us;
/* private: internal use only */
/*
@@ -784,6 +815,12 @@ int damon_stop(struct damon_ctx **ctxs, int nr_ctxs);
int damon_set_region_biggest_system_ram_default(struct damon_target *t,
unsigned long *start, unsigned long *end);
+#ifdef CONFIG_ACMA
+
+unsigned long damon_alloced_bytes(void);
+
+#endif
+
#endif /* CONFIG_DAMON */
#endif /* _DAMON_H */
diff --git a/mm/damon/paddr.c b/mm/damon/paddr.c
index 81163206e70c..f66bd032c523 100644
--- a/mm/damon/paddr.c
+++ b/mm/damon/paddr.c
@@ -475,6 +475,93 @@ static unsigned long damon_pa_migrate(struct damon_region *r, struct damos *s)
}
+#ifdef CONFIG_ACMA
+
+static bool damon_pa_preempted(unsigned long pfn)
+{
+ /* todo: implement */
+}
+
+/* always success for preempted=false */
+static int damon_pa_set_preempted(unsigned long pfn, bool preempted)
+{
+ /* todo: implement */
+}
+
+/*
+ * Return ownership of the memory to the system. At the moment, only user of
+ * this function is virtio-balloon. They could use page fault-based mechanisms
+ * to catch returned ownership. Therefore this function doesn't notify this
+ * event to the report subscribers. In future, we could add some notification
+ * system of this event for more users such as contig memory allocator.
+ */
+static int damon_pa_free(unsigned long pfn, struct damos *scheme)
+{
+ if (!damon_pa_preemted(pfn))
+ return -EINVAL;
+
+ free_contig_range(pfn, DAMON_MEM_PREEMPT_PAGES);
+ damon_pa_set_preempted(pfn, false);
+ /*
+ * We intentionally do not report this event to the preempted memory
+ * report subscriber. They could use page fault handler like
+ * mechanisms.
+ */
+ return 0;
+}
+
+/*
+ * Pass ownership of the memory to page reporting subscribers. The subscribers
+ * can use the reported memory for their purpose, e.g., letting Host
+ * re-allocate it to other guest, or use as contig allocation memory pool.
+ */
+static int damon_pa_alloc(unsigned long pfn, struct damos *scheme)
+{
+ int err;
+
+ if (damon_pa_preempted(pfn))
+ return -EINVAL;
+ if (alloc_contig_range(pfn, pfn + DAMON_MEM_PREEMPT_PAGES,
+ MIGRATE_MOVABLE, GFP_KERNEL))
+ return -ENOMEM;
+ err = damon_pa_set_preempted(pfn, true);
+ if (err) {
+ free_contig_range(pfn, DAMON_MEM_PREEMPT_PAGES);
+ return err;
+ }
+ if (!scheme->alloc_callback)
+ return 0;
+ err = scheme->alloc_callback(PFN_PHYS(pfn));
+ if (err) {
+ damon_pa_free(pfn);
+ return err;
+ }
+ return 0;
+}
+
+/* Preempt or yield memory regions from system */
+static unsigned long damon_pa_alloc_or_free(
+ struct damon_region *r, struct damos *s, bool alloc)
+{
+ unsigned long pfn;
+ unsigned long applied = 0;
+
+ for (pfn = PHYS_PFN(r->start); pfn < PHYS_PFN(r->end);
+ pfn += DAMON_MEM_PREEMPT_PAGES) {
+ if (alloc) {
+ if (damon_pa_alloc(pfn, s))
+ continue;
+ } else {
+ if (damon_pa_free(pfn, s))
+ continue;
+ }
+ applied += 1;
+ }
+ return applied * PAGE_SIZE * DAMON_MEM_PREEMPT_PAGES;
+}
+
+#endif
+
static unsigned long damon_pa_apply_scheme(struct damon_ctx *ctx,
struct damon_target *t, struct damon_region *r,
struct damos *scheme)
@@ -489,6 +576,12 @@ static unsigned long damon_pa_apply_scheme(struct damon_ctx *ctx,
case DAMOS_MIGRATE_HOT:
case DAMOS_MIGRATE_COLD:
return damon_pa_migrate(r, scheme);
+#ifdef CONFIG_ACMA
+ case DAMOS_ALLOC:
+ return damon_pa_alloc_or_free(r, scheme, true);
+ case DAMOS_FREE:
+ return damon_pa_alloc_or_free(r, scheme, false);
+#endif
case DAMOS_STAT:
break;
default:
diff --git a/mm/damon/sysfs-schemes.c b/mm/damon/sysfs-schemes.c
index 66fccfa776d7..54be4d661881 100644
--- a/mm/damon/sysfs-schemes.c
+++ b/mm/damon/sysfs-schemes.c
@@ -1460,6 +1460,10 @@ static const char * const damon_sysfs_damos_action_strs[] = {
"lru_deprio",
"migrate_hot",
"migrate_cold",
+#ifdef CONFIG_ACMA
+ "damos_alloc",
+ "damos_free",
+#endif
"stat",
};
--
2.39.2
^ permalink raw reply related [flat|nested] 7+ messages in thread* [RFC IDEA v2 2/6] mm/damon: add the initial part of access/contiguity-aware memory auto-scaling module
2024-05-12 19:36 [RFC IDEA v2 0/6] mm/damon: introduce Access/Contiguity-aware Memory Auto-scaling (ACMA) SeongJae Park
2024-05-12 19:36 ` [RFC IDEA v2 1/6] mm/damon: implement DAMOS actions for access-aware contiguous memory allocation SeongJae Park
@ 2024-05-12 19:36 ` SeongJae Park
2024-05-12 19:36 ` [RFC IDEA v2 3/6] mm/page_reporting: implement a function for reporting specific pfn range SeongJae Park
` (3 subsequent siblings)
5 siblings, 0 replies; 7+ messages in thread
From: SeongJae Park @ 2024-05-12 19:36 UTC (permalink / raw)
Cc: SeongJae Park, linux-kernel, damon, linux-mm
Start adding a DAMON application module for access/contiguity-aware
memory auto-scaling. The module does proactive reclamation, scale-down,
and scale-up of memory under user-defined min/max memory and acceptable
level of memory pressure using three DAMOS schemes each designed for
each of the three main operations. Nonetheless, this is only the
initial part of the implementation. Hence this commit implements only
the memory pressure-aware auto-tuning proactive reclamation feature.
Following commits will implement scale down and up, respectively.
Signed-off-by: SeongJae Park <sj@kernel.org>
---
mm/damon/Kconfig | 10 ++
mm/damon/Makefile | 1 +
mm/damon/acma.c | 335 ++++++++++++++++++++++++++++++++++++++++++++++
3 files changed, 346 insertions(+)
create mode 100644 mm/damon/acma.c
diff --git a/mm/damon/Kconfig b/mm/damon/Kconfig
index fecb8172410c..4fe7520601dd 100644
--- a/mm/damon/Kconfig
+++ b/mm/damon/Kconfig
@@ -121,4 +121,14 @@ config DAMON_LRU_SORT
protect frequently accessed (hot) pages while rarely accessed (cold)
pages reclaimed first under memory pressure.
+config DAMON_ACMA
+ bool "Build Access/Contiguity-aware Memory Auto-scaling (DAMON_ACMA)"
+ depends on DAMON_PADDR
+ help
+ This builds the DAMON-based Access/Contiguity-aware Memory
+ Auto-scaling subsystem. It preempts unnecessary memory from the
+ system and report it to the host while respecting user-specified
+ min/max memory for the system and maximum memory pressure stall time
+ ratio.
+
endmenu
diff --git a/mm/damon/Makefile b/mm/damon/Makefile
index f7add3f4aa79..814c8da3081b 100644
--- a/mm/damon/Makefile
+++ b/mm/damon/Makefile
@@ -7,3 +7,4 @@ obj-$(CONFIG_DAMON_SYSFS) += sysfs-common.o sysfs-schemes.o sysfs.o
obj-$(CONFIG_DAMON_DBGFS) += dbgfs.o
obj-$(CONFIG_DAMON_RECLAIM) += modules-common.o reclaim.o
obj-$(CONFIG_DAMON_LRU_SORT) += modules-common.o lru_sort.o
+obj-$(CONFIG_DAMON_ACMA) += modules-common.o acma.o
diff --git a/mm/damon/acma.c b/mm/damon/acma.c
new file mode 100644
index 000000000000..276b61fd4e26
--- /dev/null
+++ b/mm/damon/acma.c
@@ -0,0 +1,335 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * DAMON-based Access/Contiguity-aware Memory Auto-scaling
+ *
+ * Let user specifies min/max memory of the system and acceptable level of
+ * memory pressure stall level. While respecting those, automatically scale
+ * the memory of the system up and down by scale_downing memory from the system
+ * and report it to the host when the system is having memory pressure level
+ * under the threshold, and vice versa, respectively.
+ *
+ * At this moment, the scaling is not implemented, hence this is just a memory
+ * pressure-aware proactive reclamation module.
+ *
+ * Author: SeongJae Park <sj@kernel.org>
+ */
+
+#define pr_fmt(fmt) "damon-acma: " fmt
+
+#include <linux/damon.h>
+#include <linux/kstrtox.h>
+#include <linux/module.h>
+
+#include "modules-common.h"
+
+#ifdef MODULE_PARAM_PREFIX
+#undef MODULE_PARAM_PREFIX
+#endif
+#define MODULE_PARAM_PREFIX "damon_acma."
+
+/*
+ * Enable or disable DAMON_ACMA.
+ *
+ * You can enable DAMON_ACMA by setting the value of this parameter as ``Y``.
+ * Setting it as ``N`` disables DAMON_ACMA. Note that DAMON_ACMA could do no
+ * real monitoring and memory auto-scaling due to the watermarks-based
+ * activation condition. Refer to below descriptions for the watermarks
+ * parameter for this.
+ */
+static bool enabled __read_mostly;
+
+/*
+ * Make DAMON_ACMA reads the input parameters again, except ``enabled``.
+ *
+ * Input parameters that updated while DAMON_ACMA is running are not
+ * applied by default. Once this parameter is set as ``Y``, DAMON_ACMA
+ * reads values of parametrs except ``enabled`` again. Once the re-reading is
+ * done, this parameter is set as ``N``. If invalid parameters are found while
+ * the re-reading, DAMON_ACMA will be disabled.
+ */
+static bool commit_inputs __read_mostly;
+module_param(commit_inputs, bool, 0600);
+
+/*
+ * Desired level of memory pressure-stall time in microseconds.
+ *
+ * While keeping the caps that set by other quotas, DAMON_RECLAIM automatically
+ * increases and decreases the effective level of the quota aiming this level of
+ * memory pressure is incurred. System-wide ``some`` memory PSI in microseconds
+ * per quota reset interval (``quota_reset_interval_ms``) is collected and
+ * compared to this value to see if the aim is satisfied. Value zero means
+ * disabling this auto-tuning feature.
+ *
+ * 1 ms/ 1 second (0.1%) by default. Inspired by the PSI threshold of TMO
+ * (https://dl.acm.org/doi/10.1145/3503222.3507731).
+ */
+static unsigned long quota_mem_pressure_us __read_mostly = 1000;
+module_param(quota_mem_pressure_us, ulong, 0600);
+
+static struct damos_quota damon_acma_quota = {
+ /* Use up to 15 ms per 1 sec for scaling, by default */
+ .ms = 15,
+ .sz = 0,
+ .reset_interval = 1000,
+ /* Within the quota, mark hotter regions accessed first. */
+ .weight_sz = 0,
+ .weight_nr_accesses = 1,
+ .weight_age = 0,
+};
+DEFINE_DAMON_MODULES_DAMOS_TIME_QUOTA(damon_acma_quota);
+
+static struct damos_watermarks damon_acma_wmarks = {
+ .metric = DAMOS_WMARK_NONE,
+};
+
+static struct damon_attrs damon_acma_mon_attrs = {
+ .sample_interval = 1000000, /* 1 second */
+ .aggr_interval = 20000000, /* 20 seconds */
+ .ops_update_interval = 0,
+ .min_nr_regions = 10,
+ .max_nr_regions = 1000,
+};
+DEFINE_DAMON_MODULES_MON_ATTRS_PARAMS(damon_acma_mon_attrs);
+
+/*
+ * Start of the target memory region in physical address.
+ *
+ * The start physical address of memory region that DAMON_ACMA will do work
+ * against. By default, biggest System RAM is used as the region.
+ */
+static unsigned long monitor_region_start __read_mostly;
+module_param(monitor_region_start, ulong, 0600);
+
+/*
+ * End of the target memory region in physical address.
+ *
+ * The end physical address of memory region that DAMON_ACMA will do work
+ * against. By default, biggest System RAM is used as the region.
+ */
+static unsigned long monitor_region_end __read_mostly;
+module_param(monitor_region_end, ulong, 0600);
+
+/*
+ * PID of the DAMON thread
+ *
+ * If DAMON_ACMA is enabled, this becomes the PID of the worker thread.
+ * Else, -1.
+ */
+static int kdamond_pid __read_mostly = -1;
+module_param(kdamond_pid, int, 0400);
+
+static struct damos_stat damon_acma_reclaim_stat;
+DEFINE_DAMON_MODULES_DAMOS_STATS_PARAMS(damon_acma_reclaim_stat,
+ acma_reclaim_tried_regions, acma_reclaim_succ_regions,
+ acma_reclaim_quota_exceeds);
+
+static struct damos_access_pattern damon_acma_stub_pattern = {
+ /* Find regions having PAGE_SIZE or larger size */
+ .min_sz_region = PAGE_SIZE,
+ .max_sz_region = ULONG_MAX,
+ /* no matter its access frequency */
+ .min_nr_accesses = 0,
+ .max_nr_accesses = UINT_MAX,
+ /* no matter its age */
+ .min_age_region = 0,
+ .max_age_region = UINT_MAX,
+};
+
+static struct damon_ctx *ctx;
+static struct damon_target *target;
+
+static struct damos *damon_acma_new_scheme(
+ struct damos_access_pattern *pattern, enum damos_action action)
+{
+ struct damos_quota quota = damon_acma_quota;
+
+ return damon_new_scheme(
+ pattern,
+ action,
+ /* work for every second */
+ 1000000,
+ /* under the quota. */
+ "a,
+ /* (De)activate this according to the watermarks. */
+ &damon_acma_wmarks);
+}
+
+static void damon_acma_copy_quota_status(struct damos_quota *dst,
+ struct damos_quota *src)
+{
+ dst->total_charged_sz = src->total_charged_sz;
+ dst->total_charged_ns = src->total_charged_ns;
+ dst->charged_sz = src->charged_sz;
+ dst->charged_from = src->charged_from;
+ dst->charge_target_from = src->charge_target_from;
+ dst->charge_addr_from = src->charge_addr_from;
+}
+
+static int damon_acma_set_scheme_quota(struct damos *scheme, struct damos *old,
+ damos_quota_goal_metric goal_metric)
+{
+ if (old)
+ damon_acma_copy_quota_status(&scheme->quota, &old->quota);
+ goal = damos_new_quota_goal(goal_metric, quota_mem_pressure_us);
+ if (!goal)
+ return -ENOMEM;
+ damos_add_quota_goal(&scheme->quota, goal);
+ return 0;
+}
+
+/*
+ * Reclaim cold pages on entire physical address space
+ */
+static struct damos *damon_acma_new_reclaim_scheme(struct damos *old)
+{
+ struct damos_access_pattern pattern = damon_acma_stub_pattern;
+ struct damos *scheme;
+ int err;
+
+ pattern.max_nr_accesses = 0;
+ scheme = damon_acma_new_scheme(&pattern, DAMOS_PAGEOUT);
+ if (!scheme)
+ return NULL;
+ err = damon_acma_set_scheme_quota(scheme, old,
+ DAMOS_QUOTA_SOME_MEM_PSI_US);
+ if (err) {
+ damon_destroy_scheme(scheme);
+ return NULL;
+ }
+ return scheme;
+}
+
+static int damon_acma_apply_parameters(void)
+{
+ struct damos *scheme, *reclaim_scheme;
+ struct damos *old_reclaim_scheme = NULL;
+ struct damos_quota_goal *goal;
+ int err = 0;
+
+ err = damon_set_attrs(ctx, &damon_acma_mon_attrs);
+ if (err)
+ return err;
+
+ damon_for_each_scheme(scheme, ctx)
+ old_reclaim_scheme = scheme;
+
+ reclaim_scheme = damon_acma_new_reclaim_scheme(old_reclaim_scheme);
+ if (!reclaim_scheme)
+ return -ENOMEM;
+ damon_set_schemes(ctx, &reclaim_scheme, 1);
+
+ return damon_set_region_biggest_system_ram_default(target,
+ &monitor_region_start,
+ &monitor_region_end);
+}
+
+static int damon_acma_turn(bool on)
+{
+ int err;
+
+ if (!on) {
+ err = damon_stop(&ctx, 1);
+ if (!err)
+ kdamond_pid = -1;
+ return err;
+ }
+
+ err = damon_acma_apply_parameters();
+ if (err)
+ return err;
+
+ err = damon_start(&ctx, 1, true);
+ if (err)
+ return err;
+ kdamond_pid = ctx->kdamond->pid;
+ return 0;
+}
+
+static int damon_acma_enabled_store(const char *val,
+ const struct kernel_param *kp)
+{
+ bool is_enabled = enabled;
+ bool enable;
+ int err;
+
+ err = kstrtobool(val, &enable);
+ if (err)
+ return err;
+
+ if (is_enabled == enable)
+ return 0;
+
+ /* Called before init function. The function will handle this. */
+ if (!ctx)
+ goto set_param_out;
+
+ err = damon_acma_turn(enable);
+ if (err)
+ return err;
+
+set_param_out:
+ enabled = enable;
+ return err;
+}
+
+static const struct kernel_param_ops enabled_param_ops = {
+ .set = damon_acma_enabled_store,
+ .get = param_get_bool,
+};
+
+module_param_cb(enabled, &enabled_param_ops, &enabled, 0600);
+MODULE_PARM_DESC(enabled,
+ "Enable or disable DAMON_ACMA (default: disabled)");
+
+static int damon_acma_handle_commit_inputs(void)
+{
+ int err;
+
+ if (!commit_inputs)
+ return 0;
+
+ err = damon_acma_apply_parameters();
+ commit_inputs = false;
+ return err;
+}
+
+static int damon_acma_after_aggregation(struct damon_ctx *c)
+{
+ struct damos *s;
+
+ /* update the stats parameter */
+ damon_for_each_scheme(s, c) {
+ switch (s->action) {
+ case DAMOS_LRU_RECLAIM:
+ damon_acma_reclaim_stat = s->stat;
+ break;
+ default:
+ break;
+ }
+
+ return damon_acma_handle_commit_inputs();
+}
+
+static int damon_acma_after_wmarks_check(struct damon_ctx *c)
+{
+ return damon_acma_handle_commit_inputs();
+}
+
+static int __init damon_acma_init(void)
+{
+ int err = damon_modules_new_paddr_ctx_target(&ctx, &target);
+
+ if (err)
+ return err;
+
+ ctx->callback.after_wmarks_check = damon_acma_after_wmarks_check;
+ ctx->callback.after_aggregation = damon_acma_after_aggregation;
+
+ /* 'enabled' has set before this function, probably via command line */
+ if (enabled)
+ err = damon_acma_turn(true);
+
+ return err;
+}
+
+module_init(damon_acma_init);
--
2.39.2
^ permalink raw reply related [flat|nested] 7+ messages in thread* [RFC IDEA v2 3/6] mm/page_reporting: implement a function for reporting specific pfn range
2024-05-12 19:36 [RFC IDEA v2 0/6] mm/damon: introduce Access/Contiguity-aware Memory Auto-scaling (ACMA) SeongJae Park
2024-05-12 19:36 ` [RFC IDEA v2 1/6] mm/damon: implement DAMOS actions for access-aware contiguous memory allocation SeongJae Park
2024-05-12 19:36 ` [RFC IDEA v2 2/6] mm/damon: add the initial part of access/contiguity-aware memory auto-scaling module SeongJae Park
@ 2024-05-12 19:36 ` SeongJae Park
2024-05-12 19:36 ` [RFC IDEA v2 4/6] mm/damon/acma: implement scale down feature SeongJae Park
` (2 subsequent siblings)
5 siblings, 0 replies; 7+ messages in thread
From: SeongJae Park @ 2024-05-12 19:36 UTC (permalink / raw)
Cc: SeongJae Park, damon, linux-mm, linux-kernel
Implement a function for reporting pages of specific pfn range, for
non-free pages reporting use case. The use case will be implemented by
following commits.
Signed-off-by: SeongJae Park <sj@kernel.org>
---
mm/page_reporting.c | 27 +++++++++++++++++++++++++++
1 file changed, 27 insertions(+)
diff --git a/mm/page_reporting.c b/mm/page_reporting.c
index e4c428e61d8c..e14f2e979f16 100644
--- a/mm/page_reporting.c
+++ b/mm/page_reporting.c
@@ -349,6 +349,33 @@ static void page_reporting_process(struct work_struct *work)
static DEFINE_MUTEX(page_reporting_mutex);
DEFINE_STATIC_KEY_FALSE(page_reporting_enabled);
+#ifdef CONFIG_ACMA
+
+int page_report(unsigned long pfn, unsigned long nr_pages)
+{
+ struct page_reporting_dev_info *prdev;
+ struct scatterlist sgl;
+ int err;
+
+ rcu_read_lock();
+
+ prdev = rcu_dereference(pr_dev_info);
+ if (!prdev || !prdev->report) {
+ rcu_read_unlock();
+ return -ENOENT;
+ }
+
+ sg_init_table(&sgl, 1);
+ sg_set_page(&sgl, NULL, nr_pages << PAGE_SHIFT, 0);
+ sgl.dma_address = PFN_PHYS(pfn);
+
+ err = prdev->report(prdev, sgl, 1);
+ rcu_read_unlock();
+ return err;
+}
+
+#endif
+
int page_reporting_register(struct page_reporting_dev_info *prdev)
{
int err = 0;
--
2.39.2
^ permalink raw reply related [flat|nested] 7+ messages in thread* [RFC IDEA v2 4/6] mm/damon/acma: implement scale down feature
2024-05-12 19:36 [RFC IDEA v2 0/6] mm/damon: introduce Access/Contiguity-aware Memory Auto-scaling (ACMA) SeongJae Park
` (2 preceding siblings ...)
2024-05-12 19:36 ` [RFC IDEA v2 3/6] mm/page_reporting: implement a function for reporting specific pfn range SeongJae Park
@ 2024-05-12 19:36 ` SeongJae Park
2024-05-12 19:36 ` [RFC IDEA v2 5/6] mm/damon/acma: implement scale up feature SeongJae Park
2024-05-12 19:36 ` [RFC IDEA v2 6/6] drivers/virtio/virtio_balloon: integrate ACMA and ballooning SeongJae Park
5 siblings, 0 replies; 7+ messages in thread
From: SeongJae Park @ 2024-05-12 19:36 UTC (permalink / raw)
Cc: SeongJae Park, damon, linux-mm, linux-kernel
Implement the memory scale down feature of Access/Contiguity-aware
Memory Auto-scaling module. It runs DMAOS_ALLOC action scheme with
user-acceptable level of memory pressure stall as its target, in 512
contig pages base granularity, and report the 512 contig successfully
DAMOS_ALLOC-ed regions to the host for reuse. For minimizing
DAMON-internal DAMOS_ALLOC-ed region management overhead, the scheme is
applied to only 128 MiB not-completely-allocated contiguous memory
region of highest address.
Signed-off-by: SeongJae Park <sj@kernel.org>
---
mm/damon/acma.c | 137 +++++++++++++++++++++++++++++++++++++++++++++---
1 file changed, 131 insertions(+), 6 deletions(-)
diff --git a/mm/damon/acma.c b/mm/damon/acma.c
index 276b61fd4e26..b093b90471dd 100644
--- a/mm/damon/acma.c
+++ b/mm/damon/acma.c
@@ -8,9 +8,6 @@
* and report it to the host when the system is having memory pressure level
* under the threshold, and vice versa, respectively.
*
- * At this moment, the scaling is not implemented, hence this is just a memory
- * pressure-aware proactive reclamation module.
- *
* Author: SeongJae Park <sj@kernel.org>
*/
@@ -50,6 +47,13 @@ static bool enabled __read_mostly;
static bool commit_inputs __read_mostly;
module_param(commit_inputs, bool, 0600);
+/*
+ * Minimum amount of memory to be guaranteed to the system. In other words,
+ * the lower limit of the scaling.
+ */
+static unsigned long min_mem_kb __read_mostly;
+module_param(min_mem, ulong, 0600);
+
/*
* Desired level of memory pressure-stall time in microseconds.
*
@@ -66,6 +70,18 @@ module_param(commit_inputs, bool, 0600);
static unsigned long quota_mem_pressure_us __read_mostly = 1000;
module_param(quota_mem_pressure_us, ulong, 0600);
+/*
+ * Basic scale down/up granularity. ACMA will allocate and report contiguous
+ * pages of this size at once. 512 pages (2 MiB for 4 KiB page setup) by
+ * default.
+ *
+ * To minimize DAMON-internal ALLOC-ed memory management overhead, we further
+ * apply SCALE_WINDOW. Refer to damon_acma_set_scale_down_region_filter() for
+ * more detail about it.
+ */
+static unsigned int scale_pg_order __read_mostly = 9;
+module_param(scale_pg_order, uint, 0600);
+
static struct damos_quota damon_acma_quota = {
/* Use up to 15 ms per 1 sec for scaling, by default */
.ms = 15,
@@ -123,6 +139,11 @@ DEFINE_DAMON_MODULES_DAMOS_STATS_PARAMS(damon_acma_reclaim_stat,
acma_reclaim_tried_regions, acma_reclaim_succ_regions,
acma_reclaim_quota_exceeds);
+static struct damos_stat damon_acma_scale_down_stat;
+DEFINE_DAMON_MODULES_DAMOS_STATS_PARAMS(damon_acma_scale_down_stat,
+ acma_scale_down_tried_regions, acma_scale_down_succ_regions,
+ acma_scale_down_quota_exceeds);
+
static struct damos_access_pattern damon_acma_stub_pattern = {
/* Find regions having PAGE_SIZE or larger size */
.min_sz_region = PAGE_SIZE,
@@ -143,6 +164,9 @@ static struct damos *damon_acma_new_scheme(
{
struct damos_quota quota = damon_acma_quota;
+ /* Use 1/2 of total quota for hot/cold pages sorting */
+ quota.ms = quota.ms / 2;
+
return damon_new_scheme(
pattern,
action,
@@ -177,6 +201,61 @@ static int damon_acma_set_scheme_quota(struct damos *scheme, struct damos *old,
return 0;
}
+/*
+ * scale_pg_order is for basic scaling granularity. Have a larger granularity
+ * to limit DAMON-internal alloc-ed pages management overhead.
+ */
+#define SCALE_WINDOW (128 * MB)
+
+/*
+ * Set scale_down scheme's address range type filter to apply scaling down to
+ * only current scaling window. Scaling window is SCALE_WINDOW size contiguous
+ * memory region of highest address that not yet completely DAMOS_ALLOC-ed and
+ * reported.
+ *
+ * TODO: Apply 'struct page' reduction in SCALE_WINDOW or lower granularity.
+ * E.g., hot-unplug the memory block, or apply vmemmap remapping-based approach
+ * like hugetlb vmemmap optimization
+ * (https://docs.kernel.org/mm/vmemmap_dedup.html).
+ */
+static int damon_acma_set_scale_down_region_filter(struct damos *scheme)
+{
+ struct damos_filter *filter = damos_new_filter(
+ DAMOS_FILTER_TYPE_ADDR, false);
+ unsigned long end;
+ unsigned long start_limit, end_limit;
+
+ if (!filter)
+ return -ENOMEM;
+
+ /* scale down no below min_mem_kb */
+ end_limit = monitor_region_end;
+ start_limit = monitor_region_start + min_mem_kb * KB;
+
+ /* not-completely-alloc-ed SCALE_WINDOW region of highest address */
+ for (end = end_limit; end >= start_limit + SCALE_WINDOW;
+ end -= SCALE_WINDOW) {
+ if (damon_alloced_bytes(end, end - SCALE_WINDOW)
+ != SCALE_WINDOW)
+ break;
+ }
+ filter->addr_range.start = max(start_limit, end - SCALE_WINDOW);
+ filter->addr_range.end = end;
+
+ damos_add_filter(scheme, filter);
+ return 0;
+}
+
+/*
+ * Called back from DAMOS for every damos->alloc_order contig pages that
+ * just successfully DAMOS_ALLOC-ed.
+ */
+static int damon_acma_alloc_callback(unsigned long start_addr)
+{
+ /* For non-zero return value, DAMOS free the pages. */
+ return page_report(PHYS_PFN(addr), 1 << scale_pg_order);
+}
+
/*
* Reclaim cold pages on entire physical address space
*/
@@ -199,10 +278,40 @@ static struct damos *damon_acma_new_reclaim_scheme(struct damos *old)
return scheme;
}
+/*
+ * Scale down scheme
+ */
+static struct damos *damon_acma_new_scale_down_scheme(struct damos *old)
+{
+ struct damos_access_pattern pattern = damon_acma_stub_pattern;
+ struct damos *scheme;
+ int err;
+
+ scheme = damon_acma_new_scheme(&pattern, DAMOS_ALLOC);
+ if (!scheme)
+ return NULL;
+ err = damon_acma_set_scheme_quota(scheme, old,
+ DAMOS_QUOTA_SOME_MEM_PSI_US);
+ if (err) {
+ damon_destroy_scheme(scheme);
+ return NULL;
+ }
+ /* alloc in 512 pages granularity */
+ scheme->alloc_order = scale_pg_order;
+ scheme->alloc_callback = damon_acma_alloc_callback;
+ err = damon_acma_set_scale_down_region_filter(scale_down_scheme);
+ if (err) {
+ damon_destroy_scheme(scheme);
+ return NULL;
+ }
+ return scheme;
+}
+
static int damon_acma_apply_parameters(void)
{
struct damos *scheme, *reclaim_scheme;
- struct damos *old_reclaim_scheme = NULL;
+ struct damos *scale_down_scheme;
+ struct damos *old_reclaim_scheme = NULL, *old_scale_down_scheme = NULL;
struct damos_quota_goal *goal;
int err = 0;
@@ -210,14 +319,27 @@ static int damon_acma_apply_parameters(void)
if (err)
return err;
- damon_for_each_scheme(scheme, ctx)
- old_reclaim_scheme = scheme;
+ damon_for_each_scheme(scheme, ctx) {
+ if (!old_reclaim_scheme) {
+ old_reclaim_scheme = scheme;
+ continue;
+ }
+ old_scale_down_scheme = scheme;
+ }
reclaim_scheme = damon_acma_new_reclaim_scheme(old_reclaim_scheme);
if (!reclaim_scheme)
return -ENOMEM;
damon_set_schemes(ctx, &reclaim_scheme, 1);
+ scale_down_scheme = damon_acma_new_scale_down_scheme(
+ old_scale_down_scheme);
+ if (!scale_down_scheme) {
+ damon_destroy_scheme(reclaim_scheme);
+ return -ENOMEM;
+ }
+ damon_add_scheme(ctx, scale_down_scheme);
+
return damon_set_region_biggest_system_ram_default(target,
&monitor_region_start,
&monitor_region_end);
@@ -303,6 +425,9 @@ static int damon_acma_after_aggregation(struct damon_ctx *c)
case DAMOS_LRU_RECLAIM:
damon_acma_reclaim_stat = s->stat;
break;
+ case DAMOS_ALLOC:
+ damon_acma_scale_down_stat = s->stat;
+ break;
default:
break;
}
--
2.39.2
^ permalink raw reply related [flat|nested] 7+ messages in thread* [RFC IDEA v2 5/6] mm/damon/acma: implement scale up feature
2024-05-12 19:36 [RFC IDEA v2 0/6] mm/damon: introduce Access/Contiguity-aware Memory Auto-scaling (ACMA) SeongJae Park
` (3 preceding siblings ...)
2024-05-12 19:36 ` [RFC IDEA v2 4/6] mm/damon/acma: implement scale down feature SeongJae Park
@ 2024-05-12 19:36 ` SeongJae Park
2024-05-12 19:36 ` [RFC IDEA v2 6/6] drivers/virtio/virtio_balloon: integrate ACMA and ballooning SeongJae Park
5 siblings, 0 replies; 7+ messages in thread
From: SeongJae Park @ 2024-05-12 19:36 UTC (permalink / raw)
Cc: SeongJae Park, damon, linux-mm, linux-kernel
Implement the memory scale up feature of Access/Contiguity-aware Memory
Auto-scaling module. It runs DAMOS_FREE action scheme with
user-acceptable level of memory pressure stall as its target, in 512
contig pages base granularity. For minimizing DAMON-internal
DAMOS_ALLOC-ed region management overhead, the scheme is applied to only
128 MiB not-completely-allocated contiguous memory region of lowest
address.
Note that it does nothing with free_callback() since the host will
notify in-guest use of the page via page fault and return it on demand.
Signed-off-by: SeongJae Park <sj@kernel.org>
---
mm/damon/acma.c | 94 ++++++++++++++++++++++++++++++++++++++++++++++---
1 file changed, 90 insertions(+), 4 deletions(-)
diff --git a/mm/damon/acma.c b/mm/damon/acma.c
index b093b90471dd..32827cbf2fa7 100644
--- a/mm/damon/acma.c
+++ b/mm/damon/acma.c
@@ -54,6 +54,13 @@ module_param(commit_inputs, bool, 0600);
static unsigned long min_mem_kb __read_mostly;
module_param(min_mem, ulong, 0600);
+/*
+ * Maximum amount of memory to be guaranteed to the system. In other words,
+ * the upper limit of the scaling.
+ */
+static unsigned long max_mem_kb __read_mostly;
+module_param(max_mem, ulong, 0600);
+
/*
* Desired level of memory pressure-stall time in microseconds.
*
@@ -144,6 +151,11 @@ DEFINE_DAMON_MODULES_DAMOS_STATS_PARAMS(damon_acma_scale_down_stat,
acma_scale_down_tried_regions, acma_scale_down_succ_regions,
acma_scale_down_quota_exceeds);
+static struct damos_stat damon_acma_scale_up_stat;
+DEFINE_DAMON_MODULES_DAMOS_STATS_PARAMS(damon_acma_scale_up_stat,
+ acma_scale_up_tried_regions, acma_scale_up_succ_regions,
+ acma_scale_up_quota_exceeds);
+
static struct damos_access_pattern damon_acma_stub_pattern = {
/* Find regions having PAGE_SIZE or larger size */
.min_sz_region = PAGE_SIZE,
@@ -164,8 +176,8 @@ static struct damos *damon_acma_new_scheme(
{
struct damos_quota quota = damon_acma_quota;
- /* Use 1/2 of total quota for hot/cold pages sorting */
- quota.ms = quota.ms / 2;
+ /* Use 1/3 of total quota for hot/cold pages sorting */
+ quota.ms = quota.ms / 3;
return damon_new_scheme(
pattern,
@@ -246,6 +258,36 @@ static int damon_acma_set_scale_down_region_filter(struct damos *scheme)
return 0;
}
+/*
+ * Similar to damon_acma_set_scale_down_region_filter() but for scaling up.
+ */
+static int damon_acma_set_scale_up_region_filter(struct damos *scheme)
+{
+ struct damos_filter *filter = damos_new_filter(
+ DAMOS_FILTER_TYPE_ADDR, false);
+ unsigned long start;
+ unsigned long start_limit, end_limit;
+
+ if (!filter)
+ return -ENOMEM;
+
+ /* scale up no above max_mem_kb */
+ start_limit = monitor_region_start;
+ end_limit = start_limit + max_mem_kb * KB;
+
+ /* not-completely-free-ed SCALE_WINDOW region of lowest address */
+ for (start = start_limit; start <= end_limit - SCALE_WINDOW;
+ start += SCALE_WINDOW) {
+ if (damon_alloced_bytes(end - SCALE_WINDOW))
+ break;
+ }
+ filter->addr_range.start = start;
+ filter->addr_range.end = min(end_limit, start + SCALE_WINDOW);
+
+ damos_add_filter(scheme, filter);
+ return 0;
+}
+
/*
* Called back from DAMOS for every damos->alloc_order contig pages that
* just successfully DAMOS_ALLOC-ed.
@@ -307,11 +349,40 @@ static struct damos *damon_acma_new_scale_down_scheme(struct damos *old)
return scheme;
}
+/*
+ * Scale up scheme
+ */
+static struct damos *damon_acma_new_scale_up_scheme(void)
+{
+ struct damos_access_pattern pattern = damon_acma_stub_pattern;
+ struct damos *scheme;
+ int err;
+
+ scheme = damon_acma_new_scheme(&pattern, DAMOS_FREE);
+ if (!scheme)
+ return NULL;
+ err = damon_acma_set_scheme_quota(scheme, old,
+ DAMOS_QUOTA_SOME_MEM_PUSI_US);
+ if (err) {
+ damon_destroy_scheme(scheme);
+ return NULL;
+ }
+ scheme->alloc_order = scale_pg_order;
+ scheme->alloc_callback = NULL;
+ err = damon_acma_set_scale_up_region_filter(scale_up_scheme);
+ if (err) {
+ damon_destroy_scheme(scale_down_scheme);
+ return NULL;
+ }
+ return scheme;
+}
+
static int damon_acma_apply_parameters(void)
{
struct damos *scheme, *reclaim_scheme;
- struct damos *scale_down_scheme;
+ struct damos *scale_down_scheme, *scale_up_scheme;
struct damos *old_reclaim_scheme = NULL, *old_scale_down_scheme = NULL;
+ struct damos *old_scale_up_scheme = NULL;
struct damos_quota_goal *goal;
int err = 0;
@@ -324,7 +395,11 @@ static int damon_acma_apply_parameters(void)
old_reclaim_scheme = scheme;
continue;
}
- old_scale_down_scheme = scheme;
+ if (!old_scale_down_scheme) {
+ old_scale_down_scheme = scheme;
+ continue;
+ }
+ old_scale_up_scheme = scheme;
}
reclaim_scheme = damon_acma_new_reclaim_scheme(old_reclaim_scheme);
@@ -340,6 +415,14 @@ static int damon_acma_apply_parameters(void)
}
damon_add_scheme(ctx, scale_down_scheme);
+ scale_up_scheme = damon_acma_new_scale_up_scheme(old_scale_up_scheme);
+ if (!scale_up_scheme) {
+ damon_destroy_scheme(scale_down_scheme);
+ damon_destroy_scheme(reclaim_scheme);
+ return -ENOMEM;
+ }
+ damon_add_scheme(ctx, scale_up_scheme);
+
return damon_set_region_biggest_system_ram_default(target,
&monitor_region_start,
&monitor_region_end);
@@ -428,6 +511,9 @@ static int damon_acma_after_aggregation(struct damon_ctx *c)
case DAMOS_ALLOC:
damon_acma_scale_down_stat = s->stat;
break;
+ case DAMOS_FREE:
+ damon_acma_scale_up_stat = s->stat;
+ break;
default:
break;
}
--
2.39.2
^ permalink raw reply related [flat|nested] 7+ messages in thread* [RFC IDEA v2 6/6] drivers/virtio/virtio_balloon: integrate ACMA and ballooning
2024-05-12 19:36 [RFC IDEA v2 0/6] mm/damon: introduce Access/Contiguity-aware Memory Auto-scaling (ACMA) SeongJae Park
` (4 preceding siblings ...)
2024-05-12 19:36 ` [RFC IDEA v2 5/6] mm/damon/acma: implement scale up feature SeongJae Park
@ 2024-05-12 19:36 ` SeongJae Park
5 siblings, 0 replies; 7+ messages in thread
From: SeongJae Park @ 2024-05-12 19:36 UTC (permalink / raw)
Cc: SeongJae Park, Michael S. Tsirkin, David Hildenbrand, Jason Wang,
Xuan Zhuo, virtualization, linux-kernel, damon, linux-mm
Let the host effectively inflate the balloon in access/contiguity-aware
way when the guest kernel is compiled with specific kernel config. When
the config is enabled and the host requests balloon size change,
virtio-balloon adjusts ACMA's max-mem parameter instead of allocating
guest pages and put it into the balloon. As a result, the host can use
the requested amount of guest memory, so from the host's perspective,
the ballooning just works, but in transparent and
access/contiguity-aware way.
Signed-off-by: SeongJae Park <sj@kernel.org>
---
drivers/virtio/virtio_balloon.c | 26 ++++++++++++++++++++++++++
1 file changed, 26 insertions(+)
diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c
index 1f5b3dd31fcf..a954d75789ae 100644
--- a/drivers/virtio/virtio_balloon.c
+++ b/drivers/virtio/virtio_balloon.c
@@ -472,6 +472,32 @@ static void virtballoon_changed(struct virtio_device *vdev)
struct virtio_balloon *vb = vdev->priv;
unsigned long flags;
+#ifdef CONFIG_ACMA_BALLOON
+ s64 target;
+ u32 num_pages;
+
+
+ /* Legacy balloon config space is LE, unlike all other devices. */
+ virtio_cread_le(vb->vdev, struct virtio_balloon_config, num_pages,
+ &num_pages);
+
+ /*
+ * Aligned up to guest page size to avoid inflating and deflating
+ * balloon endlessly.
+ */
+ target = ALIGN(num_pages, VIRTIO_BALLOON_PAGES_PER_PAGE);
+
+ /*
+ * If the given new max mem size is larger than current acma's max mem
+ * size, same to normal max mem adjustment.
+ * If the given new max mem size is smaller than current acma's max mem
+ * size, strong aggressiveness is applied while memory for meeting the
+ * new max mem is met is stolen.
+ */
+ acma_set_max_mem_aggressive(totalram_pages() - target);
+ return;
+#endif
+
spin_lock_irqsave(&vb->stop_update_lock, flags);
if (!vb->stop_update) {
start_update_balloon_size(vb);
--
2.39.2
^ permalink raw reply related [flat|nested] 7+ messages in thread