Re: [RFC PATCH v1 3/3] mm/damon/reclaim: Add per NUMA node proactive reclamation by DAMON_RECLAIM.

All of lore.kernel.org
 help / color / mirror / Atom feed

From: SeongJae Park <sj@kernel.org>
To: Jonghyeon Kim <tome01@ajou.ac.kr>
Cc: akpm@linux-foundation.org, Jonathan.Cameron@Huawei.com,
	amit@kernel.org, benh@kernel.crashing.org, corbet@lwn.net,
	david@redhat.com, dwmw@amazon.com, elver@google.com,
	foersleo@amazon.de, gthelen@google.com, markubo@amazon.de,
	rientjes@google.com, shakeelb@google.com, shuah@kernel.org,
	linux-damon@amazon.com, linux-mm@kvack.org,
	linux-doc@vger.kernel.org, linux-kernel@vger.kernel.org
Subject: Re: [RFC PATCH v1 3/3] mm/damon/reclaim: Add per NUMA node proactive reclamation by DAMON_RECLAIM.
Date: Tue, 22 Feb 2022 09:53:38 +0000	[thread overview]
Message-ID: <20220222095338.8013-1-sj@kernel.org> (raw)
In-Reply-To: <20220218102611.31895-4-tome01@ajou.ac.kr>

On Fri, 18 Feb 2022 19:26:11 +0900 Jonghyeon Kim <tome01@ajou.ac.kr> wrote:

> To add DAMON_RECLAIM worker threads(kdamond) that do proactive
> reclamation per NUMA node, each node must have its own context.
> 'per_node' is added to enable it.
> 
> If 'per_node' is true, kdamonds as online NUMA node will be waked up and
> start monitoring to proactively reclaim memory. If 'per_node' is false,
> only one kdamond thread will start monitoring for all system memory.
> 
> Signed-off-by: Jonghyeon Kim <tome01@ajou.ac.kr>
> ---
>  mm/damon/reclaim.c | 147 ++++++++++++++++++++++++++++++++-------------
>  1 file changed, 104 insertions(+), 43 deletions(-)
> 
> diff --git a/mm/damon/reclaim.c b/mm/damon/reclaim.c
> index b53d9c22fad1..85e8f97dd599 100644
> --- a/mm/damon/reclaim.c
> +++ b/mm/damon/reclaim.c
> @@ -177,13 +177,27 @@ static unsigned long monitor_region_end __read_mostly;
>  module_param(monitor_region_end, ulong, 0600);
>  
>  /*
> - * PID of the DAMON thread
> + * Enable monitoring memory regions per NUMA node.
>   *
> - * If DAMON_RECLAIM is enabled, this becomes the PID of the worker thread.
> + * By default, watermarks consist of based on total system memory.
> + */
> +static bool per_node __read_mostly;
> +module_param(per_node, bool, 0600);
> +
> +/*
> + * Number of currently running DAMON worker threads
> + */
> +static unsigned long nr_kdamond __read_mostly;
> +module_param(nr_kdamond, ulong, 0400);

I'd prefer to call this nr_kdamond*s*

> +
> +/*
> + * First PID of the DAMON threads
> + *
> + * If DAMON_RECLAIM is enabled, this becomes the first PID of the worker threads.
>   * Else, -1.
>   */
> -static int kdamond_pid __read_mostly = -1;
> -module_param(kdamond_pid, int, 0400);
> +static int kdamond_start_pid __read_mostly = -1;
> +module_param(kdamond_start_pid, int, 0400);

This change could break old users.  Let's keep the name as is and clarify the
fact that it's for only the first kdamond in the document.

As long as DAMON_RECLAIM works in the exclusive manner, users will still be
able to know all pids of kdamonds for DAMON_RECLAIM, as nr_kdamonds is also
provided. 

>  
>  /*
>   * Number of memory regions that tried to be reclaimed.
> @@ -215,8 +229,8 @@ module_param(bytes_reclaimed_regions, ulong, 0400);
>  static unsigned long nr_quota_exceeds __read_mostly;
>  module_param(nr_quota_exceeds, ulong, 0400);
>  
> -static struct damon_ctx *ctx;
> -static struct damon_target *target;
> +static struct damon_ctx *ctxs[MAX_NUMNODES];
> +static struct damon_target *targets[MAX_NUMNODES];
>  
>  struct damon_reclaim_ram_walk_arg {
>  	unsigned long start;
> @@ -251,7 +265,7 @@ static bool get_monitoring_region(unsigned long *start, unsigned long *end)
>  	return true;
>  }
>  
> -static struct damos *damon_reclaim_new_scheme(void)
> +static struct damos *damon_reclaim_new_scheme(int node)
>  {
>  	struct damos_watermarks wmarks = {
>  		.metric = DAMOS_WMARK_FREE_MEM_RATE,
> @@ -259,6 +273,7 @@ static struct damos *damon_reclaim_new_scheme(void)
>  		.high = wmarks_high,
>  		.mid = wmarks_mid,
>  		.low = wmarks_low,
> +		.node = node,
>  	};
>  	struct damos_quota quota = {
>  		/*
> @@ -290,56 +305,99 @@ static struct damos *damon_reclaim_new_scheme(void)
>  	return scheme;
>  }
>  
> -static int damon_reclaim_turn(bool on)
> +static int damon_reclaim_start(int nid)
>  {
>  	struct damon_region *region;
>  	struct damos *scheme;
>  	int err;
> +	unsigned long start, end;
>  
> -	if (!on) {
> -		err = damon_stop(&ctx, 1);
> -		if (!err)
> -			kdamond_pid = -1;
> -		return err;
> -	}
> -
> -	err = damon_set_attrs(ctx, sample_interval, aggr_interval, 0,
> +	err = damon_set_attrs(ctxs[nid], sample_interval, aggr_interval, 0,
>  			min_nr_regions, max_nr_regions);
>  	if (err)
>  		return err;
>  
> -	if (monitor_region_start > monitor_region_end)
> -		return -EINVAL;
> -	if (!monitor_region_start && !monitor_region_end &&
> -			!get_monitoring_region(&monitor_region_start,
> -				&monitor_region_end))
> -		return -EINVAL;
> +	if (per_node) {
> +		monitor_region_start = monitor_region_end = 0;
> +
> +		start = PFN_PHYS(node_start_pfn(nid));
> +		end = PFN_PHYS(node_start_pfn(nid) + node_present_pages(nid) - 1);
> +		if (end <= start)
> +			return -EINVAL;
> +	} else {
> +		if (!monitor_region_start && !monitor_region_end &&
> +				!get_monitoring_region(&monitor_region_start,
> +					&monitor_region_end))
> +			return -EINVAL;
> +		start = monitor_region_start;
> +		end = monitor_region_end;
> +	}
> +
>  	/* DAMON will free this on its own when finish monitoring */
> -	region = damon_new_region(monitor_region_start, monitor_region_end);
> +	region = damon_new_region(start, end);
>  	if (!region)
>  		return -ENOMEM;
> -	damon_add_region(region, target);
> +	damon_add_region(region, targets[nid]);
>  
>  	/* Will be freed by 'damon_set_schemes()' below */
> -	scheme = damon_reclaim_new_scheme();
> +	scheme = damon_reclaim_new_scheme(nid);
>  	if (!scheme) {
>  		err = -ENOMEM;
>  		goto free_region_out;
>  	}
> -	err = damon_set_schemes(ctx, &scheme, 1);
> +
> +	err = damon_set_schemes(ctxs[nid], &scheme, 1);
>  	if (err)
>  		goto free_scheme_out;
>  
> -	err = damon_start(&ctx, 1);
> +	err = damon_start_one(ctxs[nid]);

This could surprise users assuming DAMON_RECLAIM would work in exclusive manner
as it was.

>  	if (!err) {
> -		kdamond_pid = ctx->kdamond->pid;
> +		if (kdamond_start_pid == -1)
> +			kdamond_start_pid = ctxs[nid]->kdamond->pid;
> +		nr_kdamond++;
>  		return 0;
>  	}
>  
>  free_scheme_out:
>  	damon_destroy_scheme(scheme);
>  free_region_out:
> -	damon_destroy_region(region, target);
> +	damon_destroy_region(region, targets[nid]);
> +
> +	return err;
> +}
> +
> +static int damon_reclaim_start_all(void)
> +{
> +	int nid, err;
> +
> +	if (!per_node)
> +		return damon_reclaim_start(0);
> +
> +	for_each_online_node(nid) {
> +		err = damon_reclaim_start(nid);
> +		if (err)
> +			break;

I'd prefer making contexts first and starting them at once in the exclusive
manner using damon_start().

> +	}
> +
> +	return err;
> +}
> +
> +static int damon_reclaim_turn(bool on)
> +{
> +	int err;
> +
> +	if (!on) {
> +		err = damon_stop(ctxs, nr_kdamond);
> +		if (!err) {
> +			kdamond_start_pid = -1;
> +			nr_kdamond = 0;
> +			monitor_region_start = 0;
> +			monitor_region_end = 0;
> +		}
> +		return err;
> +	}
> +
> +	err = damon_reclaim_start_all();
>  	return err;
>  }
>  
> @@ -380,21 +438,24 @@ static int damon_reclaim_after_aggregation(struct damon_ctx *c)
>  
>  static int __init damon_reclaim_init(void)
>  {
> -	ctx = damon_new_ctx();
> -	if (!ctx)
> -		return -ENOMEM;
> -
> -	if (damon_select_ops(ctx, DAMON_OPS_PADDR))
> -		return -EINVAL;
> -
> -	ctx->callback.after_aggregation = damon_reclaim_after_aggregation;
> -
> -	target = damon_new_target();
> -	if (!target) {
> -		damon_destroy_ctx(ctx);
> -		return -ENOMEM;
> +	int nid;
> +
> +	for_each_node(nid) {
> +		ctxs[nid] = damon_new_ctx();
> +		if (!ctxs[nid])
> +			return -ENOMEM;
> +
> +		if (damon_select_ops(ctxs[nid], DAMON_OPS_PADDR))
> +			return -EINVAL;
> +		ctxs[nid]->callback.after_aggregation = damon_reclaim_after_aggregation;
> +
> +		targets[nid] = damon_new_target();
> +		if (!targets[nid]) {
> +			damon_destroy_ctx(ctxs[nid]);

Shouldn't we also destroy previously allocated contexts?

> +			return -ENOMEM;
> +		}
> +		damon_add_target(ctxs[nid], targets[nid]);
>  	}
> -	damon_add_target(ctx, target);
>  
>  	schedule_delayed_work(&damon_reclaim_timer, 0);
>  	return 0;
> -- 
> 2.17.1


Thanks,
SJ

next prev parent reply	other threads:[~2022-02-22  9:53 UTC|newest]

Thread overview: 12+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-02-18 10:26 [RFC PATCH v1 0/3] Rebase DAMON_RECALIM for NUMA system Jonghyeon Kim
2022-02-18 10:26 ` [RFC PATCH v1 1/3] mm/damon: Rebase damos watermarks for NUMA systems Jonghyeon Kim
2022-02-22  9:53   ` SeongJae Park
     [not found]     ` <20220223051056.GA3502@swarm08>
2022-02-23  7:10       ` Jonghyeon Kim
2022-02-18 10:26 ` [RFC PATCH v1 2/3] mm/damon/core: Add damon_start_one() Jonghyeon Kim
2022-02-22  9:53   ` SeongJae Park
     [not found]     ` <20220223051113.GA3535@swarm08>
2022-02-23  7:11       ` Jonghyeon Kim
2022-02-18 10:26 ` [RFC PATCH v1 3/3] mm/damon/reclaim: Add per NUMA node proactive reclamation by DAMON_RECLAIM Jonghyeon Kim
2022-02-22  9:53   ` SeongJae Park [this message]
     [not found]     ` <20220223051127.GA3588@swarm08>
2022-02-23  7:12       ` Jonghyeon Kim
2022-02-22  9:53 ` [RFC PATCH v1 0/3] Rebase DAMON_RECALIM for NUMA system SeongJae Park
     [not found]   ` <20220223051146.GA4530@swarm08>
2022-02-23  7:12     ` Jonghyeon Kim

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20220222095338.8013-1-sj@kernel.org \
    --to=sj@kernel.org \
    --cc=Jonathan.Cameron@Huawei.com \
    --cc=akpm@linux-foundation.org \
    --cc=amit@kernel.org \
    --cc=benh@kernel.crashing.org \
    --cc=corbet@lwn.net \
    --cc=david@redhat.com \
    --cc=dwmw@amazon.com \
    --cc=elver@google.com \
    --cc=foersleo@amazon.de \
    --cc=gthelen@google.com \
    --cc=linux-damon@amazon.com \
    --cc=linux-doc@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=markubo@amazon.de \
    --cc=rientjes@google.com \
    --cc=shakeelb@google.com \
    --cc=shuah@kernel.org \
    --cc=tome01@ajou.ac.kr \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.