linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: "Aneesh Kumar K.V" <aneesh.kumar@linux.ibm.com>
To: Huang Ying <ying.huang@intel.com>, linux-mm@kvack.org
Cc: linux-kernel@vger.kernel.org,
	Andrew Morton <akpm@linux-foundation.org>,
	Huang Ying <ying.huang@intel.com>, Wei Xu <weixugc@google.com>,
	Alistair Popple <apopple@nvidia.com>,
	Dan Williams <dan.j.williams@intel.com>,
	Dave Hansen <dave.hansen@intel.com>,
	Davidlohr Bueso <dave@stgolabs.net>,
	Johannes Weiner <hannes@cmpxchg.org>,
	Jonathan Cameron <Jonathan.Cameron@huawei.com>,
	Michal Hocko <mhocko@kernel.org>, Yang Shi <shy828301@gmail.com>,
	Rafael J Wysocki <rafael.j.wysocki@intel.com>
Subject: Re: [RFC 3/4] acpi, hmat: calculate abstract distance with HMAT
Date: Mon, 19 Jun 2023 21:53:42 +0530	[thread overview]
Message-ID: <87352nifox.fsf@linux.ibm.com> (raw)
In-Reply-To: <20230616070538.190042-4-ying.huang@intel.com>

Huang Ying <ying.huang@intel.com> writes:

> A memory tiering abstract distance calculation algorithm based on ACPI
> HMAT is implemented.  The basic idea is as follows.
>
> The performance attributes of system default DRAM nodes are recorded
> as the base line.  Whose abstract distance is MEMTIER_ADISTANCE_DRAM.
> Then, the ratio of the abstract distance of a memory node (target) to
> MEMTIER_ADISTANCE_DRAM is scaled based on the ratio of the performance
> attributes of the node to that of the default DRAM nodes.
>
> Signed-off-by: "Huang, Ying" <ying.huang@intel.com>
> Cc: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
> Cc: Wei Xu <weixugc@google.com>
> Cc: Alistair Popple <apopple@nvidia.com>
> Cc: Dan Williams <dan.j.williams@intel.com>
> Cc: Dave Hansen <dave.hansen@intel.com>
> Cc: Davidlohr Bueso <dave@stgolabs.net>
> Cc: Johannes Weiner <hannes@cmpxchg.org>
> Cc: Jonathan Cameron <Jonathan.Cameron@huawei.com>
> Cc: Michal Hocko <mhocko@kernel.org>
> Cc: Yang Shi <shy828301@gmail.com>
> Cc: Rafael J Wysocki <rafael.j.wysocki@intel.com>
> ---
>  drivers/acpi/numa/hmat.c     | 124 ++++++++++++++++++++++++++++++++++-
>  include/linux/memory-tiers.h |   2 +
>  mm/memory-tiers.c            |   2 +-
>  3 files changed, 126 insertions(+), 2 deletions(-)
>
> diff --git a/drivers/acpi/numa/hmat.c b/drivers/acpi/numa/hmat.c
> index 2dee0098f1a9..21e4deb581ad 100644
> --- a/drivers/acpi/numa/hmat.c
> +++ b/drivers/acpi/numa/hmat.c
> @@ -24,6 +24,7 @@
>  #include <linux/node.h>
>  #include <linux/sysfs.h>
>  #include <linux/dax.h>
> +#include <linux/memory-tiers.h>
>  
>  static u8 hmat_revision;
>  static int hmat_disable __initdata;
> @@ -759,6 +760,123 @@ static int hmat_callback(struct notifier_block *self,
>  	return NOTIFY_OK;
>  }
>  
> +static int hmat_adistance_disabled;
> +static struct node_hmem_attrs default_dram_attrs;
> +
> +static void dump_hmem_attrs(struct node_hmem_attrs *attrs)
> +{
> +	pr_cont("read_latency: %u, write_latency: %u, read_bandwidth: %u, write_bandwidth: %u\n",
> +		attrs->read_latency, attrs->write_latency,
> +		attrs->read_bandwidth, attrs->write_bandwidth);
> +}
> +
> +static void disable_hmat_adistance_algorithm(void)
> +{
> +	hmat_adistance_disabled = true;
> +}
> +
> +static int hmat_init_default_dram_attrs(void)
> +{
> +	struct memory_target *target;
> +	struct node_hmem_attrs *attrs;
> +	int nid, pxm;
> +	int nid_dram = NUMA_NO_NODE;
> +
> +	if (default_dram_attrs.read_latency +
> +	    default_dram_attrs.write_latency != 0)
> +		return 0;
> +
> +	if (!default_dram_type)
> +		return -EIO;
> +
> +	for_each_node_mask(nid, default_dram_type->nodes) {
> +		pxm = node_to_pxm(nid);
> +		target = find_mem_target(pxm);
> +		if (!target)
> +			continue;
> +		attrs = &target->hmem_attrs[1];
> +		if (nid_dram == NUMA_NO_NODE) {
> +			if (attrs->read_latency + attrs->write_latency == 0 ||
> +			    attrs->read_bandwidth + attrs->write_bandwidth == 0) {
> +				pr_info("hmat: invalid hmem attrs for default DRAM node: %d,\n",
> +					nid);
> +				pr_info("  ");
> +				dump_hmem_attrs(attrs);
> +				pr_info("  disable hmat based abstract distance algorithm.\n");
> +				disable_hmat_adistance_algorithm();
> +				return -EIO;
> +			}
> +			nid_dram = nid;
> +			default_dram_attrs = *attrs;
> +			continue;
> +		}
> +		if (abs(attrs->read_latency - default_dram_attrs.read_latency) * 10 >
> +		    default_dram_attrs.read_latency ||
> +		    abs(attrs->write_latency - default_dram_attrs.write_latency) * 10 >
> +		    default_dram_attrs.write_latency ||
> +		    abs(attrs->read_bandwidth - default_dram_attrs.read_bandwidth) * 10 >
> +		    default_dram_attrs.read_bandwidth) {
> +			pr_info("hmat: hmem attrs for DRAM nodes mismatch.\n");
> +			pr_info("  node %d:", nid_dram);
> +			dump_hmem_attrs(&default_dram_attrs);
> +			pr_info("  node %d:", nid);
> +			dump_hmem_attrs(attrs);
> +			pr_info("  disable hmat based abstract distance algorithm.\n");
> +			disable_hmat_adistance_algorithm();
> +			return -EIO;
> +		}

What is this check about? what is the significance of 10? Can you add
the details as a code comment ?


> +	}
> +
> +	return 0;
> +}
> +
> +static int hmat_calculate_adistance(struct notifier_block *self,
> +				    unsigned long nid, void *data)
> +{
> +	static DECLARE_BITMAP(p_nodes, MAX_NUMNODES);
> +	struct memory_target *target;
> +	struct node_hmem_attrs *attrs;
> +	int *adist = data;
> +	int pxm;
> +
> +	if (hmat_adistance_disabled)
> +		return NOTIFY_OK;
> +
> +	pxm = node_to_pxm(nid);
> +	target = find_mem_target(pxm);
> +	if (!target)
> +		return NOTIFY_OK;
> +
> +	if (hmat_init_default_dram_attrs())
> +		return NOTIFY_OK;
> +
> +	mutex_lock(&target_lock);
> +	hmat_update_target_attrs(target, p_nodes, 1);
> +	mutex_unlock(&target_lock);
> +
> +	attrs = &target->hmem_attrs[1];
> +
> +	if (attrs->read_latency + attrs->write_latency == 0 ||
> +	    attrs->read_bandwidth + attrs->write_bandwidth == 0)
> +		return NOTIFY_OK;
> +
> +	*adist = MEMTIER_ADISTANCE_DRAM *
> +		(attrs->read_latency + attrs->write_latency) /
> +		(default_dram_attrs.read_latency +
> +		 default_dram_attrs.write_latency) *
> +		(default_dram_attrs.read_bandwidth +
> +		 default_dram_attrs.write_bandwidth) /
> +		(attrs->read_bandwidth + attrs->write_bandwidth);


Can you write a comment describing how we use all these attributes in
deriving the abstract distance value?


> +
> +	return NOTIFY_STOP;
> +}
> +
> +static __meminitdata struct notifier_block hmat_adist_nb =
> +{
> +	.notifier_call = hmat_calculate_adistance,
> +	.priority = 100,
> +};
> +
>  static __init void hmat_free_structures(void)
>  {
>  	struct memory_target *target, *tnext;
> @@ -801,6 +919,7 @@ static __init int hmat_init(void)
>  	struct acpi_table_header *tbl;
>  	enum acpi_hmat_type i;
>  	acpi_status status;
> +	int usage;
>  
>  	if (srat_disabled() || hmat_disable)
>  		return 0;
> @@ -841,8 +960,11 @@ static __init int hmat_init(void)
>  	hmat_register_targets();
>  
>  	/* Keep the table and structures if the notifier may use them */
> -	if (!hotplug_memory_notifier(hmat_callback, HMAT_CALLBACK_PRI))
> +	usage = !hotplug_memory_notifier(hmat_callback, HMAT_CALLBACK_PRI);
> +	usage += !register_mt_adistance_algorithm(&hmat_adist_nb);
> +	if (usage)
>  		return 0;
> +
>  out_put:
>  	hmat_free_structures();
>  	acpi_put_table(tbl);
> diff --git a/include/linux/memory-tiers.h b/include/linux/memory-tiers.h
> index c6429e624244..9377239c8d34 100644
> --- a/include/linux/memory-tiers.h
> +++ b/include/linux/memory-tiers.h
> @@ -33,6 +33,7 @@ struct memory_dev_type {
>  
>  #ifdef CONFIG_NUMA
>  extern bool numa_demotion_enabled;
> +extern struct memory_dev_type *default_dram_type;
>  struct memory_dev_type *alloc_memory_type(int adistance);
>  void destroy_memory_type(struct memory_dev_type *memtype);
>  void init_node_memory_type(int node, struct memory_dev_type *default_type);
> @@ -64,6 +65,7 @@ static inline bool node_is_toptier(int node)
>  #else
>  
>  #define numa_demotion_enabled	false
> +#define default_dram_type	NULL
>  /*
>   * CONFIG_NUMA implementation returns non NULL error.
>   */
> diff --git a/mm/memory-tiers.c b/mm/memory-tiers.c
> index fb5398e710cc..3aabc7240402 100644
> --- a/mm/memory-tiers.c
> +++ b/mm/memory-tiers.c
> @@ -37,7 +37,7 @@ struct node_memory_type_map {
>  static DEFINE_MUTEX(memory_tier_lock);
>  static LIST_HEAD(memory_tiers);
>  static struct node_memory_type_map node_memory_types[MAX_NUMNODES];
> -static struct memory_dev_type *default_dram_type;
> +struct memory_dev_type *default_dram_type;
>  
>  static struct bus_type memory_tier_subsys = {
>  	.name = "memory_tiering",
> -- 
> 2.39.2

  reply	other threads:[~2023-06-19 16:24 UTC|newest]

Thread overview: 7+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-06-16  7:05 [RFC 0/4] memory tiering: calculate abstract distance based on ACPI HMAT Huang Ying
2023-06-16  7:05 ` [RFC 1/4] memory tiering: add abstract distance calculation algorithms management Huang Ying
2023-06-16  7:05 ` [RFC 2/4] acpi, hmat: refactor hmat_register_target_initiators() Huang Ying
2023-06-16  7:05 ` [RFC 3/4] acpi, hmat: calculate abstract distance with HMAT Huang Ying
2023-06-19 16:23   ` Aneesh Kumar K.V [this message]
2023-06-20  5:01     ` Huang, Ying
2023-06-16  7:05 ` [RFC 4/4] dax, kmem: calculate abstract distance with general interface Huang Ying

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=87352nifox.fsf@linux.ibm.com \
    --to=aneesh.kumar@linux.ibm.com \
    --cc=Jonathan.Cameron@huawei.com \
    --cc=akpm@linux-foundation.org \
    --cc=apopple@nvidia.com \
    --cc=dan.j.williams@intel.com \
    --cc=dave.hansen@intel.com \
    --cc=dave@stgolabs.net \
    --cc=hannes@cmpxchg.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=mhocko@kernel.org \
    --cc=rafael.j.wysocki@intel.com \
    --cc=shy828301@gmail.com \
    --cc=weixugc@google.com \
    --cc=ying.huang@intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).