All of lore.kernel.org
 help / color / mirror / Atom feed
From: Aneesh Kumar K V <aneesh.kumar@linux.ibm.com>
To: "Huang, Ying" <ying.huang@intel.com>
Cc: linux-mm@kvack.org, akpm@linux-foundation.org,
	Wei Xu <weixugc@google.com>, Yang Shi <shy828301@gmail.com>,
	Davidlohr Bueso <dave@stgolabs.net>,
	Tim C Chen <tim.c.chen@intel.com>,
	Michal Hocko <mhocko@kernel.org>,
	Linux Kernel Mailing List <linux-kernel@vger.kernel.org>,
	Hesham Almatary <hesham.almatary@huawei.com>,
	Dave Hansen <dave.hansen@intel.com>,
	Jonathan Cameron <Jonathan.Cameron@huawei.com>,
	Alistair Popple <apopple@nvidia.com>,
	Dan Williams <dan.j.williams@intel.com>,
	Johannes Weiner <hannes@cmpxchg.org>,
	jvgediya.oss@gmail.com
Subject: Re: [PATCH v11 8/8] mm/demotion: Update node_is_toptier to work with memory tiers
Date: Fri, 29 Jul 2022 12:17:45 +0530	[thread overview]
Message-ID: <e213cd43-e451-e837-b591-08e3400dd22e@linux.ibm.com> (raw)
In-Reply-To: <9fa09da8-eff7-e39a-96b0-2bc51711f08f@linux.ibm.com>

On 7/29/22 12:11 PM, Aneesh Kumar K V wrote:
> On 7/29/22 12:09 PM, Huang, Ying wrote:
>> "Aneesh Kumar K.V" <aneesh.kumar@linux.ibm.com> writes:
>>
>>> With memory tiers support we can have memory only NUMA nodes
>>> in the top tier from which we want to avoid promotion tracking NUMA
>>> faults. Update node_is_toptier to work with memory tiers.
>>> All NUMA nodes are by default top tier nodes. With lower memory
>>> tiers added we consider all memory tiers above a memory tier having
>>> CPU NUMA nodes as a top memory tier
>>>
>>> Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
>>> ---
>>>  include/linux/memory-tiers.h | 11 ++++++++++
>>>  include/linux/node.h         |  5 -----
>>>  mm/huge_memory.c             |  1 +
>>>  mm/memory-tiers.c            | 42 ++++++++++++++++++++++++++++++++++++
>>>  mm/migrate.c                 |  1 +
>>>  mm/mprotect.c                |  1 +
>>>  6 files changed, 56 insertions(+), 5 deletions(-)
>>>
>>> diff --git a/include/linux/memory-tiers.h b/include/linux/memory-tiers.h
>>> index f8dbeda617a7..bc9fb9d39b2c 100644
>>> --- a/include/linux/memory-tiers.h
>>> +++ b/include/linux/memory-tiers.h
>>> @@ -35,6 +35,7 @@ struct memory_dev_type *init_node_memory_type(int node, struct memory_dev_type *
>>>  #ifdef CONFIG_MIGRATION
>>>  int next_demotion_node(int node);
>>>  void node_get_allowed_targets(pg_data_t *pgdat, nodemask_t *targets);
>>> +bool node_is_toptier(int node);
>>>  #else
>>>  static inline int next_demotion_node(int node)
>>>  {
>>> @@ -45,6 +46,11 @@ static inline void node_get_allowed_targets(pg_data_t *pgdat, nodemask_t *target
>>>  {
>>>  	*targets = NODE_MASK_NONE;
>>>  }
>>> +
>>> +static inline bool node_is_toptier(int node)
>>> +{
>>> +	return true;
>>> +}
>>>  #endif
>>>  
>>>  #else
>>> @@ -64,5 +70,10 @@ static inline void node_get_allowed_targets(pg_data_t *pgdat, nodemask_t *target
>>>  {
>>>  	*targets = NODE_MASK_NONE;
>>>  }
>>> +
>>> +static inline bool node_is_toptier(int node)
>>> +{
>>> +	return true;
>>> +}
>>>  #endif	/* CONFIG_NUMA */
>>>  #endif  /* _LINUX_MEMORY_TIERS_H */
>>> diff --git a/include/linux/node.h b/include/linux/node.h
>>> index 40d641a8bfb0..9ec680dd607f 100644
>>> --- a/include/linux/node.h
>>> +++ b/include/linux/node.h
>>> @@ -185,9 +185,4 @@ static inline void register_hugetlbfs_with_node(node_registration_func_t reg,
>>>  
>>>  #define to_node(device) container_of(device, struct node, dev)
>>>  
>>> -static inline bool node_is_toptier(int node)
>>> -{
>>> -	return node_state(node, N_CPU);
>>> -}
>>> -
>>>  #endif /* _LINUX_NODE_H_ */
>>> diff --git a/mm/huge_memory.c b/mm/huge_memory.c
>>> index 834f288b3769..8405662646e9 100644
>>> --- a/mm/huge_memory.c
>>> +++ b/mm/huge_memory.c
>>> @@ -35,6 +35,7 @@
>>>  #include <linux/numa.h>
>>>  #include <linux/page_owner.h>
>>>  #include <linux/sched/sysctl.h>
>>> +#include <linux/memory-tiers.h>
>>>  
>>>  #include <asm/tlb.h>
>>>  #include <asm/pgalloc.h>
>>> diff --git a/mm/memory-tiers.c b/mm/memory-tiers.c
>>> index 84e2be31a853..36d87dc422ab 100644
>>> --- a/mm/memory-tiers.c
>>> +++ b/mm/memory-tiers.c
>>> @@ -30,6 +30,7 @@ static DEFINE_MUTEX(memory_tier_lock);
>>>  static LIST_HEAD(memory_tiers);
>>>  struct memory_dev_type *node_memory_types[MAX_NUMNODES];
>>>  #ifdef CONFIG_MIGRATION
>>> +static int top_tier_adistance;
>>>  /*
>>>   * node_demotion[] examples:
>>>   *
>>> @@ -159,6 +160,31 @@ static struct memory_tier *__node_get_memory_tier(int node)
>>>  }
>>>  
>>>  #ifdef CONFIG_MIGRATION
>>> +bool node_is_toptier(int node)
>>> +{
>>> +	bool toptier;
>>> +	pg_data_t *pgdat;
>>> +	struct memory_tier *memtier;
>>> +
>>> +	pgdat = NODE_DATA(node);
>>> +	if (!pgdat)
>>> +		return false;
>>> +
>>> +	rcu_read_lock();
>>> +	memtier = rcu_dereference(pgdat->memtier);
>>> +	if (!memtier) {
>>> +		toptier = true;
>>> +		goto out;
>>> +	}
>>> +	if (memtier->adistance_start >= top_tier_adistance)
>>> +		toptier = true;
>>> +	else
>>> +		toptier = false;
>>> +out:
>>> +	rcu_read_unlock();
>>> +	return toptier;
>>> +}
>>> +
>>>  void node_get_allowed_targets(pg_data_t *pgdat, nodemask_t *targets)
>>>  {
>>>  	struct memory_tier *memtier;
>>> @@ -315,6 +341,22 @@ static void establish_demotion_targets(void)
>>>  			}
>>>  		} while (1);
>>>  	}
>>> +	/*
>>> +	 * Promotion is allowed from a memory tier to higher
>>> +	 * memory tier only if the memory tier doesn't include
>>> +	 * compute. We want to  skip promotion from a memory tier,
>>> +	 * if any node that is  part of the memory tier have CPUs.
>>> +	 * Once we detect such a memory tier, we consider that tier
>>> +	 * as top tiper from which promotion on is not allowed.
>>> +	 */
>>> +	list_for_each_entry(memtier, &memory_tiers, list) {
>>> +		tier_nodes = get_memtier_nodemask(memtier);
>>> +		nodes_and(tier_nodes, node_states[N_CPU], tier_nodes);
>>> +		if (!nodes_empty(tier_nodes)) {
>>> +			top_tier_adistance = memtier->adistance_start;
>>
>> IMHO, this should be,
>>
>> 			top_tier_adistance = memtier->adistance_start + MEMTIER_CHUNK_SIZE;
>>
> 
> Good catch. Will update. BTW i did send v12 version of the patchset already to the list. 
> 
>

Checking this again, we consider a node top tier if the node's memtier abstract distance
satisfy the below.  

	if (memtier->adistance_start <= top_tier_adistance)
		toptier = true;
	
With that we should be good with the current code. But I agree with you that top_tier_distance
should cover the full range of the top memory tier.

-aneesh


  reply	other threads:[~2022-07-29  6:48 UTC|newest]

Thread overview: 40+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-07-28 19:04 [PATCH v11 0/8] mm/demotion: Memory tiers and demotion Aneesh Kumar K.V
2022-07-28 19:04 ` [PATCH v11 1/8] mm/demotion: Add support for explicit memory tiers Aneesh Kumar K.V
2022-07-29  6:25   ` Huang, Ying
2022-07-29  7:24     ` Aneesh Kumar K.V
2022-08-02  2:50   ` Dan Williams
2022-08-02  3:16     ` Huang, Ying
2022-08-02  3:40       ` Dan Williams
2022-08-02  5:03         ` Aneesh Kumar K V
2022-08-02  6:57         ` Huang, Ying
2022-08-02  9:34           ` Aneesh Kumar K V
2022-08-04  0:56             ` Huang, Ying
2022-08-04  4:49               ` Aneesh Kumar K V
2022-08-04  5:19                 ` Huang, Ying
2022-07-28 19:04 ` [PATCH v11 2/8] mm/demotion: Move memory demotion related code Aneesh Kumar K.V
2022-07-28 19:04 ` [PATCH v11 3/8] mm/demotion: Add hotplug callbacks to handle new numa node onlined Aneesh Kumar K.V
2022-07-28 19:04 ` [PATCH v11 4/8] mm/demotion/dax/kmem: Set node's abstract distance to MEMTIER_ADISTANCE_PMEM Aneesh Kumar K.V
2022-07-29  6:20   ` Huang, Ying
2022-07-29  7:19     ` Aneesh Kumar K.V
2022-08-01  2:06       ` Huang, Ying
2022-08-01  4:40         ` Aneesh Kumar K V
2022-08-01  5:10           ` Huang, Ying
2022-08-01  5:38             ` Aneesh Kumar K V
2022-08-01  6:37               ` Huang, Ying
2022-08-01  6:55                 ` Aneesh Kumar K V
2022-08-01  7:13                   ` Huang, Ying
2022-08-01  7:41                     ` Aneesh Kumar K V
2022-08-02  1:58                       ` Huang, Ying
2022-07-28 19:04 ` [PATCH v11 5/8] mm/demotion: Build demotion targets based on explicit memory tiers Aneesh Kumar K.V
2022-07-29  6:35   ` Huang, Ying
2022-07-29  7:22     ` Aneesh Kumar K.V
2022-08-01  2:15       ` Huang, Ying
2022-07-28 19:04 ` [PATCH v11 6/8] mm/demotion: Add pg_data_t member to track node memory tier details Aneesh Kumar K.V
2022-07-28 19:04 ` [PATCH v11 7/8] mm/demotion: Demote pages according to allocation fallback order Aneesh Kumar K.V
2022-07-28 19:04 ` [PATCH v11 8/8] mm/demotion: Update node_is_toptier to work with memory tiers Aneesh Kumar K.V
2022-07-29  6:39   ` Huang, Ying
2022-07-29  6:41     ` Aneesh Kumar K V
2022-07-29  6:47       ` Aneesh Kumar K V [this message]
2022-08-01  1:04         ` Huang, Ying
2022-07-29  5:30 ` [PATCH v11 0/8] mm/demotion: Memory tiers and demotion Huang, Ying
2022-07-29  6:17   ` Aneesh Kumar K.V

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=e213cd43-e451-e837-b591-08e3400dd22e@linux.ibm.com \
    --to=aneesh.kumar@linux.ibm.com \
    --cc=Jonathan.Cameron@huawei.com \
    --cc=akpm@linux-foundation.org \
    --cc=apopple@nvidia.com \
    --cc=dan.j.williams@intel.com \
    --cc=dave.hansen@intel.com \
    --cc=dave@stgolabs.net \
    --cc=hannes@cmpxchg.org \
    --cc=hesham.almatary@huawei.com \
    --cc=jvgediya.oss@gmail.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=mhocko@kernel.org \
    --cc=shy828301@gmail.com \
    --cc=tim.c.chen@intel.com \
    --cc=weixugc@google.com \
    --cc=ying.huang@intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.