From: "Aneesh Kumar K.V" <aneesh.kumar@linux.ibm.com>
To: linux-mm@kvack.org, akpm@linux-foundation.org
Cc: Wei Xu <weixugc@google.com>, Huang Ying <ying.huang@intel.com>,
Yang Shi <shy828301@gmail.com>,
Davidlohr Bueso <dave@stgolabs.net>,
Tim C Chen <tim.c.chen@intel.com>,
Michal Hocko <mhocko@kernel.org>,
Linux Kernel Mailing List <linux-kernel@vger.kernel.org>,
Hesham Almatary <hesham.almatary@huawei.com>,
Dave Hansen <dave.hansen@intel.com>,
Jonathan Cameron <Jonathan.Cameron@huawei.com>,
Alistair Popple <apopple@nvidia.com>,
Dan Williams <dan.j.williams@intel.com>,
Johannes Weiner <hannes@cmpxchg.org>,
jvgediya.oss@gmail.com,
"Aneesh Kumar K.V" <aneesh.kumar@linux.ibm.com>
Subject: [PATCH v11 6/8] mm/demotion: Add pg_data_t member to track node memory tier details
Date: Fri, 29 Jul 2022 00:34:34 +0530 [thread overview]
Message-ID: <20220728190436.858458-7-aneesh.kumar@linux.ibm.com> (raw)
In-Reply-To: <20220728190436.858458-1-aneesh.kumar@linux.ibm.com>
Also update different helpes to use NODE_DATA()->memtier. Since
node specific memtier can change based on the reassignment of
NUMA node to a different memory tiers, accessing NODE_DATA()->memtier
needs to happen under an rcu read lock or memory_tier_lock.
Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
---
include/linux/mmzone.h | 3 +++
mm/memory-tiers.c | 40 +++++++++++++++++++++++++++++++++++-----
2 files changed, 38 insertions(+), 5 deletions(-)
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index aab70355d64f..353812495a70 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -928,6 +928,9 @@ typedef struct pglist_data {
/* Per-node vmstats */
struct per_cpu_nodestat __percpu *per_cpu_nodestats;
atomic_long_t vm_stat[NR_VM_NODE_STAT_ITEMS];
+#ifdef CONFIG_NUMA
+ struct memory_tier __rcu *memtier;
+#endif
} pg_data_t;
#define node_present_pages(nid) (NODE_DATA(nid)->node_present_pages)
diff --git a/mm/memory-tiers.c b/mm/memory-tiers.c
index 60845aa74afc..f982ca6b3559 100644
--- a/mm/memory-tiers.c
+++ b/mm/memory-tiers.c
@@ -3,6 +3,7 @@
#include <linux/lockdep.h>
#include <linux/memory.h>
#include <linux/random.h>
+#include <linux/mmzone.h>
#include <linux/memory-tiers.h>
#include "internal.h"
@@ -142,12 +143,18 @@ static struct memory_tier *find_create_memory_tier(struct memory_dev_type *memty
static struct memory_tier *__node_get_memory_tier(int node)
{
- struct memory_dev_type *memtype;
+ pg_data_t *pgdat;
- memtype = node_memory_types[node];
- if (memtype)
- return memtype->memtier;
- return NULL;
+ pgdat = NODE_DATA(node);
+ if (!pgdat)
+ return NULL;
+ /*
+ * Since we hold memory_tier_lock, we can avoid
+ * RCU read locks when accessing the details. No
+ * parallel updates are possible here.
+ */
+ return rcu_dereference_check(pgdat->memtier,
+ lockdep_is_held(&memory_tier_lock));
}
#ifdef CONFIG_MIGRATION
@@ -290,6 +297,7 @@ static inline void establish_demotion_targets(void) {}
static void init_node_memory_tier(int node)
{
+ pg_data_t *pgdat = NODE_DATA(node);
struct memory_tier *memtier;
mutex_lock(&memory_tier_lock);
@@ -311,8 +319,12 @@ static void init_node_memory_tier(int node)
}
memtype = node_memory_types[node];
memtier = find_create_memory_tier(memtype);
+ if (IS_ERR(memtier))
+ goto err_out;
+ rcu_assign_pointer(pgdat->memtier, memtier);
}
establish_demotion_targets();
+err_out:
mutex_unlock(&memory_tier_lock);
}
@@ -324,13 +336,26 @@ static void destroy_memory_tier(struct memory_tier *memtier)
static void clear_node_memory_tier(int node)
{
+ pg_data_t *pgdat;
struct memory_tier *current_memtier;
+ pgdat = NODE_DATA(node);
+ if (!pgdat)
+ return;
+
mutex_lock(&memory_tier_lock);
+ /*
+ * Make sure that anybody looking at NODE_DATA who finds
+ * a valid memtier finds memory_dev_types with nodes still
+ * linked to the memtier. We achieve this by waiting for
+ * rcu read section to finish using synchronize_rcu.
+ */
current_memtier = __node_get_memory_tier(node);
if (current_memtier) {
struct memory_dev_type *memtype;
+ rcu_assign_pointer(pgdat->memtier, NULL);
+ synchronize_rcu();
memtype = node_memory_types[node];
node_clear(node, memtype->nodes);
if (nodes_empty(memtype->nodes)) {
@@ -386,6 +411,7 @@ static int __meminit memtier_hotplug_callback(struct notifier_block *self,
static int __init memory_tier_init(void)
{
+ int node;
struct memory_tier *memtier;
mutex_lock(&memory_tier_lock);
@@ -396,6 +422,10 @@ static int __init memory_tier_init(void)
if (IS_ERR(memtier))
panic("%s() failed to register memory tier: %ld\n",
__func__, PTR_ERR(memtier));
+
+ for_each_node_state(node, N_MEMORY)
+ rcu_assign_pointer(NODE_DATA(node)->memtier, memtier);
+
mutex_unlock(&memory_tier_lock);
#ifdef CONFIG_MIGRATION
node_demotion = kcalloc(MAX_NUMNODES, sizeof(struct demotion_nodes),
--
2.37.1
next prev parent reply other threads:[~2022-07-28 19:06 UTC|newest]
Thread overview: 40+ messages / expand[flat|nested] mbox.gz Atom feed top
2022-07-28 19:04 [PATCH v11 0/8] mm/demotion: Memory tiers and demotion Aneesh Kumar K.V
2022-07-28 19:04 ` [PATCH v11 1/8] mm/demotion: Add support for explicit memory tiers Aneesh Kumar K.V
2022-07-29 6:25 ` Huang, Ying
2022-07-29 7:24 ` Aneesh Kumar K.V
2022-08-02 2:50 ` Dan Williams
2022-08-02 3:16 ` Huang, Ying
2022-08-02 3:40 ` Dan Williams
2022-08-02 5:03 ` Aneesh Kumar K V
2022-08-02 6:57 ` Huang, Ying
2022-08-02 9:34 ` Aneesh Kumar K V
2022-08-04 0:56 ` Huang, Ying
2022-08-04 4:49 ` Aneesh Kumar K V
2022-08-04 5:19 ` Huang, Ying
2022-07-28 19:04 ` [PATCH v11 2/8] mm/demotion: Move memory demotion related code Aneesh Kumar K.V
2022-07-28 19:04 ` [PATCH v11 3/8] mm/demotion: Add hotplug callbacks to handle new numa node onlined Aneesh Kumar K.V
2022-07-28 19:04 ` [PATCH v11 4/8] mm/demotion/dax/kmem: Set node's abstract distance to MEMTIER_ADISTANCE_PMEM Aneesh Kumar K.V
2022-07-29 6:20 ` Huang, Ying
2022-07-29 7:19 ` Aneesh Kumar K.V
2022-08-01 2:06 ` Huang, Ying
2022-08-01 4:40 ` Aneesh Kumar K V
2022-08-01 5:10 ` Huang, Ying
2022-08-01 5:38 ` Aneesh Kumar K V
2022-08-01 6:37 ` Huang, Ying
2022-08-01 6:55 ` Aneesh Kumar K V
2022-08-01 7:13 ` Huang, Ying
2022-08-01 7:41 ` Aneesh Kumar K V
2022-08-02 1:58 ` Huang, Ying
2022-07-28 19:04 ` [PATCH v11 5/8] mm/demotion: Build demotion targets based on explicit memory tiers Aneesh Kumar K.V
2022-07-29 6:35 ` Huang, Ying
2022-07-29 7:22 ` Aneesh Kumar K.V
2022-08-01 2:15 ` Huang, Ying
2022-07-28 19:04 ` Aneesh Kumar K.V [this message]
2022-07-28 19:04 ` [PATCH v11 7/8] mm/demotion: Demote pages according to allocation fallback order Aneesh Kumar K.V
2022-07-28 19:04 ` [PATCH v11 8/8] mm/demotion: Update node_is_toptier to work with memory tiers Aneesh Kumar K.V
2022-07-29 6:39 ` Huang, Ying
2022-07-29 6:41 ` Aneesh Kumar K V
2022-07-29 6:47 ` Aneesh Kumar K V
2022-08-01 1:04 ` Huang, Ying
2022-07-29 5:30 ` [PATCH v11 0/8] mm/demotion: Memory tiers and demotion Huang, Ying
2022-07-29 6:17 ` Aneesh Kumar K.V
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20220728190436.858458-7-aneesh.kumar@linux.ibm.com \
--to=aneesh.kumar@linux.ibm.com \
--cc=Jonathan.Cameron@huawei.com \
--cc=akpm@linux-foundation.org \
--cc=apopple@nvidia.com \
--cc=dan.j.williams@intel.com \
--cc=dave.hansen@intel.com \
--cc=dave@stgolabs.net \
--cc=hannes@cmpxchg.org \
--cc=hesham.almatary@huawei.com \
--cc=jvgediya.oss@gmail.com \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-mm@kvack.org \
--cc=mhocko@kernel.org \
--cc=shy828301@gmail.com \
--cc=tim.c.chen@intel.com \
--cc=weixugc@google.com \
--cc=ying.huang@intel.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.