* [PATCH 1/3] drivers/base/memory: add node id parameter to add_memory_block()
2025-07-02 7:39 [PATCHv2 0/3] mm/memory_hotplug: fixup crash during uevent handling Hannes Reinecke
@ 2025-07-02 7:39 ` Hannes Reinecke
2025-07-03 11:59 ` Donet Tom
0 siblings, 1 reply; 12+ messages in thread
From: Hannes Reinecke @ 2025-07-02 7:39 UTC (permalink / raw)
To: David Hildenbrand; +Cc: Oscar Salvador, linux-mm, Hannes Reinecke
Add a 'nid' parameter to add_memory_block() to initialize the memory
block with the correct node id.
Signed-off-by: Hannes Reinecke <hare@kernel.org>
Acked-by: David Hildenbrand <david@redhat.com>
Acked-by: Oscar Salvador <osalvador@suse.de>
---
drivers/base/memory.c | 15 ++++-----------
1 file changed, 4 insertions(+), 11 deletions(-)
diff --git a/drivers/base/memory.c b/drivers/base/memory.c
index ed3e69dc785c..2b951e5f8a27 100644
--- a/drivers/base/memory.c
+++ b/drivers/base/memory.c
@@ -822,7 +822,7 @@ void memory_block_add_nid(struct memory_block *mem, int nid,
}
#endif
-static int add_memory_block(unsigned long block_id, unsigned long state,
+static int add_memory_block(unsigned long block_id, int nid, unsigned long state,
struct vmem_altmap *altmap,
struct memory_group *group)
{
@@ -840,7 +840,7 @@ static int add_memory_block(unsigned long block_id, unsigned long state,
mem->start_section_nr = block_id * sections_per_block;
mem->state = state;
- mem->nid = NUMA_NO_NODE;
+ mem->nid = nid;
mem->altmap = altmap;
INIT_LIST_HEAD(&mem->group_next);
@@ -867,13 +867,6 @@ static int add_memory_block(unsigned long block_id, unsigned long state,
return 0;
}
-static int add_hotplug_memory_block(unsigned long block_id,
- struct vmem_altmap *altmap,
- struct memory_group *group)
-{
- return add_memory_block(block_id, MEM_OFFLINE, altmap, group);
-}
-
static void remove_memory_block(struct memory_block *memory)
{
if (WARN_ON_ONCE(memory->dev.bus != &memory_subsys))
@@ -913,7 +906,7 @@ int create_memory_block_devices(unsigned long start, unsigned long size,
return -EINVAL;
for (block_id = start_block_id; block_id != end_block_id; block_id++) {
- ret = add_hotplug_memory_block(block_id, altmap, group);
+ ret = add_memory_block(block_id, NUMA_NO_NODE, MEM_OFFLINE, altmap, group);
if (ret)
break;
}
@@ -1018,7 +1011,7 @@ void __init memory_dev_init(void)
continue;
block_id = memory_block_id(nr);
- ret = add_memory_block(block_id, MEM_ONLINE, NULL, NULL);
+ ret = add_memory_block(block_id, NUMA_NO_NODE, MEM_ONLINE, NULL, NULL);
if (ret) {
panic("%s() failed to add memory block: %d\n",
__func__, ret);
--
2.43.0
^ permalink raw reply related [flat|nested] 12+ messages in thread
* Re: [PATCH 1/3] drivers/base/memory: add node id parameter to add_memory_block()
2025-07-02 7:39 ` [PATCH 1/3] drivers/base/memory: add node id parameter to add_memory_block() Hannes Reinecke
@ 2025-07-03 11:59 ` Donet Tom
0 siblings, 0 replies; 12+ messages in thread
From: Donet Tom @ 2025-07-03 11:59 UTC (permalink / raw)
To: Hannes Reinecke, David Hildenbrand; +Cc: Oscar Salvador, linux-mm
On 7/2/25 1:09 PM, Hannes Reinecke wrote:
> Add a 'nid' parameter to add_memory_block() to initialize the memory
> block with the correct node id.
>
> Signed-off-by: Hannes Reinecke <hare@kernel.org>
> Acked-by: David Hildenbrand <david@redhat.com>
> Acked-by: Oscar Salvador <osalvador@suse.de>
> ---
> drivers/base/memory.c | 15 ++++-----------
> 1 file changed, 4 insertions(+), 11 deletions(-)
>
> diff --git a/drivers/base/memory.c b/drivers/base/memory.c
> index ed3e69dc785c..2b951e5f8a27 100644
> --- a/drivers/base/memory.c
> +++ b/drivers/base/memory.c
> @@ -822,7 +822,7 @@ void memory_block_add_nid(struct memory_block *mem, int nid,
> }
> #endif
>
> -static int add_memory_block(unsigned long block_id, unsigned long state,
> +static int add_memory_block(unsigned long block_id, int nid, unsigned long state,
> struct vmem_altmap *altmap,
> struct memory_group *group)
> {
> @@ -840,7 +840,7 @@ static int add_memory_block(unsigned long block_id, unsigned long state,
>
> mem->start_section_nr = block_id * sections_per_block;
> mem->state = state;
> - mem->nid = NUMA_NO_NODE;
> + mem->nid = nid;
> mem->altmap = altmap;
> INIT_LIST_HEAD(&mem->group_next);
>
> @@ -867,13 +867,6 @@ static int add_memory_block(unsigned long block_id, unsigned long state,
> return 0;
> }
>
> -static int add_hotplug_memory_block(unsigned long block_id,
> - struct vmem_altmap *altmap,
> - struct memory_group *group)
> -{
> - return add_memory_block(block_id, MEM_OFFLINE, altmap, group);
> -}
> -
> static void remove_memory_block(struct memory_block *memory)
> {
> if (WARN_ON_ONCE(memory->dev.bus != &memory_subsys))
> @@ -913,7 +906,7 @@ int create_memory_block_devices(unsigned long start, unsigned long size,
> return -EINVAL;
>
> for (block_id = start_block_id; block_id != end_block_id; block_id++) {
> - ret = add_hotplug_memory_block(block_id, altmap, group);
> + ret = add_memory_block(block_id, NUMA_NO_NODE, MEM_OFFLINE, altmap, group);
> if (ret)
> break;
> }
> @@ -1018,7 +1011,7 @@ void __init memory_dev_init(void)
> continue;
>
> block_id = memory_block_id(nr);
> - ret = add_memory_block(block_id, MEM_ONLINE, NULL, NULL);
> + ret = add_memory_block(block_id, NUMA_NO_NODE, MEM_ONLINE, NULL, NULL);
> if (ret) {
> panic("%s() failed to add memory block: %d\n",
> __func__, ret);
This looks good to me. Feel free to add
Reviewed-by: Donet Tom <donettom@linux.ibm.com>
^ permalink raw reply [flat|nested] 12+ messages in thread
* [PATCHv3 0/3] mm/memory_hotplug: fixup crash during uevent handling
@ 2025-07-04 6:34 Hannes Reinecke
2025-07-04 6:34 ` [PATCH 1/3] drivers/base/memory: add node id parameter to add_memory_block() Hannes Reinecke
` (2 more replies)
0 siblings, 3 replies; 12+ messages in thread
From: Hannes Reinecke @ 2025-07-04 6:34 UTC (permalink / raw)
To: David Hildenbrand; +Cc: Oscar Salvador, linux-mm, Hannes Reinecke
Hi all,
we have some udev rules trying to read the sysfs attribute 'valid_zones' during
an memory 'add' event, causing a crash in zone_for_pfn_range(). Debugging found
that mem->nid was set to NUMA_NO_NODE, which crashed in NODE_DATA(nid).
Further analysis revealed that we're running into a race with udev event
processing: add_memory_resource() has this function calls:
1) __try_online_node()
2) arch_add_memory()
3) create_memory_block_devices()
-> calls device_register() -> memory 'add' event
4) node_set_online()/__register_one_node()
-> calls device_register() -> node 'add' event
5) register_memory_blocks_under_node()
-> sets mem->nid
Which, to the uninitated, is ... weird ...
Why do we try to online the node in 1), but only register
the node in 4) _after_ we have created the memory blocks in 3) ?
And why do we set the 'nid' value in 5), when the uevent
(which might need to see the correct 'nid' value) is sent out
in 3) ?
There must be a reason, I'm sure ...
So here's a small patchset to fixup uevent ordering.
The first patch adds a 'nid' parameter to add_memory_blocks()
(to avoid mem->nid being initialized with NUMA_NO_NODE), and
the second patch reshuffles the code in add_memory_resource()
to fully initialize the node prior to calling
create_memory_block_devices() so that the node is valid at
that time and uevent processing will see correct values in sysfs.
As usual, comments and reviews are welcome.
Changes to the original submission:
- Add patch to rename memory_block_add_nid()
- Add reviews
Changes to v2:
- Move changes to nid setting into the last patch
- Add reviews from David Hildenbrand
Hannes Reinecke (3):
drivers/base/memory: add node id parameter to add_memory_block()
mm/memory_hotplug: activate node before adding new memory blocks
drivers/base: move memory_block_add_nid() into the caller
drivers/base/memory.c | 53 ++++++++++++++++++------------------------
drivers/base/node.c | 10 ++++----
include/linux/memory.h | 5 ++--
mm/memory_hotplug.c | 32 +++++++++++++------------
4 files changed, 46 insertions(+), 54 deletions(-)
--
2.43.0
^ permalink raw reply [flat|nested] 12+ messages in thread
* [PATCH 1/3] drivers/base/memory: add node id parameter to add_memory_block()
2025-07-04 6:34 [PATCHv3 0/3] mm/memory_hotplug: fixup crash during uevent handling Hannes Reinecke
@ 2025-07-04 6:34 ` Hannes Reinecke
2025-07-04 6:34 ` [PATCH 2/3] mm/memory_hotplug: activate node before adding new memory blocks Hannes Reinecke
2025-07-04 6:34 ` [PATCH 3/3] drivers/base: move memory_block_add_nid() into the caller Hannes Reinecke
2 siblings, 0 replies; 12+ messages in thread
From: Hannes Reinecke @ 2025-07-04 6:34 UTC (permalink / raw)
To: David Hildenbrand; +Cc: Oscar Salvador, linux-mm, Hannes Reinecke, Donet Tom
Add a 'nid' parameter to add_memory_block() to initialize the memory
block with the correct node id.
Signed-off-by: Hannes Reinecke <hare@kernel.org>
Acked-by: David Hildenbrand <david@redhat.com>
Acked-by: Oscar Salvador <osalvador@suse.de>
Reviewed-by: Donet Tom <donettom@linux.ibm.com>
---
drivers/base/memory.c | 15 ++++-----------
1 file changed, 4 insertions(+), 11 deletions(-)
diff --git a/drivers/base/memory.c b/drivers/base/memory.c
index ed3e69dc785c..2b951e5f8a27 100644
--- a/drivers/base/memory.c
+++ b/drivers/base/memory.c
@@ -822,7 +822,7 @@ void memory_block_add_nid(struct memory_block *mem, int nid,
}
#endif
-static int add_memory_block(unsigned long block_id, unsigned long state,
+static int add_memory_block(unsigned long block_id, int nid, unsigned long state,
struct vmem_altmap *altmap,
struct memory_group *group)
{
@@ -840,7 +840,7 @@ static int add_memory_block(unsigned long block_id, unsigned long state,
mem->start_section_nr = block_id * sections_per_block;
mem->state = state;
- mem->nid = NUMA_NO_NODE;
+ mem->nid = nid;
mem->altmap = altmap;
INIT_LIST_HEAD(&mem->group_next);
@@ -867,13 +867,6 @@ static int add_memory_block(unsigned long block_id, unsigned long state,
return 0;
}
-static int add_hotplug_memory_block(unsigned long block_id,
- struct vmem_altmap *altmap,
- struct memory_group *group)
-{
- return add_memory_block(block_id, MEM_OFFLINE, altmap, group);
-}
-
static void remove_memory_block(struct memory_block *memory)
{
if (WARN_ON_ONCE(memory->dev.bus != &memory_subsys))
@@ -913,7 +906,7 @@ int create_memory_block_devices(unsigned long start, unsigned long size,
return -EINVAL;
for (block_id = start_block_id; block_id != end_block_id; block_id++) {
- ret = add_hotplug_memory_block(block_id, altmap, group);
+ ret = add_memory_block(block_id, NUMA_NO_NODE, MEM_OFFLINE, altmap, group);
if (ret)
break;
}
@@ -1018,7 +1011,7 @@ void __init memory_dev_init(void)
continue;
block_id = memory_block_id(nr);
- ret = add_memory_block(block_id, MEM_ONLINE, NULL, NULL);
+ ret = add_memory_block(block_id, NUMA_NO_NODE, MEM_ONLINE, NULL, NULL);
if (ret) {
panic("%s() failed to add memory block: %d\n",
__func__, ret);
--
2.43.0
^ permalink raw reply related [flat|nested] 12+ messages in thread
* [PATCH 2/3] mm/memory_hotplug: activate node before adding new memory blocks
2025-07-04 6:34 [PATCHv3 0/3] mm/memory_hotplug: fixup crash during uevent handling Hannes Reinecke
2025-07-04 6:34 ` [PATCH 1/3] drivers/base/memory: add node id parameter to add_memory_block() Hannes Reinecke
@ 2025-07-04 6:34 ` Hannes Reinecke
2025-07-04 8:25 ` Oscar Salvador
2025-07-04 10:29 ` Donet Tom
2025-07-04 6:34 ` [PATCH 3/3] drivers/base: move memory_block_add_nid() into the caller Hannes Reinecke
2 siblings, 2 replies; 12+ messages in thread
From: Hannes Reinecke @ 2025-07-04 6:34 UTC (permalink / raw)
To: David Hildenbrand; +Cc: Oscar Salvador, linux-mm, Hannes Reinecke
The sysfs attributes for memory blocks require the node ID to be
set and initialized, so move the node activation before adding
new memory blocks. This also has the nice side effect that the
BUG_ON() can be converted into a WARN_ON() as we now can handle
registration errors.
Fixes: b9ff036082cd ("mm/memory_hotplug.c: make add_memory_resource use __try_online_node")
Signed-off-by: Hannes Reinecke <hare@kernel.org>
Acked-by: David Hildenbrand <david@redhat.com>
---
drivers/base/memory.c | 4 ++--
include/linux/memory.h | 2 +-
mm/memory_hotplug.c | 32 +++++++++++++++++---------------
3 files changed, 20 insertions(+), 18 deletions(-)
diff --git a/drivers/base/memory.c b/drivers/base/memory.c
index 2b951e5f8a27..4ce93fb54525 100644
--- a/drivers/base/memory.c
+++ b/drivers/base/memory.c
@@ -892,7 +892,7 @@ static void remove_memory_block(struct memory_block *memory)
* Called under device_hotplug_lock.
*/
int create_memory_block_devices(unsigned long start, unsigned long size,
- struct vmem_altmap *altmap,
+ int nid, struct vmem_altmap *altmap,
struct memory_group *group)
{
const unsigned long start_block_id = pfn_to_block_id(PFN_DOWN(start));
@@ -906,7 +906,7 @@ int create_memory_block_devices(unsigned long start, unsigned long size,
return -EINVAL;
for (block_id = start_block_id; block_id != end_block_id; block_id++) {
- ret = add_memory_block(block_id, NUMA_NO_NODE, MEM_OFFLINE, altmap, group);
+ ret = add_memory_block(block_id, nid, MEM_OFFLINE, altmap, group);
if (ret)
break;
}
diff --git a/include/linux/memory.h b/include/linux/memory.h
index 5ec4e6d209b9..d7c3a4856031 100644
--- a/include/linux/memory.h
+++ b/include/linux/memory.h
@@ -161,7 +161,7 @@ static inline unsigned long memory_block_advised_max_size(void)
extern int register_memory_notifier(struct notifier_block *nb);
extern void unregister_memory_notifier(struct notifier_block *nb);
int create_memory_block_devices(unsigned long start, unsigned long size,
- struct vmem_altmap *altmap,
+ int nid, struct vmem_altmap *altmap,
struct memory_group *group);
void remove_memory_block_devices(unsigned long start, unsigned long size);
extern void memory_dev_init(void);
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index b1caedbade5b..204bd6f19d8d 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -1478,7 +1478,7 @@ static int create_altmaps_and_memory_blocks(int nid, struct memory_group *group,
}
/* create memory block devices after memory was added */
- ret = create_memory_block_devices(cur_start, memblock_size,
+ ret = create_memory_block_devices(cur_start, memblock_size, nid,
params.altmap, group);
if (ret) {
arch_remove_memory(cur_start, memblock_size, NULL);
@@ -1540,8 +1540,16 @@ int add_memory_resource(int nid, struct resource *res, mhp_t mhp_flags)
ret = __try_online_node(nid, false);
if (ret < 0)
- goto error;
- new_node = ret;
+ goto error_memblock_remove;
+ if (ret) {
+ node_set_online(nid);
+ ret = __register_one_node(nid);
+ if (WARN_ON(ret)) {
+ node_set_offline(nid);
+ goto error_memblock_remove;
+ }
+ new_node = true;
+ }
/*
* Self hosted memmap array
@@ -1557,24 +1565,13 @@ int add_memory_resource(int nid, struct resource *res, mhp_t mhp_flags)
goto error;
/* create memory block devices after memory was added */
- ret = create_memory_block_devices(start, size, NULL, group);
+ ret = create_memory_block_devices(start, size, nid, NULL, group);
if (ret) {
arch_remove_memory(start, size, params.altmap);
goto error;
}
}
- if (new_node) {
- /* If sysfs file of new node can't be created, cpu on the node
- * can't be hot-added. There is no rollback way now.
- * So, check by BUG_ON() to catch it reluctantly..
- * We online node here. We can't roll back from here.
- */
- node_set_online(nid);
- ret = __register_one_node(nid);
- BUG_ON(ret);
- }
-
register_memory_blocks_under_node(nid, PFN_DOWN(start),
PFN_UP(start + size - 1),
MEMINIT_HOTPLUG);
@@ -1599,6 +1596,11 @@ int add_memory_resource(int nid, struct resource *res, mhp_t mhp_flags)
return ret;
error:
+ if (new_node) {
+ node_set_offline(nid);
+ unregister_one_node(nid);
+ }
+error_memblock_remove:
if (IS_ENABLED(CONFIG_ARCH_KEEP_MEMBLOCK))
memblock_remove(start, size);
error_mem_hotplug_end:
--
2.43.0
^ permalink raw reply related [flat|nested] 12+ messages in thread
* [PATCH 3/3] drivers/base: move memory_block_add_nid() into the caller
2025-07-04 6:34 [PATCHv3 0/3] mm/memory_hotplug: fixup crash during uevent handling Hannes Reinecke
2025-07-04 6:34 ` [PATCH 1/3] drivers/base/memory: add node id parameter to add_memory_block() Hannes Reinecke
2025-07-04 6:34 ` [PATCH 2/3] mm/memory_hotplug: activate node before adding new memory blocks Hannes Reinecke
@ 2025-07-04 6:34 ` Hannes Reinecke
2025-07-04 7:48 ` David Hildenbrand
` (2 more replies)
2 siblings, 3 replies; 12+ messages in thread
From: Hannes Reinecke @ 2025-07-04 6:34 UTC (permalink / raw)
To: David Hildenbrand; +Cc: Oscar Salvador, linux-mm, Hannes Reinecke
Now the node id only needs to be set for early memory, so move
memory_block_add_nid() into the caller and rename it into
memory_block_add_nid_early(). This allows us to further simplify
the code by dropping the 'context' argument to
do_register_memory_block_under_node().
Suggested-by: David Hildenbrand <david@redhat.com>
Signed-off-by: Hannes Reinecke <hare@kernel.org>
Acked-by: David Hildenbrand <david@redhat.com>
---
drivers/base/memory.c | 36 ++++++++++++++++++------------------
drivers/base/node.c | 10 ++++------
include/linux/memory.h | 3 +--
3 files changed, 23 insertions(+), 26 deletions(-)
diff --git a/drivers/base/memory.c b/drivers/base/memory.c
index 4ce93fb54525..5fb275d7ed7d 100644
--- a/drivers/base/memory.c
+++ b/drivers/base/memory.c
@@ -782,21 +782,22 @@ static struct zone *early_node_zone_for_memory_block(struct memory_block *mem,
#ifdef CONFIG_NUMA
/**
- * memory_block_add_nid() - Indicate that system RAM falling into this memory
- * block device (partially) belongs to the given node.
+ * memory_block_add_nid_early() - Indicate that early system RAM falling into
+ * this memory block device (partially) belongs
+ * to the given node.
* @mem: The memory block device.
* @nid: The node id.
- * @context: The memory initialization context.
*
- * Indicate that system RAM falling into this memory block (partially) belongs
- * to the given node. If the context indicates ("early") that we are adding the
- * node during node device subsystem initialization, this will also properly
- * set/adjust mem->zone based on the zone ranges of the given node.
+ * Indicate that early system RAM falling into this memory block (partially)
+ * belongs to the given node. This will also properly set/adjust mem->zone based
+ * on the zone ranges of the given node.
+ *
+ * Memory hotplug handles this on memory block creation, where we can only have
+ * a single nid span a memory block.
*/
-void memory_block_add_nid(struct memory_block *mem, int nid,
- enum meminit_context context)
+void memory_block_add_nid_early(struct memory_block *mem, int nid)
{
- if (context == MEMINIT_EARLY && mem->nid != nid) {
+ if (mem->nid != nid) {
/*
* For early memory we have to determine the zone when setting
* the node id and handle multiple nodes spanning a single
@@ -810,15 +811,14 @@ void memory_block_add_nid(struct memory_block *mem, int nid,
mem->zone = early_node_zone_for_memory_block(mem, nid);
else
mem->zone = NULL;
+ /*
+ * If this memory block spans multiple nodes, we only indicate
+ * the last processed node. If we span multiple nodes (not applicable
+ * to hotplugged memory), zone == NULL will prohibit memory offlining
+ * and consequently unplug.
+ */
+ mem->nid = nid;
}
-
- /*
- * If this memory block spans multiple nodes, we only indicate
- * the last processed node. If we span multiple nodes (not applicable
- * to hotplugged memory), zone == NULL will prohibit memory offlining
- * and consequently unplug.
- */
- mem->nid = nid;
}
#endif
diff --git a/drivers/base/node.c b/drivers/base/node.c
index c19094481630..8e3225434ae7 100644
--- a/drivers/base/node.c
+++ b/drivers/base/node.c
@@ -766,13 +766,10 @@ static int __ref get_nid_for_pfn(unsigned long pfn)
}
static void do_register_memory_block_under_node(int nid,
- struct memory_block *mem_blk,
- enum meminit_context context)
+ struct memory_block *mem_blk)
{
int ret;
- memory_block_add_nid(mem_blk, nid, context);
-
ret = sysfs_create_link_nowarn(&node_devices[nid]->dev.kobj,
&mem_blk->dev.kobj,
kobject_name(&mem_blk->dev.kobj));
@@ -824,7 +821,8 @@ static int register_mem_block_under_node_early(struct memory_block *mem_blk,
if (page_nid != nid)
continue;
- do_register_memory_block_under_node(nid, mem_blk, MEMINIT_EARLY);
+ memory_block_add_nid_early(mem_blk, nid);
+ do_register_memory_block_under_node(nid, mem_blk);
return 0;
}
/* mem section does not span the specified node */
@@ -840,7 +838,7 @@ static int register_mem_block_under_node_hotplug(struct memory_block *mem_blk,
{
int nid = *(int *)arg;
- do_register_memory_block_under_node(nid, mem_blk, MEMINIT_HOTPLUG);
+ do_register_memory_block_under_node(nid, mem_blk);
return 0;
}
diff --git a/include/linux/memory.h b/include/linux/memory.h
index d7c3a4856031..63f076f2d303 100644
--- a/include/linux/memory.h
+++ b/include/linux/memory.h
@@ -186,8 +186,7 @@ int walk_dynamic_memory_groups(int nid, walk_memory_groups_func_t func,
})
#ifdef CONFIG_NUMA
-void memory_block_add_nid(struct memory_block *mem, int nid,
- enum meminit_context context);
+void memory_block_add_nid_early(struct memory_block *mem, int nid);
#endif /* CONFIG_NUMA */
int memory_block_advise_max_size(unsigned long size);
unsigned long memory_block_advised_max_size(void);
--
2.43.0
^ permalink raw reply related [flat|nested] 12+ messages in thread
* Re: [PATCH 3/3] drivers/base: move memory_block_add_nid() into the caller
2025-07-04 6:34 ` [PATCH 3/3] drivers/base: move memory_block_add_nid() into the caller Hannes Reinecke
@ 2025-07-04 7:48 ` David Hildenbrand
2025-07-04 8:28 ` Oscar Salvador
2025-07-04 10:30 ` Donet Tom
2 siblings, 0 replies; 12+ messages in thread
From: David Hildenbrand @ 2025-07-04 7:48 UTC (permalink / raw)
To: Hannes Reinecke; +Cc: Oscar Salvador, linux-mm
On 04.07.25 08:34, Hannes Reinecke wrote:
> Now the node id only needs to be set for early memory, so move
> memory_block_add_nid() into the caller and rename it into
> memory_block_add_nid_early(). This allows us to further simplify
> the code by dropping the 'context' argument to
> do_register_memory_block_under_node().
>
> Suggested-by: David Hildenbrand <david@redhat.com>
> Signed-off-by: Hannes Reinecke <hare@kernel.org>
> Acked-by: David Hildenbrand <david@redhat.com>
> ---
> drivers/base/memory.c | 36 ++++++++++++++++++------------------
> drivers/base/node.c | 10 ++++------
> include/linux/memory.h | 3 +--
> 3 files changed, 23 insertions(+), 26 deletions(-)
>
> diff --git a/drivers/base/memory.c b/drivers/base/memory.c
> index 4ce93fb54525..5fb275d7ed7d 100644
> --- a/drivers/base/memory.c
> +++ b/drivers/base/memory.c
> @@ -782,21 +782,22 @@ static struct zone *early_node_zone_for_memory_block(struct memory_block *mem,
>
> #ifdef CONFIG_NUMA
> /**
> - * memory_block_add_nid() - Indicate that system RAM falling into this memory
> - * block device (partially) belongs to the given node.
> + * memory_block_add_nid_early() - Indicate that early system RAM falling into
> + * this memory block device (partially) belongs
> + * to the given node.
> * @mem: The memory block device.
> * @nid: The node id.
> - * @context: The memory initialization context.
> *
> - * Indicate that system RAM falling into this memory block (partially) belongs
> - * to the given node. If the context indicates ("early") that we are adding the
> - * node during node device subsystem initialization, this will also properly
> - * set/adjust mem->zone based on the zone ranges of the given node.
> + * Indicate that early system RAM falling into this memory block (partially)
> + * belongs to the given node. This will also properly set/adjust mem->zone based
> + * on the zone ranges of the given node.
> + *
> + * Memory hotplug handles this on memory block creation, where we can only have
> + * a single nid span a memory block.
> */
> -void memory_block_add_nid(struct memory_block *mem, int nid,
> - enum meminit_context context)
> +void memory_block_add_nid_early(struct memory_block *mem, int nid)
> {
> - if (context == MEMINIT_EARLY && mem->nid != nid) {
> + if (mem->nid != nid) {
> /*
> * For early memory we have to determine the zone when setting
> * the node id and handle multiple nodes spanning a single
> @@ -810,15 +811,14 @@ void memory_block_add_nid(struct memory_block *mem, int nid,
> mem->zone = early_node_zone_for_memory_block(mem, nid);
> else
> mem->zone = NULL;
> + /*
> + * If this memory block spans multiple nodes, we only indicate
> + * the last processed node. If we span multiple nodes (not applicable
> + * to hotplugged memory), zone == NULL will prohibit memory offlining
> + * and consequently unplug.
> + */
> + mem->nid = nid;
> }
> -
> - /*
> - * If this memory block spans multiple nodes, we only indicate
> - * the last processed node. If we span multiple nodes (not applicable
> - * to hotplugged memory), zone == NULL will prohibit memory offlining
> - * and consequently unplug.
> - */
> - mem->nid = nid;
I'll note that this change is not required, but also doesn't hurt.
Thanks Hannes!
--
Cheers,
David / dhildenb
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [PATCH 2/3] mm/memory_hotplug: activate node before adding new memory blocks
2025-07-04 6:34 ` [PATCH 2/3] mm/memory_hotplug: activate node before adding new memory blocks Hannes Reinecke
@ 2025-07-04 8:25 ` Oscar Salvador
2025-07-04 10:29 ` Donet Tom
1 sibling, 0 replies; 12+ messages in thread
From: Oscar Salvador @ 2025-07-04 8:25 UTC (permalink / raw)
To: Hannes Reinecke; +Cc: David Hildenbrand, linux-mm
On Fri, Jul 04, 2025 at 08:34:03AM +0200, Hannes Reinecke wrote:
> The sysfs attributes for memory blocks require the node ID to be
> set and initialized, so move the node activation before adding
> new memory blocks. This also has the nice side effect that the
> BUG_ON() can be converted into a WARN_ON() as we now can handle
> registration errors.
>
> Fixes: b9ff036082cd ("mm/memory_hotplug.c: make add_memory_resource use __try_online_node")
> Signed-off-by: Hannes Reinecke <hare@kernel.org>
> Acked-by: David Hildenbrand <david@redhat.com>
Acked-by: Oscar Salvador <osalvador@suse.de>
--
Oscar Salvador
SUSE Labs
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [PATCH 3/3] drivers/base: move memory_block_add_nid() into the caller
2025-07-04 6:34 ` [PATCH 3/3] drivers/base: move memory_block_add_nid() into the caller Hannes Reinecke
2025-07-04 7:48 ` David Hildenbrand
@ 2025-07-04 8:28 ` Oscar Salvador
2025-07-04 10:30 ` Donet Tom
2 siblings, 0 replies; 12+ messages in thread
From: Oscar Salvador @ 2025-07-04 8:28 UTC (permalink / raw)
To: Hannes Reinecke; +Cc: David Hildenbrand, linux-mm
On Fri, Jul 04, 2025 at 08:34:04AM +0200, Hannes Reinecke wrote:
> Now the node id only needs to be set for early memory, so move
> memory_block_add_nid() into the caller and rename it into
> memory_block_add_nid_early(). This allows us to further simplify
> the code by dropping the 'context' argument to
> do_register_memory_block_under_node().
>
> Suggested-by: David Hildenbrand <david@redhat.com>
> Signed-off-by: Hannes Reinecke <hare@kernel.org>
> Acked-by: David Hildenbrand <david@redhat.com>
Acked-by: Oscar Salvador <osalvador@suse.de>
thanks!
--
Oscar Salvador
SUSE Labs
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [PATCH 2/3] mm/memory_hotplug: activate node before adding new memory blocks
2025-07-04 6:34 ` [PATCH 2/3] mm/memory_hotplug: activate node before adding new memory blocks Hannes Reinecke
2025-07-04 8:25 ` Oscar Salvador
@ 2025-07-04 10:29 ` Donet Tom
1 sibling, 0 replies; 12+ messages in thread
From: Donet Tom @ 2025-07-04 10:29 UTC (permalink / raw)
To: Hannes Reinecke, David Hildenbrand; +Cc: Oscar Salvador, linux-mm
On 7/4/25 12:04 PM, Hannes Reinecke wrote:
> The sysfs attributes for memory blocks require the node ID to be
> set and initialized, so move the node activation before adding
> new memory blocks. This also has the nice side effect that the
> BUG_ON() can be converted into a WARN_ON() as we now can handle
> registration errors.
>
> Fixes: b9ff036082cd ("mm/memory_hotplug.c: make add_memory_resource use __try_online_node")
> Signed-off-by: Hannes Reinecke <hare@kernel.org>
> Acked-by: David Hildenbrand <david@redhat.com>
> ---
> drivers/base/memory.c | 4 ++--
> include/linux/memory.h | 2 +-
> mm/memory_hotplug.c | 32 +++++++++++++++++---------------
> 3 files changed, 20 insertions(+), 18 deletions(-)
>
> diff --git a/drivers/base/memory.c b/drivers/base/memory.c
> index 2b951e5f8a27..4ce93fb54525 100644
> --- a/drivers/base/memory.c
> +++ b/drivers/base/memory.c
> @@ -892,7 +892,7 @@ static void remove_memory_block(struct memory_block *memory)
> * Called under device_hotplug_lock.
> */
> int create_memory_block_devices(unsigned long start, unsigned long size,
> - struct vmem_altmap *altmap,
> + int nid, struct vmem_altmap *altmap,
> struct memory_group *group)
> {
> const unsigned long start_block_id = pfn_to_block_id(PFN_DOWN(start));
> @@ -906,7 +906,7 @@ int create_memory_block_devices(unsigned long start, unsigned long size,
> return -EINVAL;
>
> for (block_id = start_block_id; block_id != end_block_id; block_id++) {
> - ret = add_memory_block(block_id, NUMA_NO_NODE, MEM_OFFLINE, altmap, group);
> + ret = add_memory_block(block_id, nid, MEM_OFFLINE, altmap, group);
> if (ret)
> break;
> }
> diff --git a/include/linux/memory.h b/include/linux/memory.h
> index 5ec4e6d209b9..d7c3a4856031 100644
> --- a/include/linux/memory.h
> +++ b/include/linux/memory.h
> @@ -161,7 +161,7 @@ static inline unsigned long memory_block_advised_max_size(void)
> extern int register_memory_notifier(struct notifier_block *nb);
> extern void unregister_memory_notifier(struct notifier_block *nb);
> int create_memory_block_devices(unsigned long start, unsigned long size,
> - struct vmem_altmap *altmap,
> + int nid, struct vmem_altmap *altmap,
> struct memory_group *group);
> void remove_memory_block_devices(unsigned long start, unsigned long size);
> extern void memory_dev_init(void);
> diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
> index b1caedbade5b..204bd6f19d8d 100644
> --- a/mm/memory_hotplug.c
> +++ b/mm/memory_hotplug.c
> @@ -1478,7 +1478,7 @@ static int create_altmaps_and_memory_blocks(int nid, struct memory_group *group,
> }
>
> /* create memory block devices after memory was added */
> - ret = create_memory_block_devices(cur_start, memblock_size,
> + ret = create_memory_block_devices(cur_start, memblock_size, nid,
> params.altmap, group);
> if (ret) {
> arch_remove_memory(cur_start, memblock_size, NULL);
> @@ -1540,8 +1540,16 @@ int add_memory_resource(int nid, struct resource *res, mhp_t mhp_flags)
>
> ret = __try_online_node(nid, false);
> if (ret < 0)
> - goto error;
> - new_node = ret;
> + goto error_memblock_remove;
> + if (ret) {
> + node_set_online(nid);
> + ret = __register_one_node(nid);
> + if (WARN_ON(ret)) {
> + node_set_offline(nid);
> + goto error_memblock_remove;
> + }
> + new_node = true;
> + }
>
> /*
> * Self hosted memmap array
> @@ -1557,24 +1565,13 @@ int add_memory_resource(int nid, struct resource *res, mhp_t mhp_flags)
> goto error;
>
> /* create memory block devices after memory was added */
> - ret = create_memory_block_devices(start, size, NULL, group);
> + ret = create_memory_block_devices(start, size, nid, NULL, group);
> if (ret) {
> arch_remove_memory(start, size, params.altmap);
> goto error;
> }
> }
>
> - if (new_node) {
> - /* If sysfs file of new node can't be created, cpu on the node
> - * can't be hot-added. There is no rollback way now.
> - * So, check by BUG_ON() to catch it reluctantly..
> - * We online node here. We can't roll back from here.
> - */
> - node_set_online(nid);
> - ret = __register_one_node(nid);
> - BUG_ON(ret);
> - }
> -
> register_memory_blocks_under_node(nid, PFN_DOWN(start),
> PFN_UP(start + size - 1),
> MEMINIT_HOTPLUG);
> @@ -1599,6 +1596,11 @@ int add_memory_resource(int nid, struct resource *res, mhp_t mhp_flags)
>
> return ret;
> error:
> + if (new_node) {
> + node_set_offline(nid);
> + unregister_one_node(nid);
> + }
> +error_memblock_remove:
> if (IS_ENABLED(CONFIG_ARCH_KEEP_MEMBLOCK))
> memblock_remove(start, size);
> error_mem_hotplug_end:
This looks good to me.
Reviewed-by: Donet Tom <donettom@linux.ibm.com>
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [PATCH 3/3] drivers/base: move memory_block_add_nid() into the caller
2025-07-04 6:34 ` [PATCH 3/3] drivers/base: move memory_block_add_nid() into the caller Hannes Reinecke
2025-07-04 7:48 ` David Hildenbrand
2025-07-04 8:28 ` Oscar Salvador
@ 2025-07-04 10:30 ` Donet Tom
2 siblings, 0 replies; 12+ messages in thread
From: Donet Tom @ 2025-07-04 10:30 UTC (permalink / raw)
To: Hannes Reinecke, David Hildenbrand; +Cc: Oscar Salvador, linux-mm
On 7/4/25 12:04 PM, Hannes Reinecke wrote:
> Now the node id only needs to be set for early memory, so move
> memory_block_add_nid() into the caller and rename it into
> memory_block_add_nid_early(). This allows us to further simplify
> the code by dropping the 'context' argument to
> do_register_memory_block_under_node().
>
> Suggested-by: David Hildenbrand <david@redhat.com>
> Signed-off-by: Hannes Reinecke <hare@kernel.org>
> Acked-by: David Hildenbrand <david@redhat.com>
> ---
> drivers/base/memory.c | 36 ++++++++++++++++++------------------
> drivers/base/node.c | 10 ++++------
> include/linux/memory.h | 3 +--
> 3 files changed, 23 insertions(+), 26 deletions(-)
>
> diff --git a/drivers/base/memory.c b/drivers/base/memory.c
> index 4ce93fb54525..5fb275d7ed7d 100644
> --- a/drivers/base/memory.c
> +++ b/drivers/base/memory.c
> @@ -782,21 +782,22 @@ static struct zone *early_node_zone_for_memory_block(struct memory_block *mem,
>
> #ifdef CONFIG_NUMA
> /**
> - * memory_block_add_nid() - Indicate that system RAM falling into this memory
> - * block device (partially) belongs to the given node.
> + * memory_block_add_nid_early() - Indicate that early system RAM falling into
> + * this memory block device (partially) belongs
> + * to the given node.
> * @mem: The memory block device.
> * @nid: The node id.
> - * @context: The memory initialization context.
> *
> - * Indicate that system RAM falling into this memory block (partially) belongs
> - * to the given node. If the context indicates ("early") that we are adding the
> - * node during node device subsystem initialization, this will also properly
> - * set/adjust mem->zone based on the zone ranges of the given node.
> + * Indicate that early system RAM falling into this memory block (partially)
> + * belongs to the given node. This will also properly set/adjust mem->zone based
> + * on the zone ranges of the given node.
> + *
> + * Memory hotplug handles this on memory block creation, where we can only have
> + * a single nid span a memory block.
> */
> -void memory_block_add_nid(struct memory_block *mem, int nid,
> - enum meminit_context context)
> +void memory_block_add_nid_early(struct memory_block *mem, int nid)
> {
> - if (context == MEMINIT_EARLY && mem->nid != nid) {
> + if (mem->nid != nid) {
> /*
> * For early memory we have to determine the zone when setting
> * the node id and handle multiple nodes spanning a single
> @@ -810,15 +811,14 @@ void memory_block_add_nid(struct memory_block *mem, int nid,
> mem->zone = early_node_zone_for_memory_block(mem, nid);
> else
> mem->zone = NULL;
> + /*
> + * If this memory block spans multiple nodes, we only indicate
> + * the last processed node. If we span multiple nodes (not applicable
> + * to hotplugged memory), zone == NULL will prohibit memory offlining
> + * and consequently unplug.
> + */
> + mem->nid = nid;
> }
> -
> - /*
> - * If this memory block spans multiple nodes, we only indicate
> - * the last processed node. If we span multiple nodes (not applicable
> - * to hotplugged memory), zone == NULL will prohibit memory offlining
> - * and consequently unplug.
> - */
> - mem->nid = nid;
> }
> #endif
>
> diff --git a/drivers/base/node.c b/drivers/base/node.c
> index c19094481630..8e3225434ae7 100644
> --- a/drivers/base/node.c
> +++ b/drivers/base/node.c
> @@ -766,13 +766,10 @@ static int __ref get_nid_for_pfn(unsigned long pfn)
> }
>
> static void do_register_memory_block_under_node(int nid,
> - struct memory_block *mem_blk,
> - enum meminit_context context)
> + struct memory_block *mem_blk)
> {
> int ret;
>
> - memory_block_add_nid(mem_blk, nid, context);
> -
> ret = sysfs_create_link_nowarn(&node_devices[nid]->dev.kobj,
> &mem_blk->dev.kobj,
> kobject_name(&mem_blk->dev.kobj));
> @@ -824,7 +821,8 @@ static int register_mem_block_under_node_early(struct memory_block *mem_blk,
> if (page_nid != nid)
> continue;
>
> - do_register_memory_block_under_node(nid, mem_blk, MEMINIT_EARLY);
> + memory_block_add_nid_early(mem_blk, nid);
> + do_register_memory_block_under_node(nid, mem_blk);
> return 0;
> }
> /* mem section does not span the specified node */
> @@ -840,7 +838,7 @@ static int register_mem_block_under_node_hotplug(struct memory_block *mem_blk,
> {
> int nid = *(int *)arg;
>
> - do_register_memory_block_under_node(nid, mem_blk, MEMINIT_HOTPLUG);
> + do_register_memory_block_under_node(nid, mem_blk);
> return 0;
> }
>
> diff --git a/include/linux/memory.h b/include/linux/memory.h
> index d7c3a4856031..63f076f2d303 100644
> --- a/include/linux/memory.h
> +++ b/include/linux/memory.h
> @@ -186,8 +186,7 @@ int walk_dynamic_memory_groups(int nid, walk_memory_groups_func_t func,
> })
>
> #ifdef CONFIG_NUMA
> -void memory_block_add_nid(struct memory_block *mem, int nid,
> - enum meminit_context context);
> +void memory_block_add_nid_early(struct memory_block *mem, int nid);
> #endif /* CONFIG_NUMA */
> int memory_block_advise_max_size(unsigned long size);
> unsigned long memory_block_advised_max_size(void);
This looks good to me.
Reviewed-by: Donet Tom <donettom@linux.ibm.com>
^ permalink raw reply [flat|nested] 12+ messages in thread
* [PATCH 1/3] drivers/base/memory: add node id parameter to add_memory_block()
2025-07-29 6:46 [PATCHv4 0/3] mm/memory_hotplug: fixup crash during uevent handling Hannes Reinecke
@ 2025-07-29 6:46 ` Hannes Reinecke
0 siblings, 0 replies; 12+ messages in thread
From: Hannes Reinecke @ 2025-07-29 6:46 UTC (permalink / raw)
To: Andrew Morton
Cc: David Hildenbrand, Oscar Salvador, linux-mm, Hannes Reinecke,
Donet Tom
Add a 'nid' parameter to add_memory_block() to initialize the memory
block with the correct node id.
Signed-off-by: Hannes Reinecke <hare@kernel.org>
Acked-by: David Hildenbrand <david@redhat.com>
Acked-by: Oscar Salvador <osalvador@suse.de>
Reviewed-by: Donet Tom <donettom@linux.ibm.com>
---
drivers/base/memory.c | 15 ++++-----------
1 file changed, 4 insertions(+), 11 deletions(-)
diff --git a/drivers/base/memory.c b/drivers/base/memory.c
index 5c6c1d6bb59f..894d3891292b 100644
--- a/drivers/base/memory.c
+++ b/drivers/base/memory.c
@@ -809,7 +809,7 @@ void memory_block_add_nid(struct memory_block *mem, int nid,
}
#endif
-static int add_memory_block(unsigned long block_id, unsigned long state,
+static int add_memory_block(unsigned long block_id, int nid, unsigned long state,
struct vmem_altmap *altmap,
struct memory_group *group)
{
@@ -827,7 +827,7 @@ static int add_memory_block(unsigned long block_id, unsigned long state,
mem->start_section_nr = block_id * sections_per_block;
mem->state = state;
- mem->nid = NUMA_NO_NODE;
+ mem->nid = nid;
mem->altmap = altmap;
INIT_LIST_HEAD(&mem->group_next);
@@ -854,13 +854,6 @@ static int add_memory_block(unsigned long block_id, unsigned long state,
return 0;
}
-static int add_hotplug_memory_block(unsigned long block_id,
- struct vmem_altmap *altmap,
- struct memory_group *group)
-{
- return add_memory_block(block_id, MEM_OFFLINE, altmap, group);
-}
-
static void remove_memory_block(struct memory_block *memory)
{
if (WARN_ON_ONCE(memory->dev.bus != &memory_subsys))
@@ -900,7 +893,7 @@ int create_memory_block_devices(unsigned long start, unsigned long size,
return -EINVAL;
for (block_id = start_block_id; block_id != end_block_id; block_id++) {
- ret = add_hotplug_memory_block(block_id, altmap, group);
+ ret = add_memory_block(block_id, NUMA_NO_NODE, MEM_OFFLINE, altmap, group);
if (ret)
break;
}
@@ -1005,7 +998,7 @@ void __init memory_dev_init(void)
continue;
block_id = memory_block_id(nr);
- ret = add_memory_block(block_id, MEM_ONLINE, NULL, NULL);
+ ret = add_memory_block(block_id, NUMA_NO_NODE, MEM_ONLINE, NULL, NULL);
if (ret) {
panic("%s() failed to add memory block: %d\n",
__func__, ret);
--
2.43.0
^ permalink raw reply related [flat|nested] 12+ messages in thread
end of thread, other threads:[~2025-07-29 6:46 UTC | newest]
Thread overview: 12+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2025-07-04 6:34 [PATCHv3 0/3] mm/memory_hotplug: fixup crash during uevent handling Hannes Reinecke
2025-07-04 6:34 ` [PATCH 1/3] drivers/base/memory: add node id parameter to add_memory_block() Hannes Reinecke
2025-07-04 6:34 ` [PATCH 2/3] mm/memory_hotplug: activate node before adding new memory blocks Hannes Reinecke
2025-07-04 8:25 ` Oscar Salvador
2025-07-04 10:29 ` Donet Tom
2025-07-04 6:34 ` [PATCH 3/3] drivers/base: move memory_block_add_nid() into the caller Hannes Reinecke
2025-07-04 7:48 ` David Hildenbrand
2025-07-04 8:28 ` Oscar Salvador
2025-07-04 10:30 ` Donet Tom
-- strict thread matches above, loose matches on Subject: below --
2025-07-29 6:46 [PATCHv4 0/3] mm/memory_hotplug: fixup crash during uevent handling Hannes Reinecke
2025-07-29 6:46 ` [PATCH 1/3] drivers/base/memory: add node id parameter to add_memory_block() Hannes Reinecke
2025-07-02 7:39 [PATCHv2 0/3] mm/memory_hotplug: fixup crash during uevent handling Hannes Reinecke
2025-07-02 7:39 ` [PATCH 1/3] drivers/base/memory: add node id parameter to add_memory_block() Hannes Reinecke
2025-07-03 11:59 ` Donet Tom
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).