Re: [PATCH v7 2/3] kho: fix deferred init of kho scratch

public inbox for linux-mm@kvack.org
 help / color / mirror / Atom feed

From: Mike Rapoport <rppt@kernel.org>
To: Zi Yan <ziy@nvidia.com>
Cc: "Michał Cłapiński" <mclapinski@google.com>,
	"Evangelos Petrongonas" <epetron@amazon.de>,
	"Pasha Tatashin" <pasha.tatashin@soleen.com>,
	"Pratyush Yadav" <pratyush@kernel.org>,
	"Alexander Graf" <graf@amazon.com>,
	"Samiullah Khawaja" <skhawaja@google.com>,
	kexec@lists.infradead.org, linux-mm@kvack.org,
	linux-kernel@vger.kernel.org,
	"Andrew Morton" <akpm@linux-foundation.org>
Subject: Re: [PATCH v7 2/3] kho: fix deferred init of kho scratch
Date: Thu, 19 Mar 2026 09:54:05 +0200	[thread overview]
Message-ID: <aburnbP64n9axmu_@kernel.org> (raw)
In-Reply-To: <58A8B1B4-A73B-48D2-8492-A58A03634644@nvidia.com>

Hi,

On Wed, Mar 18, 2026 at 01:36:07PM -0400, Zi Yan wrote:
> On 18 Mar 2026, at 13:19, Michał Cłapiński wrote:
> > On Wed, Mar 18, 2026 at 6:08 PM Zi Yan <ziy@nvidia.com> wrote:
> >>
> >> ## Call site analysis
> >>
> >> init_pageblock_migratetype() has nine call sites. The init call ordering
> >> relevant to scratch is:
> >>
> >> ```
> >> setup_arch()
> >>   zone_sizes_init() -> free_area_init() -> memmap_init_range()   [1]

Hmm, this is slightly outdated, but largely correct :)

> >>
> >> mm_init_free_all() / start_kernel():
> >>   kho_memory_init() -> kho_release_scratch()                     [2]
> >>   memblock_free_all()
> >>     free_low_memory_core_early()
> >>       memmap_init_reserved_pages()
> >>         reserve_bootmem_region() -> __init_deferred_page()
> >>           -> __init_page_from_nid()                              [3]
> >>   deferred init kthreads -> __init_page_from_nid()               [4]

And this is wrong, deferred init does not call __init_page_from_nid, only
reserve_bootmem_region() does.

And there's a case claude missed:

hugetlb_bootmem_free_invalid_page() -> __init_page_from_nid() that
shouldn't check for KHO. Well, at least until we have support for hugetlb
persistence and most probably even afterwards.

I don't think we should modify reserve_bootmem_region(). If there are
reserved pages in a pageblock, it does not matter if it's initialized to
MIGRATE_CMA. It only becomes important if the reserved pages freed, so we
can update pageblock migrate type in free_reserved_area().
When we boot with KHO, all memblock allocations come from scratch, so
anything freed in free_reserved_area() should become CMA again.

> >> ```
> >
> > I don't understand this. deferred_free_pages() doesn't call
> > __init_page_from_nid(). So I would clearly need to modify both
> > deferred_free_pages and __init_page_from_nid.

For deferred_free_pages() we don't need kho_scratch_overlap(), we already
have memblock_region (almost) at hand and it's enough to check if it's
MEMBLOCK_KHO_SCRATCH.

Something along these lines (compile tested only) should do the trick:

diff --git a/include/linux/memblock.h b/include/linux/memblock.h
index 3e217414e12d..b9b1e0991ec8 100644
--- a/include/linux/memblock.h
+++ b/include/linux/memblock.h
@@ -275,6 +275,8 @@ static inline void __next_physmem_range(u64 *idx, struct memblock_type *type,
 	__for_each_mem_range(i, &memblock.reserved, NULL, NUMA_NO_NODE,	\
 			     MEMBLOCK_NONE, p_start, p_end, NULL)
 
+struct memblock_region *memblock_region_from_iter(u64 iterator);
+
 static inline bool memblock_is_hotpluggable(struct memblock_region *m)
 {
 	return m->flags & MEMBLOCK_HOTPLUG;
diff --git a/mm/memblock.c b/mm/memblock.c
index ae6a5af46bd7..9cf99f32279f 100644
--- a/mm/memblock.c
+++ b/mm/memblock.c
@@ -1359,6 +1359,16 @@ void __init_memblock __next_mem_range_rev(u64 *idx, int nid,
 	*idx = ULLONG_MAX;
 }
 
+__init_memblock struct memblock_region *memblock_region_from_iter(u64 iterator)
+{
+	int index = iterator & 0xffffffff;
+
+	if (index < 0 || index >= memblock.memory.cnt)
+		return NULL;
+
+	return &memblock.memory.regions[index];
+}
+
 /*
  * Common iterator interface used to define for_each_mem_pfn_range().
  */
diff --git a/mm/mm_init.c b/mm/mm_init.c
index cec7bb758bdd..96b25895ffbe 100644
--- a/mm/mm_init.c
+++ b/mm/mm_init.c
@@ -1996,7 +1996,7 @@ unsigned long __init node_map_pfn_alignment(void)
 
 #ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT
 static void __init deferred_free_pages(unsigned long pfn,
-		unsigned long nr_pages)
+		unsigned long nr_pages, enum migratetype mt)
 {
 	struct page *page;
 	unsigned long i;
@@ -2009,8 +2009,7 @@ static void __init deferred_free_pages(unsigned long pfn,
 	/* Free a large naturally-aligned chunk if possible */
 	if (nr_pages == MAX_ORDER_NR_PAGES && IS_MAX_ORDER_ALIGNED(pfn)) {
 		for (i = 0; i < nr_pages; i += pageblock_nr_pages)
-			init_pageblock_migratetype(page + i, MIGRATE_MOVABLE,
-					false);
+			init_pageblock_migratetype(page + i, mt, false);
 		__free_pages_core(page, MAX_PAGE_ORDER, MEMINIT_EARLY);
 		return;
 	}
@@ -2020,8 +2019,7 @@ static void __init deferred_free_pages(unsigned long pfn,
 
 	for (i = 0; i < nr_pages; i++, page++, pfn++) {
 		if (pageblock_aligned(pfn))
-			init_pageblock_migratetype(page, MIGRATE_MOVABLE,
-					false);
+			init_pageblock_migratetype(page, mt, false);
 		__free_pages_core(page, 0, MEMINIT_EARLY);
 	}
 }
@@ -2077,6 +2075,8 @@ deferred_init_memmap_chunk(unsigned long start_pfn, unsigned long end_pfn,
 	u64 i = 0;
 
 	for_each_free_mem_range(i, nid, 0, &start, &end, NULL) {
+		struct memblock_region *region = memblock_region_from_iter(i);
+		enum migratetype mt = MIGRATE_MOVABLE;
 		unsigned long spfn = PFN_UP(start);
 		unsigned long epfn = PFN_DOWN(end);
 
@@ -2086,12 +2086,15 @@ deferred_init_memmap_chunk(unsigned long start_pfn, unsigned long end_pfn,
 		spfn = max(spfn, start_pfn);
 		epfn = min(epfn, end_pfn);
 
+		if (memblock_is_kho_scratch(region))
+			mt = MIGRATE_CMA;
+
 		while (spfn < epfn) {
 			unsigned long mo_pfn = ALIGN(spfn + 1, MAX_ORDER_NR_PAGES);
 			unsigned long chunk_end = min(mo_pfn, epfn);
 
 			nr_pages += deferred_init_pages(zone, spfn, chunk_end);
-			deferred_free_pages(spfn, chunk_end - spfn);
+			deferred_free_pages(spfn, chunk_end - spfn, mt);
 
 			spfn = chunk_end;

-- 
Sincerely yours,
Mike.

next prev parent reply	other threads:[~2026-03-19  7:54 UTC|newest]

Thread overview: 25+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2026-03-17 14:15 [PATCH v7 0/3] kho: add support for deferred struct page init Michal Clapinski
2026-03-17 14:15 ` [PATCH v7 1/3] kho: make kho_scratch_overlap usable outside debugging Michal Clapinski
2026-03-18  9:16   ` Mike Rapoport
2026-03-17 14:15 ` [PATCH v7 2/3] kho: fix deferred init of kho scratch Michal Clapinski
2026-03-17 23:23   ` Vishal Moola (Oracle)
2026-03-18  0:08     ` SeongJae Park
2026-03-18  0:23       ` Andrew Morton
2026-03-18  9:33   ` Mike Rapoport
2026-03-18 10:28     ` Michał Cłapiński
2026-03-18 10:33     ` Michał Cłapiński
2026-03-18 11:02       ` Mike Rapoport
2026-03-18 15:10   ` Zi Yan
2026-03-18 15:18     ` Michał Cłapiński
2026-03-18 15:26       ` Zi Yan
2026-03-18 15:45         ` Michał Cłapiński
2026-03-18 17:08           ` Zi Yan
2026-03-18 17:19             ` Michał Cłapiński
2026-03-18 17:36               ` Zi Yan
2026-03-19  7:54                 ` Mike Rapoport [this message]
2026-03-19 18:17                   ` Michał Cłapiński
2026-03-22 14:45                     ` Mike Rapoport
2026-03-17 14:15 ` [PATCH v7 3/3] kho: make preserved pages compatible with deferred struct page init Michal Clapinski
2026-03-17 17:46 ` [PATCH v7 0/3] kho: add support for " Andrew Morton
2026-03-18  9:34   ` Mike Rapoport
2026-03-18  9:18 ` Mike Rapoport

find likely ancestor, descendant, or conflicting patches for this message:
( dfblob:3e217414e12 dfblob:b9b1e0991ec dfblob:ae6a5af46bd
dfblob:9cf99f32279 dfblob:cec7bb758bd dfblob:96b25895ffb )
 OR (
bs:"Re: [PATCH v7 2/3] kho: fix deferred init of kho scratch" )
	(help)

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=aburnbP64n9axmu_@kernel.org \
    --to=rppt@kernel.org \
    --cc=akpm@linux-foundation.org \
    --cc=epetron@amazon.de \
    --cc=graf@amazon.com \
    --cc=kexec@lists.infradead.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=mclapinski@google.com \
    --cc=pasha.tatashin@soleen.com \
    --cc=pratyush@kernel.org \
    --cc=skhawaja@google.com \
    --cc=ziy@nvidia.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox