Linux-HyperV List
 help / color / mirror / Atom feed
From: Jork Loeser <jloeser@linux.microsoft.com>
To: linux-hyperv@vger.kernel.org, linux-mm@kvack.org,
	kexec@lists.infradead.org
Cc: "K. Y. Srinivasan" <kys@microsoft.com>,
	Haiyang Zhang <haiyangz@microsoft.com>,
	Wei Liu <wei.liu@kernel.org>, Dexuan Cui <decui@microsoft.com>,
	Long Li <longli@microsoft.com>, Mike Rapoport <rppt@kernel.org>,
	Pasha Tatashin <pasha.tatashin@soleen.com>,
	Pratyush Yadav <pratyush@kernel.org>,
	Alexander Graf <graf@amazon.com>, Jason Miu <jasonmiu@google.com>,
	Andrew Morton <akpm@linux-foundation.org>,
	David Hildenbrand <david@kernel.org>,
	Muchun Song <muchun.song@linux.dev>,
	Oscar Salvador <osalvador@suse.de>, Baoquan He <bhe@redhat.com>,
	Catalin Marinas <catalin.marinas@arm.com>,
	Will Deacon <will@kernel.org>, Thomas Gleixner <tglx@kernel.org>,
	Ingo Molnar <mingo@redhat.com>, Borislav Petkov <bp@alien8.de>,
	Dave Hansen <dave.hansen@linux.intel.com>,
	"H. Peter Anvin" <hpa@zytor.com>, Kees Cook <kees@kernel.org>,
	Ran Xiaokai <ran.xiaokai@zte.com.cn>,
	Justinien Bouron <jbouron@amazon.com>,
	Sourabh Jain <sourabhjain@linux.ibm.com>,
	Pingfan Liu <piliu@redhat.com>,
	"Rafael J. Wysocki" <rafael.j.wysocki@intel.com>,
	Mario Limonciello <mario.limonciello@amd.com>,
	linux-arm-kernel@lists.infradead.org, x86@kernel.org,
	linux-kernel@vger.kernel.org,
	Michael Kelley <mhklinux@outlook.com>,
	Jork Loeser <jloeser@linux.microsoft.com>
Subject: [RFC PATCH 14/20] kho: Add crash-kernel-safe radix tree presence check
Date: Wed, 27 May 2026 17:41:56 -0700	[thread overview]
Message-ID: <20260528004204.1484584-15-jloeser@linux.microsoft.com> (raw)
In-Reply-To: <20260528004204.1484584-1-jloeser@linux.microsoft.com>

In the crash kernel, the old kernel's memory is outside the direct
map. Add a read-only radix tree variant that memremaps nodes during
init so that subsequent page presence checks can traverse the tree
with plain pointer dereferencing.

This will be used by the MSHV driver to exclude hypervisor-owned pages
from /proc/vmcore via a pfn_is_ram() callback.

Signed-off-by: Jork Loeser <jloeser@linux.microsoft.com>
---
 include/linux/kho_radix_tree.h     |  30 +++++++
 kernel/liveupdate/kexec_handover.c | 124 +++++++++++++++++++++++++++++
 2 files changed, 154 insertions(+)

diff --git a/include/linux/kho_radix_tree.h b/include/linux/kho_radix_tree.h
index 4fe2238e1e30..e906a874e612 100644
--- a/include/linux/kho_radix_tree.h
+++ b/include/linux/kho_radix_tree.h
@@ -49,6 +49,19 @@ struct kho_radix_walk_cb {
 	int (*table)(phys_addr_t phys, void *data);
 };
 
+/**
+ * struct kho_radix_crash_tree - Read-only radix tree for crash kernel use.
+ * @root: pointer to the remapped root node
+ *
+ * In the crash kernel, the old kernel's memory is not in the direct map.
+ * This variant uses memremap() during init to map the tree nodes and
+ * converts the physical address table entries to virtual addresses in-place,
+ * enabling efficient pointer-based traversal without per-lookup remapping.
+ */
+struct kho_radix_crash_tree {
+	struct kho_radix_node *root;
+};
+
 #ifdef CONFIG_KEXEC_HANDOVER
 
 int kho_radix_add_key(struct kho_radix_tree *tree, unsigned long key);
@@ -59,6 +72,11 @@ int kho_radix_init_tree(struct kho_radix_tree *tree, struct kho_radix_node *root
 void kho_radix_destroy_tree(struct kho_radix_tree *tree);
 int kho_radix_tree_freeze(struct kho_radix_tree *tree);
 
+int kho_radix_crash_init(struct kho_radix_crash_tree *tree, phys_addr_t root_pa);
+
+bool kho_radix_crash_contains_page(struct kho_radix_crash_tree *tree,
+				   unsigned long pfn, unsigned int order);
+
 #else  /* #ifdef CONFIG_KEXEC_HANDOVER */
 
 static inline int kho_radix_add_key(struct kho_radix_tree *tree, unsigned long key)
@@ -91,6 +109,18 @@ static inline int kho_radix_tree_freeze(struct kho_radix_tree *tree)
 	return -EOPNOTSUPP;
 }
 
+static inline int kho_radix_crash_init(struct kho_radix_crash_tree *tree,
+				       phys_addr_t root_pa)
+{
+	return -EOPNOTSUPP;
+}
+
+static inline bool kho_radix_crash_contains_page(
+					struct kho_radix_crash_tree *tree,
+					unsigned long pfn, unsigned int order)
+{
+	return false;
+}
 #endif /* #ifdef CONFIG_KEXEC_HANDOVER */
 
 #endif	/* _LINUX_KHO_RADIX_TREE_H */
diff --git a/kernel/liveupdate/kexec_handover.c b/kernel/liveupdate/kexec_handover.c
index 2e2b4e73f00d..0dfdf0f9781e 100644
--- a/kernel/liveupdate/kexec_handover.c
+++ b/kernel/liveupdate/kexec_handover.c
@@ -15,6 +15,7 @@
 #include <linux/kmemleak.h>
 #include <linux/count_zeros.h>
 #include <linux/kasan.h>
+#include <linux/io.h>
 #include <linux/kexec.h>
 #include <linux/kexec_handover.h>
 #include <linux/kho_radix_tree.h>
@@ -396,6 +397,129 @@ void kho_radix_destroy_tree(struct kho_radix_tree *tree)
 }
 EXPORT_SYMBOL_GPL(kho_radix_destroy_tree);
 
+/*
+ * Convert a crash tree node's children from PA to VA in-place via memremap().
+ * On failure, already-remapped pages are not cleaned up -- the crash kernel
+ * is short-lived and will reboot after dump collection, so the leak is
+ * inconsequential.
+ */
+static int kho_radix_crash_convert_node(struct kho_radix_node *node,
+					unsigned int level)
+{
+	struct kho_radix_node *child;
+	unsigned int i;
+	int err;
+
+	for (i = 0; i < (1 << KHO_TABLE_SIZE_LOG2); i++) {
+		if (!node->table[i])
+			continue;
+
+		/* Validate: PA must have bit 63 clear and be page-aligned */
+		if ((node->table[i] & BIT_ULL(63)) ||
+		    (node->table[i] & (PAGE_SIZE - 1)))
+			return -EINVAL;
+
+		child = memremap(node->table[i], PAGE_SIZE, MEMREMAP_WB);
+		if (!child)
+			return -ENOMEM;
+
+		/* Overwrite PA with VA in-place */
+		node->table[i] = (u64)(uintptr_t)child;
+
+		/* Recurse for intermediate levels; level 1 children are leaves */
+		if (level > 1) {
+			err = kho_radix_crash_convert_node(child, level - 1);
+			if (err)
+				return err;
+		}
+	}
+
+	return 0;
+}
+
+/**
+ * kho_radix_crash_init - Initialize a crash-kernel view of a KHO radix tree.
+ * @tree: The crash tree to initialize.
+ * @root_pa: Physical address of the radix tree root from the old kernel.
+ *
+ * Maps the old kernel's radix tree into the crash kernel's address space
+ * by memremapping each node and converting table entries from physical to
+ * virtual addresses in-place. After successful initialization, the tree
+ * can be traversed with kho_radix_crash_contains_page() using direct
+ * pointer dereferencing.
+ *
+ * This function is intended for use in the crash kernel where the old
+ * kernel's memory is not in the direct map. No locking is used as the
+ * crash kernel is effectively single-threaded during dump collection.
+ *
+ * Return: 0 on success, negative error code on failure.
+ */
+int kho_radix_crash_init(struct kho_radix_crash_tree *tree, phys_addr_t root_pa)
+{
+	struct kho_radix_node *root;
+	int err;
+
+	tree->root = NULL;
+
+	if (!root_pa || (root_pa & (PAGE_SIZE - 1)))
+		return -EINVAL;
+
+	root = memremap(root_pa, PAGE_SIZE, MEMREMAP_WB);
+	if (!root)
+		return -ENOMEM;
+
+	err = kho_radix_crash_convert_node(root, KHO_TREE_MAX_DEPTH - 1);
+	if (err)
+		return err;
+
+	tree->root = root;
+	return 0;
+}
+EXPORT_SYMBOL_GPL(kho_radix_crash_init);
+
+/**
+ * kho_radix_crash_contains_page - Check if a page is in a crash-kernel radix tree.
+ * @tree: The crash tree, previously initialized with kho_radix_crash_init().
+ * @pfn: The page frame number to check.
+ * @order: The order of the page.
+ *
+ * Traverses the radix tree using direct pointer dereferencing (the table
+ * entries were converted from PA to VA during init). No locking is used as the
+ * crash kernel is effectively single-threaded during dump collection.
+ *
+ * Note: This function checks specifically for the presence of the page at the
+ * given order. If a larger order page that encompasses this page is preserved,
+ * this function will return false.
+ *
+ * Return: true if the page is present in the tree, false otherwise.
+ */
+bool kho_radix_crash_contains_page(struct kho_radix_crash_tree *tree,
+				   unsigned long pfn, unsigned int order)
+{
+	unsigned long key = kho_encode_radix_key(PFN_PHYS(pfn), order);
+	struct kho_radix_node *node = tree->root;
+	struct kho_radix_leaf *leaf;
+	unsigned int i, idx;
+
+	if (!tree->root)
+		return false;
+
+	/* Traverse using VA pointers stored in table[] */
+	for (i = KHO_TREE_MAX_DEPTH - 1; i > 0; i--) {
+		idx = kho_radix_get_table_index(key, i);
+
+		if (!node->table[idx])
+			return false;
+
+		node = (struct kho_radix_node *)(uintptr_t)node->table[idx];
+	}
+
+	leaf = (struct kho_radix_leaf *)node;
+	idx = kho_radix_get_bitmap_index(key);
+	return test_bit(idx, leaf->bitmap);
+}
+EXPORT_SYMBOL_GPL(kho_radix_crash_contains_page);
+
 static int kho_radix_walk_leaf(struct kho_radix_leaf *leaf, unsigned long key,
 			       const struct kho_radix_walk_cb *cb, void *data)
 {
-- 
2.43.0


  parent reply	other threads:[~2026-05-28  0:42 UTC|newest]

Thread overview: 37+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2026-05-28  0:41 [RFC PATCH 00/20] mshv: enable kexec with Hyper-V donated pages and partitions Jork Loeser
2026-05-28  0:41 ` [RFC PATCH 01/20] kho: generalize radix tree APIs Jork Loeser
2026-05-28  1:22   ` sashiko-bot
2026-05-28  0:41 ` [RFC PATCH 02/20] kho: store incoming radix tree in kho_in Jork Loeser
2026-05-28  1:08   ` sashiko-bot
2026-05-28  0:41 ` [RFC PATCH 03/20] kho: add a struct for radix callbacks Jork Loeser
2026-05-28  0:41 ` [RFC PATCH 04/20] kho: add callback for table pages Jork Loeser
2026-05-28  1:33   ` sashiko-bot
2026-05-28  0:41 ` [RFC PATCH 05/20] kho: add data argument to radix walk callback Jork Loeser
2026-05-28  1:11   ` sashiko-bot
2026-05-28  0:41 ` [RFC PATCH 06/20] kho: allow early-boot usage of the KHO radix tree Jork Loeser
2026-05-28  1:40   ` sashiko-bot
2026-05-28  0:41 ` [RFC PATCH 07/20] kho: allow destroying " Jork Loeser
2026-05-28  0:41 ` [RFC PATCH 08/20] kho: add kho_radix_init_tree() Jork Loeser
2026-05-28  1:21   ` sashiko-bot
2026-05-28  0:41 ` [RFC PATCH 09/20] memblock: introduce MEMBLOCK_KHO_SCRATCH_EXT Jork Loeser
2026-05-28  0:41 ` [RFC PATCH 10/20] kho: extended scratch Jork Loeser
2026-05-28  1:21   ` sashiko-bot
2026-05-28  0:41 ` [RFC PATCH 11/20] kho: return virtual address of mem_map Jork Loeser
2026-05-28  1:27   ` sashiko-bot
2026-05-28  0:41 ` [RFC PATCH 12/20] mm/hugetlb: make bootmem allocation work with KHO Jork Loeser
2026-05-28  1:06   ` sashiko-bot
2026-05-28  0:41 ` [RFC PATCH 13/20] kho: add radix tree freeze and del_key() error reporting Jork Loeser
2026-05-28  1:34   ` sashiko-bot
2026-05-28  0:41 ` Jork Loeser [this message]
2026-05-28  1:27   ` [RFC PATCH 14/20] kho: Add crash-kernel-safe radix tree presence check sashiko-bot
2026-05-28  0:41 ` [RFC PATCH 15/20] mshv: Use page tracker to manage MSHV-owned pages and preserve with KHO Jork Loeser
2026-05-28  1:41   ` sashiko-bot
2026-05-28  0:41 ` [RFC PATCH 16/20] mshv: Add debugfs interface to page tracker Jork Loeser
2026-05-28  1:48   ` sashiko-bot
2026-05-28  0:41 ` [RFC PATCH 17/20] hyperv: Reserve crash MSR P2 for page preservation root PA Jork Loeser
2026-05-28  1:34   ` sashiko-bot
2026-05-28  0:42 ` [RFC PATCH 18/20] mshv: Exclude Hyper-V donated pages from crash dump collection Jork Loeser
2026-05-28  2:13   ` sashiko-bot
2026-05-28  0:42 ` [RFC PATCH 19/20] kexec: export kexec_in_progress for modules Jork Loeser
2026-05-28  0:42 ` [RFC PATCH 20/20] mshv: freeze and vacuum partitions across kexec Jork Loeser
2026-05-28  2:11   ` sashiko-bot

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20260528004204.1484584-15-jloeser@linux.microsoft.com \
    --to=jloeser@linux.microsoft.com \
    --cc=akpm@linux-foundation.org \
    --cc=bhe@redhat.com \
    --cc=bp@alien8.de \
    --cc=catalin.marinas@arm.com \
    --cc=dave.hansen@linux.intel.com \
    --cc=david@kernel.org \
    --cc=decui@microsoft.com \
    --cc=graf@amazon.com \
    --cc=haiyangz@microsoft.com \
    --cc=hpa@zytor.com \
    --cc=jasonmiu@google.com \
    --cc=jbouron@amazon.com \
    --cc=kees@kernel.org \
    --cc=kexec@lists.infradead.org \
    --cc=kys@microsoft.com \
    --cc=linux-arm-kernel@lists.infradead.org \
    --cc=linux-hyperv@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=longli@microsoft.com \
    --cc=mario.limonciello@amd.com \
    --cc=mhklinux@outlook.com \
    --cc=mingo@redhat.com \
    --cc=muchun.song@linux.dev \
    --cc=osalvador@suse.de \
    --cc=pasha.tatashin@soleen.com \
    --cc=piliu@redhat.com \
    --cc=pratyush@kernel.org \
    --cc=rafael.j.wysocki@intel.com \
    --cc=ran.xiaokai@zte.com.cn \
    --cc=rppt@kernel.org \
    --cc=sourabhjain@linux.ibm.com \
    --cc=tglx@kernel.org \
    --cc=wei.liu@kernel.org \
    --cc=will@kernel.org \
    --cc=x86@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox