From: Mike Rapoport <rppt@kernel.org>
To: Andrew Morton <akpm@linux-foundation.org>
Cc: Alexander Graf <graf@amazon.com>, Baoquan He <bhe@redhat.com>,
Changyuan Lyu <changyuanl@google.com>,
Chris Li <chrisl@kernel.org>, Jason Gunthorpe <jgg@nvidia.com>,
Mike Rapoport <rppt@kernel.org>,
Pasha Tatashin <pasha.tatashin@soleen.com>,
Pratyush Yadav <pratyush@kernel.org>,
kexec@lists.infradead.org, linux-mm@kvack.org,
linux-kernel@vger.kernel.org
Subject: [PATCH v2 1/2] kho: add support for preserving vmalloc allocations
Date: Fri, 5 Sep 2025 16:13:01 +0300 [thread overview]
Message-ID: <20250905131302.3595582-2-rppt@kernel.org> (raw)
In-Reply-To: <20250905131302.3595582-1-rppt@kernel.org>
From: "Mike Rapoport (Microsoft)" <rppt@kernel.org>
A vmalloc allocation is preserved using binary structure similar to
global KHO memory tracker. It's a linked list of pages where each page
is an array of physical address of pages in vmalloc area.
kho_preserve_vmalloc() hands out the physical address of the head page
to the caller. This address is used as the argument to
kho_vmalloc_restore() to restore the mapping in the vmalloc address
space and populate it with the preserved pages.
Signed-off-by: Mike Rapoport (Microsoft) <rppt@kernel.org>
---
include/linux/kexec_handover.h | 12 ++
kernel/kexec_handover.c | 200 +++++++++++++++++++++++++++++++++
2 files changed, 212 insertions(+)
diff --git a/include/linux/kexec_handover.h b/include/linux/kexec_handover.h
index 348844cffb13..b7bf3bf11019 100644
--- a/include/linux/kexec_handover.h
+++ b/include/linux/kexec_handover.h
@@ -42,8 +42,10 @@ struct kho_serialization;
bool kho_is_enabled(void);
int kho_preserve_folio(struct folio *folio);
+int kho_preserve_vmalloc(void *ptr, phys_addr_t *preservation);
int kho_preserve_phys(phys_addr_t phys, size_t size);
struct folio *kho_restore_folio(phys_addr_t phys);
+void *kho_restore_vmalloc(phys_addr_t preservation);
int kho_add_subtree(struct kho_serialization *ser, const char *name, void *fdt);
int kho_retrieve_subtree(const char *name, phys_addr_t *phys);
@@ -70,11 +72,21 @@ static inline int kho_preserve_phys(phys_addr_t phys, size_t size)
return -EOPNOTSUPP;
}
+static inline int kho_preserve_vmalloc(void *ptr, phys_addr_t *preservation)
+{
+ return -EOPNOTSUPP;
+}
+
static inline struct folio *kho_restore_folio(phys_addr_t phys)
{
return NULL;
}
+static inline void *kho_restore_vmalloc(phys_addr_t preservation)
+{
+ return NULL;
+}
+
static inline int kho_add_subtree(struct kho_serialization *ser,
const char *name, void *fdt)
{
diff --git a/kernel/kexec_handover.c b/kernel/kexec_handover.c
index ecd1ac210dbd..c4560ff9b1fc 100644
--- a/kernel/kexec_handover.c
+++ b/kernel/kexec_handover.c
@@ -18,6 +18,7 @@
#include <linux/memblock.h>
#include <linux/notifier.h>
#include <linux/page-isolation.h>
+#include <linux/vmalloc.h>
#include <asm/early_ioremap.h>
@@ -733,6 +734,205 @@ int kho_preserve_phys(phys_addr_t phys, size_t size)
}
EXPORT_SYMBOL_GPL(kho_preserve_phys);
+struct kho_vmalloc_chunk;
+
+struct kho_vmalloc_hdr {
+ DECLARE_KHOSER_PTR(next, struct kho_vmalloc_chunk *);
+ unsigned int total_pages; /* only valid in the first chunk */
+ unsigned int flags; /* only valid in the first chunk */
+ unsigned short order; /* only valid in the first chunk */
+ unsigned short num_elms;
+};
+
+#define KHO_VMALLOC_SIZE \
+ ((PAGE_SIZE - sizeof(struct kho_vmalloc_hdr)) / \
+ sizeof(phys_addr_t))
+
+struct kho_vmalloc_chunk {
+ struct kho_vmalloc_hdr hdr;
+ phys_addr_t phys[KHO_VMALLOC_SIZE];
+};
+
+static_assert(sizeof(struct kho_vmalloc_chunk) == PAGE_SIZE);
+
+#define KHO_VMALLOC_FLAGS_MASK (VM_ALLOC | VM_ALLOW_HUGE_VMAP)
+
+static struct kho_vmalloc_chunk *new_vmalloc_chunk(struct kho_vmalloc_chunk *cur)
+{
+ struct kho_vmalloc_chunk *chunk;
+ int err;
+
+ chunk = kzalloc(PAGE_SIZE, GFP_KERNEL);
+ if (!chunk)
+ return NULL;
+
+ err = kho_preserve_phys(virt_to_phys(chunk), PAGE_SIZE);
+ if (err)
+ goto err_free;
+ if (cur)
+ KHOSER_STORE_PTR(cur->hdr.next, chunk);
+ return chunk;
+
+err_free:
+ kfree(chunk);
+ return NULL;
+}
+
+static void kho_vmalloc_free_chunks(struct kho_vmalloc_chunk *first_chunk)
+{
+ struct kho_mem_track *track = &kho_out.ser.track;
+ struct kho_vmalloc_chunk *chunk = first_chunk;
+
+ while (chunk) {
+ unsigned long pfn = PHYS_PFN(virt_to_phys(chunk));
+ struct kho_vmalloc_chunk *tmp = chunk;
+
+ __kho_unpreserve(track, pfn, pfn + 1);
+
+ chunk = KHOSER_LOAD_PTR(chunk->hdr.next);
+ kfree(tmp);
+ }
+}
+
+/**
+ * kho_preserve_vmalloc - preserve memory allocated with vmalloc() across kexec
+ * @ptr: pointer to the area in vmalloc address space
+ * @preservation: returned physical address of preservation metadata
+ *
+ * Instructs KHO to preserve the area in vmalloc address space at @ptr. The
+ * physical pages mapped at @ptr will be preserved and on successful return
+ * @preservation will hold the physical address of a structure that describes
+ * the preservation.
+ *
+ * NOTE: The memory allocated with vmalloc_node() variants cannot be reliably
+ * restored on the same node
+ *
+ * Return: 0 on success, error code on failure
+ */
+int kho_preserve_vmalloc(void *ptr, phys_addr_t *preservation)
+{
+ struct kho_mem_track *track = &kho_out.ser.track;
+ struct kho_vmalloc_chunk *chunk, *first_chunk;
+ struct vm_struct *vm = find_vm_area(ptr);
+ unsigned int order, flags;
+ int err;
+
+ if (!vm)
+ return -EINVAL;
+
+ if (vm->flags & ~KHO_VMALLOC_FLAGS_MASK)
+ return -EOPNOTSUPP;
+
+ flags = vm->flags & KHO_VMALLOC_FLAGS_MASK;
+ order = get_vm_area_page_order(vm);
+
+ chunk = new_vmalloc_chunk(NULL);
+ if (!chunk)
+ return -ENOMEM;
+ first_chunk = chunk;
+ first_chunk->hdr.total_pages = vm->nr_pages;
+ first_chunk->hdr.flags = flags;
+ first_chunk->hdr.order = order;
+
+ for (int i = 0; i < vm->nr_pages; i += (1 << order)) {
+ phys_addr_t phys = page_to_phys(vm->pages[i]);
+
+ err = __kho_preserve_order(track, PHYS_PFN(phys), order);
+ if (err)
+ goto err_free;
+
+ chunk->phys[chunk->hdr.num_elms] = phys;
+ chunk->hdr.num_elms++;
+ if (chunk->hdr.num_elms == ARRAY_SIZE(chunk->phys)) {
+ chunk = new_vmalloc_chunk(chunk);
+ if (!chunk)
+ goto err_free;
+ }
+ }
+
+ *preservation = virt_to_phys(first_chunk);
+ return 0;
+
+err_free:
+ kho_vmalloc_free_chunks(first_chunk);
+ return err;
+}
+EXPORT_SYMBOL_GPL(kho_preserve_vmalloc);
+
+/**
+ * kho_restore_vmalloc - recreates and populates an area in vmalloc address
+ * space from the preserved memory.
+ * @preservation: physical address of the preservation metadata.
+ *
+ * Recreates an area in vmalloc address space and populates it with memory that
+ * was preserved using kho_preserve_vmalloc().
+ *
+ * Return: pointer to the area in the vmalloc address space, NULL on failure.
+ */
+void *kho_restore_vmalloc(phys_addr_t preservation)
+{
+ struct kho_vmalloc_chunk *chunk = phys_to_virt(preservation);
+ unsigned int align, order, shift, flags;
+ unsigned int idx = 0, nr;
+ unsigned long addr, size;
+ struct vm_struct *area;
+ struct page **pages;
+ int err;
+
+ flags = chunk->hdr.flags;
+ if (flags & ~KHO_VMALLOC_FLAGS_MASK)
+ return NULL;
+
+ nr = chunk->hdr.total_pages;
+ pages = kvmalloc_array(nr, sizeof(*pages), GFP_KERNEL);
+ if (!pages)
+ return NULL;
+ order = chunk->hdr.order;
+ shift = PAGE_SHIFT + order;
+ align = 1 << shift;
+
+ while (chunk) {
+ struct page *page;
+
+ for (int i = 0; i < chunk->hdr.num_elms; i++) {
+ phys_addr_t phys = chunk->phys[i];
+
+ for (int j = 0; j < (1 << order); j++) {
+ page = phys_to_page(phys);
+ kho_restore_page(page, 0);
+ pages[idx++] = page;
+ phys += PAGE_SIZE;
+ }
+ }
+
+ page = virt_to_page(chunk);
+ chunk = KHOSER_LOAD_PTR(chunk->hdr.next);
+ kho_restore_page(page, 0);
+ __free_page(page);
+ }
+
+ area = __get_vm_area_node(nr * PAGE_SIZE, align, shift, flags,
+ VMALLOC_START, VMALLOC_END, NUMA_NO_NODE,
+ GFP_KERNEL, __builtin_return_address(0));
+ if (!area)
+ goto err_free_pages_array;
+
+ addr = (unsigned long)area->addr;
+ size = get_vm_area_size(area);
+ err = vmap_pages_range(addr, addr + size, PAGE_KERNEL, pages, shift);
+ if (err)
+ goto err_free_vm_area;
+
+ return area->addr;
+
+err_free_vm_area:
+ free_vm_area(area);
+err_free_pages_array:
+ kvfree(pages);
+ return NULL;
+}
+EXPORT_SYMBOL_GPL(kho_restore_vmalloc);
+
/* Handling for debug/kho/out */
static struct dentry *debugfs_root;
--
2.50.1
next prev parent reply other threads:[~2025-09-05 16:42 UTC|newest]
Thread overview: 4+ messages / expand[flat|nested] mbox.gz Atom feed top
2025-09-05 13:13 [PATCH v2 0/2] kho: add support for preserving vmalloc allocations Mike Rapoport
2025-09-05 13:13 ` Mike Rapoport [this message]
2025-09-05 13:13 ` [PATCH v2 2/2] lib/test_kho: use kho_preserve_vmalloc instead of storing addresses in fdt Mike Rapoport
-- strict thread matches above, loose matches on Subject: below --
2025-09-07 7:00 [PATCH v2 0/2] kho: add support for preserving vmalloc allocations Mike Rapoport
2025-09-07 7:00 ` [PATCH v2 1/2] " Mike Rapoport
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20250905131302.3595582-2-rppt@kernel.org \
--to=rppt@kernel.org \
--cc=akpm@linux-foundation.org \
--cc=bhe@redhat.com \
--cc=changyuanl@google.com \
--cc=chrisl@kernel.org \
--cc=graf@amazon.com \
--cc=jgg@nvidia.com \
--cc=kexec@lists.infradead.org \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-mm@kvack.org \
--cc=pasha.tatashin@soleen.com \
--cc=pratyush@kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.