* [PATCH v2 1/2] kho: add support for preserving vmalloc allocations
2025-09-07 7:00 [PATCH v2 0/2] kho: add support for preserving vmalloc allocations Mike Rapoport
@ 2025-09-07 7:00 ` Mike Rapoport
2025-09-07 7:00 ` [PATCH v2 2/2] lib/test_kho: use kho_preserve_vmalloc instead of storing addresses in fdt Mike Rapoport
` (4 subsequent siblings)
5 siblings, 0 replies; 8+ messages in thread
From: Mike Rapoport @ 2025-09-07 7:00 UTC (permalink / raw)
To: Andrew Morton
Cc: Alexander Graf, Baoquan He, Changyuan Lyu, Chris Li,
Jason Gunthorpe, Mike Rapoport, Pasha Tatashin, Pratyush Yadav,
kexec, linux-mm, linux-kernel
From: "Mike Rapoport (Microsoft)" <rppt@kernel.org>
A vmalloc allocation is preserved using binary structure similar to
global KHO memory tracker. It's a linked list of pages where each page
is an array of physical address of pages in vmalloc area.
kho_preserve_vmalloc() hands out the physical address of the head page
to the caller. This address is used as the argument to
kho_vmalloc_restore() to restore the mapping in the vmalloc address
space and populate it with the preserved pages.
Signed-off-by: Mike Rapoport (Microsoft) <rppt@kernel.org>
---
include/linux/kexec_handover.h | 12 ++
kernel/kexec_handover.c | 200 +++++++++++++++++++++++++++++++++
2 files changed, 212 insertions(+)
diff --git a/include/linux/kexec_handover.h b/include/linux/kexec_handover.h
index 348844cffb13..b7bf3bf11019 100644
--- a/include/linux/kexec_handover.h
+++ b/include/linux/kexec_handover.h
@@ -42,8 +42,10 @@ struct kho_serialization;
bool kho_is_enabled(void);
int kho_preserve_folio(struct folio *folio);
+int kho_preserve_vmalloc(void *ptr, phys_addr_t *preservation);
int kho_preserve_phys(phys_addr_t phys, size_t size);
struct folio *kho_restore_folio(phys_addr_t phys);
+void *kho_restore_vmalloc(phys_addr_t preservation);
int kho_add_subtree(struct kho_serialization *ser, const char *name, void *fdt);
int kho_retrieve_subtree(const char *name, phys_addr_t *phys);
@@ -70,11 +72,21 @@ static inline int kho_preserve_phys(phys_addr_t phys, size_t size)
return -EOPNOTSUPP;
}
+static inline int kho_preserve_vmalloc(void *ptr, phys_addr_t *preservation)
+{
+ return -EOPNOTSUPP;
+}
+
static inline struct folio *kho_restore_folio(phys_addr_t phys)
{
return NULL;
}
+static inline void *kho_restore_vmalloc(phys_addr_t preservation)
+{
+ return NULL;
+}
+
static inline int kho_add_subtree(struct kho_serialization *ser,
const char *name, void *fdt)
{
diff --git a/kernel/kexec_handover.c b/kernel/kexec_handover.c
index ecd1ac210dbd..c4560ff9b1fc 100644
--- a/kernel/kexec_handover.c
+++ b/kernel/kexec_handover.c
@@ -18,6 +18,7 @@
#include <linux/memblock.h>
#include <linux/notifier.h>
#include <linux/page-isolation.h>
+#include <linux/vmalloc.h>
#include <asm/early_ioremap.h>
@@ -733,6 +734,205 @@ int kho_preserve_phys(phys_addr_t phys, size_t size)
}
EXPORT_SYMBOL_GPL(kho_preserve_phys);
+struct kho_vmalloc_chunk;
+
+struct kho_vmalloc_hdr {
+ DECLARE_KHOSER_PTR(next, struct kho_vmalloc_chunk *);
+ unsigned int total_pages; /* only valid in the first chunk */
+ unsigned int flags; /* only valid in the first chunk */
+ unsigned short order; /* only valid in the first chunk */
+ unsigned short num_elms;
+};
+
+#define KHO_VMALLOC_SIZE \
+ ((PAGE_SIZE - sizeof(struct kho_vmalloc_hdr)) / \
+ sizeof(phys_addr_t))
+
+struct kho_vmalloc_chunk {
+ struct kho_vmalloc_hdr hdr;
+ phys_addr_t phys[KHO_VMALLOC_SIZE];
+};
+
+static_assert(sizeof(struct kho_vmalloc_chunk) == PAGE_SIZE);
+
+#define KHO_VMALLOC_FLAGS_MASK (VM_ALLOC | VM_ALLOW_HUGE_VMAP)
+
+static struct kho_vmalloc_chunk *new_vmalloc_chunk(struct kho_vmalloc_chunk *cur)
+{
+ struct kho_vmalloc_chunk *chunk;
+ int err;
+
+ chunk = kzalloc(PAGE_SIZE, GFP_KERNEL);
+ if (!chunk)
+ return NULL;
+
+ err = kho_preserve_phys(virt_to_phys(chunk), PAGE_SIZE);
+ if (err)
+ goto err_free;
+ if (cur)
+ KHOSER_STORE_PTR(cur->hdr.next, chunk);
+ return chunk;
+
+err_free:
+ kfree(chunk);
+ return NULL;
+}
+
+static void kho_vmalloc_free_chunks(struct kho_vmalloc_chunk *first_chunk)
+{
+ struct kho_mem_track *track = &kho_out.ser.track;
+ struct kho_vmalloc_chunk *chunk = first_chunk;
+
+ while (chunk) {
+ unsigned long pfn = PHYS_PFN(virt_to_phys(chunk));
+ struct kho_vmalloc_chunk *tmp = chunk;
+
+ __kho_unpreserve(track, pfn, pfn + 1);
+
+ chunk = KHOSER_LOAD_PTR(chunk->hdr.next);
+ kfree(tmp);
+ }
+}
+
+/**
+ * kho_preserve_vmalloc - preserve memory allocated with vmalloc() across kexec
+ * @ptr: pointer to the area in vmalloc address space
+ * @preservation: returned physical address of preservation metadata
+ *
+ * Instructs KHO to preserve the area in vmalloc address space at @ptr. The
+ * physical pages mapped at @ptr will be preserved and on successful return
+ * @preservation will hold the physical address of a structure that describes
+ * the preservation.
+ *
+ * NOTE: The memory allocated with vmalloc_node() variants cannot be reliably
+ * restored on the same node
+ *
+ * Return: 0 on success, error code on failure
+ */
+int kho_preserve_vmalloc(void *ptr, phys_addr_t *preservation)
+{
+ struct kho_mem_track *track = &kho_out.ser.track;
+ struct kho_vmalloc_chunk *chunk, *first_chunk;
+ struct vm_struct *vm = find_vm_area(ptr);
+ unsigned int order, flags;
+ int err;
+
+ if (!vm)
+ return -EINVAL;
+
+ if (vm->flags & ~KHO_VMALLOC_FLAGS_MASK)
+ return -EOPNOTSUPP;
+
+ flags = vm->flags & KHO_VMALLOC_FLAGS_MASK;
+ order = get_vm_area_page_order(vm);
+
+ chunk = new_vmalloc_chunk(NULL);
+ if (!chunk)
+ return -ENOMEM;
+ first_chunk = chunk;
+ first_chunk->hdr.total_pages = vm->nr_pages;
+ first_chunk->hdr.flags = flags;
+ first_chunk->hdr.order = order;
+
+ for (int i = 0; i < vm->nr_pages; i += (1 << order)) {
+ phys_addr_t phys = page_to_phys(vm->pages[i]);
+
+ err = __kho_preserve_order(track, PHYS_PFN(phys), order);
+ if (err)
+ goto err_free;
+
+ chunk->phys[chunk->hdr.num_elms] = phys;
+ chunk->hdr.num_elms++;
+ if (chunk->hdr.num_elms == ARRAY_SIZE(chunk->phys)) {
+ chunk = new_vmalloc_chunk(chunk);
+ if (!chunk)
+ goto err_free;
+ }
+ }
+
+ *preservation = virt_to_phys(first_chunk);
+ return 0;
+
+err_free:
+ kho_vmalloc_free_chunks(first_chunk);
+ return err;
+}
+EXPORT_SYMBOL_GPL(kho_preserve_vmalloc);
+
+/**
+ * kho_restore_vmalloc - recreates and populates an area in vmalloc address
+ * space from the preserved memory.
+ * @preservation: physical address of the preservation metadata.
+ *
+ * Recreates an area in vmalloc address space and populates it with memory that
+ * was preserved using kho_preserve_vmalloc().
+ *
+ * Return: pointer to the area in the vmalloc address space, NULL on failure.
+ */
+void *kho_restore_vmalloc(phys_addr_t preservation)
+{
+ struct kho_vmalloc_chunk *chunk = phys_to_virt(preservation);
+ unsigned int align, order, shift, flags;
+ unsigned int idx = 0, nr;
+ unsigned long addr, size;
+ struct vm_struct *area;
+ struct page **pages;
+ int err;
+
+ flags = chunk->hdr.flags;
+ if (flags & ~KHO_VMALLOC_FLAGS_MASK)
+ return NULL;
+
+ nr = chunk->hdr.total_pages;
+ pages = kvmalloc_array(nr, sizeof(*pages), GFP_KERNEL);
+ if (!pages)
+ return NULL;
+ order = chunk->hdr.order;
+ shift = PAGE_SHIFT + order;
+ align = 1 << shift;
+
+ while (chunk) {
+ struct page *page;
+
+ for (int i = 0; i < chunk->hdr.num_elms; i++) {
+ phys_addr_t phys = chunk->phys[i];
+
+ for (int j = 0; j < (1 << order); j++) {
+ page = phys_to_page(phys);
+ kho_restore_page(page, 0);
+ pages[idx++] = page;
+ phys += PAGE_SIZE;
+ }
+ }
+
+ page = virt_to_page(chunk);
+ chunk = KHOSER_LOAD_PTR(chunk->hdr.next);
+ kho_restore_page(page, 0);
+ __free_page(page);
+ }
+
+ area = __get_vm_area_node(nr * PAGE_SIZE, align, shift, flags,
+ VMALLOC_START, VMALLOC_END, NUMA_NO_NODE,
+ GFP_KERNEL, __builtin_return_address(0));
+ if (!area)
+ goto err_free_pages_array;
+
+ addr = (unsigned long)area->addr;
+ size = get_vm_area_size(area);
+ err = vmap_pages_range(addr, addr + size, PAGE_KERNEL, pages, shift);
+ if (err)
+ goto err_free_vm_area;
+
+ return area->addr;
+
+err_free_vm_area:
+ free_vm_area(area);
+err_free_pages_array:
+ kvfree(pages);
+ return NULL;
+}
+EXPORT_SYMBOL_GPL(kho_restore_vmalloc);
+
/* Handling for debug/kho/out */
static struct dentry *debugfs_root;
--
2.50.1
^ permalink raw reply related [flat|nested] 8+ messages in thread* [PATCH v2 2/2] lib/test_kho: use kho_preserve_vmalloc instead of storing addresses in fdt
2025-09-07 7:00 [PATCH v2 0/2] kho: add support for preserving vmalloc allocations Mike Rapoport
2025-09-07 7:00 ` [PATCH v2 1/2] " Mike Rapoport
@ 2025-09-07 7:00 ` Mike Rapoport
2025-09-07 7:00 ` [PATCH v3 0/2] kho: add support for preserving vmalloc allocations Mike Rapoport
` (3 subsequent siblings)
5 siblings, 0 replies; 8+ messages in thread
From: Mike Rapoport @ 2025-09-07 7:00 UTC (permalink / raw)
To: Andrew Morton
Cc: Alexander Graf, Baoquan He, Changyuan Lyu, Chris Li,
Jason Gunthorpe, Mike Rapoport, Pasha Tatashin, Pratyush Yadav,
kexec, linux-mm, linux-kernel
From: "Mike Rapoport (Microsoft)" <rppt@kernel.org>
KHO test stores physical addresses of the preserved folios directly in
fdt.
Use kho_preserve_vmalloc() instead of it and kho_restore_vmalloc() to
retrieve the addresses after kexec.
This makes the test more scalable from one side and adds tests coverage
for kho_preserve_vmalloc() from the other.
Signed-off-by: Mike Rapoport (Microsoft) <rppt@kernel.org>
---
lib/test_kho.c | 30 +++++++++++++++++++++++-------
1 file changed, 23 insertions(+), 7 deletions(-)
diff --git a/lib/test_kho.c b/lib/test_kho.c
index c2eb899c3b45..10045f5979a0 100644
--- a/lib/test_kho.c
+++ b/lib/test_kho.c
@@ -32,6 +32,7 @@ module_param(max_mem, long, 0644);
struct kho_test_state {
unsigned int nr_folios;
struct folio **folios;
+ phys_addr_t *folios_info;
struct folio *fdt;
__wsum csum;
};
@@ -68,13 +69,17 @@ static struct notifier_block kho_test_nb = {
static int kho_test_save_data(struct kho_test_state *state, void *fdt)
{
phys_addr_t *folios_info __free(kvfree) = NULL;
+ phys_addr_t folios_info_phys;
int err = 0;
- folios_info = kvmalloc_array(state->nr_folios, sizeof(*folios_info),
- GFP_KERNEL);
+ folios_info = vmalloc_array(state->nr_folios, sizeof(*folios_info));
if (!folios_info)
return -ENOMEM;
+ err = kho_preserve_vmalloc(folios_info, &folios_info_phys);
+ if (err)
+ return err;
+
for (int i = 0; i < state->nr_folios; i++) {
struct folio *folio = state->folios[i];
unsigned int order = folio_order(folio);
@@ -89,11 +94,14 @@ static int kho_test_save_data(struct kho_test_state *state, void *fdt)
err |= fdt_begin_node(fdt, "data");
err |= fdt_property(fdt, "nr_folios", &state->nr_folios,
sizeof(state->nr_folios));
- err |= fdt_property(fdt, "folios_info", folios_info,
- state->nr_folios * sizeof(*folios_info));
+ err |= fdt_property(fdt, "folios_info", &folios_info_phys,
+ sizeof(folios_info_phys));
err |= fdt_property(fdt, "csum", &state->csum, sizeof(state->csum));
err |= fdt_end_node(fdt);
+ if (!err)
+ state->folios_info = no_free_ptr(folios_info);
+
return err;
}
@@ -197,7 +205,8 @@ static int kho_test_save(void)
static int kho_test_restore_data(const void *fdt, int node)
{
const unsigned int *nr_folios;
- const phys_addr_t *folios_info;
+ const phys_addr_t *folios_info_phys;
+ phys_addr_t *folios_info;
const __wsum *old_csum;
__wsum csum = 0;
int len;
@@ -212,8 +221,12 @@ static int kho_test_restore_data(const void *fdt, int node)
if (!old_csum || len != sizeof(*old_csum))
return -EINVAL;
- folios_info = fdt_getprop(fdt, node, "folios_info", &len);
- if (!folios_info || len != sizeof(*folios_info) * *nr_folios)
+ folios_info_phys = fdt_getprop(fdt, node, "folios_info", &len);
+ if (!folios_info_phys || len != sizeof(*folios_info_phys))
+ return -EINVAL;
+
+ folios_info = kho_restore_vmalloc(*folios_info_phys);
+ if (!folios_info)
return -EINVAL;
for (int i = 0; i < *nr_folios; i++) {
@@ -233,6 +246,8 @@ static int kho_test_restore_data(const void *fdt, int node)
folio_put(folio);
}
+ vfree(folios_info);
+
if (csum != *old_csum)
return -EINVAL;
@@ -291,6 +306,7 @@ static void kho_test_cleanup(void)
folio_put(kho_test_state.folios[i]);
kvfree(kho_test_state.folios);
+ vfree(kho_test_state.folios_info);
}
static void __exit kho_test_exit(void)
--
2.50.1
^ permalink raw reply related [flat|nested] 8+ messages in thread* [PATCH v3 0/2] kho: add support for preserving vmalloc allocations
2025-09-07 7:00 [PATCH v2 0/2] kho: add support for preserving vmalloc allocations Mike Rapoport
2025-09-07 7:00 ` [PATCH v2 1/2] " Mike Rapoport
2025-09-07 7:00 ` [PATCH v2 2/2] lib/test_kho: use kho_preserve_vmalloc instead of storing addresses in fdt Mike Rapoport
@ 2025-09-07 7:00 ` Mike Rapoport
2025-09-07 7:00 ` [PATCH v3 1/2] " Mike Rapoport
` (2 subsequent siblings)
5 siblings, 0 replies; 8+ messages in thread
From: Mike Rapoport @ 2025-09-07 7:00 UTC (permalink / raw)
To: Andrew Morton
Cc: Alexander Graf, Baoquan He, Changyuan Lyu, Chris Li,
Jason Gunthorpe, Mike Rapoport, Pasha Tatashin, Pratyush Yadav,
kexec, linux-mm, linux-kernel
From: "Mike Rapoport (Microsoft)" <rppt@kernel.org>
Hi,
Following the discussion about preservation of memfd with LUO [1] these
patches add support for preserving vmalloc allocations.
Any KHO uses case presumes that there's a data structure that lists
physical addresses of preserved folios (and potentially some additional
metadata). Allowing vmalloc preservations with KHO allows scalable
preservation of such data structures.
For instance, instead of allocating array describing preserved folios in
the fdt, memfd preservation can use vmalloc:
preserved_folios = vmalloc_array(nr_folios, sizeof(*preserved_folios));
memfd_luo_preserve_folios(preserved_folios, folios, nr_folios);
kho_preserve_vmalloc(preserved_folios, &folios_info);
[1] https://lore.kernel.org/all/20250807014442.3829950-30-pasha.tatashin@soleen.com
v3 changes:
* rebase on mm-unstable
v2: https://lore.kernel.org/all/20250905131302.3595582-1-rppt@kernel.org
* support preservation of vmalloc backed by large pages
* add check for supported vmalloc flags and preserve the flags to be
able to identify incompatible preservations
* don't use kho_preserve_phys()
* add kernel-doc
v1: https://lore.kernel.org/all/20250903063018.3346652-1-rppt@kernel.org
Mike Rapoport (Microsoft) (2):
kho: add support for preserving vmalloc allocations
lib/test_kho: use kho_preserve_vmalloc instead of storing addresses in fdt
include/linux/kexec_handover.h | 12 ++
kernel/kexec_handover.c | 200 +++++++++++++++++++++++++++++++++
lib/test_kho.c | 41 +++++--
3 files changed, 241 insertions(+), 12 deletions(-)
base-commit: b024763926d2726978dff6588b81877d000159c1
--
2.50.1
^ permalink raw reply [flat|nested] 8+ messages in thread* [PATCH v3 1/2] kho: add support for preserving vmalloc allocations
2025-09-07 7:00 [PATCH v2 0/2] kho: add support for preserving vmalloc allocations Mike Rapoport
` (2 preceding siblings ...)
2025-09-07 7:00 ` [PATCH v3 0/2] kho: add support for preserving vmalloc allocations Mike Rapoport
@ 2025-09-07 7:00 ` Mike Rapoport
2025-09-07 7:00 ` [PATCH v3 2/2] lib/test_kho: use kho_preserve_vmalloc instead of storing addresses in fdt Mike Rapoport
2025-09-08 10:34 ` [PATCH v2 0/2] kho: add support for preserving vmalloc allocations Mike Rapoport
5 siblings, 0 replies; 8+ messages in thread
From: Mike Rapoport @ 2025-09-07 7:00 UTC (permalink / raw)
To: Andrew Morton
Cc: Alexander Graf, Baoquan He, Changyuan Lyu, Chris Li,
Jason Gunthorpe, Mike Rapoport, Pasha Tatashin, Pratyush Yadav,
kexec, linux-mm, linux-kernel
From: "Mike Rapoport (Microsoft)" <rppt@kernel.org>
A vmalloc allocation is preserved using binary structure similar to
global KHO memory tracker. It's a linked list of pages where each page
is an array of physical address of pages in vmalloc area.
kho_preserve_vmalloc() hands out the physical address of the head page
to the caller. This address is used as the argument to
kho_vmalloc_restore() to restore the mapping in the vmalloc address
space and populate it with the preserved pages.
Signed-off-by: Mike Rapoport (Microsoft) <rppt@kernel.org>
---
include/linux/kexec_handover.h | 12 ++
kernel/kexec_handover.c | 200 +++++++++++++++++++++++++++++++++
2 files changed, 212 insertions(+)
diff --git a/include/linux/kexec_handover.h b/include/linux/kexec_handover.h
index 348844cffb13..b7bf3bf11019 100644
--- a/include/linux/kexec_handover.h
+++ b/include/linux/kexec_handover.h
@@ -42,8 +42,10 @@ struct kho_serialization;
bool kho_is_enabled(void);
int kho_preserve_folio(struct folio *folio);
+int kho_preserve_vmalloc(void *ptr, phys_addr_t *preservation);
int kho_preserve_phys(phys_addr_t phys, size_t size);
struct folio *kho_restore_folio(phys_addr_t phys);
+void *kho_restore_vmalloc(phys_addr_t preservation);
int kho_add_subtree(struct kho_serialization *ser, const char *name, void *fdt);
int kho_retrieve_subtree(const char *name, phys_addr_t *phys);
@@ -70,11 +72,21 @@ static inline int kho_preserve_phys(phys_addr_t phys, size_t size)
return -EOPNOTSUPP;
}
+static inline int kho_preserve_vmalloc(void *ptr, phys_addr_t *preservation)
+{
+ return -EOPNOTSUPP;
+}
+
static inline struct folio *kho_restore_folio(phys_addr_t phys)
{
return NULL;
}
+static inline void *kho_restore_vmalloc(phys_addr_t preservation)
+{
+ return NULL;
+}
+
static inline int kho_add_subtree(struct kho_serialization *ser,
const char *name, void *fdt)
{
diff --git a/kernel/kexec_handover.c b/kernel/kexec_handover.c
index 8079fc4b9189..1177cc5ffa1a 100644
--- a/kernel/kexec_handover.c
+++ b/kernel/kexec_handover.c
@@ -18,6 +18,7 @@
#include <linux/memblock.h>
#include <linux/notifier.h>
#include <linux/page-isolation.h>
+#include <linux/vmalloc.h>
#include <asm/early_ioremap.h>
@@ -742,6 +743,205 @@ int kho_preserve_phys(phys_addr_t phys, size_t size)
}
EXPORT_SYMBOL_GPL(kho_preserve_phys);
+struct kho_vmalloc_chunk;
+
+struct kho_vmalloc_hdr {
+ DECLARE_KHOSER_PTR(next, struct kho_vmalloc_chunk *);
+ unsigned int total_pages; /* only valid in the first chunk */
+ unsigned int flags; /* only valid in the first chunk */
+ unsigned short order; /* only valid in the first chunk */
+ unsigned short num_elms;
+};
+
+#define KHO_VMALLOC_SIZE \
+ ((PAGE_SIZE - sizeof(struct kho_vmalloc_hdr)) / \
+ sizeof(phys_addr_t))
+
+struct kho_vmalloc_chunk {
+ struct kho_vmalloc_hdr hdr;
+ phys_addr_t phys[KHO_VMALLOC_SIZE];
+};
+
+static_assert(sizeof(struct kho_vmalloc_chunk) == PAGE_SIZE);
+
+#define KHO_VMALLOC_FLAGS_MASK (VM_ALLOC | VM_ALLOW_HUGE_VMAP)
+
+static struct kho_vmalloc_chunk *new_vmalloc_chunk(struct kho_vmalloc_chunk *cur)
+{
+ struct kho_vmalloc_chunk *chunk;
+ int err;
+
+ chunk = kzalloc(PAGE_SIZE, GFP_KERNEL);
+ if (!chunk)
+ return NULL;
+
+ err = kho_preserve_phys(virt_to_phys(chunk), PAGE_SIZE);
+ if (err)
+ goto err_free;
+ if (cur)
+ KHOSER_STORE_PTR(cur->hdr.next, chunk);
+ return chunk;
+
+err_free:
+ kfree(chunk);
+ return NULL;
+}
+
+static void kho_vmalloc_free_chunks(struct kho_vmalloc_chunk *first_chunk)
+{
+ struct kho_mem_track *track = &kho_out.ser.track;
+ struct kho_vmalloc_chunk *chunk = first_chunk;
+
+ while (chunk) {
+ unsigned long pfn = PHYS_PFN(virt_to_phys(chunk));
+ struct kho_vmalloc_chunk *tmp = chunk;
+
+ __kho_unpreserve(track, pfn, pfn + 1);
+
+ chunk = KHOSER_LOAD_PTR(chunk->hdr.next);
+ kfree(tmp);
+ }
+}
+
+/**
+ * kho_preserve_vmalloc - preserve memory allocated with vmalloc() across kexec
+ * @ptr: pointer to the area in vmalloc address space
+ * @preservation: returned physical address of preservation metadata
+ *
+ * Instructs KHO to preserve the area in vmalloc address space at @ptr. The
+ * physical pages mapped at @ptr will be preserved and on successful return
+ * @preservation will hold the physical address of a structure that describes
+ * the preservation.
+ *
+ * NOTE: The memory allocated with vmalloc_node() variants cannot be reliably
+ * restored on the same node
+ *
+ * Return: 0 on success, error code on failure
+ */
+int kho_preserve_vmalloc(void *ptr, phys_addr_t *preservation)
+{
+ struct kho_mem_track *track = &kho_out.ser.track;
+ struct kho_vmalloc_chunk *chunk, *first_chunk;
+ struct vm_struct *vm = find_vm_area(ptr);
+ unsigned int order, flags;
+ int err;
+
+ if (!vm)
+ return -EINVAL;
+
+ if (vm->flags & ~KHO_VMALLOC_FLAGS_MASK)
+ return -EOPNOTSUPP;
+
+ flags = vm->flags & KHO_VMALLOC_FLAGS_MASK;
+ order = get_vm_area_page_order(vm);
+
+ chunk = new_vmalloc_chunk(NULL);
+ if (!chunk)
+ return -ENOMEM;
+ first_chunk = chunk;
+ first_chunk->hdr.total_pages = vm->nr_pages;
+ first_chunk->hdr.flags = flags;
+ first_chunk->hdr.order = order;
+
+ for (int i = 0; i < vm->nr_pages; i += (1 << order)) {
+ phys_addr_t phys = page_to_phys(vm->pages[i]);
+
+ err = __kho_preserve_order(track, PHYS_PFN(phys), order);
+ if (err)
+ goto err_free;
+
+ chunk->phys[chunk->hdr.num_elms] = phys;
+ chunk->hdr.num_elms++;
+ if (chunk->hdr.num_elms == ARRAY_SIZE(chunk->phys)) {
+ chunk = new_vmalloc_chunk(chunk);
+ if (!chunk)
+ goto err_free;
+ }
+ }
+
+ *preservation = virt_to_phys(first_chunk);
+ return 0;
+
+err_free:
+ kho_vmalloc_free_chunks(first_chunk);
+ return err;
+}
+EXPORT_SYMBOL_GPL(kho_preserve_vmalloc);
+
+/**
+ * kho_restore_vmalloc - recreates and populates an area in vmalloc address
+ * space from the preserved memory.
+ * @preservation: physical address of the preservation metadata.
+ *
+ * Recreates an area in vmalloc address space and populates it with memory that
+ * was preserved using kho_preserve_vmalloc().
+ *
+ * Return: pointer to the area in the vmalloc address space, NULL on failure.
+ */
+void *kho_restore_vmalloc(phys_addr_t preservation)
+{
+ struct kho_vmalloc_chunk *chunk = phys_to_virt(preservation);
+ unsigned int align, order, shift, flags;
+ unsigned int idx = 0, nr;
+ unsigned long addr, size;
+ struct vm_struct *area;
+ struct page **pages;
+ int err;
+
+ flags = chunk->hdr.flags;
+ if (flags & ~KHO_VMALLOC_FLAGS_MASK)
+ return NULL;
+
+ nr = chunk->hdr.total_pages;
+ pages = kvmalloc_array(nr, sizeof(*pages), GFP_KERNEL);
+ if (!pages)
+ return NULL;
+ order = chunk->hdr.order;
+ shift = PAGE_SHIFT + order;
+ align = 1 << shift;
+
+ while (chunk) {
+ struct page *page;
+
+ for (int i = 0; i < chunk->hdr.num_elms; i++) {
+ phys_addr_t phys = chunk->phys[i];
+
+ for (int j = 0; j < (1 << order); j++) {
+ page = phys_to_page(phys);
+ kho_restore_page(page, 0);
+ pages[idx++] = page;
+ phys += PAGE_SIZE;
+ }
+ }
+
+ page = virt_to_page(chunk);
+ chunk = KHOSER_LOAD_PTR(chunk->hdr.next);
+ kho_restore_page(page, 0);
+ __free_page(page);
+ }
+
+ area = __get_vm_area_node(nr * PAGE_SIZE, align, shift, flags,
+ VMALLOC_START, VMALLOC_END, NUMA_NO_NODE,
+ GFP_KERNEL, __builtin_return_address(0));
+ if (!area)
+ goto err_free_pages_array;
+
+ addr = (unsigned long)area->addr;
+ size = get_vm_area_size(area);
+ err = vmap_pages_range(addr, addr + size, PAGE_KERNEL, pages, shift);
+ if (err)
+ goto err_free_vm_area;
+
+ return area->addr;
+
+err_free_vm_area:
+ free_vm_area(area);
+err_free_pages_array:
+ kvfree(pages);
+ return NULL;
+}
+EXPORT_SYMBOL_GPL(kho_restore_vmalloc);
+
/* Handling for debug/kho/out */
static struct dentry *debugfs_root;
--
2.50.1
^ permalink raw reply related [flat|nested] 8+ messages in thread* [PATCH v3 2/2] lib/test_kho: use kho_preserve_vmalloc instead of storing addresses in fdt
2025-09-07 7:00 [PATCH v2 0/2] kho: add support for preserving vmalloc allocations Mike Rapoport
` (3 preceding siblings ...)
2025-09-07 7:00 ` [PATCH v3 1/2] " Mike Rapoport
@ 2025-09-07 7:00 ` Mike Rapoport
2025-09-08 10:34 ` [PATCH v2 0/2] kho: add support for preserving vmalloc allocations Mike Rapoport
5 siblings, 0 replies; 8+ messages in thread
From: Mike Rapoport @ 2025-09-07 7:00 UTC (permalink / raw)
To: Andrew Morton
Cc: Alexander Graf, Baoquan He, Changyuan Lyu, Chris Li,
Jason Gunthorpe, Mike Rapoport, Pasha Tatashin, Pratyush Yadav,
kexec, linux-mm, linux-kernel
From: "Mike Rapoport (Microsoft)" <rppt@kernel.org>
KHO test stores physical addresses of the preserved folios directly in
fdt.
Use kho_preserve_vmalloc() instead of it and kho_restore_vmalloc() to
retrieve the addresses after kexec.
This makes the test more scalable from one side and adds tests coverage
for kho_preserve_vmalloc() from the other.
Signed-off-by: Mike Rapoport (Microsoft) <rppt@kernel.org>
---
lib/test_kho.c | 41 +++++++++++++++++++++++++++++------------
1 file changed, 29 insertions(+), 12 deletions(-)
diff --git a/lib/test_kho.c b/lib/test_kho.c
index fe8504e3407b..c46f577b6aee 100644
--- a/lib/test_kho.c
+++ b/lib/test_kho.c
@@ -32,6 +32,7 @@ module_param(max_mem, long, 0644);
struct kho_test_state {
unsigned int nr_folios;
struct folio **folios;
+ phys_addr_t *folios_info;
struct folio *fdt;
__wsum csum;
};
@@ -67,18 +68,15 @@ static struct notifier_block kho_test_nb = {
static int kho_test_save_data(struct kho_test_state *state, void *fdt)
{
- phys_addr_t *folios_info;
+ phys_addr_t *folios_info __free(kvfree) = NULL;
+ phys_addr_t folios_info_phys;
int err = 0;
- err |= fdt_begin_node(fdt, "data");
- err |= fdt_property(fdt, "nr_folios", &state->nr_folios,
- sizeof(state->nr_folios));
- err |= fdt_property_placeholder(fdt, "folios_info",
- state->nr_folios * sizeof(*folios_info),
- (void **)&folios_info);
- err |= fdt_property(fdt, "csum", &state->csum, sizeof(state->csum));
- err |= fdt_end_node(fdt);
+ folios_info = vmalloc_array(state->nr_folios, sizeof(*folios_info));
+ if (!folios_info)
+ return -ENOMEM;
+ err = kho_preserve_vmalloc(folios_info, &folios_info_phys);
if (err)
return err;
@@ -93,6 +91,17 @@ static int kho_test_save_data(struct kho_test_state *state, void *fdt)
break;
}
+ err |= fdt_begin_node(fdt, "data");
+ err |= fdt_property(fdt, "nr_folios", &state->nr_folios,
+ sizeof(state->nr_folios));
+ err |= fdt_property(fdt, "folios_info", &folios_info_phys,
+ sizeof(folios_info_phys));
+ err |= fdt_property(fdt, "csum", &state->csum, sizeof(state->csum));
+ err |= fdt_end_node(fdt);
+
+ if (!err)
+ state->folios_info = no_free_ptr(folios_info);
+
return err;
}
@@ -210,7 +219,8 @@ static int kho_test_save(void)
static int kho_test_restore_data(const void *fdt, int node)
{
const unsigned int *nr_folios;
- const phys_addr_t *folios_info;
+ const phys_addr_t *folios_info_phys;
+ phys_addr_t *folios_info;
const __wsum *old_csum;
__wsum csum = 0;
int len;
@@ -225,8 +235,12 @@ static int kho_test_restore_data(const void *fdt, int node)
if (!old_csum || len != sizeof(*old_csum))
return -EINVAL;
- folios_info = fdt_getprop(fdt, node, "folios_info", &len);
- if (!folios_info || len != sizeof(*folios_info) * *nr_folios)
+ folios_info_phys = fdt_getprop(fdt, node, "folios_info", &len);
+ if (!folios_info_phys || len != sizeof(*folios_info_phys))
+ return -EINVAL;
+
+ folios_info = kho_restore_vmalloc(*folios_info_phys);
+ if (!folios_info)
return -EINVAL;
for (int i = 0; i < *nr_folios; i++) {
@@ -246,6 +260,8 @@ static int kho_test_restore_data(const void *fdt, int node)
folio_put(folio);
}
+ vfree(folios_info);
+
if (csum != *old_csum)
return -EINVAL;
@@ -304,6 +320,7 @@ static void kho_test_cleanup(void)
folio_put(kho_test_state.folios[i]);
kvfree(kho_test_state.folios);
+ vfree(kho_test_state.folios_info);
folio_put(kho_test_state.fdt);
}
--
2.50.1
^ permalink raw reply related [flat|nested] 8+ messages in thread* Re: [PATCH v2 0/2] kho: add support for preserving vmalloc allocations
2025-09-07 7:00 [PATCH v2 0/2] kho: add support for preserving vmalloc allocations Mike Rapoport
` (4 preceding siblings ...)
2025-09-07 7:00 ` [PATCH v3 2/2] lib/test_kho: use kho_preserve_vmalloc instead of storing addresses in fdt Mike Rapoport
@ 2025-09-08 10:34 ` Mike Rapoport
5 siblings, 0 replies; 8+ messages in thread
From: Mike Rapoport @ 2025-09-08 10:34 UTC (permalink / raw)
To: Andrew Morton
Cc: Alexander Graf, Baoquan He, Changyuan Lyu, Chris Li,
Jason Gunthorpe, Pasha Tatashin, Pratyush Yadav, kexec, linux-mm,
linux-kernel
Argh, I've messed up the posting :(
Sorry for the noise.
On Sun, Sep 07, 2025 at 10:00:17AM +0300, Mike Rapoport wrote:
> From: "Mike Rapoport (Microsoft)" <rppt@kernel.org>
>
> Hi,
>
> Following the discussion about preservation of memfd with LUO [1] these
> patches add support for preserving vmalloc allocations.
>
> Any KHO uses case presumes that there's a data structure that lists
> physical addresses of preserved folios (and potentially some additional
> metadata). Allowing vmalloc preservations with KHO allows scalable
> preservation of such data structures.
>
> For instance, instead of allocating array describing preserved folios in
> the fdt, memfd preservation can use vmalloc:
>
> preserved_folios = vmalloc_array(nr_folios, sizeof(*preserved_folios));
> memfd_luo_preserve_folios(preserved_folios, folios, nr_folios);
> kho_preserve_vmalloc(preserved_folios, &folios_info);
>
> [1] https://lore.kernel.org/all/20250807014442.3829950-30-pasha.tatashin@soleen.com
>
> v2 changes:
> * support preservation of vmalloc backed by large pages
> * add check for supported vmalloc flags and preserve the flags to be
> able to identify incompatible preservations
> * don't use kho_preserve_phys()
> * add kernel-doc
>
> v1: https://lore.kernel.org/all/20250903063018.3346652-1-rppt@kernel.org
>
> Mike Rapoport (Microsoft) (2):
> kho: add support for preserving vmalloc allocations
> lib/test_kho: use kho_preserve_vmalloc instead of storing addresses in fdt
>
> include/linux/kexec_handover.h | 12 ++
> kernel/kexec_handover.c | 200 +++++++++++++++++++++++++++++++++
> lib/test_kho.c | 30 +++--
> 3 files changed, 235 insertions(+), 7 deletions(-)
>
>
> base-commit: b320789d6883cc00ac78ce83bccbfe7ed58afcf0
> --
> 2.50.1
>
--
Sincerely yours,
Mike.
^ permalink raw reply [flat|nested] 8+ messages in thread