From: Ard Biesheuvel <ardb+git@google.com>
To: linux-kernel@vger.kernel.org
Cc: linux-efi@vger.kernel.org, x86@kernel.org,
Ard Biesheuvel <ardb@kernel.org>,
"Mike Rapoport (Microsoft)" <rppt@kernel.org>,
Benjamin Herrenschmidt <benh@kernel.crashing.org>
Subject: [RFC PATCH 8/9] x86/efi: Defer compaction of the EFI memory map
Date: Fri, 6 Mar 2026 16:57:13 +0100 [thread overview]
Message-ID: <20260306155703.815272-21-ardb+git@google.com> (raw)
In-Reply-To: <20260306155703.815272-12-ardb+git@google.com>
From: Ard Biesheuvel <ardb@kernel.org>
Currently, the EFI memory map is compacted early at boot, to leave only
the entries that are significant to the current kernel or potentially a
kexec'ed kernel that comes after, and to suppress all boot services code
and data entries that have no correspondence with anything that either
the firmware or the kernel treats as reserved for firmware use.
Given that actually freeing those regions to the page allocator is not
possible yet at this point, those suppressed entries are converted into
yet another type of temporary memory reservation map, and freed during
an arch_initcall(), which is the earliest convenient time to actually
perform this operation.
Given that compacting the memory map does not need to occur that early
to begin with, move it to the arch_initcall(). This removes the need for
the special memory reservation map, as the entries still exist at this
point, and can be consulted directly to decide whether they need to be
preserved in their entirety or only partially.
Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
---
arch/x86/platform/efi/quirks.c | 130 +++++++-------------
1 file changed, 46 insertions(+), 84 deletions(-)
diff --git a/arch/x86/platform/efi/quirks.c b/arch/x86/platform/efi/quirks.c
index 5bf97376c1a0..d7a64b404bea 100644
--- a/arch/x86/platform/efi/quirks.c
+++ b/arch/x86/platform/efi/quirks.c
@@ -350,37 +350,10 @@ static void __init efi_unmap_pages(efi_memory_desc_t *md)
pr_err("Failed to unmap VA mapping for 0x%llx\n", va);
}
-struct efi_freeable_range {
- u64 start;
- u64 end;
-};
-
-static struct efi_freeable_range *ranges_to_free;
-
void __init efi_unmap_boot_services(void)
{
- struct efi_memory_map_data data = {
- .phys_map = efi.memmap.phys_map,
- .desc_version = efi.memmap.desc_version,
- .desc_size = efi.memmap.desc_size,
- };
efi_memory_desc_t *md;
- void *new_md;
- int idx = 0;
- size_t sz;
- /* Keep all regions for /sys/kernel/debug/efi */
- if (efi_enabled(EFI_DBG))
- return;
-
- sz = sizeof(*ranges_to_free) * efi.memmap.nr_map + 1;
- ranges_to_free = kzalloc(sz, GFP_KERNEL);
- if (!ranges_to_free) {
- pr_err("Failed to allocate storage for freeable EFI regions\n");
- return;
- }
-
- new_md = efi.memmap.map;
for_each_efi_memory_desc(md) {
unsigned long long start = md->phys_addr;
unsigned long long size = md->num_pages << EFI_PAGE_SHIFT;
@@ -394,15 +367,10 @@ void __init efi_unmap_boot_services(void)
/*
* Before calling set_virtual_address_map(), EFI boot services
* code/data regions were mapped as a quirk for buggy firmware.
- * Unmap them from efi_pgd before freeing them up.
+ * Unmap them from efi_pgd, they will be freed later.
*/
efi_unmap_pages(md);
- /* Do not free, someone else owns it: */
- if (md->attribute & EFI_MEMORY_RUNTIME) {
- continue;
- }
-
/*
* Nasty quirk: if all sub-1MB memory is used for boot
* services, we can get here without having allocated the
@@ -416,49 +384,14 @@ void __init efi_unmap_boot_services(void)
* this happened, but Linux should still try to boot rather
* panicking early.)
*/
- rm_size = real_mode_size_needed();
+ rm_size = PAGE_ALIGN(real_mode_size_needed());
if (rm_size && (start + rm_size) < (1<<20) && size >= rm_size) {
set_real_mode_mem(start);
- start += rm_size;
- size -= rm_size;
- }
-
- /*
- * With CONFIG_DEFERRED_STRUCT_PAGE_INIT parts of the memory
- * map are still not initialized and we can't reliably free
- * memory here.
- * Queue the ranges to free at a later point.
- */
- ranges_to_free[idx].start = start;
- ranges_to_free[idx].end = start + size;
- idx++;
- }
- /*
- * Build a new EFI memmap that excludes any boot services
- * regions that are not tagged EFI_MEMORY_RUNTIME, since those
- * regions have now been freed.
- */
- new_md = efi.memmap.map;
- for_each_efi_memory_desc(md) {
- if (!(md->attribute & EFI_MEMORY_RUNTIME) &&
- (md->type == EFI_BOOT_SERVICES_CODE ||
- md->type == EFI_BOOT_SERVICES_DATA) &&
- !e820__mapped_any(md->phys_addr,
- md->phys_addr + md->num_pages * EFI_PAGE_SIZE,
- E820_TYPE_RESERVED)) {
- continue;
+ /* Remove the allocated space from the descriptor */
+ md->phys_addr += rm_size;
+ md->num_pages -= rm_size / EFI_PAGE_SIZE;
}
-
- memcpy(new_md, md, efi.memmap.desc_size);
- new_md += efi.memmap.desc_size;
- }
-
- data.size = new_md - efi.memmap.map;
-
- if (efi_memmap_install(&data) != 0) {
- pr_err("Could not install new EFI memmap\n");
- return;
}
}
@@ -498,24 +431,53 @@ efi_free_unreserved_subregions(u64 range_start, u64 range_end)
static int __init efi_free_boot_services(void)
{
- struct efi_freeable_range *range = ranges_to_free;
+ struct efi_memory_map_data data = {
+ .phys_map = efi.memmap.phys_map,
+ .desc_version = efi.memmap.desc_version,
+ .desc_size = efi.memmap.desc_size,
+ };
unsigned long freed = 0;
+ efi_memory_desc_t *md;
+ void *new_md;
- if (!ranges_to_free)
+ /* Keep all regions for /sys/kernel/debug/efi */
+ if (efi_enabled(EFI_DBG))
return 0;
- while (range->start) {
- /*
- * Don't free memory under 1M for two reasons:
- * - BIOS might clobber it
- * - Crash kernel needs it to be reserved
- */
- u64 start = max(range->start, SZ_1M);
+ new_md = efi.memmap.map;
+ for_each_efi_memory_desc(md) {
+ u64 md_start = max(md->phys_addr, SZ_1M);
+ u64 md_end = md->phys_addr + md->num_pages * EFI_PAGE_SIZE;
+ bool preserve_entry = true;
+
+ if (!(md->attribute & EFI_MEMORY_RUNTIME) &&
+ (md->type == EFI_BOOT_SERVICES_CODE ||
+ md->type == EFI_BOOT_SERVICES_DATA)) {
+ u64 f = efi_free_unreserved_subregions(md_start, md_end);
+
+ /*
+ * Omit the memory map entry of this region only if it
+ * has been freed entirely. This ensures that boot data
+ * regions for things like ESRT and BGRT tables carry
+ * over correctly during kexec.
+ */
+ if (f == md_end - md_start)
+ preserve_entry = false;
+
+ freed += f;
+ }
- freed += efi_free_unreserved_subregions(start, range->end);
- range++;
+ if (preserve_entry) {
+ if (new_md != md)
+ memcpy(new_md, md, efi.memmap.desc_size);
+ new_md += efi.memmap.desc_size;
+ }
}
- kfree(ranges_to_free);
+
+ data.size = new_md - efi.memmap.map;
+
+ if (efi_memmap_install(&data) != 0)
+ pr_err("Could not install new EFI memmap\n");
if (freed)
pr_info("Freeing EFI boot services memory: %ldK\n", freed / SZ_1K);
--
2.53.0.473.g4a7958ca14-goog
next prev parent reply other threads:[~2026-03-06 15:57 UTC|newest]
Thread overview: 14+ messages / expand[flat|nested] mbox.gz Atom feed top
2026-03-06 15:57 [RFC PATCH 0/9] efi/x86: Avoid the need to mangle the EFI memory map Ard Biesheuvel
2026-03-06 15:57 ` [RFC PATCH 1/9] memblock: Permit existing reserved regions to be marked RSRV_KERN Ard Biesheuvel
2026-03-16 6:53 ` Mike Rapoport
2026-03-06 15:57 ` [RFC PATCH 2/9] efi: Tag memblock reservations of boot services regions as RSRV_KERN Ard Biesheuvel
2026-03-16 6:55 ` Mike Rapoport
2026-03-06 15:57 ` [RFC PATCH 3/9] x86/efi: Omit RSRV_KERN memblock reservations when freeing boot regions Ard Biesheuvel
2026-03-06 15:57 ` [RFC PATCH 4/9] x86/efi: Defer sub-1M check from unmap to free stage Ard Biesheuvel
2026-03-06 15:57 ` [PATCH 4/4] x86/efi: Omit kernel reservations of boot services memory from memmap Ard Biesheuvel
2026-03-06 16:00 ` Ard Biesheuvel
2026-03-06 15:57 ` [RFC PATCH 5/9] x86/efi: Unmap kernel-reserved boot regions from EFI page tables Ard Biesheuvel
2026-03-06 15:57 ` [RFC PATCH 6/9] x86/efi: Do not rely on EFI_MEMORY_RUNTIME bit and avoid entry splitting Ard Biesheuvel
2026-03-06 15:57 ` [RFC PATCH 7/9] x86/efi: Reuse memory map instead of reallocating it Ard Biesheuvel
2026-03-06 15:57 ` Ard Biesheuvel [this message]
2026-03-06 15:57 ` [RFC PATCH 9/9] x86/efi: Free unused tail of the EFI memory map Ard Biesheuvel
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20260306155703.815272-21-ardb+git@google.com \
--to=ardb+git@google.com \
--cc=ardb@kernel.org \
--cc=benh@kernel.crashing.org \
--cc=linux-efi@vger.kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=rppt@kernel.org \
--cc=x86@kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox