From: Ard Biesheuvel <ardb+git@google.com>
To: linux-kernel@vger.kernel.org
Cc: linux-efi@vger.kernel.org, x86@kernel.org,
Ard Biesheuvel <ardb@kernel.org>,
"Mike Rapoport (Microsoft)" <rppt@kernel.org>,
Benjamin Herrenschmidt <benh@kernel.crashing.org>
Subject: [PATCH v2 17/19] x86/efi: Defer compaction of the EFI memory map
Date: Thu, 19 Mar 2026 10:05:47 +0100 [thread overview]
Message-ID: <20260319090529.1091660-38-ardb+git@google.com> (raw)
In-Reply-To: <20260319090529.1091660-21-ardb+git@google.com>
From: Ard Biesheuvel <ardb@kernel.org>
Currently, the EFI memory map is compacted early at boot, to leave only
the entries that are significant to the current kernel or potentially a
kexec'ed kernel that comes after, and to suppress all boot services code
and data entries that have no correspondence with anything that either
the firmware or the kernel treats as reserved for firmware use.
Given that actually freeing those regions to the page allocator is not
possible yet at this point, those suppressed entries are converted into
yet another type of temporary memory reservation map, and freed during
an arch_initcall(), which is the earliest convenient time to actually
perform this operation.
Given that compacting the memory map does not need to occur that early
to begin with, move it to the arch_initcall(). This removes the need for
the special memory reservation map, as the entries still exist at this
point, and can be consulted directly to decide whether they need to be
preserved in their entirety or only partially.
Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
---
arch/x86/platform/efi/quirks.c | 110 +++++++-------------
1 file changed, 39 insertions(+), 71 deletions(-)
diff --git a/arch/x86/platform/efi/quirks.c b/arch/x86/platform/efi/quirks.c
index dc90c35480f8..bc9dfe7925aa 100644
--- a/arch/x86/platform/efi/quirks.c
+++ b/arch/x86/platform/efi/quirks.c
@@ -347,36 +347,11 @@ static void __init efi_unmap_pages(efi_memory_desc_t *md)
pr_err("Failed to unmap VA mapping for 0x%llx\n", va);
}
-struct efi_freeable_range {
- u64 start;
- u64 end;
-};
-
-static struct efi_freeable_range *ranges_to_free;
-
void __init efi_unmap_boot_services(void)
{
efi_memory_desc_t *md;
- void *new_md;
- int idx = 0;
- size_t sz;
- /* Keep all regions for /sys/kernel/debug/efi */
- if (efi_enabled(EFI_DBG))
- return;
-
- sz = sizeof(*ranges_to_free) * efi.memmap.num_valid_entries + 1;
- ranges_to_free = kzalloc(sz, GFP_KERNEL);
- if (!ranges_to_free) {
- pr_err("Failed to allocate storage for freeable EFI regions\n");
- return;
- }
-
- new_md = efi.memmap.map;
for_each_efi_memory_desc(md) {
- unsigned long long start = md->phys_addr;
- unsigned long long size = md->num_pages << EFI_PAGE_SHIFT;
-
if (md->type != EFI_BOOT_SERVICES_CODE &&
md->type != EFI_BOOT_SERVICES_DATA) {
continue;
@@ -385,47 +360,10 @@ void __init efi_unmap_boot_services(void)
/*
* Before calling set_virtual_address_map(), EFI boot services
* code/data regions were mapped as a quirk for buggy firmware.
- * Unmap them from efi_pgd before freeing them up.
+ * Unmap them from efi_pgd, they will be freed later.
*/
efi_unmap_pages(md);
-
- /* Do not free, someone else owns it: */
- if ((md->attribute & EFI_MEMORY_RUNTIME) ||
- !can_free_region(start, size)) {
- continue;
- }
-
- /*
- * With CONFIG_DEFERRED_STRUCT_PAGE_INIT parts of the memory
- * map are still not initialized and we can't reliably free
- * memory here.
- * Queue the ranges to free at a later point.
- */
- ranges_to_free[idx].start = start;
- ranges_to_free[idx].end = start + size;
- idx++;
}
-
- /*
- * Build a new EFI memmap that excludes any boot services
- * regions that are not tagged EFI_MEMORY_RUNTIME, since those
- * regions have now been freed.
- */
- new_md = efi.memmap.map;
- for_each_efi_memory_desc(md) {
- if (!(md->attribute & EFI_MEMORY_RUNTIME) &&
- (md->type == EFI_BOOT_SERVICES_CODE ||
- md->type == EFI_BOOT_SERVICES_DATA) &&
- can_free_region(md->phys_addr,
- md->num_pages << EFI_PAGE_SHIFT)) {
- continue;
- }
-
- memcpy(new_md, md, efi.memmap.desc_size);
- new_md += efi.memmap.desc_size;
- }
-
- efi.memmap.num_valid_entries = (new_md - efi.memmap.map) / efi.memmap.desc_size;
}
static unsigned long __init
@@ -464,27 +402,57 @@ efi_free_unreserved_subregions(u64 range_start, u64 range_end)
static int __init efi_free_boot_services(void)
{
- struct efi_freeable_range *range = ranges_to_free;
unsigned long freed = 0;
+ efi_memory_desc_t *md;
+ void *new_md;
+
+ /* No EFI memory map or it came from the preceding kernel? */
+ if (efi_setup || !efi_enabled(EFI_MEMMAP))
+ return 0;
- if (!ranges_to_free)
+ /* Keep all regions for /sys/kernel/debug/efi */
+ if (efi_enabled(EFI_DBG))
return 0;
- while (range->start) {
+ new_md = efi.memmap.map;
+ for_each_efi_memory_desc(md) {
/*
* Don't free memory under 1M for two reasons:
* - BIOS might clobber it
* - Crash kernel needs it to be reserved
*/
- u64 start = max(range->start, SZ_1M);
+ u64 md_start = max(md->phys_addr, SZ_1M);
+ u64 md_end = md->phys_addr + md->num_pages * EFI_PAGE_SIZE;
+ bool preserve_entry = md->attribute & EFI_MEMORY_RUNTIME;
- if (start >= range->end)
+ if (md_start >= md_end)
continue;
- freed += efi_free_unreserved_subregions(start, range->end);
- range++;
+ if (!(md->attribute & EFI_MEMORY_RUNTIME) &&
+ (md->type == EFI_BOOT_SERVICES_CODE ||
+ md->type == EFI_BOOT_SERVICES_DATA)) {
+ u64 f = efi_free_unreserved_subregions(md_start, md_end);
+
+ /*
+ * Omit the memory map entry of this region only if it
+ * has been freed entirely. This ensures that boot data
+ * regions for things like ESRT and BGRT tables carry
+ * over correctly during kexec.
+ */
+ if (f < md_end - md_start)
+ preserve_entry = true;
+
+ freed += f;
+ }
+
+ if (preserve_entry) {
+ if (new_md != md)
+ memcpy(new_md, md, efi.memmap.desc_size);
+ new_md += efi.memmap.desc_size;
+ }
}
- kfree(ranges_to_free);
+
+ efi.memmap.num_valid_entries = (new_md - efi.memmap.map) / efi.memmap.desc_size;
if (freed)
pr_info("Freeing EFI boot services memory: %ldK\n", freed / SZ_1K);
--
2.53.0.851.ga537e3e6e9-goog
next prev parent reply other threads:[~2026-03-19 9:06 UTC|newest]
Thread overview: 21+ messages / expand[flat|nested] mbox.gz Atom feed top
2026-03-19 9:05 [PATCH v2 00/19] efi/x86: Avoid the need to mangle the EFI memory map Ard Biesheuvel
2026-03-19 9:05 ` [PATCH v2 01/19] memblock: Permit existing reserved regions to be marked RSRV_KERN Ard Biesheuvel
2026-03-19 9:05 ` [PATCH v2 02/19] efi: Tag memblock reservations of boot services regions as RSRV_KERN Ard Biesheuvel
2026-03-19 9:05 ` [PATCH v2 03/19] x86/efi: Unmap kernel-reserved boot regions from EFI page tables Ard Biesheuvel
2026-03-19 9:05 ` [PATCH v2 04/19] x86/efi: Drop EFI_MEMORY_RUNTIME check from __ioremap_check_other() Ard Biesheuvel
2026-03-19 9:05 ` [PATCH v2 05/19] x86/efi: Omit RSRV_KERN memblock reservations when freeing boot regions Ard Biesheuvel
2026-03-19 9:05 ` [PATCH v2 06/19] x86/efi: Defer sub-1M check from unmap to free stage Ard Biesheuvel
2026-03-19 9:05 ` [PATCH v2 07/19] x86/efi: Simplify real mode trampoline allocation quirk Ard Biesheuvel
2026-03-19 9:05 ` [PATCH v2 08/19] x86/efi: Omit redundant kernel image overlap check Ard Biesheuvel
2026-03-19 9:05 ` [PATCH v2 09/19] x86/efi: Drop redundant EFI_PARAVIRT check Ard Biesheuvel
2026-03-19 9:05 ` [PATCH v2 10/19] x86/efi: Do not rely on EFI_MEMORY_RUNTIME bit and avoid entry splitting Ard Biesheuvel
2026-03-19 9:05 ` [PATCH v2 11/19] efi: Use nr_map not map_end to find the last valid memory map entry Ard Biesheuvel
2026-03-19 9:05 ` [PATCH v2 12/19] x86/efi: Only merge EFI memory map entries on 32-bit systems Ard Biesheuvel
2026-03-19 9:05 ` [PATCH v2 13/19] x86/efi: Clean the memory map using iterator and filter API Ard Biesheuvel
2026-03-19 9:05 ` [PATCH v2 14/19] x86/efi: Update the runtime map in place Ard Biesheuvel
2026-03-19 9:05 ` [PATCH v2 15/19] x86/efi: Use iterator API when mapping EFI regions for runtime Ard Biesheuvel
2026-03-19 9:05 ` [PATCH v2 16/19] x86/efi: Reuse memory map instead of reallocating it Ard Biesheuvel
2026-03-19 9:05 ` Ard Biesheuvel [this message]
2026-03-19 9:05 ` [PATCH v2 18/19] x86/efi: Do not abuse RUNTIME bit to mark boot regions as reserved Ard Biesheuvel
2026-03-19 9:05 ` [PATCH v2 19/19] x86/efi: Free unused tail of the EFI memory map Ard Biesheuvel
2026-03-24 9:50 ` [PATCH v2 00/19] efi/x86: Avoid the need to mangle " Ard Biesheuvel
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20260319090529.1091660-38-ardb+git@google.com \
--to=ardb+git@google.com \
--cc=ardb@kernel.org \
--cc=benh@kernel.crashing.org \
--cc=linux-efi@vger.kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=rppt@kernel.org \
--cc=x86@kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox