From: Ard Biesheuvel <ardb@kernel.org>
To: linux-efi@vger.kernel.org
Cc: linux-kernel@vger.kernel.org, Ard Biesheuvel <ardb@kernel.org>,
Evgeniy Baskov <baskov@ispras.ru>, Borislav Petkov <bp@alien8.de>,
Andy Lutomirski <luto@kernel.org>,
Dave Hansen <dave.hansen@linux.intel.com>,
Ingo Molnar <mingo@redhat.com>,
Peter Zijlstra <peterz@infradead.org>,
Thomas Gleixner <tglx@linutronix.de>,
Alexey Khoroshilov <khoroshilov@ispras.ru>,
Peter Jones <pjones@redhat.com>,
Gerd Hoffmann <kraxel@redhat.com>, Dave Young <dyoung@redhat.com>,
Mario Limonciello <mario.limonciello@amd.com>,
Kees Cook <keescook@chromium.org>,
Tom Lendacky <thomas.lendacky@amd.com>,
"Kirill A . Shutemov" <kirill.shutemov@linux.intel.com>,
Linus Torvalds <torvalds@linux-foundation.org>
Subject: [PATCH 4/6] x86: efistub: Perform 4/5 level paging switch from the stub
Date: Mon, 24 Apr 2023 18:57:24 +0200 [thread overview]
Message-ID: <20230424165726.2245548-5-ardb@kernel.org> (raw)
In-Reply-To: <20230424165726.2245548-1-ardb@kernel.org>
In preparation for updating the EFI stub boot flow to avoid the bare
metal decompressor code altogether, implement the support code for
switching between 4 and 5 levels of paging before jumping to the kernel
proper.
Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
---
drivers/firmware/efi/libstub/efi-stub-helper.c | 4 +
drivers/firmware/efi/libstub/x86-stub.c | 145 ++++++++++++++++++++
2 files changed, 149 insertions(+)
diff --git a/drivers/firmware/efi/libstub/efi-stub-helper.c b/drivers/firmware/efi/libstub/efi-stub-helper.c
index 1e0203d74691ffcc..fc5f3b4c45e91401 100644
--- a/drivers/firmware/efi/libstub/efi-stub-helper.c
+++ b/drivers/firmware/efi/libstub/efi-stub-helper.c
@@ -16,6 +16,8 @@
#include "efistub.h"
+extern bool efi_no5lvl;
+
bool efi_nochunk;
bool efi_nokaslr = !IS_ENABLED(CONFIG_RANDOMIZE_BASE);
bool efi_novamap;
@@ -73,6 +75,8 @@ efi_status_t efi_parse_options(char const *cmdline)
efi_loglevel = CONSOLE_LOGLEVEL_QUIET;
} else if (!strcmp(param, "noinitrd")) {
efi_noinitrd = true;
+ } else if (IS_ENABLED(CONFIG_X86_64) && !strcmp(param, "no5lvl")) {
+ efi_no5lvl = true;
} else if (!strcmp(param, "efi") && val) {
efi_nochunk = parse_option_str(val, "nochunk");
efi_novamap |= parse_option_str(val, "novamap");
diff --git a/drivers/firmware/efi/libstub/x86-stub.c b/drivers/firmware/efi/libstub/x86-stub.c
index e136c94037dda8d3..7b8717cbb96a1246 100644
--- a/drivers/firmware/efi/libstub/x86-stub.c
+++ b/drivers/firmware/efi/libstub/x86-stub.c
@@ -760,6 +760,139 @@ static efi_status_t exit_boot(struct boot_params *boot_params, void *handle)
return EFI_SUCCESS;
}
+#ifdef CONFIG_X86_64
+bool efi_no5lvl;
+
+static const struct desc_struct gdt[] = {
+ [GDT_ENTRY_KERNEL32_CS] = GDT_ENTRY_INIT(0xc09b, 0, 0xfffff),
+ [GDT_ENTRY_KERNEL_CS] = GDT_ENTRY_INIT(0xa09b, 0, 0xfffff),
+ [GDT_ENTRY_KERNEL_DS] = GDT_ENTRY_INIT(0xc093, 0, 0xfffff),
+};
+
+static void (*la57_toggle)(void *cr3, void *gdt);
+
+static void __naked tmpl_toggle(void *cr3, void *gdt)
+{
+ /*
+ * This is template code that will be copied into a 32-bit addressable
+ * buffer, allowing us to drop to 32-bit mode with paging disabled,
+ * which is required to be able to toggle the CR4.LA57 bit.
+ *
+ * The first MOVB instruction is only there to capture the size of the
+ * sequence, and implicitly, the offset to the LJMP's immediate, which
+ * will be populated with the correct absolute address after copying.
+ */
+ asm("0: movb $(4f - .), %%al \n\t"
+ " lgdt (%%rsi) \n\t"
+ " movw %[ds], %%ax \n\t"
+ " movw %%ax, %%ds \n\t"
+ " movw %%ax, %%ss \n\t"
+ " leaq 2f(%%rip), %%rax \n\t"
+ " pushq %[cs32] \n\t"
+ " pushq %%rax \n\t"
+ " lretq \n\t"
+ "1: retq \n\t"
+ " .code32 \n\t"
+ "2: movl %%cr0, %%eax \n\t"
+ " btrl %[pg], %%eax \n\t"
+ " movl %%eax, %%cr0 \n\t"
+ " jmp 3f \n\t"
+ "3: movl %%cr4, %%ecx \n\t"
+ " btcl %[la57], %%ecx \n\t"
+ " movl %%ecx, %%cr4 \n\t"
+ " movl %%edi, %%cr3 \n\t"
+ " btsl %[pg], %%eax \n\t"
+ " movl %%eax, %%cr0 \n\t"
+ " ljmpl %[cs], $(1b - 0b) \n\t"
+ "4: .code64"
+ :
+ : [cs32] "i"(__KERNEL32_CS),
+ [cs] "i"(__KERNEL_CS),
+ [ds] "i"(__KERNEL_DS),
+ [pg] "i"(X86_CR0_PG_BIT),
+ [la57] "i"(X86_CR4_LA57_BIT));
+}
+
+/*
+ * Enabling (or disabling) 5 level paging is tricky, because it can only be
+ * done from 32-bit mode with paging disabled. This means not only that the
+ * code itself must be running from 32-bit addressable physical memory, but
+ * also that the root page table must be 32-bit addressable, as we cannot
+ * program a 64-bit value into CR3 when running in 32-bit mode.
+ */
+static efi_status_t efi_setup_5level_paging(void)
+{
+ const u8 tmpl_size = ((u8 *)tmpl_toggle)[1];
+ efi_status_t status;
+ u8 *la57_code;
+
+ if (!efi_is_64bit())
+ return EFI_SUCCESS;
+
+ /* check for 5 level paging support */
+ if (native_cpuid_eax(0) < 7 ||
+ !(native_cpuid_ecx(7) & (1 << (X86_FEATURE_LA57 & 31))))
+ return EFI_SUCCESS;
+
+ /* allocate some 32-bit addressable memory for code and a page table */
+ status = efi_allocate_pages(2 * PAGE_SIZE, (unsigned long *)&la57_code,
+ U32_MAX);
+ if (status != EFI_SUCCESS)
+ return status;
+
+ la57_toggle = memcpy(la57_code, tmpl_toggle, tmpl_size);
+ memset(la57_code + tmpl_size, 0x90, PAGE_SIZE - tmpl_size);
+
+ /*
+ * To avoid having to allocate a 32-bit addressable stack, we use a
+ * ljmp to switch back to long mode. However, this takes an absolute
+ * address, so we have to poke it in at runtime. The dummy MOVB
+ * instruction at the beginning can be used to locate the immediate.
+ */
+ *(u32 *)&la57_code[tmpl_size - 6] += (unsigned long)la57_code;
+
+ adjust_memory_range_protection((unsigned long)la57_code, PAGE_SIZE);
+
+ return EFI_SUCCESS;
+}
+
+static void efi_5level_switch(void)
+{
+ bool want_la57 = IS_ENABLED(CONFIG_X86_5LEVEL) && !efi_no5lvl;
+ bool have_la57 = native_read_cr4() & X86_CR4_LA57;
+ bool need_toggle = want_la57 ^ have_la57;
+ u64 *pgt = (void *)la57_toggle + PAGE_SIZE;
+ u64 *cr3 = (u64 *)__native_read_cr3();
+ struct desc_ptr desc;
+ u64 *new_cr3;
+
+ if (!la57_toggle || !need_toggle)
+ return;
+
+ if (!have_la57) {
+ /*
+ * We are going to enable 5 level paging, so we need to
+ * allocate a root level page from the 32-bit addressable
+ * physical region, and plug the existing hierarchy into it.
+ */
+ new_cr3 = memset(pgt, 0, PAGE_SIZE);
+ new_cr3[0] = (u64)cr3 | _PAGE_TABLE_NOENC;
+ } else {
+ // take the new root table pointer from the current entry #0
+ new_cr3 = (u64 *)(cr3[0] & PAGE_MASK);
+
+ // copy the new root level table if it is not 32-bit addressable
+ if ((u64)new_cr3 > U32_MAX)
+ new_cr3 = memcpy(pgt, new_cr3, PAGE_SIZE);
+ }
+
+ desc.size = sizeof(gdt) - 1;
+ desc.address = (u64)gdt;
+
+ la57_toggle(new_cr3, &desc);
+}
+#endif
+
/*
* On success, we return the address of startup_32, which has potentially been
* relocated by efi_relocate_kernel.
@@ -792,6 +925,14 @@ asmlinkage unsigned long efi_main(efi_handle_t handle,
(get_efi_config_table(ACPI_20_TABLE_GUID) ?:
get_efi_config_table(ACPI_TABLE_GUID));
+#ifdef CONFIG_X86_64
+ status = efi_setup_5level_paging();
+ if (status != EFI_SUCCESS) {
+ efi_err("efi_setup_5level_paging() failed!\n");
+ goto fail;
+ }
+#endif
+
/*
* If the kernel isn't already loaded at a suitable address,
* relocate it.
@@ -910,6 +1051,10 @@ asmlinkage unsigned long efi_main(efi_handle_t handle,
goto fail;
}
+#ifdef CONFIG_X86_64
+ efi_5level_switch();
+#endif
+
return bzimage_addr;
fail:
efi_err("efi_main() failed!\n");
--
2.39.2
next prev parent reply other threads:[~2023-04-24 16:58 UTC|newest]
Thread overview: 26+ messages / expand[flat|nested] mbox.gz Atom feed top
2023-04-24 16:57 [PATCH 0/6] efi/x86: Avoid legacy decompressor during EFI boot Ard Biesheuvel
2023-04-24 16:57 ` [PATCH 1/6] x86: decompressor: Move global symbol references to C code Ard Biesheuvel
2023-04-24 16:57 ` [PATCH 2/6] x86: decompressor: Factor out kernel decompression and relocation Ard Biesheuvel
2023-04-24 16:57 ` [PATCH 3/6] x86: efistub: Obtain ACPI RSDP address while running in the stub Ard Biesheuvel
2023-04-24 16:57 ` Ard Biesheuvel [this message]
2023-04-26 10:42 ` [PATCH 4/6] x86: efistub: Perform 4/5 level paging switch from " Kirill A . Shutemov
2023-04-26 21:29 ` Ard Biesheuvel
2023-04-24 16:57 ` [PATCH 5/6] x86: efistub: Prefer EFI memory attributes protocol over DXE services Ard Biesheuvel
2023-04-24 16:57 ` [PATCH 6/6] x86: efistub: Avoid legacy decompressor when doing EFI boot Ard Biesheuvel
2023-04-26 10:17 ` [PATCH 0/6] efi/x86: Avoid legacy decompressor during " Borislav Petkov
2023-04-26 21:24 ` Ard Biesheuvel
2023-04-28 13:22 ` Evgeniy Baskov
2023-04-28 17:14 ` Ard Biesheuvel
2023-05-02 13:37 ` Tom Lendacky
2023-05-02 13:39 ` Ard Biesheuvel
2023-05-02 16:08 ` Tom Lendacky
2023-05-03 17:44 ` Ard Biesheuvel
2023-05-03 18:51 ` Tom Lendacky
2023-05-03 17:58 ` Tom Lendacky
2023-05-03 18:17 ` Ard Biesheuvel
2023-05-03 18:24 ` Borislav Petkov
2023-05-03 18:39 ` Ard Biesheuvel
2023-05-03 18:48 ` Tom Lendacky
2023-05-03 18:59 ` Ard Biesheuvel
2023-05-03 21:23 ` Tom Lendacky
2023-05-03 21:30 ` Ard Biesheuvel
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20230424165726.2245548-5-ardb@kernel.org \
--to=ardb@kernel.org \
--cc=baskov@ispras.ru \
--cc=bp@alien8.de \
--cc=dave.hansen@linux.intel.com \
--cc=dyoung@redhat.com \
--cc=keescook@chromium.org \
--cc=khoroshilov@ispras.ru \
--cc=kirill.shutemov@linux.intel.com \
--cc=kraxel@redhat.com \
--cc=linux-efi@vger.kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=luto@kernel.org \
--cc=mario.limonciello@amd.com \
--cc=mingo@redhat.com \
--cc=peterz@infradead.org \
--cc=pjones@redhat.com \
--cc=tglx@linutronix.de \
--cc=thomas.lendacky@amd.com \
--cc=torvalds@linux-foundation.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox