public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed
From: Ard Biesheuvel <ardb@kernel.org>
To: linux-efi@vger.kernel.org
Cc: linux-kernel@vger.kernel.org, Ard Biesheuvel <ardb@kernel.org>,
	Evgeniy Baskov <baskov@ispras.ru>, Borislav Petkov <bp@alien8.de>,
	Andy Lutomirski <luto@kernel.org>,
	Dave Hansen <dave.hansen@linux.intel.com>,
	Ingo Molnar <mingo@redhat.com>,
	Peter Zijlstra <peterz@infradead.org>,
	Thomas Gleixner <tglx@linutronix.de>,
	Alexey Khoroshilov <khoroshilov@ispras.ru>,
	Peter Jones <pjones@redhat.com>,
	Gerd Hoffmann <kraxel@redhat.com>, Dave Young <dyoung@redhat.com>,
	Mario Limonciello <mario.limonciello@amd.com>,
	Kees Cook <keescook@chromium.org>,
	Tom Lendacky <thomas.lendacky@amd.com>,
	"Kirill A . Shutemov" <kirill.shutemov@linux.intel.com>,
	Linus Torvalds <torvalds@linux-foundation.org>
Subject: [PATCH 4/6] x86: efistub: Perform 4/5 level paging switch from the stub
Date: Mon, 24 Apr 2023 18:57:24 +0200	[thread overview]
Message-ID: <20230424165726.2245548-5-ardb@kernel.org> (raw)
In-Reply-To: <20230424165726.2245548-1-ardb@kernel.org>

In preparation for updating the EFI stub boot flow to avoid the bare
metal decompressor code altogether, implement the support code for
switching between 4 and 5 levels of paging before jumping to the kernel
proper.

Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
---
 drivers/firmware/efi/libstub/efi-stub-helper.c |   4 +
 drivers/firmware/efi/libstub/x86-stub.c        | 145 ++++++++++++++++++++
 2 files changed, 149 insertions(+)

diff --git a/drivers/firmware/efi/libstub/efi-stub-helper.c b/drivers/firmware/efi/libstub/efi-stub-helper.c
index 1e0203d74691ffcc..fc5f3b4c45e91401 100644
--- a/drivers/firmware/efi/libstub/efi-stub-helper.c
+++ b/drivers/firmware/efi/libstub/efi-stub-helper.c
@@ -16,6 +16,8 @@
 
 #include "efistub.h"
 
+extern bool efi_no5lvl;
+
 bool efi_nochunk;
 bool efi_nokaslr = !IS_ENABLED(CONFIG_RANDOMIZE_BASE);
 bool efi_novamap;
@@ -73,6 +75,8 @@ efi_status_t efi_parse_options(char const *cmdline)
 			efi_loglevel = CONSOLE_LOGLEVEL_QUIET;
 		} else if (!strcmp(param, "noinitrd")) {
 			efi_noinitrd = true;
+		} else if (IS_ENABLED(CONFIG_X86_64) && !strcmp(param, "no5lvl")) {
+			efi_no5lvl = true;
 		} else if (!strcmp(param, "efi") && val) {
 			efi_nochunk = parse_option_str(val, "nochunk");
 			efi_novamap |= parse_option_str(val, "novamap");
diff --git a/drivers/firmware/efi/libstub/x86-stub.c b/drivers/firmware/efi/libstub/x86-stub.c
index e136c94037dda8d3..7b8717cbb96a1246 100644
--- a/drivers/firmware/efi/libstub/x86-stub.c
+++ b/drivers/firmware/efi/libstub/x86-stub.c
@@ -760,6 +760,139 @@ static efi_status_t exit_boot(struct boot_params *boot_params, void *handle)
 	return EFI_SUCCESS;
 }
 
+#ifdef CONFIG_X86_64
+bool efi_no5lvl;
+
+static const struct desc_struct gdt[] = {
+	[GDT_ENTRY_KERNEL32_CS] = GDT_ENTRY_INIT(0xc09b, 0, 0xfffff),
+	[GDT_ENTRY_KERNEL_CS]   = GDT_ENTRY_INIT(0xa09b, 0, 0xfffff),
+	[GDT_ENTRY_KERNEL_DS]   = GDT_ENTRY_INIT(0xc093, 0, 0xfffff),
+};
+
+static void (*la57_toggle)(void *cr3, void *gdt);
+
+static void __naked tmpl_toggle(void *cr3, void *gdt)
+{
+	/*
+	 * This is template code that will be copied into a 32-bit addressable
+	 * buffer, allowing us to drop to 32-bit mode with paging disabled,
+	 * which is required to be able to toggle the CR4.LA57 bit.
+	 *
+	 * The first MOVB instruction is only there to capture the size of the
+	 * sequence, and implicitly, the offset to the LJMP's immediate, which
+	 * will be populated with the correct absolute address after copying.
+	 */
+	asm("0:	movb	$(4f - .), %%al		\n\t"
+	    "	lgdt	(%%rsi)			\n\t"
+	    "	movw	%[ds], %%ax		\n\t"
+	    "	movw	%%ax, %%ds		\n\t"
+	    "	movw	%%ax, %%ss		\n\t"
+	    "	leaq	2f(%%rip), %%rax	\n\t"
+	    "	pushq	%[cs32]			\n\t"
+	    "	pushq	%%rax			\n\t"
+	    "	lretq				\n\t"
+	    "1:	retq				\n\t"
+	    "	.code32				\n\t"
+	    "2: movl	%%cr0, %%eax		\n\t"
+	    "	btrl	%[pg], %%eax		\n\t"
+	    "	movl	%%eax, %%cr0		\n\t"
+	    "	jmp	3f			\n\t"
+	    "3: movl	%%cr4, %%ecx		\n\t"
+	    "	btcl	%[la57], %%ecx		\n\t"
+	    "	movl	%%ecx, %%cr4		\n\t"
+	    "	movl	%%edi, %%cr3		\n\t"
+	    "	btsl	%[pg], %%eax		\n\t"
+	    "	movl	%%eax, %%cr0		\n\t"
+	    "	ljmpl	%[cs], $(1b - 0b)	\n\t"
+	    "4:	.code64"
+	    :
+	    : [cs32]	"i"(__KERNEL32_CS),
+	      [cs]	"i"(__KERNEL_CS),
+	      [ds]	"i"(__KERNEL_DS),
+	      [pg]	"i"(X86_CR0_PG_BIT),
+	      [la57]	"i"(X86_CR4_LA57_BIT));
+}
+
+/*
+ * Enabling (or disabling) 5 level paging is tricky, because it can only be
+ * done from 32-bit mode with paging disabled. This means not only that the
+ * code itself must be running from 32-bit addressable physical memory, but
+ * also that the root page table must be 32-bit addressable, as we cannot
+ * program a 64-bit value into CR3 when running in 32-bit mode.
+ */
+static efi_status_t efi_setup_5level_paging(void)
+{
+	const u8 tmpl_size = ((u8 *)tmpl_toggle)[1];
+	efi_status_t status;
+	u8 *la57_code;
+
+	if (!efi_is_64bit())
+		return EFI_SUCCESS;
+
+	/* check for 5 level paging support */
+	if (native_cpuid_eax(0) < 7 ||
+	    !(native_cpuid_ecx(7) & (1 << (X86_FEATURE_LA57 & 31))))
+		return EFI_SUCCESS;
+
+	/* allocate some 32-bit addressable memory for code and a page table */
+	status = efi_allocate_pages(2 * PAGE_SIZE, (unsigned long *)&la57_code,
+				    U32_MAX);
+	if (status != EFI_SUCCESS)
+		return status;
+
+	la57_toggle = memcpy(la57_code, tmpl_toggle, tmpl_size);
+	memset(la57_code + tmpl_size, 0x90, PAGE_SIZE - tmpl_size);
+
+	/*
+	 * To avoid having to allocate a 32-bit addressable stack, we use a
+	 * ljmp to switch back to long mode. However, this takes an absolute
+	 * address, so we have to poke it in at runtime. The dummy MOVB
+	 * instruction at the beginning can be used to locate the immediate.
+	 */
+	*(u32 *)&la57_code[tmpl_size - 6] += (unsigned long)la57_code;
+
+	adjust_memory_range_protection((unsigned long)la57_code, PAGE_SIZE);
+
+	return EFI_SUCCESS;
+}
+
+static void efi_5level_switch(void)
+{
+	bool want_la57 = IS_ENABLED(CONFIG_X86_5LEVEL) && !efi_no5lvl;
+	bool have_la57 = native_read_cr4() & X86_CR4_LA57;
+	bool need_toggle = want_la57 ^ have_la57;
+	u64 *pgt = (void *)la57_toggle + PAGE_SIZE;
+	u64 *cr3 = (u64 *)__native_read_cr3();
+	struct desc_ptr desc;
+	u64 *new_cr3;
+
+	if (!la57_toggle || !need_toggle)
+		return;
+
+	if (!have_la57) {
+		/*
+		 * We are going to enable 5 level paging, so we need to
+		 * allocate a root level page from the 32-bit addressable
+		 * physical region, and plug the existing hierarchy into it.
+		 */
+		new_cr3 = memset(pgt, 0, PAGE_SIZE);
+		new_cr3[0] = (u64)cr3 | _PAGE_TABLE_NOENC;
+	} else {
+		// take the new root table pointer from the current entry #0
+		new_cr3 = (u64 *)(cr3[0] & PAGE_MASK);
+
+		// copy the new root level table if it is not 32-bit addressable
+		if ((u64)new_cr3 > U32_MAX)
+			new_cr3 = memcpy(pgt, new_cr3, PAGE_SIZE);
+	}
+
+	desc.size       = sizeof(gdt) - 1;
+	desc.address    = (u64)gdt;
+
+	la57_toggle(new_cr3, &desc);
+}
+#endif
+
 /*
  * On success, we return the address of startup_32, which has potentially been
  * relocated by efi_relocate_kernel.
@@ -792,6 +925,14 @@ asmlinkage unsigned long efi_main(efi_handle_t handle,
 				(get_efi_config_table(ACPI_20_TABLE_GUID) ?:
 				 get_efi_config_table(ACPI_TABLE_GUID));
 
+#ifdef CONFIG_X86_64
+	status = efi_setup_5level_paging();
+	if (status != EFI_SUCCESS) {
+		efi_err("efi_setup_5level_paging() failed!\n");
+		goto fail;
+	}
+#endif
+
 	/*
 	 * If the kernel isn't already loaded at a suitable address,
 	 * relocate it.
@@ -910,6 +1051,10 @@ asmlinkage unsigned long efi_main(efi_handle_t handle,
 		goto fail;
 	}
 
+#ifdef CONFIG_X86_64
+	efi_5level_switch();
+#endif
+
 	return bzimage_addr;
 fail:
 	efi_err("efi_main() failed!\n");
-- 
2.39.2


  parent reply	other threads:[~2023-04-24 16:58 UTC|newest]

Thread overview: 26+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-04-24 16:57 [PATCH 0/6] efi/x86: Avoid legacy decompressor during EFI boot Ard Biesheuvel
2023-04-24 16:57 ` [PATCH 1/6] x86: decompressor: Move global symbol references to C code Ard Biesheuvel
2023-04-24 16:57 ` [PATCH 2/6] x86: decompressor: Factor out kernel decompression and relocation Ard Biesheuvel
2023-04-24 16:57 ` [PATCH 3/6] x86: efistub: Obtain ACPI RSDP address while running in the stub Ard Biesheuvel
2023-04-24 16:57 ` Ard Biesheuvel [this message]
2023-04-26 10:42   ` [PATCH 4/6] x86: efistub: Perform 4/5 level paging switch from " Kirill A . Shutemov
2023-04-26 21:29     ` Ard Biesheuvel
2023-04-24 16:57 ` [PATCH 5/6] x86: efistub: Prefer EFI memory attributes protocol over DXE services Ard Biesheuvel
2023-04-24 16:57 ` [PATCH 6/6] x86: efistub: Avoid legacy decompressor when doing EFI boot Ard Biesheuvel
2023-04-26 10:17 ` [PATCH 0/6] efi/x86: Avoid legacy decompressor during " Borislav Petkov
2023-04-26 21:24   ` Ard Biesheuvel
2023-04-28 13:22 ` Evgeniy Baskov
2023-04-28 17:14   ` Ard Biesheuvel
2023-05-02 13:37 ` Tom Lendacky
2023-05-02 13:39   ` Ard Biesheuvel
2023-05-02 16:08     ` Tom Lendacky
2023-05-03 17:44       ` Ard Biesheuvel
2023-05-03 18:51         ` Tom Lendacky
2023-05-03 17:58       ` Tom Lendacky
2023-05-03 18:17         ` Ard Biesheuvel
2023-05-03 18:24           ` Borislav Petkov
2023-05-03 18:39             ` Ard Biesheuvel
2023-05-03 18:48           ` Tom Lendacky
2023-05-03 18:59             ` Ard Biesheuvel
2023-05-03 21:23               ` Tom Lendacky
2023-05-03 21:30                 ` Ard Biesheuvel

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20230424165726.2245548-5-ardb@kernel.org \
    --to=ardb@kernel.org \
    --cc=baskov@ispras.ru \
    --cc=bp@alien8.de \
    --cc=dave.hansen@linux.intel.com \
    --cc=dyoung@redhat.com \
    --cc=keescook@chromium.org \
    --cc=khoroshilov@ispras.ru \
    --cc=kirill.shutemov@linux.intel.com \
    --cc=kraxel@redhat.com \
    --cc=linux-efi@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=luto@kernel.org \
    --cc=mario.limonciello@amd.com \
    --cc=mingo@redhat.com \
    --cc=peterz@infradead.org \
    --cc=pjones@redhat.com \
    --cc=tglx@linutronix.de \
    --cc=thomas.lendacky@amd.com \
    --cc=torvalds@linux-foundation.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox