From: Ard Biesheuvel <ardb+git@google.com>
To: linux-kernel@vger.kernel.org
Cc: linux-efi@vger.kernel.org, x86@kernel.org,
Ard Biesheuvel <ardb@kernel.org>, Borislav Petkov <bp@alien8.de>,
Ingo Molnar <mingo@kernel.org>,
Dionna Amalie Glaze <dionnaglaze@google.com>,
Kevin Loughlin <kevinloughlin@google.com>,
Tom Lendacky <thomas.lendacky@amd.com>
Subject: [RFT PATCH v2 03/23] x86/boot: Drop global variables keeping track of LA57 state
Date: Sun, 4 May 2025 11:52:33 +0200 [thread overview]
Message-ID: <20250504095230.2932860-28-ardb+git@google.com> (raw)
In-Reply-To: <20250504095230.2932860-25-ardb+git@google.com>
From: Ard Biesheuvel <ardb@kernel.org>
On x86_64, the core kernel is entered in long mode, which implies that
paging is enabled. This means that the CR4.LA57 control bit is
guaranteed to be in sync with the number of paging levels used by the
kernel, and there is no need to store this in a variable.
There is also no need to use variables for storing the calculations of
pgdir_shift and ptrs_per_p4d, as they are easily determined on the fly.
This removes the need for two different sources of truth (i.e., early
and late) for determining whether 5-level paging is in use: CR4.LA57
always reflects the actual state, and never changes from the point of
view of the 64-bit core kernel. It also removes the need for exposing
the associated variables to the startup code. The only potential concern
is the cost of CR4 accesses, which can be mitigated using alternatives
patching based on feature detection.
Note that even the decompressor does not manipulate any page tables
before updating CR4.LA57, so it can also avoid the associated global
variables entirely. However, as it does not implement alternatives
patching, the associated ELF sections need to be discarded.
Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
---
arch/x86/boot/compressed/misc.h | 4 --
arch/x86/boot/compressed/pgtable_64.c | 12 ------
arch/x86/boot/compressed/vmlinux.lds.S | 1 +
arch/x86/boot/startup/map_kernel.c | 12 +-----
arch/x86/boot/startup/sme.c | 9 ----
arch/x86/include/asm/pgtable_64_types.h | 43 ++++++++++----------
arch/x86/kernel/cpu/common.c | 2 -
arch/x86/kernel/head64.c | 11 -----
arch/x86/mm/kasan_init_64.c | 3 --
9 files changed, 24 insertions(+), 73 deletions(-)
diff --git a/arch/x86/boot/compressed/misc.h b/arch/x86/boot/compressed/misc.h
index dd8d1a85f671..450d27d0f449 100644
--- a/arch/x86/boot/compressed/misc.h
+++ b/arch/x86/boot/compressed/misc.h
@@ -16,9 +16,6 @@
#define __NO_FORTIFY
-/* cpu_feature_enabled() cannot be used this early */
-#define USE_EARLY_PGTABLE_L5
-
/*
* Boot stub deals with identity mappings, physical and virtual addresses are
* the same, so override these defines.
@@ -181,7 +178,6 @@ static inline int count_immovable_mem_regions(void) { return 0; }
#endif
/* ident_map_64.c */
-extern unsigned int __pgtable_l5_enabled, pgdir_shift, ptrs_per_p4d;
extern void kernel_add_identity_map(unsigned long start, unsigned long end);
/* Used by PAGE_KERN* macros: */
diff --git a/arch/x86/boot/compressed/pgtable_64.c b/arch/x86/boot/compressed/pgtable_64.c
index 5a6c7a190e5b..591d28f2feb6 100644
--- a/arch/x86/boot/compressed/pgtable_64.c
+++ b/arch/x86/boot/compressed/pgtable_64.c
@@ -10,13 +10,6 @@
#define BIOS_START_MIN 0x20000U /* 128K, less than this is insane */
#define BIOS_START_MAX 0x9f000U /* 640K, absolute maximum */
-#ifdef CONFIG_X86_5LEVEL
-/* __pgtable_l5_enabled needs to be in .data to avoid being cleared along with .bss */
-unsigned int __section(".data") __pgtable_l5_enabled;
-unsigned int __section(".data") pgdir_shift = 39;
-unsigned int __section(".data") ptrs_per_p4d = 1;
-#endif
-
/* Buffer to preserve trampoline memory */
static char trampoline_save[TRAMPOLINE_32BIT_SIZE];
@@ -127,11 +120,6 @@ asmlinkage void configure_5level_paging(struct boot_params *bp, void *pgtable)
native_cpuid_eax(0) >= 7 &&
(native_cpuid_ecx(7) & (1 << (X86_FEATURE_LA57 & 31)))) {
l5_required = true;
-
- /* Initialize variables for 5-level paging */
- __pgtable_l5_enabled = 1;
- pgdir_shift = 48;
- ptrs_per_p4d = 512;
}
/*
diff --git a/arch/x86/boot/compressed/vmlinux.lds.S b/arch/x86/boot/compressed/vmlinux.lds.S
index 3b2bc61c9408..d3e786ff7dcb 100644
--- a/arch/x86/boot/compressed/vmlinux.lds.S
+++ b/arch/x86/boot/compressed/vmlinux.lds.S
@@ -81,6 +81,7 @@ SECTIONS
*(.dynamic) *(.dynsym) *(.dynstr) *(.dynbss)
*(.hash) *(.gnu.hash)
*(.note.*)
+ *(.altinstructions .altinstr_replacement)
}
.got.plt (INFO) : {
diff --git a/arch/x86/boot/startup/map_kernel.c b/arch/x86/boot/startup/map_kernel.c
index 099ae2559336..11f99d907f76 100644
--- a/arch/x86/boot/startup/map_kernel.c
+++ b/arch/x86/boot/startup/map_kernel.c
@@ -16,19 +16,9 @@ extern unsigned int next_early_pgt;
static inline bool check_la57_support(void)
{
- if (!IS_ENABLED(CONFIG_X86_5LEVEL))
+ if (!pgtable_l5_enabled())
return false;
- /*
- * 5-level paging is detected and enabled at kernel decompression
- * stage. Only check if it has been enabled there.
- */
- if (!(native_read_cr4() & X86_CR4_LA57))
- return false;
-
- __pgtable_l5_enabled = 1;
- pgdir_shift = 48;
- ptrs_per_p4d = 512;
page_offset_base = __PAGE_OFFSET_BASE_L5;
vmalloc_base = __VMALLOC_BASE_L5;
vmemmap_base = __VMEMMAP_BASE_L5;
diff --git a/arch/x86/boot/startup/sme.c b/arch/x86/boot/startup/sme.c
index 5738b31c8e60..ade5ad5060e9 100644
--- a/arch/x86/boot/startup/sme.c
+++ b/arch/x86/boot/startup/sme.c
@@ -25,15 +25,6 @@
#undef CONFIG_PARAVIRT_XXL
#undef CONFIG_PARAVIRT_SPINLOCKS
-/*
- * This code runs before CPU feature bits are set. By default, the
- * pgtable_l5_enabled() function uses bit X86_FEATURE_LA57 to determine if
- * 5-level paging is active, so that won't work here. USE_EARLY_PGTABLE_L5
- * is provided to handle this situation and, instead, use a variable that
- * has been set by the early boot code.
- */
-#define USE_EARLY_PGTABLE_L5
-
#include <linux/kernel.h>
#include <linux/mm.h>
#include <linux/mem_encrypt.h>
diff --git a/arch/x86/include/asm/pgtable_64_types.h b/arch/x86/include/asm/pgtable_64_types.h
index 5bb782d856f2..40dceb7d80f5 100644
--- a/arch/x86/include/asm/pgtable_64_types.h
+++ b/arch/x86/include/asm/pgtable_64_types.h
@@ -6,7 +6,10 @@
#ifndef __ASSEMBLER__
#include <linux/types.h>
+#include <asm/alternative.h>
+#include <asm/cpufeatures.h>
#include <asm/kaslr.h>
+#include <asm/processor-flags.h>
/*
* These are used to make use of C type-checking..
@@ -21,28 +24,24 @@ typedef unsigned long pgprotval_t;
typedef struct { pteval_t pte; } pte_t;
typedef struct { pmdval_t pmd; } pmd_t;
-extern unsigned int __pgtable_l5_enabled;
-
-#ifdef CONFIG_X86_5LEVEL
-#ifdef USE_EARLY_PGTABLE_L5
-/*
- * cpu_feature_enabled() is not available in early boot code.
- * Use variable instead.
- */
-static inline bool pgtable_l5_enabled(void)
+static __always_inline __pure bool pgtable_l5_enabled(void)
{
- return __pgtable_l5_enabled;
-}
-#else
-#define pgtable_l5_enabled() cpu_feature_enabled(X86_FEATURE_LA57)
-#endif /* USE_EARLY_PGTABLE_L5 */
+ unsigned long r;
+ bool ret;
-#else
-#define pgtable_l5_enabled() 0
-#endif /* CONFIG_X86_5LEVEL */
+ if (!IS_ENABLED(CONFIG_X86_5LEVEL))
+ return false;
-extern unsigned int pgdir_shift;
-extern unsigned int ptrs_per_p4d;
+ asm(ALTERNATIVE_TERNARY(
+ "movq %%cr4, %[reg] \n\t btl %[la57], %k[reg]" CC_SET(c),
+ %P[feat], "stc", "clc")
+ : [reg] "=&r" (r), CC_OUT(c) (ret)
+ : [feat] "i" (X86_FEATURE_LA57),
+ [la57] "i" (X86_CR4_LA57_BIT)
+ : "cc");
+
+ return ret;
+}
#endif /* !__ASSEMBLER__ */
@@ -53,7 +52,7 @@ extern unsigned int ptrs_per_p4d;
/*
* PGDIR_SHIFT determines what a top-level page table entry can map
*/
-#define PGDIR_SHIFT pgdir_shift
+#define PGDIR_SHIFT (pgtable_l5_enabled() ? 48 : 39)
#define PTRS_PER_PGD 512
/*
@@ -61,7 +60,7 @@ extern unsigned int ptrs_per_p4d;
*/
#define P4D_SHIFT 39
#define MAX_PTRS_PER_P4D 512
-#define PTRS_PER_P4D ptrs_per_p4d
+#define PTRS_PER_P4D (pgtable_l5_enabled() ? 512 : 1)
#define P4D_SIZE (_AC(1, UL) << P4D_SHIFT)
#define P4D_MASK (~(P4D_SIZE - 1))
@@ -76,6 +75,8 @@ extern unsigned int ptrs_per_p4d;
#define PTRS_PER_PGD 512
#define MAX_PTRS_PER_P4D 1
+#define MAX_POSSIBLE_PHYSMEM_BITS 46
+
#endif /* CONFIG_X86_5LEVEL */
/*
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 12126adbc3a9..eb6a7f6e20c4 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -1,6 +1,4 @@
// SPDX-License-Identifier: GPL-2.0-only
-/* cpu_feature_enabled() cannot be used this early */
-#define USE_EARLY_PGTABLE_L5
#include <linux/memblock.h>
#include <linux/linkage.h>
diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c
index 29226f3ac064..3d49abb1bb3a 100644
--- a/arch/x86/kernel/head64.c
+++ b/arch/x86/kernel/head64.c
@@ -5,9 +5,6 @@
* Copyright (C) 2000 Andrea Arcangeli <andrea@suse.de> SuSE
*/
-/* cpu_feature_enabled() cannot be used this early */
-#define USE_EARLY_PGTABLE_L5
-
#include <linux/init.h>
#include <linux/linkage.h>
#include <linux/types.h>
@@ -50,14 +47,6 @@ extern pmd_t early_dynamic_pgts[EARLY_DYNAMIC_PAGE_TABLES][PTRS_PER_PMD];
unsigned int __initdata next_early_pgt;
pmdval_t early_pmd_flags = __PAGE_KERNEL_LARGE & ~(_PAGE_GLOBAL | _PAGE_NX);
-#ifdef CONFIG_X86_5LEVEL
-unsigned int __pgtable_l5_enabled __ro_after_init;
-unsigned int pgdir_shift __ro_after_init = 39;
-EXPORT_SYMBOL(pgdir_shift);
-unsigned int ptrs_per_p4d __ro_after_init = 1;
-EXPORT_SYMBOL(ptrs_per_p4d);
-#endif
-
#ifdef CONFIG_DYNAMIC_MEMORY_LAYOUT
unsigned long page_offset_base __ro_after_init = __PAGE_OFFSET_BASE_L4;
EXPORT_SYMBOL(page_offset_base);
diff --git a/arch/x86/mm/kasan_init_64.c b/arch/x86/mm/kasan_init_64.c
index 0539efd0d216..7c4fafbd52cc 100644
--- a/arch/x86/mm/kasan_init_64.c
+++ b/arch/x86/mm/kasan_init_64.c
@@ -1,9 +1,6 @@
// SPDX-License-Identifier: GPL-2.0
#define pr_fmt(fmt) "kasan: " fmt
-/* cpu_feature_enabled() cannot be used this early */
-#define USE_EARLY_PGTABLE_L5
-
#include <linux/memblock.h>
#include <linux/kasan.h>
#include <linux/kdebug.h>
--
2.49.0.906.g1f30a19c02-goog
next prev parent reply other threads:[~2025-05-04 9:53 UTC|newest]
Thread overview: 59+ messages / expand[flat|nested] mbox.gz Atom feed top
2025-05-04 9:52 [RFT PATCH v2 00/23] x86: strict separation of startup code Ard Biesheuvel
2025-05-04 9:52 ` [RFT PATCH v2 01/23] x86/boot: Move early_setup_gdt() back into head64.c Ard Biesheuvel
2025-05-04 14:20 ` [tip: x86/boot] " tip-bot2 for Ard Biesheuvel
2025-05-04 9:52 ` [RFT PATCH v2 02/23] x86/boot: Disregard __supported_pte_mask in __startup_64() Ard Biesheuvel
2025-05-04 14:20 ` [tip: x86/boot] " tip-bot2 for Ard Biesheuvel
2025-05-04 9:52 ` Ard Biesheuvel [this message]
2025-05-04 13:50 ` [RFT PATCH v2 03/23] x86/boot: Drop global variables keeping track of LA57 state Ingo Molnar
2025-05-04 14:46 ` Ard Biesheuvel
2025-05-04 14:58 ` Linus Torvalds
2025-05-04 19:33 ` Ard Biesheuvel
2025-05-05 21:07 ` Ingo Molnar
2025-05-05 21:24 ` Ingo Molnar
2025-05-05 22:30 ` Ard Biesheuvel
2025-05-05 21:26 ` Linus Torvalds
2025-05-05 21:51 ` Ingo Molnar
2025-05-04 9:52 ` [RFT PATCH v2 04/23] x86/sev: Make sev_snp_enabled() a static function Ard Biesheuvel
2025-05-04 14:20 ` [tip: x86/boot] " tip-bot2 for Ard Biesheuvel
2025-05-04 9:52 ` [RFT PATCH v2 05/23] x86/sev: Move instruction decoder into separate source file Ard Biesheuvel
2025-05-04 14:20 ` [tip: x86/boot] " tip-bot2 for Ard Biesheuvel
2025-05-05 14:48 ` [RFT PATCH v2 05/23] " Tom Lendacky
2025-05-05 14:50 ` Ard Biesheuvel
2025-05-07 9:58 ` Borislav Petkov
2025-05-07 11:49 ` Ard Biesheuvel
2025-05-08 11:08 ` Borislav Petkov
2025-05-04 9:52 ` [RFT PATCH v2 06/23] x86/sev: Disentangle #VC handling code from startup code Ard Biesheuvel
2025-05-05 5:31 ` [tip: x86/boot] " tip-bot2 for Ard Biesheuvel
2025-05-05 14:58 ` [RFT PATCH v2 06/23] " Tom Lendacky
2025-05-05 16:54 ` [tip: x86/boot] " tip-bot2 for Ard Biesheuvel
2025-05-04 9:52 ` [RFT PATCH v2 07/23] x86/sev: Separate MSR and GHCB based snp_cpuid() via a callback Ard Biesheuvel
2025-05-04 9:52 ` [RFT PATCH v2 08/23] x86/sev: Fall back to early page state change code only during boot Ard Biesheuvel
2025-05-04 9:52 ` [RFT PATCH v2 09/23] x86/sev: Move GHCB page based HV communication out of startup code Ard Biesheuvel
2025-05-04 9:52 ` [RFT PATCH v2 10/23] x86/sev: Use boot SVSM CA for all startup and init code Ard Biesheuvel
2025-05-04 9:52 ` [RFT PATCH v2 11/23] x86/boot: Drop redundant RMPADJUST in SEV SVSM presence check Ard Biesheuvel
2025-05-04 9:52 ` [RFT PATCH v2 12/23] x86/sev: Unify SEV-SNP hypervisor feature check Ard Biesheuvel
2025-05-04 9:52 ` [RFT PATCH v2 13/23] x86/linkage: Add SYM_PIC_ALIAS() macro helper to emit symbol aliases Ard Biesheuvel
2025-05-04 14:20 ` [tip: x86/boot] " tip-bot2 for Ard Biesheuvel
2025-05-04 9:52 ` [RFT PATCH v2 14/23] x86/boot: Add a bunch of PIC aliases Ard Biesheuvel
2025-05-04 14:20 ` [tip: x86/boot] " tip-bot2 for Ard Biesheuvel
2025-05-04 9:52 ` [RFT PATCH v2 15/23] x86/boot: Provide __pti_set_user_pgtbl() to startup code Ard Biesheuvel
2025-05-04 14:20 ` [tip: x86/boot] " tip-bot2 for Ard Biesheuvel
2025-05-05 16:03 ` Borislav Petkov
2025-05-05 16:19 ` Ard Biesheuvel
2025-05-05 16:47 ` Borislav Petkov
2025-05-06 7:18 ` Borislav Petkov
2025-05-05 16:54 ` tip-bot2 for Ard Biesheuvel
2025-05-04 9:52 ` [RFT PATCH v2 16/23] x86/sev: Provide PIC aliases for SEV related data objects Ard Biesheuvel
2025-05-04 9:52 ` [RFT PATCH v2 17/23] x86/sev: Move __sev_[get|put]_ghcb() into separate noinstr object Ard Biesheuvel
2025-05-04 9:52 ` [RFT PATCH v2 18/23] x86/sev: Export startup routines for ordinary use Ard Biesheuvel
2025-05-04 9:52 ` [RFT PATCH v2 19/23] x86/boot: Created a confined code area for startup code Ard Biesheuvel
2025-05-04 9:52 ` [RFT PATCH v2 20/23] x86/boot: Move startup code out of __head section Ard Biesheuvel
2025-05-04 9:52 ` [RFT PATCH v2 21/23] x86/boot: Disallow absolute symbol references in startup code Ard Biesheuvel
2025-05-04 9:52 ` [RFT PATCH v2 22/23] x86/boot: Revert "Reject absolute references in .head.text" Ard Biesheuvel
2025-05-04 9:52 ` [RFT PATCH v2 23/23] x86/boot: Get rid of the .head.text section Ard Biesheuvel
2025-05-04 14:04 ` [RFT PATCH v2 00/23] x86: strict separation of startup code Ingo Molnar
2025-05-04 14:55 ` Ard Biesheuvel
2025-05-05 5:08 ` Ingo Molnar
2025-05-07 9:52 ` Borislav Petkov
2025-05-07 12:05 ` Ard Biesheuvel
2025-05-08 10:55 ` Borislav Petkov
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20250504095230.2932860-28-ardb+git@google.com \
--to=ardb+git@google.com \
--cc=ardb@kernel.org \
--cc=bp@alien8.de \
--cc=dionnaglaze@google.com \
--cc=kevinloughlin@google.com \
--cc=linux-efi@vger.kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=mingo@kernel.org \
--cc=thomas.lendacky@amd.com \
--cc=x86@kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox