public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed
From: Ingo Molnar <mingo@kernel.org>
To: Ard Biesheuvel <ardb+git@google.com>
Cc: linux-kernel@vger.kernel.org, linux-efi@vger.kernel.org,
	x86@kernel.org, Ard Biesheuvel <ardb@kernel.org>,
	Borislav Petkov <bp@alien8.de>,
	Dionna Amalie Glaze <dionnaglaze@google.com>,
	Kevin Loughlin <kevinloughlin@google.com>,
	Tom Lendacky <thomas.lendacky@amd.com>,
	Linus Torvalds <torvalds@linux-foundation.org>
Subject: Re: [RFT PATCH v2 03/23] x86/boot: Drop global variables keeping track of LA57 state
Date: Sun, 4 May 2025 15:50:57 +0200	[thread overview]
Message-ID: <aBdwwR52hI37bW9a@gmail.com> (raw)
In-Reply-To: <20250504095230.2932860-28-ardb+git@google.com>


* Ard Biesheuvel <ardb+git@google.com> wrote:

> From: Ard Biesheuvel <ardb@kernel.org>
> 
> On x86_64, the core kernel is entered in long mode, which implies that
> paging is enabled. This means that the CR4.LA57 control bit is
> guaranteed to be in sync with the number of paging levels used by the
> kernel, and there is no need to store this in a variable.
> 
> There is also no need to use variables for storing the calculations of
> pgdir_shift and ptrs_per_p4d, as they are easily determined on the fly.
> 
> This removes the need for two different sources of truth (i.e., early
> and late) for determining whether 5-level paging is in use: CR4.LA57
> always reflects the actual state, and never changes from the point of
> view of the 64-bit core kernel. It also removes the need for exposing
> the associated variables to the startup code. The only potential concern
> is the cost of CR4 accesses, which can be mitigated using alternatives
> patching based on feature detection.
> 
> Note that even the decompressor does not manipulate any page tables
> before updating CR4.LA57, so it can also avoid the associated global
> variables entirely. However, as it does not implement alternatives
> patching, the associated ELF sections need to be discarded.
> 
> Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
> ---
>  arch/x86/boot/compressed/misc.h         |  4 --
>  arch/x86/boot/compressed/pgtable_64.c   | 12 ------
>  arch/x86/boot/compressed/vmlinux.lds.S  |  1 +
>  arch/x86/boot/startup/map_kernel.c      | 12 +-----
>  arch/x86/boot/startup/sme.c             |  9 ----
>  arch/x86/include/asm/pgtable_64_types.h | 43 ++++++++++----------
>  arch/x86/kernel/cpu/common.c            |  2 -
>  arch/x86/kernel/head64.c                | 11 -----
>  arch/x86/mm/kasan_init_64.c             |  3 --
>  9 files changed, 24 insertions(+), 73 deletions(-)

So this patch breaks the build & creates header dependency hell on 
x86-64 allnoconfig:

 starship:~/tip> m kernel/pid.o 
   DESCEND objtool
   CC      arch/x86/kernel/asm-offsets.s
   INSTALL libsubcmd_headers
 In file included from ./arch/x86/include/asm/pgtable_64_types.h:5,
                 from ./arch/x86/include/asm/pgtable_types.h:283,
                 from ./arch/x86/include/asm/processor.h:21,
                 from ./arch/x86/include/asm/cpufeature.h:5,
                 from ./arch/x86/include/asm/thread_info.h:59,
                 from ./include/linux/thread_info.h:60,
                 from ./include/linux/spinlock.h:60,
                 from ./include/linux/swait.h:7,
                 from ./include/linux/completion.h:12,
                 from ./include/linux/crypto.h:15,
                 from arch/x86/kernel/asm-offsets.c:9:
 ./arch/x86/include/asm/sparsemem.h:29:34: warning: "pgtable_l5_enabled" is not defined, evaluates to 0 [-Wundef]
    29 | # define MAX_PHYSMEM_BITS       (pgtable_l5_enabled() ? 52 : 46)
       |                                  ^~~~~~~~~~~~~~~~~~
 ./include/linux/page-flags-layout.h:31:26: note: in expansion of macro ‘MAX_PHYSMEM_BITS’
    31 | #define SECTIONS_SHIFT  (MAX_PHYSMEM_BITS - SECTION_SIZE_BITS)

Plus I'm not sure I'm happy about this kind of complexity getting 
embedded deep within low level MM primitives:

  static __always_inline __pure bool pgtable_l5_enabled(void)
  {
	unsigned long r;
	bool ret;

	if (!IS_ENABLED(CONFIG_X86_5LEVEL))
		return false;

	asm(ALTERNATIVE_TERNARY(
		 "movq %%cr4, %[reg] \n\t btl %[la57], %k[reg]" CC_SET(c),
		 %P[feat], "stc", "clc")
		 : [reg] "=&r" (r), CC_OUT(c) (ret)
		 : [feat] "i"  (X86_FEATURE_LA57),
		   [la57] "i"  (X86_CR4_LA57_BIT)
		 : "cc");

	return ret;
  }

it's basically everywhere:

	arch/x86/include/asm/page_64_types.h:#define __VIRTUAL_MASK_SHIFT	(pgtable_l5_enabled() ? 56 : 47)
	arch/x86/include/asm/paravirt.h:	if (pgtable_l5_enabled())						\
	arch/x86/include/asm/paravirt.h:	if (pgtable_l5_enabled())					\
	arch/x86/include/asm/pgalloc.h:	if (!pgtable_l5_enabled())
	arch/x86/include/asm/pgalloc.h:	if (!pgtable_l5_enabled())
	arch/x86/include/asm/pgalloc.h:	if (pgtable_l5_enabled())
	arch/x86/include/asm/pgtable.h:#define pgd_clear(pgd)			(pgtable_l5_enabled() ? native_pgd_clear(pgd) : 0)
	arch/x86/include/asm/pgtable.h:	if (!pgtable_l5_enabled())
	arch/x86/include/asm/pgtable.h:	if (!pgtable_l5_enabled())
	arch/x86/include/asm/pgtable.h:	if (!pgtable_l5_enabled())
	arch/x86/include/asm/pgtable.h:	if (!pgtable_l5_enabled())
	arch/x86/include/asm/pgtable_32_types.h:#define pgtable_l5_enabled() 0
	arch/x86/include/asm/pgtable_64.h:	return !pgtable_l5_enabled();
	arch/x86/include/asm/pgtable_64.h:	if (pgtable_l5_enabled() ||
	arch/x86/include/asm/pgtable_64_types.h:static __always_inline __pure bool pgtable_l5_enabled(void)
	arch/x86/include/asm/pgtable_64_types.h:#define PGDIR_SHIFT	(pgtable_l5_enabled() ? 48 : 39)
	arch/x86/include/asm/pgtable_64_types.h:#define PTRS_PER_P4D		(pgtable_l5_enabled() ? 512 : 1)
	arch/x86/include/asm/pgtable_64_types.h:# define VMALLOC_SIZE_TB	(pgtable_l5_enabled() ? VMALLOC_SIZE_TB_L5 : VMALLOC_SIZE_TB_L4)
	arch/x86/include/asm/sparsemem.h:# define MAX_PHYSMEM_BITS	(pgtable_l5_enabled() ? 52 : 46)

Inlined approximately a gazillion times. (449 times on x86 defconfig. 
Yes, I just counted it.)

And it's not even worth it, as it generates horrendous code:

   154:   0f 20 e0                mov    %cr4,%rax
   157:   0f ba e0 0c             bt     $0xc,%eax

... while CR4 access might be faster these days, it's certainly not as 
fast as simple percpu access. Plus it clobbers a register (RAX in the 
example above), which is unnecessary for a flag test.

Cannot pgtable_l5_enabled() be a single, simple percpu flag or so?

And yes, this creates another layer for these values - but thus 
decouples low level MM from detection & implementation complexities, 
which is a plus ...

Thanks,

	Ingo

  reply	other threads:[~2025-05-04 13:51 UTC|newest]

Thread overview: 59+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-05-04  9:52 [RFT PATCH v2 00/23] x86: strict separation of startup code Ard Biesheuvel
2025-05-04  9:52 ` [RFT PATCH v2 01/23] x86/boot: Move early_setup_gdt() back into head64.c Ard Biesheuvel
2025-05-04 14:20   ` [tip: x86/boot] " tip-bot2 for Ard Biesheuvel
2025-05-04  9:52 ` [RFT PATCH v2 02/23] x86/boot: Disregard __supported_pte_mask in __startup_64() Ard Biesheuvel
2025-05-04 14:20   ` [tip: x86/boot] " tip-bot2 for Ard Biesheuvel
2025-05-04  9:52 ` [RFT PATCH v2 03/23] x86/boot: Drop global variables keeping track of LA57 state Ard Biesheuvel
2025-05-04 13:50   ` Ingo Molnar [this message]
2025-05-04 14:46     ` Ard Biesheuvel
2025-05-04 14:58     ` Linus Torvalds
2025-05-04 19:33       ` Ard Biesheuvel
2025-05-05 21:07       ` Ingo Molnar
2025-05-05 21:24         ` Ingo Molnar
2025-05-05 22:30           ` Ard Biesheuvel
2025-05-05 21:26         ` Linus Torvalds
2025-05-05 21:51           ` Ingo Molnar
2025-05-04  9:52 ` [RFT PATCH v2 04/23] x86/sev: Make sev_snp_enabled() a static function Ard Biesheuvel
2025-05-04 14:20   ` [tip: x86/boot] " tip-bot2 for Ard Biesheuvel
2025-05-04  9:52 ` [RFT PATCH v2 05/23] x86/sev: Move instruction decoder into separate source file Ard Biesheuvel
2025-05-04 14:20   ` [tip: x86/boot] " tip-bot2 for Ard Biesheuvel
2025-05-05 14:48   ` [RFT PATCH v2 05/23] " Tom Lendacky
2025-05-05 14:50     ` Ard Biesheuvel
2025-05-07  9:58   ` Borislav Petkov
2025-05-07 11:49     ` Ard Biesheuvel
2025-05-08 11:08       ` Borislav Petkov
2025-05-04  9:52 ` [RFT PATCH v2 06/23] x86/sev: Disentangle #VC handling code from startup code Ard Biesheuvel
2025-05-05  5:31   ` [tip: x86/boot] " tip-bot2 for Ard Biesheuvel
2025-05-05 14:58   ` [RFT PATCH v2 06/23] " Tom Lendacky
2025-05-05 16:54   ` [tip: x86/boot] " tip-bot2 for Ard Biesheuvel
2025-05-04  9:52 ` [RFT PATCH v2 07/23] x86/sev: Separate MSR and GHCB based snp_cpuid() via a callback Ard Biesheuvel
2025-05-04  9:52 ` [RFT PATCH v2 08/23] x86/sev: Fall back to early page state change code only during boot Ard Biesheuvel
2025-05-04  9:52 ` [RFT PATCH v2 09/23] x86/sev: Move GHCB page based HV communication out of startup code Ard Biesheuvel
2025-05-04  9:52 ` [RFT PATCH v2 10/23] x86/sev: Use boot SVSM CA for all startup and init code Ard Biesheuvel
2025-05-04  9:52 ` [RFT PATCH v2 11/23] x86/boot: Drop redundant RMPADJUST in SEV SVSM presence check Ard Biesheuvel
2025-05-04  9:52 ` [RFT PATCH v2 12/23] x86/sev: Unify SEV-SNP hypervisor feature check Ard Biesheuvel
2025-05-04  9:52 ` [RFT PATCH v2 13/23] x86/linkage: Add SYM_PIC_ALIAS() macro helper to emit symbol aliases Ard Biesheuvel
2025-05-04 14:20   ` [tip: x86/boot] " tip-bot2 for Ard Biesheuvel
2025-05-04  9:52 ` [RFT PATCH v2 14/23] x86/boot: Add a bunch of PIC aliases Ard Biesheuvel
2025-05-04 14:20   ` [tip: x86/boot] " tip-bot2 for Ard Biesheuvel
2025-05-04  9:52 ` [RFT PATCH v2 15/23] x86/boot: Provide __pti_set_user_pgtbl() to startup code Ard Biesheuvel
2025-05-04 14:20   ` [tip: x86/boot] " tip-bot2 for Ard Biesheuvel
2025-05-05 16:03     ` Borislav Petkov
2025-05-05 16:19       ` Ard Biesheuvel
2025-05-05 16:47         ` Borislav Petkov
2025-05-06  7:18           ` Borislav Petkov
2025-05-05 16:54   ` tip-bot2 for Ard Biesheuvel
2025-05-04  9:52 ` [RFT PATCH v2 16/23] x86/sev: Provide PIC aliases for SEV related data objects Ard Biesheuvel
2025-05-04  9:52 ` [RFT PATCH v2 17/23] x86/sev: Move __sev_[get|put]_ghcb() into separate noinstr object Ard Biesheuvel
2025-05-04  9:52 ` [RFT PATCH v2 18/23] x86/sev: Export startup routines for ordinary use Ard Biesheuvel
2025-05-04  9:52 ` [RFT PATCH v2 19/23] x86/boot: Created a confined code area for startup code Ard Biesheuvel
2025-05-04  9:52 ` [RFT PATCH v2 20/23] x86/boot: Move startup code out of __head section Ard Biesheuvel
2025-05-04  9:52 ` [RFT PATCH v2 21/23] x86/boot: Disallow absolute symbol references in startup code Ard Biesheuvel
2025-05-04  9:52 ` [RFT PATCH v2 22/23] x86/boot: Revert "Reject absolute references in .head.text" Ard Biesheuvel
2025-05-04  9:52 ` [RFT PATCH v2 23/23] x86/boot: Get rid of the .head.text section Ard Biesheuvel
2025-05-04 14:04 ` [RFT PATCH v2 00/23] x86: strict separation of startup code Ingo Molnar
2025-05-04 14:55   ` Ard Biesheuvel
2025-05-05  5:08     ` Ingo Molnar
2025-05-07  9:52 ` Borislav Petkov
2025-05-07 12:05   ` Ard Biesheuvel
2025-05-08 10:55     ` Borislav Petkov

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=aBdwwR52hI37bW9a@gmail.com \
    --to=mingo@kernel.org \
    --cc=ardb+git@google.com \
    --cc=ardb@kernel.org \
    --cc=bp@alien8.de \
    --cc=dionnaglaze@google.com \
    --cc=kevinloughlin@google.com \
    --cc=linux-efi@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=thomas.lendacky@amd.com \
    --cc=torvalds@linux-foundation.org \
    --cc=x86@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox