* [PATCH v1 1/4] x86/fred: Parse cmdline param "fred=" in cpu_parse_early_param()
2024-07-03 8:54 [PATCH v1 0/4] Enable FRED earlier Xin Li (Intel)
@ 2024-07-03 8:54 ` Xin Li (Intel)
2024-07-04 11:20 ` Nikolay Borisov
2024-07-03 8:54 ` [PATCH v1 2/4] x86/fred: Write to FRED MSRs with wrmsrns() Xin Li (Intel)
` (2 subsequent siblings)
3 siblings, 1 reply; 27+ messages in thread
From: Xin Li (Intel) @ 2024-07-03 8:54 UTC (permalink / raw)
To: linux-kernel
Cc: hpa, tglx, mingo, bp, dave.hansen, x86, peterz, andrew.cooper3,
nik.borisov, houwenlong.hwl
Depending on whether FRED will be used, sysvec_install() installs
a system interrupt handler into FRED system vector dispatch table
or IDT. However FRED can be disabled later in trap_init(), after
sysvec_install() is called. E.g., the HYPERVISOR_CALLBACK_VECTOR
handler is registered with sysvec_install() in kvm_guest_init(),
which is called in setup_arch() but way before trap_init(). IOW,
there is a gap between FRED is available and available but disabled.
As a result, when FRED is available but disabled, its IDT handler
is not installed thus spurious_interrupt() will be invoked.
Fix it by parsing cmdline param "fred=" in cpu_parse_early_param()
to minimize the gap between FRED is available and available but
disabled.
Fixes: 3810da12710a ("x86/fred: Add a fred= cmdline param")
Reported-by: Hou Wenlong <houwenlong.hwl@antgroup.com>
Suggested-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Xin Li (Intel) <xin@zytor.com>
---
arch/x86/kernel/cpu/common.c | 5 +++++
arch/x86/kernel/traps.c | 26 --------------------------
2 files changed, 5 insertions(+), 26 deletions(-)
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index d4e539d4e158..9a904fe7c829 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -1510,6 +1510,11 @@ static void __init cpu_parse_early_param(void)
if (cmdline_find_option_bool(boot_command_line, "nousershstk"))
setup_clear_cpu_cap(X86_FEATURE_USER_SHSTK);
+ /* Minimize the gap between FRED is available and available but disabled. */
+ arglen = cmdline_find_option(boot_command_line, "fred", arg, sizeof(arg));
+ if (arglen != strlen("on") || strcmp(arg, "on"))
+ setup_clear_cpu_cap(X86_FEATURE_FRED);
+
arglen = cmdline_find_option(boot_command_line, "clearcpuid", arg, sizeof(arg));
if (arglen <= 0)
return;
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index 4fa0b17e5043..6afb41e6cbbb 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -1402,34 +1402,8 @@ DEFINE_IDTENTRY_SW(iret_error)
}
#endif
-/* Do not enable FRED by default yet. */
-static bool enable_fred __ro_after_init = false;
-
-#ifdef CONFIG_X86_FRED
-static int __init fred_setup(char *str)
-{
- if (!str)
- return -EINVAL;
-
- if (!cpu_feature_enabled(X86_FEATURE_FRED))
- return 0;
-
- if (!strcmp(str, "on"))
- enable_fred = true;
- else if (!strcmp(str, "off"))
- enable_fred = false;
- else
- pr_warn("invalid FRED option: 'fred=%s'\n", str);
- return 0;
-}
-early_param("fred", fred_setup);
-#endif
-
void __init trap_init(void)
{
- if (cpu_feature_enabled(X86_FEATURE_FRED) && !enable_fred)
- setup_clear_cpu_cap(X86_FEATURE_FRED);
-
/* Init cpu_entry_area before IST entries are set up */
setup_cpu_entry_areas();
--
2.45.2
^ permalink raw reply related [flat|nested] 27+ messages in thread* Re: [PATCH v1 1/4] x86/fred: Parse cmdline param "fred=" in cpu_parse_early_param()
2024-07-03 8:54 ` [PATCH v1 1/4] x86/fred: Parse cmdline param "fred=" in cpu_parse_early_param() Xin Li (Intel)
@ 2024-07-04 11:20 ` Nikolay Borisov
2024-07-07 17:42 ` Xin Li
0 siblings, 1 reply; 27+ messages in thread
From: Nikolay Borisov @ 2024-07-04 11:20 UTC (permalink / raw)
To: Xin Li (Intel), linux-kernel
Cc: hpa, tglx, mingo, bp, dave.hansen, x86, peterz, andrew.cooper3,
houwenlong.hwl
On 3.07.24 г. 11:54 ч., Xin Li (Intel) wrote:
> Depending on whether FRED will be used, sysvec_install() installs
> a system interrupt handler into FRED system vector dispatch table
> or IDT. However FRED can be disabled later in trap_init(), after
> sysvec_install() is called. E.g., the HYPERVISOR_CALLBACK_VECTOR
> handler is registered with sysvec_install() in kvm_guest_init(),
> which is called in setup_arch() but way before trap_init(). IOW,
> there is a gap between FRED is available and available but disabled.
> As a result, when FRED is available but disabled, its IDT handler
> is not installed thus spurious_interrupt() will be invoked.
>
> Fix it by parsing cmdline param "fred=" in cpu_parse_early_param()
> to minimize the gap between FRED is available and available but
> disabled.
>
> Fixes: 3810da12710a ("x86/fred: Add a fred= cmdline param")
> Reported-by: Hou Wenlong <houwenlong.hwl@antgroup.com>
> Suggested-by: Thomas Gleixner <tglx@linutronix.de>
> Signed-off-by: Xin Li (Intel) <xin@zytor.com>
> ---
> arch/x86/kernel/cpu/common.c | 5 +++++
> arch/x86/kernel/traps.c | 26 --------------------------
> 2 files changed, 5 insertions(+), 26 deletions(-)
>
> diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
> index d4e539d4e158..9a904fe7c829 100644
> --- a/arch/x86/kernel/cpu/common.c
> +++ b/arch/x86/kernel/cpu/common.c
> @@ -1510,6 +1510,11 @@ static void __init cpu_parse_early_param(void)
> if (cmdline_find_option_bool(boot_command_line, "nousershstk"))
> setup_clear_cpu_cap(X86_FEATURE_USER_SHSTK);
>
> + /* Minimize the gap between FRED is available and available but disabled. */
> + arglen = cmdline_find_option(boot_command_line, "fred", arg, sizeof(arg));
> + if (arglen != strlen("on") || strcmp(arg, "on"))
Just make this check :
ret = cmdline_find_option(...)
if (ret < 0 || strcmp()))
A lot more straightforward
> + setup_clear_cpu_cap(X86_FEATURE_FRED);
> +
> arglen = cmdline_find_option(boot_command_line, "clearcpuid", arg, sizeof(arg));
> if (arglen <= 0)
> return;
> diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
> index 4fa0b17e5043..6afb41e6cbbb 100644
> --- a/arch/x86/kernel/traps.c
> +++ b/arch/x86/kernel/traps.c
> @@ -1402,34 +1402,8 @@ DEFINE_IDTENTRY_SW(iret_error)
> }
> #endif
>
> -/* Do not enable FRED by default yet. */
> -static bool enable_fred __ro_after_init = false;
> -
> -#ifdef CONFIG_X86_FRED
> -static int __init fred_setup(char *str)
> -{
> - if (!str)
> - return -EINVAL;
> -
> - if (!cpu_feature_enabled(X86_FEATURE_FRED))
> - return 0;
> -
> - if (!strcmp(str, "on"))
> - enable_fred = true;
> - else if (!strcmp(str, "off"))
> - enable_fred = false;
> - else
> - pr_warn("invalid FRED option: 'fred=%s'\n", str);
> - return 0;
> -}
> -early_param("fred", fred_setup);
> -#endif
> -
> void __init trap_init(void)
> {
> - if (cpu_feature_enabled(X86_FEATURE_FRED) && !enable_fred)
> - setup_clear_cpu_cap(X86_FEATURE_FRED);
> -
> /* Init cpu_entry_area before IST entries are set up */
> setup_cpu_entry_areas();
>
^ permalink raw reply [flat|nested] 27+ messages in thread* Re: [PATCH v1 1/4] x86/fred: Parse cmdline param "fred=" in cpu_parse_early_param()
2024-07-04 11:20 ` Nikolay Borisov
@ 2024-07-07 17:42 ` Xin Li
0 siblings, 0 replies; 27+ messages in thread
From: Xin Li @ 2024-07-07 17:42 UTC (permalink / raw)
To: Nikolay Borisov, linux-kernel
Cc: hpa, tglx, mingo, bp, dave.hansen, x86, peterz, andrew.cooper3,
houwenlong.hwl
On 7/4/2024 4:20 AM, Nikolay Borisov wrote:
>
>
> On 3.07.24 г. 11:54 ч., Xin Li (Intel) wrote:
>> Depending on whether FRED will be used, sysvec_install() installs
>> a system interrupt handler into FRED system vector dispatch table
>> or IDT. However FRED can be disabled later in trap_init(), after
>> sysvec_install() is called. E.g., the HYPERVISOR_CALLBACK_VECTOR
>> handler is registered with sysvec_install() in kvm_guest_init(),
>> which is called in setup_arch() but way before trap_init(). IOW,
>> there is a gap between FRED is available and available but disabled.
>> As a result, when FRED is available but disabled, its IDT handler
>> is not installed thus spurious_interrupt() will be invoked.
>>
>> Fix it by parsing cmdline param "fred=" in cpu_parse_early_param()
>> to minimize the gap between FRED is available and available but
>> disabled.
>>
>> Fixes: 3810da12710a ("x86/fred: Add a fred= cmdline param")
>> Reported-by: Hou Wenlong <houwenlong.hwl@antgroup.com>
>> Suggested-by: Thomas Gleixner <tglx@linutronix.de>
>> Signed-off-by: Xin Li (Intel) <xin@zytor.com>
>> ---
>> arch/x86/kernel/cpu/common.c | 5 +++++
>> arch/x86/kernel/traps.c | 26 --------------------------
>> 2 files changed, 5 insertions(+), 26 deletions(-)
>>
>> diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
>> index d4e539d4e158..9a904fe7c829 100644
>> --- a/arch/x86/kernel/cpu/common.c
>> +++ b/arch/x86/kernel/cpu/common.c
>> @@ -1510,6 +1510,11 @@ static void __init cpu_parse_early_param(void)
>> if (cmdline_find_option_bool(boot_command_line, "nousershstk"))
>> setup_clear_cpu_cap(X86_FEATURE_USER_SHSTK);
>> + /* Minimize the gap between FRED is available and available but
>> disabled. */
>> + arglen = cmdline_find_option(boot_command_line, "fred", arg,
>> sizeof(arg));
>> + if (arglen != strlen("on") || strcmp(arg, "on"))
>
>
> Just make this check :
>
> ret = cmdline_find_option(...)
> if (ret < 0 || strcmp()))
>
> A lot more straightforward
>
I'd do
if (arglen != 2 || strncmp(arg, "on", 2))
If arglen is 2, strncmp(arg, "on", 2) is essentially strcmp(arg, "on"),
but I should've kept using strncmp.
^ permalink raw reply [flat|nested] 27+ messages in thread
* [PATCH v1 2/4] x86/fred: Write to FRED MSRs with wrmsrns()
2024-07-03 8:54 [PATCH v1 0/4] Enable FRED earlier Xin Li (Intel)
2024-07-03 8:54 ` [PATCH v1 1/4] x86/fred: Parse cmdline param "fred=" in cpu_parse_early_param() Xin Li (Intel)
@ 2024-07-03 8:54 ` Xin Li (Intel)
2024-07-03 15:43 ` Dave Hansen
2024-07-03 8:54 ` [PATCH v1 3/4] x86/fred: Split FRED RSP initialization into a separate function Xin Li (Intel)
2024-07-03 8:54 ` [PATCH v1 4/4] x86/fred: Enable FRED right after init_mem_mapping() Xin Li (Intel)
3 siblings, 1 reply; 27+ messages in thread
From: Xin Li (Intel) @ 2024-07-03 8:54 UTC (permalink / raw)
To: linux-kernel
Cc: hpa, tglx, mingo, bp, dave.hansen, x86, peterz, andrew.cooper3,
nik.borisov, houwenlong.hwl
Do FRED MSR writes with wrmsrns() rather than wrmsrl().
No functional change intended.
Signed-off-by: Xin Li (Intel) <xin@zytor.com>
---
arch/x86/kernel/fred.c | 26 +++++++++++++-------------
1 file changed, 13 insertions(+), 13 deletions(-)
diff --git a/arch/x86/kernel/fred.c b/arch/x86/kernel/fred.c
index 4bcd8791ad96..b202685b8e77 100644
--- a/arch/x86/kernel/fred.c
+++ b/arch/x86/kernel/fred.c
@@ -26,27 +26,27 @@ void cpu_init_fred_exceptions(void)
/* When FRED is enabled by default, remove this log message */
pr_info("Initialize FRED on CPU%d\n", smp_processor_id());
- wrmsrl(MSR_IA32_FRED_CONFIG,
- /* Reserve for CALL emulation */
- FRED_CONFIG_REDZONE |
- FRED_CONFIG_INT_STKLVL(0) |
- FRED_CONFIG_ENTRYPOINT(asm_fred_entrypoint_user));
+ wrmsrns(MSR_IA32_FRED_CONFIG,
+ /* Reserve for CALL emulation */
+ FRED_CONFIG_REDZONE |
+ FRED_CONFIG_INT_STKLVL(0) |
+ FRED_CONFIG_ENTRYPOINT(asm_fred_entrypoint_user));
/*
* The purpose of separate stacks for NMI, #DB and #MC *in the kernel*
* (remember that user space faults are always taken on stack level 0)
* is to avoid overflowing the kernel stack.
*/
- wrmsrl(MSR_IA32_FRED_STKLVLS,
- FRED_STKLVL(X86_TRAP_DB, FRED_DB_STACK_LEVEL) |
- FRED_STKLVL(X86_TRAP_NMI, FRED_NMI_STACK_LEVEL) |
- FRED_STKLVL(X86_TRAP_MC, FRED_MC_STACK_LEVEL) |
- FRED_STKLVL(X86_TRAP_DF, FRED_DF_STACK_LEVEL));
+ wrmsrns(MSR_IA32_FRED_STKLVLS,
+ FRED_STKLVL(X86_TRAP_DB, FRED_DB_STACK_LEVEL) |
+ FRED_STKLVL(X86_TRAP_NMI, FRED_NMI_STACK_LEVEL) |
+ FRED_STKLVL(X86_TRAP_MC, FRED_MC_STACK_LEVEL) |
+ FRED_STKLVL(X86_TRAP_DF, FRED_DF_STACK_LEVEL));
/* The FRED equivalents to IST stacks... */
- wrmsrl(MSR_IA32_FRED_RSP1, __this_cpu_ist_top_va(DB));
- wrmsrl(MSR_IA32_FRED_RSP2, __this_cpu_ist_top_va(NMI));
- wrmsrl(MSR_IA32_FRED_RSP3, __this_cpu_ist_top_va(DF));
+ wrmsrns(MSR_IA32_FRED_RSP1, __this_cpu_ist_top_va(DB));
+ wrmsrns(MSR_IA32_FRED_RSP2, __this_cpu_ist_top_va(NMI));
+ wrmsrns(MSR_IA32_FRED_RSP3, __this_cpu_ist_top_va(DF));
/* Enable FRED */
cr4_set_bits(X86_CR4_FRED);
--
2.45.2
^ permalink raw reply related [flat|nested] 27+ messages in thread* Re: [PATCH v1 2/4] x86/fred: Write to FRED MSRs with wrmsrns()
2024-07-03 8:54 ` [PATCH v1 2/4] x86/fred: Write to FRED MSRs with wrmsrns() Xin Li (Intel)
@ 2024-07-03 15:43 ` Dave Hansen
2024-07-03 15:54 ` Borislav Petkov
0 siblings, 1 reply; 27+ messages in thread
From: Dave Hansen @ 2024-07-03 15:43 UTC (permalink / raw)
To: Xin Li (Intel), linux-kernel
Cc: hpa, tglx, mingo, bp, dave.hansen, x86, peterz, andrew.cooper3,
nik.borisov, houwenlong.hwl
On 7/3/24 01:54, Xin Li (Intel) wrote:
> Do FRED MSR writes with wrmsrns() rather than wrmsrl().
A longer changelog would be appreciated here. The wrmsrns() is
presumably to avoid the WRMSR serialization overhead and the CR4 write
provides all of the serialization that we need.
I'm also not clear how this fits into the series other than being FRED
related.
> No functional change intended.
I think I know what this was trying to convey, but I'm not sure this
phrase is appropriate to use here. Sure, WRMSR and WRMSRNS have the
same general purpose, but I'd never say they are functionally equivalent.
^ permalink raw reply [flat|nested] 27+ messages in thread
* Re: [PATCH v1 2/4] x86/fred: Write to FRED MSRs with wrmsrns()
2024-07-03 15:43 ` Dave Hansen
@ 2024-07-03 15:54 ` Borislav Petkov
2024-07-03 16:00 ` Andrew Cooper
0 siblings, 1 reply; 27+ messages in thread
From: Borislav Petkov @ 2024-07-03 15:54 UTC (permalink / raw)
To: dave.hansen
Cc: xin, linux-kernel, hpa, tglx, mingo, dave.hansen, x86, peterz,
andrew.cooper3, nik.borisov, houwenlong.hwl
Dave Hansen <dave.hansen@intel.com> wrote:
>On 7/3/24 01:54, Xin Li (Intel) wrote:
>> Do FRED MSR writes with wrmsrns() rather than wrmsrl().
>
>A longer changelog would be appreciated here. The wrmsrns() is
>presumably to avoid the WRMSR serialization overhead and the CR4 write
>provides all of the serialization that we need.
Also, all those wrmsrns() writes better be behind a CPUID check.
Thx.
^ permalink raw reply [flat|nested] 27+ messages in thread
* Re: [PATCH v1 2/4] x86/fred: Write to FRED MSRs with wrmsrns()
2024-07-03 15:54 ` Borislav Petkov
@ 2024-07-03 16:00 ` Andrew Cooper
2024-07-03 16:06 ` H. Peter Anvin
2024-07-05 9:28 ` Peter Zijlstra
0 siblings, 2 replies; 27+ messages in thread
From: Andrew Cooper @ 2024-07-03 16:00 UTC (permalink / raw)
To: Borislav Petkov, dave.hansen
Cc: xin, linux-kernel, hpa, tglx, mingo, dave.hansen, x86, peterz,
nik.borisov, houwenlong.hwl
On 03/07/2024 4:54 pm, Borislav Petkov wrote:
> Dave Hansen <dave.hansen@intel.com> wrote:
>> On 7/3/24 01:54, Xin Li (Intel) wrote:
>> > Do FRED MSR writes with wrmsrns() rather than wrmsrl().
>>
>> A longer changelog would be appreciated here. The wrmsrns() is
>> presumably to avoid the WRMSR serialization overhead and the CR4 write
>> provides all of the serialization that we need.
> Also, all those wrmsrns() writes better be behind a CPUID check.
They're not, in Linux.
For the $N'th time, here is the primitive that Linux wants to stea^w
borrow for this to be sane.
/* Non-serialising WRMSR, when available. Falls back to a serialising
WRMSR. */
static inline void wrmsrns(uint32_t msr, uint32_t lo, uint32_t hi)
{
/*
* WRMSR is 2 bytes. WRMSRNS is 3 bytes. Pad WRMSR with a redundant CS
* prefix to avoid a trailing NOP.
*/
alternative_input(".byte 0x2e; wrmsr",
".byte 0x0f,0x01,0xc6", X86_FEATURE_WRMSRNS,
"c" (msr), "a" (lo), "d" (hi));
}
~Andrew
^ permalink raw reply [flat|nested] 27+ messages in thread* Re: [PATCH v1 2/4] x86/fred: Write to FRED MSRs with wrmsrns()
2024-07-03 16:00 ` Andrew Cooper
@ 2024-07-03 16:06 ` H. Peter Anvin
2024-07-03 16:17 ` Borislav Petkov
` (2 more replies)
2024-07-05 9:28 ` Peter Zijlstra
1 sibling, 3 replies; 27+ messages in thread
From: H. Peter Anvin @ 2024-07-03 16:06 UTC (permalink / raw)
To: Andrew Cooper, Borislav Petkov, dave.hansen
Cc: xin, linux-kernel, tglx, mingo, dave.hansen, x86, peterz,
nik.borisov, houwenlong.hwl
On July 3, 2024 9:00:53 AM PDT, Andrew Cooper <andrew.cooper3@citrix.com> wrote:
>On 03/07/2024 4:54 pm, Borislav Petkov wrote:
>> Dave Hansen <dave.hansen@intel.com> wrote:
>>> On 7/3/24 01:54, Xin Li (Intel) wrote:
>>> > Do FRED MSR writes with wrmsrns() rather than wrmsrl().
>>>
>>> A longer changelog would be appreciated here. The wrmsrns() is
>>> presumably to avoid the WRMSR serialization overhead and the CR4 write
>>> provides all of the serialization that we need.
>> Also, all those wrmsrns() writes better be behind a CPUID check.
>
>They're not, in Linux.
>
>For the $N'th time, here is the primitive that Linux wants to stea^w
>borrow for this to be sane.
>
>/* Non-serialising WRMSR, when available. Falls back to a serialising
>WRMSR. */
>static inline void wrmsrns(uint32_t msr, uint32_t lo, uint32_t hi)
>{
> /*
> * WRMSR is 2 bytes. WRMSRNS is 3 bytes. Pad WRMSR with a redundant CS
> * prefix to avoid a trailing NOP.
> */
> alternative_input(".byte 0x2e; wrmsr",
> ".byte 0x0f,0x01,0xc6", X86_FEATURE_WRMSRNS,
> "c" (msr), "a" (lo), "d" (hi));
>}
>
>~Andrew
I believe tglx declared to use them unconditionally since FRED depends on WRMSRNS (and the kernel enforces that.)
Using an alternative would make wrmsrns() a more useful construct in general, though.
^ permalink raw reply [flat|nested] 27+ messages in thread* Re: [PATCH v1 2/4] x86/fred: Write to FRED MSRs with wrmsrns()
2024-07-03 16:06 ` H. Peter Anvin
@ 2024-07-03 16:17 ` Borislav Petkov
2024-07-05 2:45 ` H. Peter Anvin
2024-07-03 16:18 ` Andrew Cooper
2024-07-03 16:43 ` Dave Hansen
2 siblings, 1 reply; 27+ messages in thread
From: Borislav Petkov @ 2024-07-03 16:17 UTC (permalink / raw)
To: H. Peter Anvin
Cc: Andrew Cooper, dave.hansen, xin, linux-kernel, tglx, mingo,
dave.hansen, x86, peterz, nik.borisov, houwenlong.hwl
On Wed, Jul 03, 2024 at 09:06:55AM -0700, H. Peter Anvin wrote:
> I believe tglx declared to use them unconditionally since FRED depends on
> WRMSRNS (and the kernel enforces that.)
>
> Using an alternative would make wrmsrns() a more useful construct in
> general, though.
We can't use them unconditionally and we don't need an alternative just for
that - a simple
if (cpu_feature_enabled(X86_FEATURE_WRMSRNS))
wrmsrns()
else
wrmsr()
would be perfectly fine.
--
Regards/Gruss,
Boris.
https://people.kernel.org/tglx/notes-about-netiquette
^ permalink raw reply [flat|nested] 27+ messages in thread* Re: [PATCH v1 2/4] x86/fred: Write to FRED MSRs with wrmsrns()
2024-07-03 16:17 ` Borislav Petkov
@ 2024-07-05 2:45 ` H. Peter Anvin
2024-07-05 9:44 ` Borislav Petkov
0 siblings, 1 reply; 27+ messages in thread
From: H. Peter Anvin @ 2024-07-05 2:45 UTC (permalink / raw)
To: Borislav Petkov
Cc: Andrew Cooper, dave.hansen, xin, linux-kernel, tglx, mingo,
dave.hansen, x86, peterz, nik.borisov, houwenlong.hwl
On July 3, 2024 9:17:05 AM PDT, Borislav Petkov <bp@alien8.de> wrote:
>On Wed, Jul 03, 2024 at 09:06:55AM -0700, H. Peter Anvin wrote:
>> I believe tglx declared to use them unconditionally since FRED depends on
>> WRMSRNS (and the kernel enforces that.)
>>
>> Using an alternative would make wrmsrns() a more useful construct in
>> general, though.
>
>We can't use them unconditionally and we don't need an alternative just for
>that - a simple
>
> if (cpu_feature_enabled(X86_FEATURE_WRMSRNS))
> wrmsrns()
> else
> wrmsr()
>
>would be perfectly fine.
>
Except that that would be more cleanly spelled wrmsrns()... let's just abstract the whole thing away and let the user use wrmsrns() whenever serialization is not necessary.
^ permalink raw reply [flat|nested] 27+ messages in thread
* Re: [PATCH v1 2/4] x86/fred: Write to FRED MSRs with wrmsrns()
2024-07-05 2:45 ` H. Peter Anvin
@ 2024-07-05 9:44 ` Borislav Petkov
2024-07-05 10:30 ` Andrew Cooper
0 siblings, 1 reply; 27+ messages in thread
From: Borislav Petkov @ 2024-07-05 9:44 UTC (permalink / raw)
To: H. Peter Anvin
Cc: Andrew Cooper, dave.hansen, xin, linux-kernel, tglx, mingo,
dave.hansen, x86, peterz, nik.borisov, houwenlong.hwl
On Thu, Jul 04, 2024 at 07:45:00PM -0700, H. Peter Anvin wrote:
> Except that that would be more cleanly spelled wrmsrns()... let's just
> abstract the whole thing away and let the user use wrmsrns() whenever
> serialization is not necessary.
Just don't make it more complex and unreadable than it has to be.
cpu_feature_enabled() already is patching things for optimal perf so even if
PeterZ prefers the alternative, I say it is ugly and, more importantly,
unnecessary.
--
Regards/Gruss,
Boris.
https://people.kernel.org/tglx/notes-about-netiquette
^ permalink raw reply [flat|nested] 27+ messages in thread* Re: [PATCH v1 2/4] x86/fred: Write to FRED MSRs with wrmsrns()
2024-07-05 9:44 ` Borislav Petkov
@ 2024-07-05 10:30 ` Andrew Cooper
2024-07-05 13:45 ` Borislav Petkov
0 siblings, 1 reply; 27+ messages in thread
From: Andrew Cooper @ 2024-07-05 10:30 UTC (permalink / raw)
To: Borislav Petkov, H. Peter Anvin
Cc: dave.hansen, xin, linux-kernel, tglx, mingo, dave.hansen, x86,
peterz, nik.borisov, houwenlong.hwl, Juergen Gross
On 05/07/2024 10:44 am, Borislav Petkov wrote:
> On Thu, Jul 04, 2024 at 07:45:00PM -0700, H. Peter Anvin wrote:
>> Except that that would be more cleanly spelled wrmsrns()... let's just
>> abstract the whole thing away and let the user use wrmsrns() whenever
>> serialization is not necessary.
> Just don't make it more complex and unreadable than it has to be.
>
> cpu_feature_enabled() already is patching things for optimal perf so even if
> PeterZ prefers the alternative, I say it is ugly and, more importantly,
> unnecessary.
You cite perf. Look at the disassembly of the two approaches...
cpu_feature_enabled() might give you warm fuzzy feelings that you've
eekd out every ounce of performance, but it's an absolute disaster at a
code generation level by forcing the compiler to lay out both side and
preventing any kind of CSE. As I've reported before, count the number
of RDPKRU instructions in trivial-looking xsave handling functions for a
glimpse of the practical consequences.
Anyway, none of this is the complicated aspect. The complicated issue
is the paravirt wrmsr().
TGLX's complaint is that everyone turns on CONFIG_PARAVIRT, and the
paravirt hook for wmsr() is a code generation disaster WRT parameter
handling. I agree that it's not great, although it's got nothing on the
damage done by cpu_feature_enabled().
But, seeing as I've got everyone's attention, I'll repeat my proposal
for fixing this nicely, in the hope of any feedback on the 3rd posting...
The underlying problem is that parameter setup for the paravirt wrmsr()
follows a C calling convention, so the index/data are manifested into
%rdi/%rsi. Then, the out-line "native" hook shuffles the index/data
back into %ecx/%edx/%eax, and this cost is borne in all kernels.
Instead, the better way would be to have a hook with a non-standard
calling convention which happens to match the WRMSR instruction.
That way, the native, and simple paravirt paths inline to a single
instruction with no extraneous parameter shuffling, and the shuffling
cost is borne by PARAVIRT_XXL only, where a reg/reg move is nothing
compared to the hypercall involved.
The only complication is the extable #GP hook, but that's fine to place
at the paravirt site as long as the extable handler confirms the #GP
came from a WRMSR{NS,} and not a branch.
~Andrew
^ permalink raw reply [flat|nested] 27+ messages in thread* Re: [PATCH v1 2/4] x86/fred: Write to FRED MSRs with wrmsrns()
2024-07-05 10:30 ` Andrew Cooper
@ 2024-07-05 13:45 ` Borislav Petkov
2024-07-09 13:58 ` Xin Li
0 siblings, 1 reply; 27+ messages in thread
From: Borislav Petkov @ 2024-07-05 13:45 UTC (permalink / raw)
To: Andrew Cooper
Cc: H. Peter Anvin, dave.hansen, xin, linux-kernel, tglx, mingo,
dave.hansen, x86, peterz, nik.borisov, houwenlong.hwl,
Juergen Gross
On Fri, Jul 05, 2024 at 11:30:16AM +0100, Andrew Cooper wrote:
> You cite perf. Look at the disassembly of the two approaches...
>
> cpu_feature_enabled() might give you warm fuzzy feelings that you've
> eekd out every ounce of performance, but it's an absolute disaster at a
> code generation level by forcing the compiler to lay out both side and
> preventing any kind of CSE. As I've reported before, count the number
> of RDPKRU instructions in trivial-looking xsave handling functions for a
> glimpse of the practical consequences.
Yes, I do cite perf because what you have above is not saying: "yes, this is
a fast path and doing an alternative is warranted." If that is the case, sure,
by all means. If not, make the C readable and ignore code generation. Who
cares.
> Anyway, none of this is the complicated aspect. The complicated issue
> is the paravirt wrmsr().
>
> TGLX's complaint is that everyone turns on CONFIG_PARAVIRT, and the
> paravirt hook for wmsr() is a code generation disaster WRT parameter
> handling. I agree that it's not great, although it's got nothing on the
> damage done by cpu_feature_enabled().
>
>
> But, seeing as I've got everyone's attention, I'll repeat my proposal
> for fixing this nicely, in the hope of any feedback on the 3rd posting...
>
> The underlying problem is that parameter setup for the paravirt wrmsr()
> follows a C calling convention, so the index/data are manifested into
> %rdi/%rsi. Then, the out-line "native" hook shuffles the index/data
> back into %ecx/%edx/%eax, and this cost is borne in all kernels.
A handful of reg ops per a WRMSRNS? Meh, same argument as above. But...
> Instead, the better way would be to have a hook with a non-standard
> calling convention which happens to match the WRMSR instruction.
>
> That way, the native, and simple paravirt paths inline to a single
> instruction with no extraneous parameter shuffling, and the shuffling
> cost is borne by PARAVIRT_XXL only, where a reg/reg move is nothing
> compared to the hypercall involved.
>
> The only complication is the extable #GP hook, but that's fine to place
> at the paravirt site as long as the extable handler confirms the #GP
> came from a WRMSR{NS,} and not a branch.
... yes, I'd gladly review patches which address that and make the whole deal
cleaner. I'm still sceptical those handful of regs shuffling ops would matter
in any benchmark but sure, if it can be done in a cleaner way, why not...
Unless I'm missing some use case where that overhead really matters. Then by
all means...
Thx.
--
Regards/Gruss,
Boris.
https://people.kernel.org/tglx/notes-about-netiquette
^ permalink raw reply [flat|nested] 27+ messages in thread* Re: [PATCH v1 2/4] x86/fred: Write to FRED MSRs with wrmsrns()
2024-07-05 13:45 ` Borislav Petkov
@ 2024-07-09 13:58 ` Xin Li
2024-07-09 20:51 ` H. Peter Anvin
0 siblings, 1 reply; 27+ messages in thread
From: Xin Li @ 2024-07-09 13:58 UTC (permalink / raw)
To: Borislav Petkov, Andrew Cooper
Cc: H. Peter Anvin, dave.hansen, linux-kernel, tglx, mingo,
dave.hansen, x86, peterz, nik.borisov, houwenlong.hwl,
Juergen Gross
On 7/5/2024 6:45 AM, Borislav Petkov wrote:
> On Fri, Jul 05, 2024 at 11:30:16AM +0100, Andrew Cooper wrote:
>> You cite perf. Look at the disassembly of the two approaches...
>>
>> cpu_feature_enabled() might give you warm fuzzy feelings that you've
>> eekd out every ounce of performance, but it's an absolute disaster at a
>> code generation level by forcing the compiler to lay out both side and
>> preventing any kind of CSE. As I've reported before, count the number
>> of RDPKRU instructions in trivial-looking xsave handling functions for a
>> glimpse of the practical consequences.
>
> Yes, I do cite perf because what you have above is not saying: "yes, this is
> a fast path and doing an alternative is warranted." If that is the case, sure,
> by all means. If not, make the C readable and ignore code generation. Who
> cares.
>
>> Anyway, none of this is the complicated aspect. The complicated issue
>> is the paravirt wrmsr().
>>
>> TGLX's complaint is that everyone turns on CONFIG_PARAVIRT, and the
>> paravirt hook for wmsr() is a code generation disaster WRT parameter
>> handling. I agree that it's not great, although it's got nothing on the
>> damage done by cpu_feature_enabled().
>>
>>
>> But, seeing as I've got everyone's attention, I'll repeat my proposal
>> for fixing this nicely, in the hope of any feedback on the 3rd posting...
>>
>> The underlying problem is that parameter setup for the paravirt wrmsr()
>> follows a C calling convention, so the index/data are manifested into
>> %rdi/%rsi. Then, the out-line "native" hook shuffles the index/data
>> back into %ecx/%edx/%eax, and this cost is borne in all kernels.
>
> A handful of reg ops per a WRMSRNS? Meh, same argument as above. But...
>
>> Instead, the better way would be to have a hook with a non-standard
>> calling convention which happens to match the WRMSR instruction.
>>
>> That way, the native, and simple paravirt paths inline to a single
>> instruction with no extraneous parameter shuffling, and the shuffling
>> cost is borne by PARAVIRT_XXL only, where a reg/reg move is nothing
>> compared to the hypercall involved.
>>
>> The only complication is the extable #GP hook, but that's fine to place
>> at the paravirt site as long as the extable handler confirms the #GP
>> came from a WRMSR{NS,} and not a branch.
>
> ... yes, I'd gladly review patches which address that and make the whole deal
> cleaner. I'm still sceptical those handful of regs shuffling ops would matter
> in any benchmark but sure, if it can be done in a cleaner way, why not...
>
> Unless I'm missing some use case where that overhead really matters. Then by
> all means...
>
It looks that it no longer makes sense to include this patch in this
patchset; it is not something that can be done as a small cleanup.
Any objection?
Thanks!
Xin
^ permalink raw reply [flat|nested] 27+ messages in thread* Re: [PATCH v1 2/4] x86/fred: Write to FRED MSRs with wrmsrns()
2024-07-09 13:58 ` Xin Li
@ 2024-07-09 20:51 ` H. Peter Anvin
0 siblings, 0 replies; 27+ messages in thread
From: H. Peter Anvin @ 2024-07-09 20:51 UTC (permalink / raw)
To: Xin Li, Borislav Petkov, Andrew Cooper
Cc: dave.hansen, linux-kernel, tglx, mingo, dave.hansen, x86, peterz,
nik.borisov, houwenlong.hwl, Juergen Gross
On July 9, 2024 6:58:07 AM PDT, Xin Li <xin@zytor.com> wrote:
>On 7/5/2024 6:45 AM, Borislav Petkov wrote:
>> On Fri, Jul 05, 2024 at 11:30:16AM +0100, Andrew Cooper wrote:
>>> You cite perf. Look at the disassembly of the two approaches...
>>>
>>> cpu_feature_enabled() might give you warm fuzzy feelings that you've
>>> eekd out every ounce of performance, but it's an absolute disaster at a
>>> code generation level by forcing the compiler to lay out both side and
>>> preventing any kind of CSE. As I've reported before, count the number
>>> of RDPKRU instructions in trivial-looking xsave handling functions for a
>>> glimpse of the practical consequences.
>>
>> Yes, I do cite perf because what you have above is not saying: "yes, this is
>> a fast path and doing an alternative is warranted." If that is the case, sure,
>> by all means. If not, make the C readable and ignore code generation. Who
>> cares.
>>
>>> Anyway, none of this is the complicated aspect. The complicated issue
>>> is the paravirt wrmsr().
>>>
>>> TGLX's complaint is that everyone turns on CONFIG_PARAVIRT, and the
>>> paravirt hook for wmsr() is a code generation disaster WRT parameter
>>> handling. I agree that it's not great, although it's got nothing on the
>>> damage done by cpu_feature_enabled().
>>>
>>>
>>> But, seeing as I've got everyone's attention, I'll repeat my proposal
>>> for fixing this nicely, in the hope of any feedback on the 3rd posting...
>>>
>>> The underlying problem is that parameter setup for the paravirt wrmsr()
>>> follows a C calling convention, so the index/data are manifested into
>>> %rdi/%rsi. Then, the out-line "native" hook shuffles the index/data
>>> back into %ecx/%edx/%eax, and this cost is borne in all kernels.
>>
>> A handful of reg ops per a WRMSRNS? Meh, same argument as above. But...
>>
>>> Instead, the better way would be to have a hook with a non-standard
>>> calling convention which happens to match the WRMSR instruction.
>>>
>>> That way, the native, and simple paravirt paths inline to a single
>>> instruction with no extraneous parameter shuffling, and the shuffling
>>> cost is borne by PARAVIRT_XXL only, where a reg/reg move is nothing
>>> compared to the hypercall involved.
>>>
>>> The only complication is the extable #GP hook, but that's fine to place
>>> at the paravirt site as long as the extable handler confirms the #GP
>>> came from a WRMSR{NS,} and not a branch.
>>
>> ... yes, I'd gladly review patches which address that and make the whole deal
>> cleaner. I'm still sceptical those handful of regs shuffling ops would matter
>> in any benchmark but sure, if it can be done in a cleaner way, why not...
>>
>> Unless I'm missing some use case where that overhead really matters. Then by
>> all means...
>>
>
>It looks that it no longer makes sense to include this patch in this
>patchset; it is not something that can be done as a small cleanup.
>
>Any objection?
>
>Thanks!
> Xin
I agree, this is desirable but separate.
^ permalink raw reply [flat|nested] 27+ messages in thread
* Re: [PATCH v1 2/4] x86/fred: Write to FRED MSRs with wrmsrns()
2024-07-03 16:06 ` H. Peter Anvin
2024-07-03 16:17 ` Borislav Petkov
@ 2024-07-03 16:18 ` Andrew Cooper
2024-07-04 5:57 ` Xin Li
2024-07-03 16:43 ` Dave Hansen
2 siblings, 1 reply; 27+ messages in thread
From: Andrew Cooper @ 2024-07-03 16:18 UTC (permalink / raw)
To: H. Peter Anvin, Borislav Petkov, dave.hansen
Cc: xin, linux-kernel, tglx, mingo, dave.hansen, x86, peterz,
nik.borisov, houwenlong.hwl
On 03/07/2024 5:06 pm, H. Peter Anvin wrote:
> On July 3, 2024 9:00:53 AM PDT, Andrew Cooper <andrew.cooper3@citrix.com> wrote:
>> On 03/07/2024 4:54 pm, Borislav Petkov wrote:
>>> Dave Hansen <dave.hansen@intel.com> wrote:
>>>> On 7/3/24 01:54, Xin Li (Intel) wrote:
>>>> > Do FRED MSR writes with wrmsrns() rather than wrmsrl().
>>>>
>>>> A longer changelog would be appreciated here. The wrmsrns() is
>>>> presumably to avoid the WRMSR serialization overhead and the CR4 write
>>>> provides all of the serialization that we need.
>>> Also, all those wrmsrns() writes better be behind a CPUID check.
>> They're not, in Linux.
>>
>> For the $N'th time, here is the primitive that Linux wants to stea^w
>> borrow for this to be sane.
>>
>> /* Non-serialising WRMSR, when available. Falls back to a serialising
>> WRMSR. */
>> static inline void wrmsrns(uint32_t msr, uint32_t lo, uint32_t hi)
>> {
>> /*
>> * WRMSR is 2 bytes. WRMSRNS is 3 bytes. Pad WRMSR with a redundant CS
>> * prefix to avoid a trailing NOP.
>> */
>> alternative_input(".byte 0x2e; wrmsr",
>> ".byte 0x0f,0x01,0xc6", X86_FEATURE_WRMSRNS,
>> "c" (msr), "a" (lo), "d" (hi));
>> }
>>
>> ~Andrew
> I believe tglx declared to use them unconditionally since FRED depends on WRMSRNS (and the kernel enforces that.)
I know that Linux has chosen to have this as a software-enforced
requirement.
The dependency does not exist architecturally, and just because it
happens to be true on Intel processors doesn't mean it's true of other
implementations.
~Andrew
^ permalink raw reply [flat|nested] 27+ messages in thread* Re: [PATCH v1 2/4] x86/fred: Write to FRED MSRs with wrmsrns()
2024-07-03 16:18 ` Andrew Cooper
@ 2024-07-04 5:57 ` Xin Li
2024-07-04 7:58 ` H. Peter Anvin
2024-07-04 8:23 ` Borislav Petkov
0 siblings, 2 replies; 27+ messages in thread
From: Xin Li @ 2024-07-04 5:57 UTC (permalink / raw)
To: Andrew Cooper, H. Peter Anvin, Borislav Petkov, dave.hansen
Cc: linux-kernel, tglx, mingo, dave.hansen, x86, peterz, nik.borisov,
houwenlong.hwl
On 7/3/2024 9:18 AM, Andrew Cooper wrote:
> On 03/07/2024 5:06 pm, H. Peter Anvin wrote:
>> I believe tglx declared53 to use them unconditionally since FRED depends on WRMSRNS (and the kernel enforces that.)
>
> I know that Linux has chosen to have this as a software-enforced
> requirement.
>
> The dependency does not exist architecturally, and just because it
> happens to be true on Intel processors doesn't mean it's true of other
> implementations.
Won't it be way better if we could have all x86 vendors agree on it?
^ permalink raw reply [flat|nested] 27+ messages in thread
* Re: [PATCH v1 2/4] x86/fred: Write to FRED MSRs with wrmsrns()
2024-07-04 5:57 ` Xin Li
@ 2024-07-04 7:58 ` H. Peter Anvin
2024-07-04 8:23 ` Borislav Petkov
1 sibling, 0 replies; 27+ messages in thread
From: H. Peter Anvin @ 2024-07-04 7:58 UTC (permalink / raw)
To: Xin Li, Andrew Cooper, Borislav Petkov, dave.hansen
Cc: linux-kernel, tglx, mingo, dave.hansen, x86, peterz, nik.borisov,
houwenlong.hwl
On July 3, 2024 10:57:29 PM PDT, Xin Li <xin@zytor.com> wrote:
>On 7/3/2024 9:18 AM, Andrew Cooper wrote:
>> On 03/07/2024 5:06 pm, H. Peter Anvin wrote:
>>> I believe tglx declared53 to use them unconditionally since FRED depends on WRMSRNS (and the kernel enforces that.)
>>
>> I know that Linux has chosen to have this as a software-enforced
>> requirement.
>>
>> The dependency does not exist architecturally, and just because it
>> happens to be true on Intel processors doesn't mean it's true of other
>> implementations.
>
>Won't it be way better if we could have all x86 vendors agree on it?
>
>
It was tglx' explicit request to keep it simple unless someone objects. All it would take is for someone to come up with a countercase.
^ permalink raw reply [flat|nested] 27+ messages in thread
* Re: [PATCH v1 2/4] x86/fred: Write to FRED MSRs with wrmsrns()
2024-07-04 5:57 ` Xin Li
2024-07-04 7:58 ` H. Peter Anvin
@ 2024-07-04 8:23 ` Borislav Petkov
2024-07-04 8:28 ` H. Peter Anvin
1 sibling, 1 reply; 27+ messages in thread
From: Borislav Petkov @ 2024-07-04 8:23 UTC (permalink / raw)
To: Xin Li
Cc: Andrew Cooper, H. Peter Anvin, dave.hansen, linux-kernel, tglx,
mingo, dave.hansen, x86, peterz, nik.borisov, houwenlong.hwl
On Wed, Jul 03, 2024 at 10:57:29PM -0700, Xin Li wrote:
> Won't it be way better if we could have all x86 vendors agree on it?
It would be way better if you simply do it as I suggested and stop debating.
One fine day you'll know why I'm adamant about it.
Thx.
--
Regards/Gruss,
Boris.
https://people.kernel.org/tglx/notes-about-netiquette
^ permalink raw reply [flat|nested] 27+ messages in thread* Re: [PATCH v1 2/4] x86/fred: Write to FRED MSRs with wrmsrns()
2024-07-04 8:23 ` Borislav Petkov
@ 2024-07-04 8:28 ` H. Peter Anvin
0 siblings, 0 replies; 27+ messages in thread
From: H. Peter Anvin @ 2024-07-04 8:28 UTC (permalink / raw)
To: Borislav Petkov, Xin Li
Cc: Andrew Cooper, dave.hansen, linux-kernel, tglx, mingo,
dave.hansen, x86, peterz, nik.borisov, houwenlong.hwl
On July 4, 2024 1:23:43 AM PDT, Borislav Petkov <bp@alien8.de> wrote:
>On Wed, Jul 03, 2024 at 10:57:29PM -0700, Xin Li wrote:
>> Won't it be way better if we could have all x86 vendors agree on it?
>
>It would be way better if you simply do it as I suggested and stop debating.
>One fine day you'll know why I'm adamant about it.
>
>Thx.
>
I think that counts as an objection, so let's do it.
^ permalink raw reply [flat|nested] 27+ messages in thread
* Re: [PATCH v1 2/4] x86/fred: Write to FRED MSRs with wrmsrns()
2024-07-03 16:06 ` H. Peter Anvin
2024-07-03 16:17 ` Borislav Petkov
2024-07-03 16:18 ` Andrew Cooper
@ 2024-07-03 16:43 ` Dave Hansen
2024-07-03 22:48 ` Xin Li
2 siblings, 1 reply; 27+ messages in thread
From: Dave Hansen @ 2024-07-03 16:43 UTC (permalink / raw)
To: H. Peter Anvin, Andrew Cooper, Borislav Petkov
Cc: xin, linux-kernel, tglx, mingo, dave.hansen, x86, peterz,
nik.borisov, houwenlong.hwl
On 7/3/24 09:06, H. Peter Anvin wrote:
> I believe tglx declared to use them unconditionally since FRED> depends on WRMSRNS (and the kernel enforces that.)
Ahh, I forgot about:
static const struct cpuid_dep cpuid_deps[] = {
...
{ X86_FEATURE_FRED, X86_FEATURE_WRMSRNS },
So, yeah, Xin's patch here is quite safe and when Boris said:
> Also, all those wrmsrns() writes better be behind a CPUID check.
... it *is* behind a CPUID check, but it's an implicit one.
^ permalink raw reply [flat|nested] 27+ messages in thread* Re: [PATCH v1 2/4] x86/fred: Write to FRED MSRs with wrmsrns()
2024-07-03 16:43 ` Dave Hansen
@ 2024-07-03 22:48 ` Xin Li
0 siblings, 0 replies; 27+ messages in thread
From: Xin Li @ 2024-07-03 22:48 UTC (permalink / raw)
To: Dave Hansen, H. Peter Anvin, Andrew Cooper, Borislav Petkov
Cc: linux-kernel, tglx, mingo, dave.hansen, x86, peterz, nik.borisov,
houwenlong.hwl
On 7/3/2024 9:43 AM, Dave Hansen wrote:
> On 7/3/24 09:06, H. Peter Anvin wrote:
>> I believe tglx declared to use them unconditionally since FRED> depends on WRMSRNS (and the kernel enforces that.)
>
> Ahh, I forgot about:
>
> static const struct cpuid_dep cpuid_deps[] = {
> ...
> { X86_FEATURE_FRED, X86_FEATURE_WRMSRNS },
>
> So, yeah, Xin's patch here is quite safe and when Boris said:
>
>> Also, all those wrmsrns() writes better be behind a CPUID check.
>
> ... it *is* behind a CPUID check, but it's an implicit one.
>
Right!
Otherwise typically there will be two feature checks in a line in the
source code (the run time code is binary patched):
if (cpu_feature_enabled(X86_FEATURE_FRED)) {
if (cpu_feature_enabled(X86_FEATURE_WRMSRNS))
wrmsrns(MSR_IA32_FRED_RSP0, ...);
else
wrmsrl(MSR_IA32_FRED_RSP0, ...);
}
Yes, Andrew's idea to use alternative_input() is a clean approach that
everyone has agreed. However there is a more fundamental problem with
how WRMSR instruction is being used in the existing code:
https://lore.kernel.org/lkml/87y1h81ht4.ffs@tglx/.
My rough understanding is that first we want something like:
alternative_input("call paravirt_write_msr", "wrmsr", X86_FEATURE_XENPV);
to get PVOPS out of the picture, and then apply alternative_input() as
Andrew proposed.
I thought about giving it a shot, but it never comes to the top of my
task list.
Thanks!
Xin
^ permalink raw reply [flat|nested] 27+ messages in thread
* Re: [PATCH v1 2/4] x86/fred: Write to FRED MSRs with wrmsrns()
2024-07-03 16:00 ` Andrew Cooper
2024-07-03 16:06 ` H. Peter Anvin
@ 2024-07-05 9:28 ` Peter Zijlstra
2024-07-05 11:33 ` H. Peter Anvin
1 sibling, 1 reply; 27+ messages in thread
From: Peter Zijlstra @ 2024-07-05 9:28 UTC (permalink / raw)
To: Andrew Cooper
Cc: Borislav Petkov, dave.hansen, xin, linux-kernel, hpa, tglx, mingo,
dave.hansen, x86, nik.borisov, houwenlong.hwl
On Wed, Jul 03, 2024 at 05:00:53PM +0100, Andrew Cooper wrote:
> /* Non-serialising WRMSR, when available. Falls back to a serialising WRMSR. */
> static inline void wrmsrns(uint32_t msr, uint32_t lo, uint32_t hi)
> {
> /*
> * WRMSR is 2 bytes. WRMSRNS is 3 bytes. Pad WRMSR with a redundant CS
> * prefix to avoid a trailing NOP.
> */
> alternative_input(".byte 0x2e; wrmsr",
> ".byte 0x0f,0x01,0xc6", X86_FEATURE_WRMSRNS,
> "c" (msr), "a" (lo), "d" (hi));
> }
FWIW, I favour this variant.
^ permalink raw reply [flat|nested] 27+ messages in thread* Re: [PATCH v1 2/4] x86/fred: Write to FRED MSRs with wrmsrns()
2024-07-05 9:28 ` Peter Zijlstra
@ 2024-07-05 11:33 ` H. Peter Anvin
0 siblings, 0 replies; 27+ messages in thread
From: H. Peter Anvin @ 2024-07-05 11:33 UTC (permalink / raw)
To: Peter Zijlstra, Andrew Cooper
Cc: Borislav Petkov, dave.hansen, xin, linux-kernel, tglx, mingo,
dave.hansen, x86, nik.borisov, houwenlong.hwl
On July 5, 2024 2:28:05 AM PDT, Peter Zijlstra <peterz@infradead.org> wrote:
>On Wed, Jul 03, 2024 at 05:00:53PM +0100, Andrew Cooper wrote:
>
>> /* Non-serialising WRMSR, when available. Falls back to a serialising WRMSR. */
>> static inline void wrmsrns(uint32_t msr, uint32_t lo, uint32_t hi)
>> {
>> /*
>> * WRMSR is 2 bytes. WRMSRNS is 3 bytes. Pad WRMSR with a redundant CS
>> * prefix to avoid a trailing NOP.
>> */
>> alternative_input(".byte 0x2e; wrmsr",
>> ".byte 0x0f,0x01,0xc6", X86_FEATURE_WRMSRNS,
>> "c" (msr), "a" (lo), "d" (hi));
>> }
>
>FWIW, I favour this variant.
We normally use DS as the padding prefix.
^ permalink raw reply [flat|nested] 27+ messages in thread
* [PATCH v1 3/4] x86/fred: Split FRED RSP initialization into a separate function
2024-07-03 8:54 [PATCH v1 0/4] Enable FRED earlier Xin Li (Intel)
2024-07-03 8:54 ` [PATCH v1 1/4] x86/fred: Parse cmdline param "fred=" in cpu_parse_early_param() Xin Li (Intel)
2024-07-03 8:54 ` [PATCH v1 2/4] x86/fred: Write to FRED MSRs with wrmsrns() Xin Li (Intel)
@ 2024-07-03 8:54 ` Xin Li (Intel)
2024-07-03 8:54 ` [PATCH v1 4/4] x86/fred: Enable FRED right after init_mem_mapping() Xin Li (Intel)
3 siblings, 0 replies; 27+ messages in thread
From: Xin Li (Intel) @ 2024-07-03 8:54 UTC (permalink / raw)
To: linux-kernel
Cc: hpa, tglx, mingo, bp, dave.hansen, x86, peterz, andrew.cooper3,
nik.borisov, houwenlong.hwl
To enable FRED earlier, split FRED RSP initialization into a separate
function, as they are initialized with memory from CPU entry areas,
thus their initialization has to be kept after setup_cpu_entry_areas().
No functional change intended.
Signed-off-by: Xin Li (Intel) <xin@zytor.com>
---
arch/x86/include/asm/fred.h | 2 ++
arch/x86/kernel/cpu/common.c | 6 ++++--
arch/x86/kernel/fred.c | 28 +++++++++++++++++++---------
3 files changed, 25 insertions(+), 11 deletions(-)
diff --git a/arch/x86/include/asm/fred.h b/arch/x86/include/asm/fred.h
index e86c7ba32435..66d7dbe2d314 100644
--- a/arch/x86/include/asm/fred.h
+++ b/arch/x86/include/asm/fred.h
@@ -84,11 +84,13 @@ static __always_inline void fred_entry_from_kvm(unsigned int type, unsigned int
}
void cpu_init_fred_exceptions(void);
+void cpu_init_fred_rsps(void);
void fred_complete_exception_setup(void);
#else /* CONFIG_X86_FRED */
static __always_inline unsigned long fred_event_data(struct pt_regs *regs) { return 0; }
static inline void cpu_init_fred_exceptions(void) { }
+static inline void cpu_init_fred_rsps(void) { }
static inline void fred_complete_exception_setup(void) { }
static __always_inline void fred_entry_from_kvm(unsigned int type, unsigned int vector) { }
#endif /* CONFIG_X86_FRED */
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 9a904fe7c829..022ae4ba7997 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -2195,10 +2195,12 @@ void cpu_init_exception_handling(void)
/* GHCB needs to be setup to handle #VC. */
setup_ghcb();
- if (cpu_feature_enabled(X86_FEATURE_FRED))
+ if (cpu_feature_enabled(X86_FEATURE_FRED)) {
cpu_init_fred_exceptions();
- else
+ cpu_init_fred_rsps();
+ } else {
load_current_idt();
+ }
}
/*
diff --git a/arch/x86/kernel/fred.c b/arch/x86/kernel/fred.c
index b202685b8e77..1788f28754f3 100644
--- a/arch/x86/kernel/fred.c
+++ b/arch/x86/kernel/fred.c
@@ -32,6 +32,25 @@ void cpu_init_fred_exceptions(void)
FRED_CONFIG_INT_STKLVL(0) |
FRED_CONFIG_ENTRYPOINT(asm_fred_entrypoint_user));
+ wrmsrns(MSR_IA32_FRED_STKLVLS, 0);
+ wrmsrns(MSR_IA32_FRED_RSP0, 0);
+ wrmsrns(MSR_IA32_FRED_RSP1, 0);
+ wrmsrns(MSR_IA32_FRED_RSP2, 0);
+ wrmsrns(MSR_IA32_FRED_RSP3, 0);
+
+ /* Enable FRED */
+ cr4_set_bits(X86_CR4_FRED);
+ /* Any further IDT use is a bug */
+ idt_invalidate();
+
+ /* Use int $0x80 for 32-bit system calls in FRED mode */
+ setup_clear_cpu_cap(X86_FEATURE_SYSENTER32);
+ setup_clear_cpu_cap(X86_FEATURE_SYSCALL32);
+}
+
+/* Must be called after setup_cpu_entry_areas() */
+void cpu_init_fred_rsps(void)
+{
/*
* The purpose of separate stacks for NMI, #DB and #MC *in the kernel*
* (remember that user space faults are always taken on stack level 0)
@@ -47,13 +66,4 @@ void cpu_init_fred_exceptions(void)
wrmsrns(MSR_IA32_FRED_RSP1, __this_cpu_ist_top_va(DB));
wrmsrns(MSR_IA32_FRED_RSP2, __this_cpu_ist_top_va(NMI));
wrmsrns(MSR_IA32_FRED_RSP3, __this_cpu_ist_top_va(DF));
-
- /* Enable FRED */
- cr4_set_bits(X86_CR4_FRED);
- /* Any further IDT use is a bug */
- idt_invalidate();
-
- /* Use int $0x80 for 32-bit system calls in FRED mode */
- setup_clear_cpu_cap(X86_FEATURE_SYSENTER32);
- setup_clear_cpu_cap(X86_FEATURE_SYSCALL32);
}
--
2.45.2
^ permalink raw reply related [flat|nested] 27+ messages in thread* [PATCH v1 4/4] x86/fred: Enable FRED right after init_mem_mapping()
2024-07-03 8:54 [PATCH v1 0/4] Enable FRED earlier Xin Li (Intel)
` (2 preceding siblings ...)
2024-07-03 8:54 ` [PATCH v1 3/4] x86/fred: Split FRED RSP initialization into a separate function Xin Li (Intel)
@ 2024-07-03 8:54 ` Xin Li (Intel)
3 siblings, 0 replies; 27+ messages in thread
From: Xin Li (Intel) @ 2024-07-03 8:54 UTC (permalink / raw)
To: linux-kernel
Cc: hpa, tglx, mingo, bp, dave.hansen, x86, peterz, andrew.cooper3,
nik.borisov, houwenlong.hwl
Enable FRED right after init_mem_mapping() to avoid #PF handler,
exc_page_fault(), fetching its faulting address from the stack
before FRED is enabled.
Fixes: 14619d912b65 ("x86/fred: FRED entry/exit and dispatch code")
Reported-by: Hou Wenlong <houwenlong.hwl@antgroup.com>
Suggested-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Xin Li (Intel) <xin@zytor.com>
---
arch/x86/kernel/cpu/common.c | 6 +-----
arch/x86/kernel/setup.c | 7 ++++++-
arch/x86/kernel/smpboot.c | 6 ++++++
arch/x86/kernel/traps.c | 4 ++++
4 files changed, 17 insertions(+), 6 deletions(-)
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 022ae4ba7997..8d454b6f7def 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -2195,12 +2195,8 @@ void cpu_init_exception_handling(void)
/* GHCB needs to be setup to handle #VC. */
setup_ghcb();
- if (cpu_feature_enabled(X86_FEATURE_FRED)) {
- cpu_init_fred_exceptions();
- cpu_init_fred_rsps();
- } else {
+ if (!cpu_feature_enabled(X86_FEATURE_FRED))
load_current_idt();
- }
}
/*
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index 728927e4ba51..d866136a89ff 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -39,6 +39,7 @@
#include <asm/coco.h>
#include <asm/cpu.h>
#include <asm/efi.h>
+#include <asm/fred.h>
#include <asm/gart.h>
#include <asm/hypervisor.h>
#include <asm/io_apic.h>
@@ -1040,7 +1041,11 @@ void __init setup_arch(char **cmdline_p)
init_mem_mapping();
- idt_setup_early_pf();
+ /* Switch to FRED from early IDT ASAP */
+ if (cpu_feature_enabled(X86_FEATURE_FRED))
+ cpu_init_fred_exceptions();
+ else
+ idt_setup_early_pf();
/*
* Update mmu_cr4_features (and, indirectly, trampoline_cr4_features)
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 0c35207320cb..0d83377f9dcd 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -64,6 +64,7 @@
#include <asm/acpi.h>
#include <asm/cacheinfo.h>
#include <asm/desc.h>
+#include <asm/fred.h>
#include <asm/nmi.h>
#include <asm/irq.h>
#include <asm/realmode.h>
@@ -248,6 +249,11 @@ static void notrace start_secondary(void *unused)
cpu_init_exception_handling();
+ if (cpu_feature_enabled(X86_FEATURE_FRED)) {
+ cpu_init_fred_exceptions();
+ cpu_init_fred_rsps();
+ }
+
/*
* Load the microcode before reaching the AP alive synchronization
* point below so it is not part of the full per CPU serialized
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index 6afb41e6cbbb..81648bd07576 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -1407,6 +1407,10 @@ void __init trap_init(void)
/* Init cpu_entry_area before IST entries are set up */
setup_cpu_entry_areas();
+ /* FRED RSPs pointing to memory from CPU entry areas */
+ if (cpu_feature_enabled(X86_FEATURE_FRED))
+ cpu_init_fred_rsps();
+
/* Init GHCB memory pages when running as an SEV-ES guest */
sev_es_init_vc_handling();
--
2.45.2
^ permalink raw reply related [flat|nested] 27+ messages in thread