Re: [PATCHv3 14/14] x86/acpi: Add support for CPU offlining for ACPI MADT wakeup method

linux-coco.lists.linux.dev archive mirror
 help / color / mirror / Atom feed

From: "Huang, Kai" <kai.huang@intel.com>
To: "kirill.shutemov@linux.intel.com"
	<kirill.shutemov@linux.intel.com>,
	"tglx@linutronix.de" <tglx@linutronix.de>,
	"mingo@redhat.com" <mingo@redhat.com>,
	"x86@kernel.org" <x86@kernel.org>, "bp@alien8.de" <bp@alien8.de>,
	"dave.hansen@linux.intel.com" <dave.hansen@linux.intel.com>
Cc: "Edgecombe, Rick P" <rick.p.edgecombe@intel.com>,
	"Reshetova, Elena" <elena.reshetova@intel.com>,
	"Nakajima, Jun" <jun.nakajima@intel.com>,
	"rafael@kernel.org" <rafael@kernel.org>,
	"peterz@infradead.org" <peterz@infradead.org>,
	"linux-kernel@vger.kernel.org" <linux-kernel@vger.kernel.org>,
	"sathyanarayanan.kuppuswamy@linux.intel.com"
	<sathyanarayanan.kuppuswamy@linux.intel.com>,
	"Hunter, Adrian" <adrian.hunter@intel.com>,
	"thomas.lendacky@amd.com" <thomas.lendacky@amd.com>,
	"ashish.kalra@amd.com" <ashish.kalra@amd.com>,
	"kexec@lists.infradead.org" <kexec@lists.infradead.org>,
	"seanjc@google.com" <seanjc@google.com>,
	"bhe@redhat.com" <bhe@redhat.com>,
	"linux-coco@lists.linux.dev" <linux-coco@lists.linux.dev>
Subject: Re: [PATCHv3 14/14] x86/acpi: Add support for CPU offlining for ACPI MADT wakeup method
Date: Thu, 23 Nov 2023 09:38:13 +0000	[thread overview]
Message-ID: <7012ba92206efaa6f0a0a2e1a28355d67d55265a.camel@intel.com> (raw)
In-Reply-To: <20231115120044.8034-15-kirill.shutemov@linux.intel.com>


> diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
> index 171d86fe71ef..602b5d3982ff 100644
> --- a/arch/x86/kernel/acpi/boot.c
> +++ b/arch/x86/kernel/acpi/boot.c
> @@ -22,6 +22,7 @@
>  #include <linux/efi-bgrt.h>
>  #include <linux/serial_core.h>
>  #include <linux/pgtable.h>
> +#include <linux/sched/hotplug.h>
>  
>  #include <asm/e820/api.h>
>  #include <asm/irqdomain.h>
> @@ -33,6 +34,7 @@
>  #include <asm/smp.h>
>  #include <asm/i8259.h>
>  #include <asm/setup.h>
> +#include <asm/init.h>

The above two are leftovers I believe?

[...]


> +
> +static atomic_t waiting_for_crash_ipi;
> +
> +void asm_acpi_mp_play_dead(u64 reset_vector, u64 pgd_pa);
> +
> +static void acpi_mp_play_dead(void)
> +{
> +	play_dead_common();
> +	asm_acpi_mp_play_dead(acpi_mp_reset_vector_paddr,
> +			      acpi_mp_pgd);
> +}
> +
> +static void acpi_mp_cpu_die(unsigned int cpu)
> +{
> +	u32 apicid = per_cpu(x86_cpu_to_apicid, cpu);
> +	unsigned long timeout;
> +
> +	/*
> +	 * Use TEST mailbox command to prove that BIOS got control over
> +	 * the CPU before declaring it dead.
> +	 *
> +	 * BIOS has to clear 'command' field of the mailbox.
> +	 */
> +	acpi_mp_wake_mailbox->apic_id = apicid;
> +	smp_store_release(&acpi_mp_wake_mailbox->command,
> +			  ACPI_MP_WAKE_COMMAND_TEST);
> +
> +	/* Don't wait longer than a second. */
> +	timeout = USEC_PER_SEC;
> +	while (READ_ONCE(acpi_mp_wake_mailbox->command) && timeout--)
> +		udelay(1);
> +}
> +
> +static void acpi_mp_stop_other_cpus(int wait)
> +{
> +	smp_shutdown_nonboot_cpus(smp_processor_id());
> +}
> +
> +static int crash_nmi_callback(unsigned int val, struct pt_regs *regs)
> +{
> +	local_irq_disable();
> +
> +	crash_save_cpu(regs, raw_smp_processor_id());
> +
> +	cpu_emergency_stop_pt();
> +
> +	disable_local_APIC();
> +
> +	/*
> +	 * Prepare the CPU for reboot _after_ invoking the callback so that the
> +	 * callback can safely use virtualization instructions, e.g. VMCLEAR.
> +	 */
> +	cpu_emergency_disable_virtualization();
> +
> +	atomic_dec(&waiting_for_crash_ipi);
> +
> +	asm_acpi_mp_play_dead(acpi_mp_reset_vector_paddr,
> +			      acpi_mp_pgd);
> +
> +	return NMI_HANDLED;
> +}
> +
> +static void acpi_mp_crash_stop_other_cpus(void)
> +{
> +	unsigned long timeout;
> +
> +	/* The kernel is broken so disable interrupts */
> +	local_irq_disable();
> +
> +
> +	atomic_set(&waiting_for_crash_ipi, num_online_cpus() - 1);
> +
> +	/* Would it be better to replace the trap vector here? */
> +	if (register_nmi_handler(NMI_LOCAL, crash_nmi_callback,
> +				 NMI_FLAG_FIRST, "crash"))
> +		return;		/* Return what? */
> +
> +	apic_send_IPI_allbutself(NMI_VECTOR);
> +
> +	/* Don't wait longer than a second. */
> +	timeout = USEC_PER_SEC;
> +	while (atomic_read(&waiting_for_crash_ipi) && timeout--)
> +		udelay(1);
> +}
> +
> 

[...]

> +	smp_ops.stop_other_cpus = acpi_mp_stop_other_cpus;
> +	smp_ops.crash_stop_other_cpus = acpi_mp_crash_stop_other_cpus;
> +
> 

The above acpi_mp_crash_stop_other_cpus() and crash_nmi_callback() etc are kinda
duplicated code with the normal crash kexec() in reboot.c.

I am not expert here but spent some time looking into the code, and it appears
the main reason preventing us from reusing that code should be TDX guest doesn't
play nicely with mwait/halt staff when putting the cpu to offline.  

I am thinking if we skip/replace them with the asm_acpi_mp_play_dead(), we
should be able to just reuse the existing smp_ops.stop_other_cpus() and
smp_ops.crash_stop_other_cpus()?

Idea only:

diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index b6f4e8399fca..9aee6f29a21c 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -841,7 +841,10 @@ void __noreturn stop_this_cpu(void *dummy)
                 * (stack usage and variables) after possibly issuing the
                 * native_wbinvd() above.
                 */
-               native_halt();
+               if (cpu_feature_enabled(X86_FEATURE_TDX_GUEST))
+                       asm_acpi_mp_play_dead();
+               else
+                       native_halt();
        }
 }
 
diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c
index 830425e6d38e..8358b292bd42 100644
--- a/arch/x86/kernel/reboot.c
+++ b/arch/x86/kernel/reboot.c
@@ -868,8 +868,13 @@ static int crash_nmi_callback(unsigned int val, struct
pt_regs *regs)
        cpu_emergency_disable_virtualization();
 
        atomic_dec(&waiting_for_crash_ipi);
-       /* Assume hlt works */
-       halt();
+
+       if (cpu_feature_enabled(X86_FEATURE_TDX_GUEST))
+               asm_acpi_mp_play_dead();
+       else
+               /* Assume hlt works */
+               halt();
+
        for (;;)
                cpu_relax();
 
diff --git a/arch/x86/kernel/smp.c b/arch/x86/kernel/smp.c
index 96a771f9f930..f86cb10602aa 100644
--- a/arch/x86/kernel/smp.c
+++ b/arch/x86/kernel/smp.c
@@ -159,7 +159,7 @@ static void native_stop_other_cpus(int wait)
                return;
 
        /* For kexec, ensure that offline CPUs are out of MWAIT and in HLT */
-       if (kexec_in_progress)
+       if (kexec_in_progress && !cpu_feature_enabled(X86_FEATURE_TDX_GUEST))
                smp_kick_mwait_play_dead();
 
        /*


[...]

> +
>  int __init acpi_parse_mp_wake(union acpi_subtable_headers *header,
>  			      const unsigned long end)
>  {
>  	struct acpi_madt_multiproc_wakeup *mp_wake;
>  
>  	mp_wake = (struct acpi_madt_multiproc_wakeup *)header;
> -	if (BAD_MADT_ENTRY(mp_wake, end))
> +	if (!mp_wake)
> +		return -EINVAL;

I think you can keep the BAD_MADT_ENTRY() check as a standard check, and ...

> +
> +	if (end - (unsigned long)mp_wake < ACPI_MADT_MP_WAKEUP_SIZE_V0)
> +		return -EINVAL;
> +	if (mp_wake->header.length < ACPI_MADT_MP_WAKEUP_SIZE_V0)
>  		return -EINVAL;

... these can be additional sanity check.

next prev parent reply	other threads:[~2023-11-23  9:38 UTC|newest]

Thread overview: 47+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-11-15 12:00 [PATCHv3 00/14] x86/tdx: Add kexec support Kirill A. Shutemov
2023-11-15 12:00 ` [PATCHv3 01/14] x86/acpi: Extract ACPI MADT wakeup code into a separate file Kirill A. Shutemov
2023-11-21  2:08   ` Huang, Kai
2023-11-15 12:00 ` [PATCHv3 02/14] x86/apic: Mark acpi_mp_wake_* variables as __ro_after_init Kirill A. Shutemov
2023-11-21  2:08   ` Huang, Kai
2023-11-15 12:00 ` [PATCHv3 03/14] cpu/hotplug: Add support for declaring CPU offlining not supported Kirill A. Shutemov
2023-11-21  2:15   ` Huang, Kai
2023-11-21  9:31     ` kirill.shutemov
2023-11-15 12:00 ` [PATCHv3 04/14] cpu/hotplug, x86/acpi: Disable CPU offlining for ACPI MADT wakeup Kirill A. Shutemov
2023-11-15 12:00 ` [PATCHv3 05/14] x86/kvm: Do not try to disable kvmclock if it was not enabled Kirill A. Shutemov
2023-11-15 12:00 ` [PATCHv3 06/14] x86/kexec: Keep CR4.MCE set during kexec for TDX guest Kirill A. Shutemov
2023-11-15 12:23   ` Peter Zijlstra
2023-11-15 13:03     ` Kirill A. Shutemov
2023-11-15 12:00 ` [PATCHv3 07/14] x86/mm: Make x86_platform.guest.enc_status_change_*() return errno Kirill A. Shutemov
2023-11-15 12:00 ` [PATCHv3 08/14] x86/mm: Return correct level from lookup_address() if pte is none Kirill A. Shutemov
2023-11-15 12:00 ` [PATCHv3 09/14] x86/tdx: Account shared memory Kirill A. Shutemov
2023-11-21  2:47   ` Huang, Kai
2023-11-21  9:42     ` kirill.shutemov
2023-11-21  9:49       ` kirill.shutemov
2023-11-15 12:00 ` [PATCHv3 10/14] x86/tdx: Convert shared memory back to private on kexec Kirill A. Shutemov
2023-11-21  3:46   ` Huang, Kai
2023-11-21  9:58     ` kirill.shutemov
2023-11-22 10:07       ` Huang, Kai
2023-11-23 13:47         ` kirill.shutemov
2023-11-15 12:00 ` [PATCHv3 11/14] x86/mm: Make e820_end_ram_pfn() cover E820_TYPE_ACPI ranges Kirill A. Shutemov
2023-11-15 12:00 ` [PATCHv3 12/14] x86/acpi: Rename fields in acpi_madt_multiproc_wakeup structure Kirill A. Shutemov
2023-11-15 12:00 ` [PATCHv3 13/14] x86/acpi: Do not attempt to bring up secondary CPUs in kexec case Kirill A. Shutemov
2023-11-15 20:12   ` Kuppuswamy Sathyanarayanan
2023-11-15 12:00 ` [PATCHv3 14/14] x86/acpi: Add support for CPU offlining for ACPI MADT wakeup method Kirill A. Shutemov
2023-11-23  9:38   ` Huang, Kai [this message]
2023-12-01 15:57     ` kirill.shutemov
2023-12-04 22:43       ` Huang, Kai
2023-11-16 12:10 ` [PATCHv3 00/14] x86/tdx: Add kexec support Baoquan He
2023-11-16 12:56   ` Kirill A. Shutemov
2023-11-16 14:17     ` Baoquan He
2023-11-16 14:45       ` Baoquan He
2023-11-17 12:47         ` Kirill A. Shutemov
2023-11-17 15:03           ` Baoquan He
2023-11-17 15:46             ` Kirill A. Shutemov
2023-11-21  6:41               ` Baoquan He
2023-11-21  8:43                 ` Kirill A. Shutemov
2023-11-21  9:10                   ` Tao Liu
2023-11-21  9:24                     ` Kirill A. Shutemov
2023-11-21 10:15                   ` Baoquan He
2023-11-21 10:40                     ` Kirill A. Shutemov
2023-11-21 12:50                       ` Baoquan He
2023-11-28 15:45                 ` Isaku Yamahata

find likely ancestor, descendant, or conflicting patches for this message:
( dfblob:b6f4e8399fc dfblob:9aee6f29a21 dfblob:830425e6d38
dfblob:8358b292bd4 dfblob:96a771f9f93 dfblob:f86cb10602a )
 OR (
bs:"Re: [PATCHv3 14/14] x86/acpi: Add support for CPU offlining for ACPI MADT wakeup method" )
	(help)

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=7012ba92206efaa6f0a0a2e1a28355d67d55265a.camel@intel.com \
    --to=kai.huang@intel.com \
    --cc=adrian.hunter@intel.com \
    --cc=ashish.kalra@amd.com \
    --cc=bhe@redhat.com \
    --cc=bp@alien8.de \
    --cc=dave.hansen@linux.intel.com \
    --cc=elena.reshetova@intel.com \
    --cc=jun.nakajima@intel.com \
    --cc=kexec@lists.infradead.org \
    --cc=kirill.shutemov@linux.intel.com \
    --cc=linux-coco@lists.linux.dev \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mingo@redhat.com \
    --cc=peterz@infradead.org \
    --cc=rafael@kernel.org \
    --cc=rick.p.edgecombe@intel.com \
    --cc=sathyanarayanan.kuppuswamy@linux.intel.com \
    --cc=seanjc@google.com \
    --cc=tglx@linutronix.de \
    --cc=thomas.lendacky@amd.com \
    --cc=x86@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).