Re: [PATCHv2 13/13] x86/acpi: Add support for CPU offlining for ACPI MADT wakeup method

linux-coco.lists.linux.dev archive mirror
 help / color / mirror / Atom feed

From: "Huang, Kai" <kai.huang@intel.com>
To: "kirill.shutemov@linux.intel.com"
	<kirill.shutemov@linux.intel.com>,
	"tglx@linutronix.de" <tglx@linutronix.de>,
	"mingo@redhat.com" <mingo@redhat.com>,
	"x86@kernel.org" <x86@kernel.org>, "bp@alien8.de" <bp@alien8.de>,
	"dave.hansen@linux.intel.com" <dave.hansen@linux.intel.com>
Cc: "Edgecombe, Rick P" <rick.p.edgecombe@intel.com>,
	"Reshetova, Elena" <elena.reshetova@intel.com>,
	"Nakajima, Jun" <jun.nakajima@intel.com>,
	"rafael@kernel.org" <rafael@kernel.org>,
	"peterz@infradead.org" <peterz@infradead.org>,
	"linux-kernel@vger.kernel.org" <linux-kernel@vger.kernel.org>,
	"sathyanarayanan.kuppuswamy@linux.intel.com"
	<sathyanarayanan.kuppuswamy@linux.intel.com>,
	"Hunter, Adrian" <adrian.hunter@intel.com>,
	"thomas.lendacky@amd.com" <thomas.lendacky@amd.com>,
	"ashish.kalra@amd.com" <ashish.kalra@amd.com>,
	"kexec@lists.infradead.org" <kexec@lists.infradead.org>,
	"Christopherson,, Sean" <seanjc@google.com>,
	"bhe@redhat.com" <bhe@redhat.com>,
	"linux-coco@lists.linux.dev" <linux-coco@lists.linux.dev>
Subject: Re: [PATCHv2 13/13] x86/acpi: Add support for CPU offlining for ACPI MADT wakeup method
Date: Tue, 24 Oct 2023 10:11:58 +0000	[thread overview]
Message-ID: <d86dbb81338d3473f24b3c479cf985a70bfc1118.camel@intel.com> (raw)
In-Reply-To: <20231020151242.1814-14-kirill.shutemov@linux.intel.com>


> --- /dev/null
> +++ b/arch/x86/kernel/acpi/madt.S

I think the name 'madt.S' is too generic.  How about something be more specific
such as madt_reset.S, or madt_playdead.S, etc? 

> @@ -0,0 +1,24 @@
> +#include <linux/linkage.h>
> +#include <asm/nospec-branch.h>
> +#include <asm/page_types.h>
> +#include <asm/processor-flags.h>
> +
> +	.text
> +	.align PAGE_SIZE
> +SYM_FUNC_START(asm_acpi_mp_play_dead)
> +	/* Load address of reset vector into RCX to jump when kernel is ready */
> +	movq	acpi_mp_reset_vector_paddr(%rip), %rcx
> +
> +	/* Turn off global entries. Following CR3 write will flush them. */
> +	movq	%cr4, %rdx
> +	andq	$~(X86_CR4_PGE), %rdx
> +	movq	%rdx, %cr4
> +
> +	/* Switch to identity mapping */
> +	movq	acpi_mp_pgd(%rip), %rax
> +	movq	%rax, %cr3

Do we need to switch back to kernel direct-map page table after CPU is wake up
again?  We do support normal CPU offline/online, but not limited to kexec,
right?

> +
> +	/* Jump to reset vector */
> +	ANNOTATE_RETPOLINE_SAFE
> +	jmp	*%rcx
> +SYM_FUNC_END(asm_acpi_mp_play_dead)
> diff --git a/arch/x86/kernel/acpi/madt_wakeup.c b/arch/x86/kernel/acpi/madt_wakeup.c
> index ad170def2367..f9ff14ee2892 100644
> --- a/arch/x86/kernel/acpi/madt_wakeup.c
> +++ b/arch/x86/kernel/acpi/madt_wakeup.c
> @@ -1,8 +1,13 @@
>  #include <linux/acpi.h>
>  #include <linux/cpu.h>
> +#include <linux/delay.h>
>  #include <linux/io.h>
> +#include <linux/memblock.h>
> +#include <linux/pgtable.h>
> +#include <linux/sched/hotplug.h>
>  #include <asm/apic.h>
>  #include <asm/barrier.h>
> +#include <asm/init.h>
>  #include <asm/processor.h>
>  
>  /* Physical address of the Multiprocessor Wakeup Structure mailbox */
> @@ -11,6 +16,150 @@ static u64 acpi_mp_wake_mailbox_paddr;
>  /* Virtual address of the Multiprocessor Wakeup Structure mailbox */
>  static struct acpi_madt_multiproc_wakeup_mailbox *acpi_mp_wake_mailbox;
>  
> +u64 acpi_mp_pgd;
> +u64 acpi_mp_reset_vector_paddr;
> +
> +void asm_acpi_mp_play_dead(void);
> +
> +static void __init *alloc_pgt_page(void *context)
> +{
> +	return memblock_alloc(PAGE_SIZE, PAGE_SIZE);
> +}
> +
> +/*
> + * Make sure asm_acpi_mp_play_dead() is present in the identity mapping at
> + * the same place as in the kernel page tables. The function switches to
> + * the identity mapping 
> 

This function itself doesn't switch to the identity mapping.  It just creates
the kernel mapping for asm_acpi_mp_play_dead() in the identify mapping page
table.

> and has be present at the same spot in before and
> + * after transition.

This part doesn't parse to me.  I guess the whole comment can be:

	asm_acpi_mp_play_dead() is accessed both before and after switching to 
	the identity mapping.  Also map it at the kernel virtual address in
	the identity mapping table.

Or perhaps even better, put the above comments to the place where this function
is called?

> + */
> +static int __init init_transition_pgtable(pgd_t *pgd)
> +{
> +	pgprot_t prot = PAGE_KERNEL_EXEC_NOENC;
> +	unsigned long vaddr, paddr;
> +	int result = -ENOMEM;
> +	p4d_t *p4d;
> +	pud_t *pud;
> +	pmd_t *pmd;
> +	pte_t *pte;
> +
> +	vaddr = (unsigned long)asm_acpi_mp_play_dead;
> +	pgd += pgd_index(vaddr);
> +	if (!pgd_present(*pgd)) {
> +		p4d = (p4d_t *)alloc_pgt_page(NULL);
> +		if (!p4d)
> +			goto err;
> +		set_pgd(pgd, __pgd(__pa(p4d) | _KERNPG_TABLE));
> +	}
> +	p4d = p4d_offset(pgd, vaddr);
> +	if (!p4d_present(*p4d)) {
> +		pud = (pud_t *)alloc_pgt_page(NULL);
> +		if (!pud)
> +			goto err;
> +		set_p4d(p4d, __p4d(__pa(pud) | _KERNPG_TABLE));
> +	}
> +	pud = pud_offset(p4d, vaddr);
> +	if (!pud_present(*pud)) {
> +		pmd = (pmd_t *)alloc_pgt_page(NULL);
> +		if (!pmd)
> +			goto err;
> +		set_pud(pud, __pud(__pa(pmd) | _KERNPG_TABLE));
> +	}
> +	pmd = pmd_offset(pud, vaddr);
> +	if (!pmd_present(*pmd)) {
> +		pte = (pte_t *)alloc_pgt_page(NULL);
> +		if (!pte)
> +			goto err;
> +		set_pmd(pmd, __pmd(__pa(pte) | _KERNPG_TABLE));
> +	}
> +	pte = pte_offset_kernel(pmd, vaddr);
> +
> +	paddr = __pa(vaddr);
> +	set_pte(pte, pfn_pte(paddr >> PAGE_SHIFT, prot));
> +
> +	return 0;
> +err:
> +	return result;
> +}
> +
> +static void acpi_mp_play_dead(void)
> +{
> +	play_dead_common();
> +	asm_acpi_mp_play_dead();
> +}
> +
> +static void acpi_mp_cpu_die(unsigned int cpu)
> +{
> +	int apicid = per_cpu(x86_cpu_to_apicid, cpu);
> +	unsigned long timeout;
> +
> +	/*
> +	 * Use TEST mailbox command to prove that BIOS got control over
> +	 * the CPU before declaring it dead.
> +	 *
> +	 * BIOS has to clear 'command' field of the mailbox.
> +	 */
> +	acpi_mp_wake_mailbox->apic_id = apicid;
> +	smp_store_release(&acpi_mp_wake_mailbox->command,
> +			  ACPI_MP_WAKE_COMMAND_TEST);
> +
> +	/* Don't wait longer than a second. */
> +	timeout = USEC_PER_SEC;
> +	while (READ_ONCE(acpi_mp_wake_mailbox->command) && timeout--)
> +		udelay(1);
> +}
> +
> +static void acpi_mp_stop_other_cpus(int wait)
> +{
> +	smp_shutdown_nonboot_cpus(smp_processor_id());
> +}
> +
> +static void acpi_mp_crash_stop_other_cpus(void)
> +{
> +	smp_shutdown_nonboot_cpus(smp_processor_id());
> +
> +	/* The kernel is broken so disable interrupts */
> +	local_irq_disable();
> +}
> +
> +static int __init acpi_mp_setup_reset(u64 reset_vector)
> +{
> +	pgd_t *pgd;
> +	struct x86_mapping_info info = {
> +		.alloc_pgt_page = alloc_pgt_page,
> +		.page_flag      = __PAGE_KERNEL_LARGE_EXEC,
> +		.kernpg_flag    = _KERNPG_TABLE_NOENC,
> +	};
> +
> +	pgd = alloc_pgt_page(NULL);
> +
> +	for (int i = 0; i < nr_pfn_mapped; i++) {
> +		unsigned long mstart, mend;
> +		mstart = pfn_mapped[i].start << PAGE_SHIFT;
> +		mend   = pfn_mapped[i].end << PAGE_SHIFT;
> +		if (kernel_ident_mapping_init(&info, pgd, mstart, mend))
> +			return -ENOMEM;
> +	}

This is for kexec() IIUC.  Add a comment?

If we consider normal CPU offline/online case, then I don't think we need the
identity mapping for all memory?

> +
> +	if (kernel_ident_mapping_init(&info, pgd,
> +				      PAGE_ALIGN_DOWN(reset_vector),
> +				      PAGE_ALIGN(reset_vector + 1))) {
> +		return -ENOMEM;
> +	}
> +
> +	if (init_transition_pgtable(pgd))
> +		return -ENOMEM;
> +
> +	smp_ops.play_dead = acpi_mp_play_dead;
> +	smp_ops.cpu_die = acpi_mp_cpu_die;
> +	smp_ops.stop_other_cpus = acpi_mp_stop_other_cpus;
> +	smp_ops.crash_stop_other_cpus = acpi_mp_crash_stop_other_cpus;
> +
> +	acpi_mp_reset_vector_paddr = reset_vector;
> +	acpi_mp_pgd = __pa(pgd);
> +
> +	return 0;
> +}
> +
>  static int acpi_wakeup_cpu(int apicid, unsigned long start_ip)
>  {
>  	if (!acpi_mp_wake_mailbox_paddr) {
> @@ -74,31 +223,43 @@ int __init acpi_parse_mp_wake(union acpi_subtable_headers *header,
>  	struct acpi_madt_multiproc_wakeup *mp_wake;
>  
>  	mp_wake = (struct acpi_madt_multiproc_wakeup *)header;
> -	if (BAD_MADT_ENTRY(mp_wake, end))
> +	if (!mp_wake)
> +		return -EINVAL;
> +
> +	if (end - (unsigned long)mp_wake < ACPI_MADT_MP_WAKEUP_SIZE_V0)
> +		return -EINVAL;
> +	if (mp_wake->header.length < ACPI_MADT_MP_WAKEUP_SIZE_V0)
>  		return -EINVAL;
>  
>  	acpi_table_print_madt_entry(&header->common);
>  
>  	acpi_mp_wake_mailbox_paddr = mp_wake->mailbox_address;
>  
> -	cpu_hotplug_disable_offlining();
> +	if (mp_wake->version >= ACPI_MADT_MP_WAKEUP_VERSION_V1 &&
> +	    mp_wake->header.length >= ACPI_MADT_MP_WAKEUP_SIZE_V1) {
> +		acpi_mp_setup_reset(mp_wake->reset_vector);

It's better to fallback to "disable offline" if this function fails.

next prev parent reply	other threads:[~2023-10-24 10:12 UTC|newest]

Thread overview: 36+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-10-20 15:12 [PATCHv2 00/13] x86/tdx: Add kexec support Kirill A. Shutemov
2023-10-20 15:12 ` [PATCHv2 01/13] x86/acpi: Extract ACPI MADT wakeup code into a separate file Kirill A. Shutemov
2023-10-20 17:12   ` Kuppuswamy Sathyanarayanan
2023-10-20 15:12 ` [PATCHv2 02/13] kernel/cpu: Add support for declaring CPU offlining not supported Kirill A. Shutemov
2023-10-23  9:30   ` Huang, Kai
2023-10-23 15:31     ` kirill.shutemov
2023-10-23 22:07       ` Huang, Kai
2023-10-28 14:07         ` Thomas Gleixner
2023-10-28 14:12   ` Thomas Gleixner
2023-10-29 14:23     ` Kirill A. Shutemov
2023-10-20 15:12 ` [PATCHv2 03/13] cpu/hotplug, x86/acpi: Disable CPU offlining for ACPI MADT wakeup Kirill A. Shutemov
2023-10-20 15:12 ` [PATCHv2 04/13] x86/kvm: Do not try to disable kvmclock if it was not enabled Kirill A. Shutemov
2023-10-20 15:32   ` Sean Christopherson
2023-10-20 15:41   ` Vitaly Kuznetsov
2023-10-20 17:07     ` Sean Christopherson
2023-10-23  8:45       ` Vitaly Kuznetsov
2023-10-23 14:40         ` Sean Christopherson
2023-10-20 15:12 ` [PATCHv2 05/13] x86/kexec: Keep CR4.MCE set during kexec for TDX guest Kirill A. Shutemov
2023-10-20 15:12 ` [PATCHv2 06/13] x86/mm: Make x86_platform.guest.enc_status_change_*() return errno Kirill A. Shutemov
2023-10-20 15:12 ` [PATCHv2 07/13] x86/mm: Return correct level from lookup_address() if pte is none Kirill A. Shutemov
2023-10-20 15:12 ` [PATCHv2 08/13] x86/tdx: Account shared memory Kirill A. Shutemov
2023-10-20 15:12 ` [PATCHv2 09/13] x86/tdx: Convert shared memory back to private on kexec Kirill A. Shutemov
2023-10-20 15:12 ` [PATCHv2 10/13] x86/mm: Make e820_end_ram_pfn() cover E820_TYPE_ACPI ranges Kirill A. Shutemov
2023-10-20 15:12 ` [PATCHv2 11/13] x86/acpi: Do not attempt to bring up secondary CPUs in kexec case Kirill A. Shutemov
2023-10-24 10:14   ` Huang, Kai
2023-10-24 13:59   ` Kuppuswamy Sathyanarayanan
2023-10-27 13:01     ` Kirill A. Shutemov
2023-10-20 15:12 ` [PATCHv2 12/13] x86/acpi: Rename fields in acpi_madt_multiproc_wakeup structure Kirill A. Shutemov
2023-10-24 10:18   ` Huang, Kai
2023-10-24 12:46   ` Kuppuswamy Sathyanarayanan
2023-10-20 15:12 ` [PATCHv2 13/13] x86/acpi: Add support for CPU offlining for ACPI MADT wakeup method Kirill A. Shutemov
2023-10-24 10:11   ` Huang, Kai [this message]
2023-10-25  3:50     ` Huang, Kai
2023-10-27 11:58     ` kirill.shutemov
2023-10-29 17:31   ` Thomas Gleixner
2023-11-01 13:26     ` Kirill A. Shutemov

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=d86dbb81338d3473f24b3c479cf985a70bfc1118.camel@intel.com \
    --to=kai.huang@intel.com \
    --cc=adrian.hunter@intel.com \
    --cc=ashish.kalra@amd.com \
    --cc=bhe@redhat.com \
    --cc=bp@alien8.de \
    --cc=dave.hansen@linux.intel.com \
    --cc=elena.reshetova@intel.com \
    --cc=jun.nakajima@intel.com \
    --cc=kexec@lists.infradead.org \
    --cc=kirill.shutemov@linux.intel.com \
    --cc=linux-coco@lists.linux.dev \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mingo@redhat.com \
    --cc=peterz@infradead.org \
    --cc=rafael@kernel.org \
    --cc=rick.p.edgecombe@intel.com \
    --cc=sathyanarayanan.kuppuswamy@linux.intel.com \
    --cc=seanjc@google.com \
    --cc=tglx@linutronix.de \
    --cc=thomas.lendacky@amd.com \
    --cc=x86@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).