* [PATCH hyperv-next] x86/hyperv: Fix APIC ID and VP ID confusion in hv_snp_boot_ap()
@ 2025-04-24 21:57 Roman Kisel
2025-04-25 6:15 ` Wei Liu
2025-04-25 15:12 ` Michael Kelley
0 siblings, 2 replies; 13+ messages in thread
From: Roman Kisel @ 2025-04-24 21:57 UTC (permalink / raw)
To: bp, dave.hansen, decui, haiyangz, hpa, kys, mikelley, mingo, tglx,
tiala, wei.liu, linux-hyperv, linux-kernel, x86
Cc: apais, benhill, bperkins, sunilmut
To start an application processor in SNP-isolated guest, a hypercall
is used that takes a virtual processor index. The hv_snp_boot_ap()
function uses that START_VP hypercall but passes as VP ID to it what
it receives as a wakeup_secondary_cpu_64 callback: the APIC ID.
As those two aren't generally interchangeable, that may lead to hung
APs if VP IDs and APIC IDs don't match, e.g. APIC IDs might be sparse
whereas VP IDs never are.
Update the parameter names to avoid confusion as to what the parameter
is. Use the APIC ID to VP ID conversion to provide correct input to the
hypercall.
Cc: stable@vger.kernel.org
Fixes: 44676bb9d566 ("x86/hyperv: Add smp support for SEV-SNP guest")
Signed-off-by: Roman Kisel <romank@linux.microsoft.com>
---
arch/x86/hyperv/hv_init.c | 33 ++++++++++++++++++++++++++++++++
arch/x86/hyperv/hv_vtl.c | 34 +--------------------------------
arch/x86/hyperv/ivm.c | 11 +++++++++--
arch/x86/include/asm/mshyperv.h | 5 +++--
4 files changed, 46 insertions(+), 37 deletions(-)
diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c
index ddeb40930bc8..23422342a091 100644
--- a/arch/x86/hyperv/hv_init.c
+++ b/arch/x86/hyperv/hv_init.c
@@ -706,3 +706,36 @@ bool hv_is_hyperv_initialized(void)
return hypercall_msr.enable;
}
EXPORT_SYMBOL_GPL(hv_is_hyperv_initialized);
+
+int hv_apicid_to_vp_id(u32 apic_id)
+{
+ u64 control;
+ u64 status;
+ unsigned long irq_flags;
+ struct hv_get_vp_from_apic_id_in *input;
+ u32 *output, ret;
+
+ local_irq_save(irq_flags);
+
+ input = *this_cpu_ptr(hyperv_pcpu_input_arg);
+ memset(input, 0, sizeof(*input));
+ input->partition_id = HV_PARTITION_ID_SELF;
+ input->apic_ids[0] = apic_id;
+
+ output = *this_cpu_ptr(hyperv_pcpu_output_arg);
+
+ control = HV_HYPERCALL_REP_COMP_1 | HVCALL_GET_VP_ID_FROM_APIC_ID;
+ status = hv_do_hypercall(control, input, output);
+ ret = output[0];
+
+ local_irq_restore(irq_flags);
+
+ if (!hv_result_success(status)) {
+ pr_err("failed to get vp id from apic id %d, status %#llx\n",
+ apic_id, status);
+ return -EINVAL;
+ }
+
+ return ret;
+}
+EXPORT_SYMBOL_GPL(hv_apicid_to_vp_id);
diff --git a/arch/x86/hyperv/hv_vtl.c b/arch/x86/hyperv/hv_vtl.c
index 582fe820e29c..8bc4f0121e5e 100644
--- a/arch/x86/hyperv/hv_vtl.c
+++ b/arch/x86/hyperv/hv_vtl.c
@@ -205,38 +205,6 @@ static int hv_vtl_bringup_vcpu(u32 target_vp_index, int cpu, u64 eip_ignored)
return ret;
}
-static int hv_vtl_apicid_to_vp_id(u32 apic_id)
-{
- u64 control;
- u64 status;
- unsigned long irq_flags;
- struct hv_get_vp_from_apic_id_in *input;
- u32 *output, ret;
-
- local_irq_save(irq_flags);
-
- input = *this_cpu_ptr(hyperv_pcpu_input_arg);
- memset(input, 0, sizeof(*input));
- input->partition_id = HV_PARTITION_ID_SELF;
- input->apic_ids[0] = apic_id;
-
- output = *this_cpu_ptr(hyperv_pcpu_output_arg);
-
- control = HV_HYPERCALL_REP_COMP_1 | HVCALL_GET_VP_ID_FROM_APIC_ID;
- status = hv_do_hypercall(control, input, output);
- ret = output[0];
-
- local_irq_restore(irq_flags);
-
- if (!hv_result_success(status)) {
- pr_err("failed to get vp id from apic id %d, status %#llx\n",
- apic_id, status);
- return -EINVAL;
- }
-
- return ret;
-}
-
static int hv_vtl_wakeup_secondary_cpu(u32 apicid, unsigned long start_eip)
{
int vp_id, cpu;
@@ -250,7 +218,7 @@ static int hv_vtl_wakeup_secondary_cpu(u32 apicid, unsigned long start_eip)
return -EINVAL;
pr_debug("Bringing up CPU with APIC ID %d in VTL2...\n", apicid);
- vp_id = hv_vtl_apicid_to_vp_id(apicid);
+ vp_id = hv_apicid_to_vp_id(apicid);
if (vp_id < 0) {
pr_err("Couldn't find CPU with APIC ID %d\n", apicid);
diff --git a/arch/x86/hyperv/ivm.c b/arch/x86/hyperv/ivm.c
index c0039a90e9e0..e3c32bb0d0cf 100644
--- a/arch/x86/hyperv/ivm.c
+++ b/arch/x86/hyperv/ivm.c
@@ -288,7 +288,7 @@ static void snp_cleanup_vmsa(struct sev_es_save_area *vmsa)
free_page((unsigned long)vmsa);
}
-int hv_snp_boot_ap(u32 cpu, unsigned long start_ip)
+int hv_snp_boot_ap(u32 apic_id, unsigned long start_ip)
{
struct sev_es_save_area *vmsa = (struct sev_es_save_area *)
__get_free_page(GFP_KERNEL | __GFP_ZERO);
@@ -297,10 +297,17 @@ int hv_snp_boot_ap(u32 cpu, unsigned long start_ip)
u64 ret, retry = 5;
struct hv_enable_vp_vtl *start_vp_input;
unsigned long flags;
+ int vp_id;
if (!vmsa)
return -ENOMEM;
+ vp_id = hv_apicid_to_vp_id(apic_id);
+
+ /* The BSP or an error */
+ if (vp_id <= 0)
+ return -EINVAL;
+
native_store_gdt(&gdtr);
vmsa->gdtr.base = gdtr.address;
@@ -348,7 +355,7 @@ int hv_snp_boot_ap(u32 cpu, unsigned long start_ip)
start_vp_input = (struct hv_enable_vp_vtl *)ap_start_input_arg;
memset(start_vp_input, 0, sizeof(*start_vp_input));
start_vp_input->partition_id = -1;
- start_vp_input->vp_index = cpu;
+ start_vp_input->vp_index = vp_id;
start_vp_input->target_vtl.target_vtl = ms_hyperv.vtl;
*(u64 *)&start_vp_input->vp_context = __pa(vmsa) | 1;
diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h
index 07aadf0e839f..ae62a34bfd1e 100644
--- a/arch/x86/include/asm/mshyperv.h
+++ b/arch/x86/include/asm/mshyperv.h
@@ -268,11 +268,11 @@ int hv_unmap_ioapic_interrupt(int ioapic_id, struct hv_interrupt_entry *entry);
#ifdef CONFIG_AMD_MEM_ENCRYPT
bool hv_ghcb_negotiate_protocol(void);
void __noreturn hv_ghcb_terminate(unsigned int set, unsigned int reason);
-int hv_snp_boot_ap(u32 cpu, unsigned long start_ip);
+int hv_snp_boot_ap(u32 apic_id, unsigned long start_ip);
#else
static inline bool hv_ghcb_negotiate_protocol(void) { return false; }
static inline void hv_ghcb_terminate(unsigned int set, unsigned int reason) {}
-static inline int hv_snp_boot_ap(u32 cpu, unsigned long start_ip) { return 0; }
+static inline int hv_snp_boot_ap(u32 apic_id, unsigned long start_ip) { return 0; }
#endif
#if defined(CONFIG_AMD_MEM_ENCRYPT) || defined(CONFIG_INTEL_TDX_GUEST)
@@ -329,6 +329,7 @@ static inline void hv_set_non_nested_msr(unsigned int reg, u64 value) { }
static inline u64 hv_get_non_nested_msr(unsigned int reg) { return 0; }
#endif /* CONFIG_HYPERV */
+int hv_apicid_to_vp_id(u32 apic_id);
#ifdef CONFIG_HYPERV_VTL_MODE
void __init hv_vtl_init_platform(void);
base-commit: 628cc040b3a2980df6032766e8ef0688e981ab95
--
2.43.0
^ permalink raw reply related [flat|nested] 13+ messages in thread* Re: [PATCH hyperv-next] x86/hyperv: Fix APIC ID and VP ID confusion in hv_snp_boot_ap() 2025-04-24 21:57 [PATCH hyperv-next] x86/hyperv: Fix APIC ID and VP ID confusion in hv_snp_boot_ap() Roman Kisel @ 2025-04-25 6:15 ` Wei Liu 2025-04-25 9:14 ` [EXTERNAL] " Saurabh Singh Sengar 2025-04-25 15:12 ` Michael Kelley 1 sibling, 1 reply; 13+ messages in thread From: Wei Liu @ 2025-04-25 6:15 UTC (permalink / raw) To: Roman Kisel Cc: bp, dave.hansen, decui, haiyangz, hpa, kys, mikelley, mingo, tglx, tiala, wei.liu, linux-hyperv, linux-kernel, x86, apais, benhill, bperkins, sunilmut On Thu, Apr 24, 2025 at 02:57:46PM -0700, Roman Kisel wrote: > To start an application processor in SNP-isolated guest, a hypercall > is used that takes a virtual processor index. The hv_snp_boot_ap() > function uses that START_VP hypercall but passes as VP ID to it what > it receives as a wakeup_secondary_cpu_64 callback: the APIC ID. > > As those two aren't generally interchangeable, that may lead to hung > APs if VP IDs and APIC IDs don't match, e.g. APIC IDs might be sparse > whereas VP IDs never are. > > Update the parameter names to avoid confusion as to what the parameter > is. Use the APIC ID to VP ID conversion to provide correct input to the > hypercall. > > Cc: stable@vger.kernel.org > Fixes: 44676bb9d566 ("x86/hyperv: Add smp support for SEV-SNP guest") > Signed-off-by: Roman Kisel <romank@linux.microsoft.com> Applied to hyperv-fixes. ^ permalink raw reply [flat|nested] 13+ messages in thread
* RE: [EXTERNAL] Re: [PATCH hyperv-next] x86/hyperv: Fix APIC ID and VP ID confusion in hv_snp_boot_ap() 2025-04-25 6:15 ` Wei Liu @ 2025-04-25 9:14 ` Saurabh Singh Sengar 2025-04-25 16:43 ` Roman Kisel 0 siblings, 1 reply; 13+ messages in thread From: Saurabh Singh Sengar @ 2025-04-25 9:14 UTC (permalink / raw) To: Wei Liu, Roman Kisel Cc: bp@alien8.de, dave.hansen@linux.intel.com, Dexuan Cui, Haiyang Zhang, hpa@zytor.com, KY Srinivasan, mikelley@microsoft.com, mingo@redhat.com, tglx@linutronix.de, Tianyu Lan, linux-hyperv@vger.kernel.org, linux-kernel@vger.kernel.org, x86@kernel.org, Allen Pais, Ben Hillis, Brian Perkins, Sunil Muthuswamy > > On Thu, Apr 24, 2025 at 02:57:46PM -0700, Roman Kisel wrote: > > To start an application processor in SNP-isolated guest, a hypercall > > is used that takes a virtual processor index. The hv_snp_boot_ap() > > function uses that START_VP hypercall but passes as VP ID to it what > > it receives as a wakeup_secondary_cpu_64 callback: the APIC ID. > > > > As those two aren't generally interchangeable, that may lead to hung > > APs if VP IDs and APIC IDs don't match, e.g. APIC IDs might be sparse > > whereas VP IDs never are. > > > > Update the parameter names to avoid confusion as to what the parameter > > is. Use the APIC ID to VP ID conversion to provide correct input to > > the hypercall. > > > > Cc: stable@vger.kernel.org > > Fixes: 44676bb9d566 ("x86/hyperv: Add smp support for SEV-SNP guest") > > Signed-off-by: Roman Kisel <romank@linux.microsoft.com> > > Applied to hyperv-fixes. This patch will break the builds. Roman, Have you tested this patch on the latest linux-next ? Regards, Saurabh ^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [EXTERNAL] Re: [PATCH hyperv-next] x86/hyperv: Fix APIC ID and VP ID confusion in hv_snp_boot_ap() 2025-04-25 9:14 ` [EXTERNAL] " Saurabh Singh Sengar @ 2025-04-25 16:43 ` Roman Kisel 2025-04-25 17:18 ` Saurabh Singh Sengar 0 siblings, 1 reply; 13+ messages in thread From: Roman Kisel @ 2025-04-25 16:43 UTC (permalink / raw) To: Saurabh Singh Sengar, Wei Liu Cc: bp@alien8.de, dave.hansen@linux.intel.com, Dexuan Cui, Haiyang Zhang, hpa@zytor.com, KY Srinivasan, mikelley@microsoft.com, mingo@redhat.com, tglx@linutronix.de, Tianyu Lan, linux-hyperv@vger.kernel.org, linux-kernel@vger.kernel.org, x86@kernel.org, Allen Pais, Ben Hillis, Brian Perkins, Sunil Muthuswamy On 4/25/2025 2:14 AM, Saurabh Singh Sengar wrote: >> >> On Thu, Apr 24, 2025 at 02:57:46PM -0700, Roman Kisel wrote: >>> To start an application processor in SNP-isolated guest, a hypercall >>> is used that takes a virtual processor index. The hv_snp_boot_ap() >>> function uses that START_VP hypercall but passes as VP ID to it what >>> it receives as a wakeup_secondary_cpu_64 callback: the APIC ID. >>> >>> As those two aren't generally interchangeable, that may lead to hung >>> APs if VP IDs and APIC IDs don't match, e.g. APIC IDs might be sparse >>> whereas VP IDs never are. >>> >>> Update the parameter names to avoid confusion as to what the parameter >>> is. Use the APIC ID to VP ID conversion to provide correct input to >>> the hypercall. >>> >>> Cc: stable@vger.kernel.org >>> Fixes: 44676bb9d566 ("x86/hyperv: Add smp support for SEV-SNP guest") >>> Signed-off-by: Roman Kisel <romank@linux.microsoft.com> >> >> Applied to hyperv-fixes. > > This patch will break the builds. > > Roman, > Have you tested this patch on the latest linux-next ? Thanks for your help! Only on hyperv-next, looking how to repro and fix on linux-next. The kernel robot was happy, or I am missing some context about how the robot works... What was your kernel configuration, or just anything that enables Hyper-V? I thought the the linux-next tree would be a subset of hyper-next so should work, realizing that have to check, likely there might be changes from other trees. > > Regards, > Saurabh > -- Thank you, Roman ^ permalink raw reply [flat|nested] 13+ messages in thread
* RE: [EXTERNAL] Re: [PATCH hyperv-next] x86/hyperv: Fix APIC ID and VP ID confusion in hv_snp_boot_ap() 2025-04-25 16:43 ` Roman Kisel @ 2025-04-25 17:18 ` Saurabh Singh Sengar 2025-04-25 18:22 ` Roman Kisel 0 siblings, 1 reply; 13+ messages in thread From: Saurabh Singh Sengar @ 2025-04-25 17:18 UTC (permalink / raw) To: Roman Kisel, Wei Liu Cc: bp@alien8.de, dave.hansen@linux.intel.com, Dexuan Cui, Haiyang Zhang, hpa@zytor.com, KY Srinivasan, mikelley@microsoft.com, mingo@redhat.com, tglx@linutronix.de, Tianyu Lan, linux-hyperv@vger.kernel.org, linux-kernel@vger.kernel.org, x86@kernel.org, Allen Pais, Ben Hillis, Brian Perkins, Sunil Muthuswamy > On 4/25/2025 2:14 AM, Saurabh Singh Sengar wrote: > >> > >> On Thu, Apr 24, 2025 at 02:57:46PM -0700, Roman Kisel wrote: > >>> To start an application processor in SNP-isolated guest, a hypercall > >>> is used that takes a virtual processor index. The hv_snp_boot_ap() > >>> function uses that START_VP hypercall but passes as VP ID to it what > >>> it receives as a wakeup_secondary_cpu_64 callback: the APIC ID. > >>> > >>> As those two aren't generally interchangeable, that may lead to hung > >>> APs if VP IDs and APIC IDs don't match, e.g. APIC IDs might be > >>> sparse whereas VP IDs never are. > >>> > >>> Update the parameter names to avoid confusion as to what the > >>> parameter is. Use the APIC ID to VP ID conversion to provide correct > >>> input to the hypercall. > >>> > >>> Cc: stable@vger.kernel.org > >>> Fixes: 44676bb9d566 ("x86/hyperv: Add smp support for SEV-SNP > >>> guest") > >>> Signed-off-by: Roman Kisel <romank@linux.microsoft.com> > >> > >> Applied to hyperv-fixes. > > > > This patch will break the builds. > > > > Roman, > > Have you tested this patch on the latest linux-next ? > > Thanks for your help! Only on hyperv-next, looking how to repro and fix on > linux-next. The kernel robot was happy, or I am missing some context about > how the robot works... > > What was your kernel configuration, or just anything that enables Hyper-V? > > I thought the the linux-next tree would be a subset of hyper-next so should > work, realizing that have to check, likely there might be changes from other > trees. > hyperv-fixes is broken too, here's the log for your ref: https://dashboard.kernelci.org/log-viewer?itemId=microsoft%3A20250425085833916790&o=microsoft&type=build&url=https%3A%2F%2Flisalogsb15850d3.blob.core.windows.net%2Flisa-logs%2Fdefault_default%2F20250425%2F20250425-085110-393%2Fkernel_installer%2Fbuild.log%3Fst%3D2025-04-25T09%253A09%253A35Z%26se%3D2025-05-02T09%253A09%253A35Z%26sp%3Dr%26sv%3D2024-11-04%26sr%3Db%26skoid%3D14b53b1d-f4fc-442e-a437-4989376b1754%26sktid%3D72f988bf-86f1-41af-91ab-2d7cd011db47%26skt%3D2025-04-25T09%253A09%253A35Z%26ske%3D2025-05-02T09%253A09%253A35Z%26sks%3Db%26skv%3D2024-11-04%26sig%3DZHfA7%2FC174KR6HT8zhchCb47NE1aceqw8h0APzKxsII%253D The hv_snp_boot_ap() function in arch/x86/hyperv/ivm.c currently fails to compile. It looks like the function's argument was changed from 'cpu' to 'apic_id', but internal references to cpu were not updated accordingly. This might have gone unnoticed during your testing if CONFIG_AMD_MEM_ENCRYPT was disabled, in which case this function wouldn't have been compiled. Regards, Saurabh ^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [EXTERNAL] Re: [PATCH hyperv-next] x86/hyperv: Fix APIC ID and VP ID confusion in hv_snp_boot_ap() 2025-04-25 17:18 ` Saurabh Singh Sengar @ 2025-04-25 18:22 ` Roman Kisel 2025-04-25 21:12 ` Wei Liu 0 siblings, 1 reply; 13+ messages in thread From: Roman Kisel @ 2025-04-25 18:22 UTC (permalink / raw) To: Saurabh Singh Sengar, Wei Liu Cc: bp@alien8.de, dave.hansen@linux.intel.com, Dexuan Cui, Haiyang Zhang, hpa@zytor.com, KY Srinivasan, mikelley@microsoft.com, mingo@redhat.com, tglx@linutronix.de, Tianyu Lan, linux-hyperv@vger.kernel.org, linux-kernel@vger.kernel.org, x86@kernel.org, Allen Pais, Ben Hillis, Brian Perkins, Sunil Muthuswamy On 4/25/2025 10:18 AM, Saurabh Singh Sengar wrote: > >> On 4/25/2025 2:14 AM, Saurabh Singh Sengar wrote: >>>> >>>> On Thu, Apr 24, 2025 at 02:57:46PM -0700, Roman Kisel wrote: >>>>> To start an application processor in SNP-isolated guest, a hypercall >>>>> is used that takes a virtual processor index. The hv_snp_boot_ap() >>>>> function uses that START_VP hypercall but passes as VP ID to it what >>>>> it receives as a wakeup_secondary_cpu_64 callback: the APIC ID. >>>>> >>>>> As those two aren't generally interchangeable, that may lead to hung >>>>> APs if VP IDs and APIC IDs don't match, e.g. APIC IDs might be >>>>> sparse whereas VP IDs never are. >>>>> >>>>> Update the parameter names to avoid confusion as to what the >>>>> parameter is. Use the APIC ID to VP ID conversion to provide correct >>>>> input to the hypercall. >>>>> >>>>> Cc: stable@vger.kernel.org >>>>> Fixes: 44676bb9d566 ("x86/hyperv: Add smp support for SEV-SNP >>>>> guest") >>>>> Signed-off-by: Roman Kisel <romank@linux.microsoft.com> >>>> >>>> Applied to hyperv-fixes. >>> >>> This patch will break the builds. >>> >>> Roman, >>> Have you tested this patch on the latest linux-next ? >> >> Thanks for your help! Only on hyperv-next, looking how to repro and fix on >> linux-next. The kernel robot was happy, or I am missing some context about >> how the robot works... >> >> What was your kernel configuration, or just anything that enables Hyper-V? >> >> I thought the the linux-next tree would be a subset of hyper-next so should >> work, realizing that have to check, likely there might be changes from other >> trees. >> > > > hyperv-fixes is broken too, here's the log for your ref: > > https://dashboard.kernelci.org/log-viewer?itemId=microsoft%3A20250425085833916790&o=microsoft&type=build&url=https%3A%2F%2Flisalogsb15850d3.blob.core.windows.net%2Flisa-logs%2Fdefault_default%2F20250425%2F20250425-085110-393%2Fkernel_installer%2Fbuild.log%3Fst%3D2025-04-25T09%253A09%253A35Z%26se%3D2025-05-02T09%253A09%253A35Z%26sp%3Dr%26sv%3D2024-11-04%26sr%3Db%26skoid%3D14b53b1d-f4fc-442e-a437-4989376b1754%26sktid%3D72f988bf-86f1-41af-91ab-2d7cd011db47%26skt%3D2025-04-25T09%253A09%253A35Z%26ske%3D2025-05-02T09%253A09%253A35Z%26sks%3Db%26skv%3D2024-11-04%26sig%3DZHfA7%2FC174KR6HT8zhchCb47NE1aceqw8h0APzKxsII%253D > > The hv_snp_boot_ap() function in arch/x86/hyperv/ivm.c currently fails to compile. > It looks like the function's argument was changed from 'cpu' to 'apic_id', but internal > references to cpu were not updated accordingly. > > This might have gone unnoticed during your testing if CONFIG_AMD_MEM_ENCRYPT > was disabled, in which case this function wouldn't have been compiled. Must be the case! I did run the command to merge the CVM specific config options but I didn't check the result. Yep, I see the issue. Will resend the patch. > > Regards, > Saurabh -- Thank you, Roman ^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [EXTERNAL] Re: [PATCH hyperv-next] x86/hyperv: Fix APIC ID and VP ID confusion in hv_snp_boot_ap() 2025-04-25 18:22 ` Roman Kisel @ 2025-04-25 21:12 ` Wei Liu 2025-04-28 17:26 ` Roman Kisel 0 siblings, 1 reply; 13+ messages in thread From: Wei Liu @ 2025-04-25 21:12 UTC (permalink / raw) To: Roman Kisel Cc: Saurabh Singh Sengar, Wei Liu, bp@alien8.de, dave.hansen@linux.intel.com, Dexuan Cui, Haiyang Zhang, hpa@zytor.com, KY Srinivasan, mikelley@microsoft.com, mingo@redhat.com, tglx@linutronix.de, Tianyu Lan, linux-hyperv@vger.kernel.org, linux-kernel@vger.kernel.org, x86@kernel.org, Allen Pais, Ben Hillis, Brian Perkins, Sunil Muthuswamy On Fri, Apr 25, 2025 at 11:22:08AM -0700, Roman Kisel wrote: > > > On 4/25/2025 10:18 AM, Saurabh Singh Sengar wrote: > > > On 4/25/2025 2:14 AM, Saurabh Singh Sengar wrote: > > > > > > > > > > On Thu, Apr 24, 2025 at 02:57:46PM -0700, Roman Kisel wrote: > > > > > > To start an application processor in SNP-isolated guest, a hypercall > > > > > > is used that takes a virtual processor index. The hv_snp_boot_ap() > > > > > > function uses that START_VP hypercall but passes as VP ID to it what > > > > > > it receives as a wakeup_secondary_cpu_64 callback: the APIC ID. > > > > > > > > > > > > As those two aren't generally interchangeable, that may lead to hung > > > > > > APs if VP IDs and APIC IDs don't match, e.g. APIC IDs might be > > > > > > sparse whereas VP IDs never are. > > > > > > > > > > > > Update the parameter names to avoid confusion as to what the > > > > > > parameter is. Use the APIC ID to VP ID conversion to provide correct > > > > > > input to the hypercall. > > > > > > > > > > > > Cc: stable@vger.kernel.org > > > > > > Fixes: 44676bb9d566 ("x86/hyperv: Add smp support for SEV-SNP > > > > > > guest") > > > > > > Signed-off-by: Roman Kisel <romank@linux.microsoft.com> > > > > > > > > > > Applied to hyperv-fixes. > > > > > > > > This patch will break the builds. > > > > > > > > Roman, > > > > Have you tested this patch on the latest linux-next ? > > > > > > Thanks for your help! Only on hyperv-next, looking how to repro and fix on > > > linux-next. The kernel robot was happy, or I am missing some context about > > > how the robot works... > > > > > > What was your kernel configuration, or just anything that enables Hyper-V? > > > > > > I thought the the linux-next tree would be a subset of hyper-next so should > > > work, realizing that have to check, likely there might be changes from other > > > trees. > > > > > > > > > hyperv-fixes is broken too, here's the log for your ref: > > > > https://dashboard.kernelci.org/log-viewer?itemId=microsoft%3A20250425085833916790&o=microsoft&type=build&url=https%3A%2F%2Flisalogsb15850d3.blob.core.windows.net%2Flisa-logs%2Fdefault_default%2F20250425%2F20250425-085110-393%2Fkernel_installer%2Fbuild.log%3Fst%3D2025-04-25T09%253A09%253A35Z%26se%3D2025-05-02T09%253A09%253A35Z%26sp%3Dr%26sv%3D2024-11-04%26sr%3Db%26skoid%3D14b53b1d-f4fc-442e-a437-4989376b1754%26sktid%3D72f988bf-86f1-41af-91ab-2d7cd011db47%26skt%3D2025-04-25T09%253A09%253A35Z%26ske%3D2025-05-02T09%253A09%253A35Z%26sks%3Db%26skv%3D2024-11-04%26sig%3DZHfA7%2FC174KR6HT8zhchCb47NE1aceqw8h0APzKxsII%253D > > > > The hv_snp_boot_ap() function in arch/x86/hyperv/ivm.c currently fails to compile. > > It looks like the function's argument was changed from 'cpu' to 'apic_id', but internal > > references to cpu were not updated accordingly. > > > > This might have gone unnoticed during your testing if CONFIG_AMD_MEM_ENCRYPT > > was disabled, in which case this function wouldn't have been compiled. > > Must be the case! I did run the command to merge the CVM specific config > options but I didn't check the result. > > Yep, I see the issue. Will resend the patch. I have removed this from hyperv-fixes. ^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [EXTERNAL] Re: [PATCH hyperv-next] x86/hyperv: Fix APIC ID and VP ID confusion in hv_snp_boot_ap() 2025-04-25 21:12 ` Wei Liu @ 2025-04-28 17:26 ` Roman Kisel 0 siblings, 0 replies; 13+ messages in thread From: Roman Kisel @ 2025-04-28 17:26 UTC (permalink / raw) To: Wei Liu Cc: Saurabh Singh Sengar, bp@alien8.de, dave.hansen@linux.intel.com, Dexuan Cui, Haiyang Zhang, hpa@zytor.com, KY Srinivasan, mikelley@microsoft.com, mingo@redhat.com, tglx@linutronix.de, Tianyu Lan, linux-hyperv@vger.kernel.org, linux-kernel@vger.kernel.org, x86@kernel.org, Allen Pais, Ben Hillis, Brian Perkins, Sunil Muthuswamy On 4/25/2025 2:12 PM, Wei Liu wrote: > On Fri, Apr 25, 2025 at 11:22:08AM -0700, Roman Kisel wrote: [...] >> >> Yep, I see the issue. Will resend the patch. > > I have removed this from hyperv-fixes. Thank you, Wei! I'm working on resolving feedback for v2. -- Thank you, Roman ^ permalink raw reply [flat|nested] 13+ messages in thread
* RE: [PATCH hyperv-next] x86/hyperv: Fix APIC ID and VP ID confusion in hv_snp_boot_ap() 2025-04-24 21:57 [PATCH hyperv-next] x86/hyperv: Fix APIC ID and VP ID confusion in hv_snp_boot_ap() Roman Kisel 2025-04-25 6:15 ` Wei Liu @ 2025-04-25 15:12 ` Michael Kelley 2025-04-25 16:35 ` Roman Kisel 1 sibling, 1 reply; 13+ messages in thread From: Michael Kelley @ 2025-04-25 15:12 UTC (permalink / raw) To: Roman Kisel, bp@alien8.de, dave.hansen@linux.intel.com, decui@microsoft.com, haiyangz@microsoft.com, hpa@zytor.com, kys@microsoft.com, mikelley@microsoft.com, mingo@redhat.com, tglx@linutronix.de, tiala@microsoft.com, wei.liu@kernel.org, linux-hyperv@vger.kernel.org, linux-kernel@vger.kernel.org, x86@kernel.org Cc: apais@microsoft.com, benhill@microsoft.com, bperkins@microsoft.com, sunilmut@microsoft.com From: Roman Kisel <romank@linux.microsoft.com> Sent: Thursday, April 24, 2025 2:58 PM > > To start an application processor in SNP-isolated guest, a hypercall > is used that takes a virtual processor index. The hv_snp_boot_ap() > function uses that START_VP hypercall but passes as VP ID to it what > it receives as a wakeup_secondary_cpu_64 callback: the APIC ID. > > As those two aren't generally interchangeable, that may lead to hung > APs if VP IDs and APIC IDs don't match, e.g. APIC IDs might be sparse > whereas VP IDs never are. I agree that VP IDs (a.k.a. VP indexes) and APIC IDs don't necessary match, and that APIC IDs might be sparse. But I'm not aware of any statement in the TLFS about the nature of VP indexes, except that "A virtual processor index must be less than the maximum number of virtual processors per partition." But that maximum is the Hyper-V implementation maximum, not the maximum for a particular VM. So the statement does not imply denseness unless the number of CPUs in the VM is equal to the Hyper-V implementation max. In other parts of Linux kernel code, we assume that VP indexes might be sparse as well. All that said, this is just a comment about the precise accuracy of your commit message, and doesn't affect the code. > > Update the parameter names to avoid confusion as to what the parameter > is. Use the APIC ID to VP ID conversion to provide correct input to the > hypercall. Terminology: The TLFS calls this the "VP Index", not the "VP ID". In other Linux code, we also call it the "VP Index". See the hv_vp_index array, for example. The exception is the hypercall itself, which the TLFS calls HvCallGetVpIndexFromApicId, but which our Linux code calls HVCALL_GET_VP_ID_FROM_APIC_ID for some unknown reason. Could you fix the terminology to be consistent? And maybe fix the HVCALL_* string name as well. I know you are just moving the existing VTL code, but let's take the opportunity to avoid any terminology inconsistency. > > Cc: stable@vger.kernel.org > Fixes: 44676bb9d566 ("x86/hyperv: Add smp support for SEV-SNP guest") > Signed-off-by: Roman Kisel <romank@linux.microsoft.com> > --- > arch/x86/hyperv/hv_init.c | 33 ++++++++++++++++++++++++++++++++ > arch/x86/hyperv/hv_vtl.c | 34 +-------------------------------- > arch/x86/hyperv/ivm.c | 11 +++++++++-- > arch/x86/include/asm/mshyperv.h | 5 +++-- > 4 files changed, 46 insertions(+), 37 deletions(-) > > diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c > index ddeb40930bc8..23422342a091 100644 > --- a/arch/x86/hyperv/hv_init.c > +++ b/arch/x86/hyperv/hv_init.c > @@ -706,3 +706,36 @@ bool hv_is_hyperv_initialized(void) > return hypercall_msr.enable; > } > EXPORT_SYMBOL_GPL(hv_is_hyperv_initialized); > + > +int hv_apicid_to_vp_id(u32 apic_id) > +{ > + u64 control; > + u64 status; > + unsigned long irq_flags; > + struct hv_get_vp_from_apic_id_in *input; > + u32 *output, ret; > + > + local_irq_save(irq_flags); > + > + input = *this_cpu_ptr(hyperv_pcpu_input_arg); > + memset(input, 0, sizeof(*input)); > + input->partition_id = HV_PARTITION_ID_SELF; > + input->apic_ids[0] = apic_id; > + > + output = *this_cpu_ptr(hyperv_pcpu_output_arg); > + > + control = HV_HYPERCALL_REP_COMP_1 | HVCALL_GET_VP_ID_FROM_APIC_ID; > + status = hv_do_hypercall(control, input, output); > + ret = output[0]; > + > + local_irq_restore(irq_flags); > + > + if (!hv_result_success(status)) { > + pr_err("failed to get vp id from apic id %d, status %#llx\n", > + apic_id, status); > + return -EINVAL; > + } > + > + return ret; > +} > +EXPORT_SYMBOL_GPL(hv_apicid_to_vp_id); > diff --git a/arch/x86/hyperv/hv_vtl.c b/arch/x86/hyperv/hv_vtl.c > index 582fe820e29c..8bc4f0121e5e 100644 > --- a/arch/x86/hyperv/hv_vtl.c > +++ b/arch/x86/hyperv/hv_vtl.c > @@ -205,38 +205,6 @@ static int hv_vtl_bringup_vcpu(u32 target_vp_index, int cpu, u64 eip_ignored) > return ret; > } > > -static int hv_vtl_apicid_to_vp_id(u32 apic_id) > -{ > - u64 control; > - u64 status; > - unsigned long irq_flags; > - struct hv_get_vp_from_apic_id_in *input; > - u32 *output, ret; > - > - local_irq_save(irq_flags); > - > - input = *this_cpu_ptr(hyperv_pcpu_input_arg); > - memset(input, 0, sizeof(*input)); > - input->partition_id = HV_PARTITION_ID_SELF; > - input->apic_ids[0] = apic_id; > - > - output = *this_cpu_ptr(hyperv_pcpu_output_arg); > - > - control = HV_HYPERCALL_REP_COMP_1 | HVCALL_GET_VP_ID_FROM_APIC_ID; > - status = hv_do_hypercall(control, input, output); > - ret = output[0]; > - > - local_irq_restore(irq_flags); > - > - if (!hv_result_success(status)) { > - pr_err("failed to get vp id from apic id %d, status %#llx\n", > - apic_id, status); > - return -EINVAL; > - } > - > - return ret; > -} > - > static int hv_vtl_wakeup_secondary_cpu(u32 apicid, unsigned long start_eip) > { > int vp_id, cpu; > @@ -250,7 +218,7 @@ static int hv_vtl_wakeup_secondary_cpu(u32 apicid, unsigned > long start_eip) > return -EINVAL; > > pr_debug("Bringing up CPU with APIC ID %d in VTL2...\n", apicid); > - vp_id = hv_vtl_apicid_to_vp_id(apicid); > + vp_id = hv_apicid_to_vp_id(apicid); > > if (vp_id < 0) { > pr_err("Couldn't find CPU with APIC ID %d\n", apicid); > diff --git a/arch/x86/hyperv/ivm.c b/arch/x86/hyperv/ivm.c > index c0039a90e9e0..e3c32bb0d0cf 100644 > --- a/arch/x86/hyperv/ivm.c > +++ b/arch/x86/hyperv/ivm.c > @@ -288,7 +288,7 @@ static void snp_cleanup_vmsa(struct sev_es_save_area *vmsa) > free_page((unsigned long)vmsa); > } > > -int hv_snp_boot_ap(u32 cpu, unsigned long start_ip) > +int hv_snp_boot_ap(u32 apic_id, unsigned long start_ip) > { > struct sev_es_save_area *vmsa = (struct sev_es_save_area *) > __get_free_page(GFP_KERNEL | __GFP_ZERO); > @@ -297,10 +297,17 @@ int hv_snp_boot_ap(u32 cpu, unsigned long start_ip) > u64 ret, retry = 5; > struct hv_enable_vp_vtl *start_vp_input; > unsigned long flags; > + int vp_id; > > if (!vmsa) > return -ENOMEM; > > + vp_id = hv_apicid_to_vp_id(apic_id); > + > + /* The BSP or an error */ > + if (vp_id <= 0) Returning an error on value 0 may be problematic here. Consider the panic case where a CPU other than the BSP takes a panic and initiates kdump. If the kdump kernel runs with more than 1 CPU, it may try to start the CPU that was originally the BSP. To my knowledge, SEV-SNP guests on Hyper-V don't support kdump at the moment so this problem is currently theoretical, but let's not leave a potential future problem by excluding 0 here. Also, since I assert that we really don't know anything about the VP index values, we can't exclude 0. It may or may not be the original BSP. Michael > + return -EINVAL; > + > native_store_gdt(&gdtr); > > vmsa->gdtr.base = gdtr.address; > @@ -348,7 +355,7 @@ int hv_snp_boot_ap(u32 cpu, unsigned long start_ip) > start_vp_input = (struct hv_enable_vp_vtl *)ap_start_input_arg; > memset(start_vp_input, 0, sizeof(*start_vp_input)); > start_vp_input->partition_id = -1; > - start_vp_input->vp_index = cpu; > + start_vp_input->vp_index = vp_id; > start_vp_input->target_vtl.target_vtl = ms_hyperv.vtl; > *(u64 *)&start_vp_input->vp_context = __pa(vmsa) | 1; > > diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h > index 07aadf0e839f..ae62a34bfd1e 100644 > --- a/arch/x86/include/asm/mshyperv.h > +++ b/arch/x86/include/asm/mshyperv.h > @@ -268,11 +268,11 @@ int hv_unmap_ioapic_interrupt(int ioapic_id, struct > hv_interrupt_entry *entry); > #ifdef CONFIG_AMD_MEM_ENCRYPT > bool hv_ghcb_negotiate_protocol(void); > void __noreturn hv_ghcb_terminate(unsigned int set, unsigned int reason); > -int hv_snp_boot_ap(u32 cpu, unsigned long start_ip); > +int hv_snp_boot_ap(u32 apic_id, unsigned long start_ip); > #else > static inline bool hv_ghcb_negotiate_protocol(void) { return false; } > static inline void hv_ghcb_terminate(unsigned int set, unsigned int reason) {} > -static inline int hv_snp_boot_ap(u32 cpu, unsigned long start_ip) { return 0; } > +static inline int hv_snp_boot_ap(u32 apic_id, unsigned long start_ip) { return 0; } > #endif > > #if defined(CONFIG_AMD_MEM_ENCRYPT) || defined(CONFIG_INTEL_TDX_GUEST) > @@ -329,6 +329,7 @@ static inline void hv_set_non_nested_msr(unsigned int reg, > u64 value) { } > static inline u64 hv_get_non_nested_msr(unsigned int reg) { return 0; } > #endif /* CONFIG_HYPERV */ > > +int hv_apicid_to_vp_id(u32 apic_id); > > #ifdef CONFIG_HYPERV_VTL_MODE > void __init hv_vtl_init_platform(void); > > base-commit: 628cc040b3a2980df6032766e8ef0688e981ab95 > -- > 2.43.0 > ^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [PATCH hyperv-next] x86/hyperv: Fix APIC ID and VP ID confusion in hv_snp_boot_ap() 2025-04-25 15:12 ` Michael Kelley @ 2025-04-25 16:35 ` Roman Kisel 2025-04-25 16:55 ` Michael Kelley 0 siblings, 1 reply; 13+ messages in thread From: Roman Kisel @ 2025-04-25 16:35 UTC (permalink / raw) To: Michael Kelley, bp@alien8.de, dave.hansen@linux.intel.com, decui@microsoft.com, haiyangz@microsoft.com, hpa@zytor.com, kys@microsoft.com, mikelley@microsoft.com, mingo@redhat.com, tglx@linutronix.de, tiala@microsoft.com, wei.liu@kernel.org, linux-hyperv@vger.kernel.org, linux-kernel@vger.kernel.org, x86@kernel.org Cc: apais@microsoft.com, benhill@microsoft.com, bperkins@microsoft.com, sunilmut@microsoft.com On 4/25/2025 8:12 AM, Michael Kelley wrote: > From: Roman Kisel <romank@linux.microsoft.com> Sent: Thursday, April 24, 2025 2:58 PM >> >> To start an application processor in SNP-isolated guest, a hypercall >> is used that takes a virtual processor index. The hv_snp_boot_ap() >> function uses that START_VP hypercall but passes as VP ID to it what >> it receives as a wakeup_secondary_cpu_64 callback: the APIC ID. >> >> As those two aren't generally interchangeable, that may lead to hung >> APs if VP IDs and APIC IDs don't match, e.g. APIC IDs might be sparse >> whereas VP IDs never are. > > I agree that VP IDs (a.k.a. VP indexes) and APIC IDs don't necessary match, > and that APIC IDs might be sparse. But I'm not aware of any statement > in the TLFS about the nature of VP indexes, except that > > "A virtual processor index must be less than the maximum number of > virtual processors per partition." > > But that maximum is the Hyper-V implementation maximum, not the > maximum for a particular VM. So the statement does not imply > denseness unless the number of CPUs in the VM is equal to the > Hyper-V implementation max. In other parts of Linux kernel code, > we assume that VP indexes might be sparse as well. > > All that said, this is just a comment about the precise accuracy of > your commit message, and doesn't affect the code. > I appreciate your help with the precision. I used loose language, agreed, would like to fix that. The patch was applied though but not yet sent to the Linus'es tree as I understand. I'd appreciate guidance on the process! Should I send a v2 nevertheless and explain the situation in the cover letter? IOW, how do I make this easier for the maintainer(s)? >> >> Update the parameter names to avoid confusion as to what the parameter >> is. Use the APIC ID to VP ID conversion to provide correct input to the >> hypercall. > > Terminology: The TLFS calls this the "VP Index", not the "VP ID". In > other Linux code, we also call it the "VP Index". See the hv_vp_index > array, for example. The exception is the hypercall itself, which the TLFS > calls HvCallGetVpIndexFromApicId, but which our Linux code calls > HVCALL_GET_VP_ID_FROM_APIC_ID for some unknown reason. > > Could you fix the terminology to be consistent? And maybe fix the > HVCALL_* string name as well. I know you are just moving the > existing VTL code, but let's take the opportunity to avoid any > terminology inconsistency. > I percieved ID as both "index" and "identificator" I guess but maybe "idx" is more like "index". I'll send out a fix for the terminology, thanks for your help! >> >> Cc: stable@vger.kernel.org >> Fixes: 44676bb9d566 ("x86/hyperv: Add smp support for SEV-SNP guest") >> Signed-off-by: Roman Kisel <romank@linux.microsoft.com> >> --- >> arch/x86/hyperv/hv_init.c | 33 ++++++++++++++++++++++++++++++++ >> arch/x86/hyperv/hv_vtl.c | 34 +-------------------------------- >> arch/x86/hyperv/ivm.c | 11 +++++++++-- >> arch/x86/include/asm/mshyperv.h | 5 +++-- >> 4 files changed, 46 insertions(+), 37 deletions(-) >> >> diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c >> index ddeb40930bc8..23422342a091 100644 >> --- a/arch/x86/hyperv/hv_init.c >> +++ b/arch/x86/hyperv/hv_init.c >> @@ -706,3 +706,36 @@ bool hv_is_hyperv_initialized(void) >> return hypercall_msr.enable; >> } >> EXPORT_SYMBOL_GPL(hv_is_hyperv_initialized); >> + >> +int hv_apicid_to_vp_id(u32 apic_id) >> +{ >> + u64 control; >> + u64 status; >> + unsigned long irq_flags; >> + struct hv_get_vp_from_apic_id_in *input; >> + u32 *output, ret; >> + >> + local_irq_save(irq_flags); >> + >> + input = *this_cpu_ptr(hyperv_pcpu_input_arg); >> + memset(input, 0, sizeof(*input)); >> + input->partition_id = HV_PARTITION_ID_SELF; >> + input->apic_ids[0] = apic_id; >> + >> + output = *this_cpu_ptr(hyperv_pcpu_output_arg); >> + >> + control = HV_HYPERCALL_REP_COMP_1 | HVCALL_GET_VP_ID_FROM_APIC_ID; >> + status = hv_do_hypercall(control, input, output); >> + ret = output[0]; >> + >> + local_irq_restore(irq_flags); >> + >> + if (!hv_result_success(status)) { >> + pr_err("failed to get vp id from apic id %d, status %#llx\n", >> + apic_id, status); >> + return -EINVAL; >> + } >> + >> + return ret; >> +} >> +EXPORT_SYMBOL_GPL(hv_apicid_to_vp_id); >> diff --git a/arch/x86/hyperv/hv_vtl.c b/arch/x86/hyperv/hv_vtl.c >> index 582fe820e29c..8bc4f0121e5e 100644 >> --- a/arch/x86/hyperv/hv_vtl.c >> +++ b/arch/x86/hyperv/hv_vtl.c >> @@ -205,38 +205,6 @@ static int hv_vtl_bringup_vcpu(u32 target_vp_index, int cpu, u64 eip_ignored) >> return ret; >> } >> >> -static int hv_vtl_apicid_to_vp_id(u32 apic_id) >> -{ >> - u64 control; >> - u64 status; >> - unsigned long irq_flags; >> - struct hv_get_vp_from_apic_id_in *input; >> - u32 *output, ret; >> - >> - local_irq_save(irq_flags); >> - >> - input = *this_cpu_ptr(hyperv_pcpu_input_arg); >> - memset(input, 0, sizeof(*input)); >> - input->partition_id = HV_PARTITION_ID_SELF; >> - input->apic_ids[0] = apic_id; >> - >> - output = *this_cpu_ptr(hyperv_pcpu_output_arg); >> - >> - control = HV_HYPERCALL_REP_COMP_1 | HVCALL_GET_VP_ID_FROM_APIC_ID; >> - status = hv_do_hypercall(control, input, output); >> - ret = output[0]; >> - >> - local_irq_restore(irq_flags); >> - >> - if (!hv_result_success(status)) { >> - pr_err("failed to get vp id from apic id %d, status %#llx\n", >> - apic_id, status); >> - return -EINVAL; >> - } >> - >> - return ret; >> -} >> - >> static int hv_vtl_wakeup_secondary_cpu(u32 apicid, unsigned long start_eip) >> { >> int vp_id, cpu; >> @@ -250,7 +218,7 @@ static int hv_vtl_wakeup_secondary_cpu(u32 apicid, unsigned >> long start_eip) >> return -EINVAL; >> >> pr_debug("Bringing up CPU with APIC ID %d in VTL2...\n", apicid); >> - vp_id = hv_vtl_apicid_to_vp_id(apicid); >> + vp_id = hv_apicid_to_vp_id(apicid); >> >> if (vp_id < 0) { >> pr_err("Couldn't find CPU with APIC ID %d\n", apicid); >> diff --git a/arch/x86/hyperv/ivm.c b/arch/x86/hyperv/ivm.c >> index c0039a90e9e0..e3c32bb0d0cf 100644 >> --- a/arch/x86/hyperv/ivm.c >> +++ b/arch/x86/hyperv/ivm.c >> @@ -288,7 +288,7 @@ static void snp_cleanup_vmsa(struct sev_es_save_area *vmsa) >> free_page((unsigned long)vmsa); >> } >> >> -int hv_snp_boot_ap(u32 cpu, unsigned long start_ip) >> +int hv_snp_boot_ap(u32 apic_id, unsigned long start_ip) >> { >> struct sev_es_save_area *vmsa = (struct sev_es_save_area *) >> __get_free_page(GFP_KERNEL | __GFP_ZERO); >> @@ -297,10 +297,17 @@ int hv_snp_boot_ap(u32 cpu, unsigned long start_ip) >> u64 ret, retry = 5; >> struct hv_enable_vp_vtl *start_vp_input; >> unsigned long flags; >> + int vp_id; >> >> if (!vmsa) >> return -ENOMEM; >> >> + vp_id = hv_apicid_to_vp_id(apic_id); >> + >> + /* The BSP or an error */ >> + if (vp_id <= 0) > > Returning an error on value 0 may be problematic here. Consider > the panic case where a CPU other than the BSP takes a panic and > initiates kdump. If the kdump kernel runs with more than 1 CPU, it > may try to start the CPU that was originally the BSP. To my > knowledge, SEV-SNP guests on Hyper-V don't support kdump at > the moment so this problem is currently theoretical, but let's not > leave a potential future problem by excluding 0 here. > > Also, since I assert that we really don't know anything about the > VP index values, we can't exclude 0. It may or may not be the > original BSP. > I believed that the BSP is always 0 yet as long as that's not in TLFS, that's not true, I agree on that. Probably not this function's job to check that the processor shouldn't be attempted to start, will fix! > Michael > >> + return -EINVAL; >> + >> native_store_gdt(&gdtr); >> >> vmsa->gdtr.base = gdtr.address; >> @@ -348,7 +355,7 @@ int hv_snp_boot_ap(u32 cpu, unsigned long start_ip) >> start_vp_input = (struct hv_enable_vp_vtl *)ap_start_input_arg; >> memset(start_vp_input, 0, sizeof(*start_vp_input)); >> start_vp_input->partition_id = -1; >> - start_vp_input->vp_index = cpu; >> + start_vp_input->vp_index = vp_id; >> start_vp_input->target_vtl.target_vtl = ms_hyperv.vtl; >> *(u64 *)&start_vp_input->vp_context = __pa(vmsa) | 1; >> >> diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h >> index 07aadf0e839f..ae62a34bfd1e 100644 >> --- a/arch/x86/include/asm/mshyperv.h >> +++ b/arch/x86/include/asm/mshyperv.h >> @@ -268,11 +268,11 @@ int hv_unmap_ioapic_interrupt(int ioapic_id, struct >> hv_interrupt_entry *entry); >> #ifdef CONFIG_AMD_MEM_ENCRYPT >> bool hv_ghcb_negotiate_protocol(void); >> void __noreturn hv_ghcb_terminate(unsigned int set, unsigned int reason); >> -int hv_snp_boot_ap(u32 cpu, unsigned long start_ip); >> +int hv_snp_boot_ap(u32 apic_id, unsigned long start_ip); >> #else >> static inline bool hv_ghcb_negotiate_protocol(void) { return false; } >> static inline void hv_ghcb_terminate(unsigned int set, unsigned int reason) {} >> -static inline int hv_snp_boot_ap(u32 cpu, unsigned long start_ip) { return 0; } >> +static inline int hv_snp_boot_ap(u32 apic_id, unsigned long start_ip) { return 0; } >> #endif >> >> #if defined(CONFIG_AMD_MEM_ENCRYPT) || defined(CONFIG_INTEL_TDX_GUEST) >> @@ -329,6 +329,7 @@ static inline void hv_set_non_nested_msr(unsigned int reg, >> u64 value) { } >> static inline u64 hv_get_non_nested_msr(unsigned int reg) { return 0; } >> #endif /* CONFIG_HYPERV */ >> >> +int hv_apicid_to_vp_id(u32 apic_id); >> >> #ifdef CONFIG_HYPERV_VTL_MODE >> void __init hv_vtl_init_platform(void); >> >> base-commit: 628cc040b3a2980df6032766e8ef0688e981ab95 >> -- >> 2.43.0 >> > -- Thank you, Roman ^ permalink raw reply [flat|nested] 13+ messages in thread
* RE: [PATCH hyperv-next] x86/hyperv: Fix APIC ID and VP ID confusion in hv_snp_boot_ap() 2025-04-25 16:35 ` Roman Kisel @ 2025-04-25 16:55 ` Michael Kelley 2025-04-28 0:48 ` Wei Liu 0 siblings, 1 reply; 13+ messages in thread From: Michael Kelley @ 2025-04-25 16:55 UTC (permalink / raw) To: Roman Kisel, bp@alien8.de, dave.hansen@linux.intel.com, decui@microsoft.com, haiyangz@microsoft.com, hpa@zytor.com, kys@microsoft.com, mikelley@microsoft.com, mingo@redhat.com, tglx@linutronix.de, tiala@microsoft.com, wei.liu@kernel.org, linux-hyperv@vger.kernel.org, linux-kernel@vger.kernel.org, x86@kernel.org Cc: apais@microsoft.com, benhill@microsoft.com, bperkins@microsoft.com, sunilmut@microsoft.com From: Roman Kisel <romank@linux.microsoft.com> Sent: Friday, April 25, 2025 9:36 AM > > On 4/25/2025 8:12 AM, Michael Kelley wrote: > > From: Roman Kisel <romank@linux.microsoft.com> Sent: Thursday, April 24, 2025 2:58 PM > >> > >> To start an application processor in SNP-isolated guest, a hypercall > >> is used that takes a virtual processor index. The hv_snp_boot_ap() > >> function uses that START_VP hypercall but passes as VP ID to it what > >> it receives as a wakeup_secondary_cpu_64 callback: the APIC ID. > >> > >> As those two aren't generally interchangeable, that may lead to hung > >> APs if VP IDs and APIC IDs don't match, e.g. APIC IDs might be sparse > >> whereas VP IDs never are. > > > > I agree that VP IDs (a.k.a. VP indexes) and APIC IDs don't necessary match, > > and that APIC IDs might be sparse. But I'm not aware of any statement > > in the TLFS about the nature of VP indexes, except that > > > > "A virtual processor index must be less than the maximum number of > > virtual processors per partition." > > > > But that maximum is the Hyper-V implementation maximum, not the > > maximum for a particular VM. So the statement does not imply > > denseness unless the number of CPUs in the VM is equal to the > > Hyper-V implementation max. In other parts of Linux kernel code, > > we assume that VP indexes might be sparse as well. > > > > All that said, this is just a comment about the precise accuracy of > > your commit message, and doesn't affect the code. > > > > I appreciate your help with the precision. I used loose language, > agreed, would like to fix that. The patch was applied though but not yet > sent to the Linus'es tree as I understand. I'd appreciate guidance on > the process! Should I send a v2 nevertheless and explain the situation > in the cover letter? > > IOW, how do I make this easier for the maintainer(s)? Wei Liu should give his preferences. But in the past, I think he has just replaced a patch that was updated. If that's the case, you can send a v2 without a lot of additional explanation. > > >> > >> Update the parameter names to avoid confusion as to what the parameter > >> is. Use the APIC ID to VP ID conversion to provide correct input to the > >> hypercall. > > > > Terminology: The TLFS calls this the "VP Index", not the "VP ID". In > > other Linux code, we also call it the "VP Index". See the hv_vp_index > > array, for example. The exception is the hypercall itself, which the TLFS > > calls HvCallGetVpIndexFromApicId, but which our Linux code calls > > HVCALL_GET_VP_ID_FROM_APIC_ID for some unknown reason. > > > > Could you fix the terminology to be consistent? And maybe fix the > > HVCALL_* string name as well. I know you are just moving the > > existing VTL code, but let's take the opportunity to avoid any > > terminology inconsistency. > > > > I percieved ID as both "index" and "identificator" I guess but maybe > "idx" is more like "index". I'll send out a fix for the terminology, > thanks for your help! Yes, please just call it "vp_index", fully spelled out, as that's consistent with other Linux code for Hyper-V. I briefly got confused because I searched the TLFS for the rules on "vpid" or "vp_id", and found no matches. Then I remembered that it is really "vp index". As for the connotations my brain assigns, "index" is a modest integer suitable for indexing into an array, and the Hyper-V VP index fits that description. OTOH, "id" has a much wider potential meaning, including something as large as a GUID. Of course, given the nature of connotations, other people might have different associations. :-) Michael > > >> > >> Cc: stable@vger.kernel.org > >> Fixes: 44676bb9d566 ("x86/hyperv: Add smp support for SEV-SNP guest") > >> Signed-off-by: Roman Kisel <romank@linux.microsoft.com> > >> --- > >> arch/x86/hyperv/hv_init.c | 33 ++++++++++++++++++++++++++++++++ > >> arch/x86/hyperv/hv_vtl.c | 34 +-------------------------------- > >> arch/x86/hyperv/ivm.c | 11 +++++++++-- > >> arch/x86/include/asm/mshyperv.h | 5 +++-- > >> 4 files changed, 46 insertions(+), 37 deletions(-) > >> > >> diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c > >> index ddeb40930bc8..23422342a091 100644 > >> --- a/arch/x86/hyperv/hv_init.c > >> +++ b/arch/x86/hyperv/hv_init.c > >> @@ -706,3 +706,36 @@ bool hv_is_hyperv_initialized(void) > >> return hypercall_msr.enable; > >> } > >> EXPORT_SYMBOL_GPL(hv_is_hyperv_initialized); > >> + > >> +int hv_apicid_to_vp_id(u32 apic_id) > >> +{ > >> + u64 control; > >> + u64 status; > >> + unsigned long irq_flags; > >> + struct hv_get_vp_from_apic_id_in *input; > >> + u32 *output, ret; > >> + > >> + local_irq_save(irq_flags); > >> + > >> + input = *this_cpu_ptr(hyperv_pcpu_input_arg); > >> + memset(input, 0, sizeof(*input)); > >> + input->partition_id = HV_PARTITION_ID_SELF; > >> + input->apic_ids[0] = apic_id; > >> + > >> + output = *this_cpu_ptr(hyperv_pcpu_output_arg); > >> + > >> + control = HV_HYPERCALL_REP_COMP_1 | HVCALL_GET_VP_ID_FROM_APIC_ID; > >> + status = hv_do_hypercall(control, input, output); > >> + ret = output[0]; > >> + > >> + local_irq_restore(irq_flags); > >> + > >> + if (!hv_result_success(status)) { > >> + pr_err("failed to get vp id from apic id %d, status %#llx\n", > >> + apic_id, status); > >> + return -EINVAL; > >> + } > >> + > >> + return ret; > >> +} > >> +EXPORT_SYMBOL_GPL(hv_apicid_to_vp_id); > >> diff --git a/arch/x86/hyperv/hv_vtl.c b/arch/x86/hyperv/hv_vtl.c > >> index 582fe820e29c..8bc4f0121e5e 100644 > >> --- a/arch/x86/hyperv/hv_vtl.c > >> +++ b/arch/x86/hyperv/hv_vtl.c > >> @@ -205,38 +205,6 @@ static int hv_vtl_bringup_vcpu(u32 target_vp_index, int > cpu, u64 eip_ignored) > >> return ret; > >> } > >> > >> -static int hv_vtl_apicid_to_vp_id(u32 apic_id) > >> -{ > >> - u64 control; > >> - u64 status; > >> - unsigned long irq_flags; > >> - struct hv_get_vp_from_apic_id_in *input; > >> - u32 *output, ret; > >> - > >> - local_irq_save(irq_flags); > >> - > >> - input = *this_cpu_ptr(hyperv_pcpu_input_arg); > >> - memset(input, 0, sizeof(*input)); > >> - input->partition_id = HV_PARTITION_ID_SELF; > >> - input->apic_ids[0] = apic_id; > >> - > >> - output = *this_cpu_ptr(hyperv_pcpu_output_arg); > >> - > >> - control = HV_HYPERCALL_REP_COMP_1 | HVCALL_GET_VP_ID_FROM_APIC_ID; > >> - status = hv_do_hypercall(control, input, output); > >> - ret = output[0]; > >> - > >> - local_irq_restore(irq_flags); > >> - > >> - if (!hv_result_success(status)) { > >> - pr_err("failed to get vp id from apic id %d, status %#llx\n", > >> - apic_id, status); > >> - return -EINVAL; > >> - } > >> - > >> - return ret; > >> -} > >> - > >> static int hv_vtl_wakeup_secondary_cpu(u32 apicid, unsigned long start_eip) > >> { > >> int vp_id, cpu; > >> @@ -250,7 +218,7 @@ static int hv_vtl_wakeup_secondary_cpu(u32 apicid, > unsigned > >> long start_eip) > >> return -EINVAL; > >> > >> pr_debug("Bringing up CPU with APIC ID %d in VTL2...\n", apicid); > >> - vp_id = hv_vtl_apicid_to_vp_id(apicid); > >> + vp_id = hv_apicid_to_vp_id(apicid); > >> > >> if (vp_id < 0) { > >> pr_err("Couldn't find CPU with APIC ID %d\n", apicid); > >> diff --git a/arch/x86/hyperv/ivm.c b/arch/x86/hyperv/ivm.c > >> index c0039a90e9e0..e3c32bb0d0cf 100644 > >> --- a/arch/x86/hyperv/ivm.c > >> +++ b/arch/x86/hyperv/ivm.c > >> @@ -288,7 +288,7 @@ static void snp_cleanup_vmsa(struct sev_es_save_area > *vmsa) > >> free_page((unsigned long)vmsa); > >> } > >> > >> -int hv_snp_boot_ap(u32 cpu, unsigned long start_ip) > >> +int hv_snp_boot_ap(u32 apic_id, unsigned long start_ip) > >> { > >> struct sev_es_save_area *vmsa = (struct sev_es_save_area *) > >> __get_free_page(GFP_KERNEL | __GFP_ZERO); > >> @@ -297,10 +297,17 @@ int hv_snp_boot_ap(u32 cpu, unsigned long start_ip) > >> u64 ret, retry = 5; > >> struct hv_enable_vp_vtl *start_vp_input; > >> unsigned long flags; > >> + int vp_id; > >> > >> if (!vmsa) > >> return -ENOMEM; > >> > >> + vp_id = hv_apicid_to_vp_id(apic_id); > >> + > >> + /* The BSP or an error */ > >> + if (vp_id <= 0) > > > > Returning an error on value 0 may be problematic here. Consider > > the panic case where a CPU other than the BSP takes a panic and > > initiates kdump. If the kdump kernel runs with more than 1 CPU, it > > may try to start the CPU that was originally the BSP. To my > > knowledge, SEV-SNP guests on Hyper-V don't support kdump at > > the moment so this problem is currently theoretical, but let's not > > leave a potential future problem by excluding 0 here. > > > > Also, since I assert that we really don't know anything about the > > VP index values, we can't exclude 0. It may or may not be the > > original BSP. > > > > I believed that the BSP is always 0 yet as long as that's not in TLFS, > that's not true, I agree on that. Probably not this function's job to > check that the processor shouldn't be attempted to start, will fix! > > > Michael > > > >> + return -EINVAL; > >> + > >> native_store_gdt(&gdtr); > >> > >> vmsa->gdtr.base = gdtr.address; > >> @@ -348,7 +355,7 @@ int hv_snp_boot_ap(u32 cpu, unsigned long start_ip) > >> start_vp_input = (struct hv_enable_vp_vtl *)ap_start_input_arg; > >> memset(start_vp_input, 0, sizeof(*start_vp_input)); > >> start_vp_input->partition_id = -1; > >> - start_vp_input->vp_index = cpu; > >> + start_vp_input->vp_index = vp_id; > >> start_vp_input->target_vtl.target_vtl = ms_hyperv.vtl; > >> *(u64 *)&start_vp_input->vp_context = __pa(vmsa) | 1; > >> > >> diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h > >> index 07aadf0e839f..ae62a34bfd1e 100644 > >> --- a/arch/x86/include/asm/mshyperv.h > >> +++ b/arch/x86/include/asm/mshyperv.h > >> @@ -268,11 +268,11 @@ int hv_unmap_ioapic_interrupt(int ioapic_id, struct > >> hv_interrupt_entry *entry); > >> #ifdef CONFIG_AMD_MEM_ENCRYPT > >> bool hv_ghcb_negotiate_protocol(void); > >> void __noreturn hv_ghcb_terminate(unsigned int set, unsigned int reason); > >> -int hv_snp_boot_ap(u32 cpu, unsigned long start_ip); > >> +int hv_snp_boot_ap(u32 apic_id, unsigned long start_ip); > >> #else > >> static inline bool hv_ghcb_negotiate_protocol(void) { return false; } > >> static inline void hv_ghcb_terminate(unsigned int set, unsigned int reason) {} > >> -static inline int hv_snp_boot_ap(u32 cpu, unsigned long start_ip) { return 0; } > >> +static inline int hv_snp_boot_ap(u32 apic_id, unsigned long start_ip) { return 0; } > >> #endif > >> > >> #if defined(CONFIG_AMD_MEM_ENCRYPT) || > defined(CONFIG_INTEL_TDX_GUEST) > >> @@ -329,6 +329,7 @@ static inline void hv_set_non_nested_msr(unsigned int reg, > >> u64 value) { } > >> static inline u64 hv_get_non_nested_msr(unsigned int reg) { return 0; } > >> #endif /* CONFIG_HYPERV */ > >> > >> +int hv_apicid_to_vp_id(u32 apic_id); > >> > >> #ifdef CONFIG_HYPERV_VTL_MODE > >> void __init hv_vtl_init_platform(void); > >> > >> base-commit: 628cc040b3a2980df6032766e8ef0688e981ab95 > >> -- > >> 2.43.0 > >> > > > > -- > Thank you, > Roman ^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [PATCH hyperv-next] x86/hyperv: Fix APIC ID and VP ID confusion in hv_snp_boot_ap() 2025-04-25 16:55 ` Michael Kelley @ 2025-04-28 0:48 ` Wei Liu 2025-04-28 17:29 ` Roman Kisel 0 siblings, 1 reply; 13+ messages in thread From: Wei Liu @ 2025-04-28 0:48 UTC (permalink / raw) To: Michael Kelley Cc: Roman Kisel, bp@alien8.de, dave.hansen@linux.intel.com, decui@microsoft.com, haiyangz@microsoft.com, hpa@zytor.com, kys@microsoft.com, mikelley@microsoft.com, mingo@redhat.com, tglx@linutronix.de, tiala@microsoft.com, wei.liu@kernel.org, linux-hyperv@vger.kernel.org, linux-kernel@vger.kernel.org, x86@kernel.org, apais@microsoft.com, benhill@microsoft.com, bperkins@microsoft.com, sunilmut@microsoft.com On Fri, Apr 25, 2025 at 04:55:42PM +0000, Michael Kelley wrote: > From: Roman Kisel <romank@linux.microsoft.com> Sent: Friday, April 25, 2025 9:36 AM > > > > On 4/25/2025 8:12 AM, Michael Kelley wrote: > > > From: Roman Kisel <romank@linux.microsoft.com> Sent: Thursday, April 24, 2025 2:58 PM > > >> > > >> To start an application processor in SNP-isolated guest, a hypercall > > >> is used that takes a virtual processor index. The hv_snp_boot_ap() > > >> function uses that START_VP hypercall but passes as VP ID to it what > > >> it receives as a wakeup_secondary_cpu_64 callback: the APIC ID. > > >> > > >> As those two aren't generally interchangeable, that may lead to hung > > >> APs if VP IDs and APIC IDs don't match, e.g. APIC IDs might be sparse > > >> whereas VP IDs never are. > > > > > > I agree that VP IDs (a.k.a. VP indexes) and APIC IDs don't necessary match, > > > and that APIC IDs might be sparse. But I'm not aware of any statement > > > in the TLFS about the nature of VP indexes, except that > > > > > > "A virtual processor index must be less than the maximum number of > > > virtual processors per partition." > > > > > > But that maximum is the Hyper-V implementation maximum, not the > > > maximum for a particular VM. So the statement does not imply > > > denseness unless the number of CPUs in the VM is equal to the > > > Hyper-V implementation max. In other parts of Linux kernel code, > > > we assume that VP indexes might be sparse as well. > > > > > > All that said, this is just a comment about the precise accuracy of > > > your commit message, and doesn't affect the code. > > > > > > > I appreciate your help with the precision. I used loose language, > > agreed, would like to fix that. The patch was applied though but not yet > > sent to the Linus'es tree as I understand. I'd appreciate guidance on > > the process! Should I send a v2 nevertheless and explain the situation > > in the cover letter? > > > > IOW, how do I make this easier for the maintainer(s)? > > Wei Liu should give his preferences. But in the past, I think he has > just replaced a patch that was updated. If that's the case, you can > send a v2 without a lot of additional explanation. > Normally if you need to send a new version because the original patch is buggy, you can just update your patch. If only a commit message or comment needs to be updated, I will let the submitter know either to send a new version or not. Sometimes I will just make the changes myself to save the submitter some time. Wei. ^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [PATCH hyperv-next] x86/hyperv: Fix APIC ID and VP ID confusion in hv_snp_boot_ap() 2025-04-28 0:48 ` Wei Liu @ 2025-04-28 17:29 ` Roman Kisel 0 siblings, 0 replies; 13+ messages in thread From: Roman Kisel @ 2025-04-28 17:29 UTC (permalink / raw) To: Wei Liu, Michael Kelley Cc: bp@alien8.de, dave.hansen@linux.intel.com, decui@microsoft.com, haiyangz@microsoft.com, hpa@zytor.com, kys@microsoft.com, mikelley@microsoft.com, mingo@redhat.com, tglx@linutronix.de, tiala@microsoft.com, linux-hyperv@vger.kernel.org, linux-kernel@vger.kernel.org, x86@kernel.org, apais@microsoft.com, benhill@microsoft.com, bperkins@microsoft.com, sunilmut@microsoft.com On 4/27/2025 5:48 PM, Wei Liu wrote: > On Fri, Apr 25, 2025 at 04:55:42PM +0000, Michael Kelley wrote: [...] >>> >>> I appreciate your help with the precision. I used loose language, >>> agreed, would like to fix that. The patch was applied though but not yet >>> sent to the Linus'es tree as I understand. I'd appreciate guidance on >>> the process! Should I send a v2 nevertheless and explain the situation >>> in the cover letter? >>> >>> IOW, how do I make this easier for the maintainer(s)? >> >> Wei Liu should give his preferences. But in the past, I think he has >> just replaced a patch that was updated. If that's the case, you can >> send a v2 without a lot of additional explanation. >> > > Normally if you need to send a new version because the original > patch is buggy, you can just update your patch. > > If only a commit message or comment needs to be updated, I will let the > submitter know either to send a new version or not. Sometimes I will > just make the changes myself to save the submitter some time. > Very kind of you, thanks a lot for the explanation! > Wei. -- Thank you, Roman ^ permalink raw reply [flat|nested] 13+ messages in thread
end of thread, other threads:[~2025-04-28 17:29 UTC | newest] Thread overview: 13+ messages (download: mbox.gz follow: Atom feed -- links below jump to the message on this page -- 2025-04-24 21:57 [PATCH hyperv-next] x86/hyperv: Fix APIC ID and VP ID confusion in hv_snp_boot_ap() Roman Kisel 2025-04-25 6:15 ` Wei Liu 2025-04-25 9:14 ` [EXTERNAL] " Saurabh Singh Sengar 2025-04-25 16:43 ` Roman Kisel 2025-04-25 17:18 ` Saurabh Singh Sengar 2025-04-25 18:22 ` Roman Kisel 2025-04-25 21:12 ` Wei Liu 2025-04-28 17:26 ` Roman Kisel 2025-04-25 15:12 ` Michael Kelley 2025-04-25 16:35 ` Roman Kisel 2025-04-25 16:55 ` Michael Kelley 2025-04-28 0:48 ` Wei Liu 2025-04-28 17:29 ` Roman Kisel
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox; as well as URLs for NNTP newsgroup(s).