kvm.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH v3 0/2] KVM: SVM: Add support for 4096 vcpus with x2AVIC
@ 2025-02-20  7:38 Naveen N Rao (AMD)
  2025-02-20  7:38 ` [PATCH v3 1/2] KVM: SVM: Increase X2AVIC limit to 4096 vcpus Naveen N Rao (AMD)
                   ` (2 more replies)
  0 siblings, 3 replies; 14+ messages in thread
From: Naveen N Rao (AMD) @ 2025-02-20  7:38 UTC (permalink / raw)
  To: kvm, linux-kernel
  Cc: Sean Christopherson, Paolo Bonzini, Suravee Suthikulpanit,
	Vasant Hegde

This is v3 of the series posted at:
http://lkml.kernel.org/r/cover.1738563890.git.naveen@kernel.org

The first patch adds support for up to 4096 vcpus with x2AVIC, while the 
second patch limits the value that is programmed into 
AVIC_PHYSICAL_MAX_INDEX in the VMCB based on the max APIC ID indicated 
by the VMM.

Changes since v2:
- Patch 1: Free allocated pages in avic_vm_destroy()
- Patch 2: Rename x2apic_mode parameter of avic_get_max_physical_id() to 
  just x2apic to avoid build issue with similarly named global variable.


- Naveen


Naveen N Rao (AMD) (1):
  KVM: SVM: Limit AVIC physical max index based on configured
    max_vcpu_ids

Suravee Suthikulpanit (1):
  KVM: SVM: Increase X2AVIC limit to 4096 vcpus

 arch/x86/include/asm/svm.h |  4 ++
 arch/x86/kvm/svm/avic.c    | 82 ++++++++++++++++++++++++++++----------
 arch/x86/kvm/svm/svm.c     |  6 +++
 arch/x86/kvm/svm/svm.h     |  1 +
 4 files changed, 73 insertions(+), 20 deletions(-)


base-commit: fed48e2967f402f561d80075a20c5c9e16866e53
-- 
2.48.1


^ permalink raw reply	[flat|nested] 14+ messages in thread

* [PATCH v3 1/2] KVM: SVM: Increase X2AVIC limit to 4096 vcpus
  2025-02-20  7:38 [PATCH v3 0/2] KVM: SVM: Add support for 4096 vcpus with x2AVIC Naveen N Rao (AMD)
@ 2025-02-20  7:38 ` Naveen N Rao (AMD)
  2025-06-23 23:17   ` Sean Christopherson
  2025-02-20  7:38 ` [PATCH v3 2/2] KVM: SVM: Limit AVIC physical max index based on configured max_vcpu_ids Naveen N Rao (AMD)
  2025-03-20  5:34 ` [PATCH v3 0/2] KVM: SVM: Add support for 4096 vcpus with x2AVIC Vasant Hegde
  2 siblings, 1 reply; 14+ messages in thread
From: Naveen N Rao (AMD) @ 2025-02-20  7:38 UTC (permalink / raw)
  To: kvm, linux-kernel
  Cc: Sean Christopherson, Paolo Bonzini, Suravee Suthikulpanit,
	Vasant Hegde

From: Suravee Suthikulpanit <suravee.suthikulpanit@amd.com>

Newer AMD platforms enhance x2AVIC feature to support up to 4096 vcpus.
This capatility is detected via CPUID_Fn8000000A_ECX[x2AVIC_EXT].

Modify the SVM driver to check the capability. If detected, extend bitmask
for guest max physical APIC ID to 0xFFF, increase maximum vcpu index to
4095, and increase the size of the Phyical APIC ID table from 4K to 32K in
order to accommodate up to 4096 entries.

Signed-off-by: Suravee Suthikulpanit <suravee.suthikulpanit@amd.com>
---
 arch/x86/include/asm/svm.h |  4 ++++
 arch/x86/kvm/svm/avic.c    | 45 ++++++++++++++++++++++++++------------
 2 files changed, 35 insertions(+), 14 deletions(-)

diff --git a/arch/x86/include/asm/svm.h b/arch/x86/include/asm/svm.h
index e2fac21471f5..4ff5b2f767e1 100644
--- a/arch/x86/include/asm/svm.h
+++ b/arch/x86/include/asm/svm.h
@@ -268,6 +268,7 @@ enum avic_ipi_failure_cause {
 };
 
 #define AVIC_PHYSICAL_MAX_INDEX_MASK	GENMASK_ULL(8, 0)
+#define AVIC_PHYSICAL_MAX_INDEX_4K_MASK	GENMASK_ULL(11, 0)
 
 /*
  * For AVIC, the max index allowed for physical APIC ID table is 0xfe (254), as
@@ -277,11 +278,14 @@ enum avic_ipi_failure_cause {
 
 /*
  * For x2AVIC, the max index allowed for physical APIC ID table is 0x1ff (511).
+ * For extended x2AVIC, the max index allowed for physical APIC ID table is 0xfff (4095).
  */
 #define X2AVIC_MAX_PHYSICAL_ID		0x1FFUL
+#define X2AVIC_MAX_PHYSICAL_ID_4K	0xFFFUL
 
 static_assert((AVIC_MAX_PHYSICAL_ID & AVIC_PHYSICAL_MAX_INDEX_MASK) == AVIC_MAX_PHYSICAL_ID);
 static_assert((X2AVIC_MAX_PHYSICAL_ID & AVIC_PHYSICAL_MAX_INDEX_MASK) == X2AVIC_MAX_PHYSICAL_ID);
+static_assert((X2AVIC_MAX_PHYSICAL_ID_4K & AVIC_PHYSICAL_MAX_INDEX_4K_MASK) == X2AVIC_MAX_PHYSICAL_ID_4K);
 
 #define AVIC_HPA_MASK	~((0xFFFULL << 52) | 0xFFF)
 
diff --git a/arch/x86/kvm/svm/avic.c b/arch/x86/kvm/svm/avic.c
index 65fd245a9953..1fb322d2ac18 100644
--- a/arch/x86/kvm/svm/avic.c
+++ b/arch/x86/kvm/svm/avic.c
@@ -38,9 +38,9 @@
  * size of the GATag is defined by hardware (32 bits), but is an opaque value
  * as far as hardware is concerned.
  */
-#define AVIC_VCPU_ID_MASK		AVIC_PHYSICAL_MAX_INDEX_MASK
+#define AVIC_VCPU_ID_MASK		AVIC_PHYSICAL_MAX_INDEX_4K_MASK
 
-#define AVIC_VM_ID_SHIFT		HWEIGHT32(AVIC_PHYSICAL_MAX_INDEX_MASK)
+#define AVIC_VM_ID_SHIFT		HWEIGHT32(AVIC_PHYSICAL_MAX_INDEX_4K_MASK)
 #define AVIC_VM_ID_MASK			(GENMASK(31, AVIC_VM_ID_SHIFT) >> AVIC_VM_ID_SHIFT)
 
 #define AVIC_GATAG_TO_VMID(x)		((x >> AVIC_VM_ID_SHIFT) & AVIC_VM_ID_MASK)
@@ -73,6 +73,9 @@ static u32 next_vm_id = 0;
 static bool next_vm_id_wrapped = 0;
 static DEFINE_SPINLOCK(svm_vm_data_hash_lock);
 bool x2avic_enabled;
+static bool x2avic_4k_vcpu_supported;
+static u64 x2avic_max_physical_id;
+static u64 avic_physical_max_index_mask;
 
 /*
  * This is a wrapper of struct amd_iommu_ir_data.
@@ -87,7 +90,7 @@ static void avic_activate_vmcb(struct vcpu_svm *svm)
 	struct vmcb *vmcb = svm->vmcb01.ptr;
 
 	vmcb->control.int_ctl &= ~(AVIC_ENABLE_MASK | X2APIC_MODE_MASK);
-	vmcb->control.avic_physical_id &= ~AVIC_PHYSICAL_MAX_INDEX_MASK;
+	vmcb->control.avic_physical_id &= ~avic_physical_max_index_mask;
 
 	vmcb->control.int_ctl |= AVIC_ENABLE_MASK;
 
@@ -100,7 +103,7 @@ static void avic_activate_vmcb(struct vcpu_svm *svm)
 	 */
 	if (x2avic_enabled && apic_x2apic_mode(svm->vcpu.arch.apic)) {
 		vmcb->control.int_ctl |= X2APIC_MODE_MASK;
-		vmcb->control.avic_physical_id |= X2AVIC_MAX_PHYSICAL_ID;
+		vmcb->control.avic_physical_id |= x2avic_max_physical_id;
 		/* Disabling MSR intercept for x2APIC registers */
 		svm_set_x2apic_msr_interception(svm, false);
 	} else {
@@ -122,7 +125,7 @@ static void avic_deactivate_vmcb(struct vcpu_svm *svm)
 	struct vmcb *vmcb = svm->vmcb01.ptr;
 
 	vmcb->control.int_ctl &= ~(AVIC_ENABLE_MASK | X2APIC_MODE_MASK);
-	vmcb->control.avic_physical_id &= ~AVIC_PHYSICAL_MAX_INDEX_MASK;
+	vmcb->control.avic_physical_id &= ~avic_physical_max_index_mask;
 
 	/*
 	 * If running nested and the guest uses its own MSR bitmap, there
@@ -182,7 +185,8 @@ void avic_vm_destroy(struct kvm *kvm)
 	if (kvm_svm->avic_logical_id_table_page)
 		__free_page(kvm_svm->avic_logical_id_table_page);
 	if (kvm_svm->avic_physical_id_table_page)
-		__free_page(kvm_svm->avic_physical_id_table_page);
+		__free_pages(kvm_svm->avic_physical_id_table_page,
+			     get_order(sizeof(u64) * (x2avic_max_physical_id + 1)));
 
 	spin_lock_irqsave(&svm_vm_data_hash_lock, flags);
 	hash_del(&kvm_svm->hnode);
@@ -197,13 +201,15 @@ int avic_vm_init(struct kvm *kvm)
 	struct kvm_svm *k2;
 	struct page *p_page;
 	struct page *l_page;
-	u32 vm_id;
+	u32 vm_id, entries;
 
 	if (!enable_apicv)
 		return 0;
 
-	/* Allocating physical APIC ID table (4KB) */
-	p_page = alloc_page(GFP_KERNEL_ACCOUNT | __GFP_ZERO);
+	/* Allocating physical APIC ID table */
+	entries = x2avic_max_physical_id + 1;
+	p_page = alloc_pages(GFP_KERNEL_ACCOUNT | __GFP_ZERO,
+			     get_order(sizeof(u64) * entries));
 	if (!p_page)
 		goto free_avic;
 
@@ -266,7 +272,7 @@ static u64 *avic_get_physical_id_entry(struct kvm_vcpu *vcpu,
 	struct kvm_svm *kvm_svm = to_kvm_svm(vcpu->kvm);
 
 	if ((!x2avic_enabled && index > AVIC_MAX_PHYSICAL_ID) ||
-	    (index > X2AVIC_MAX_PHYSICAL_ID))
+	    (index > x2avic_max_physical_id))
 		return NULL;
 
 	avic_physical_id_table = page_address(kvm_svm->avic_physical_id_table_page);
@@ -281,7 +287,7 @@ static int avic_init_backing_page(struct kvm_vcpu *vcpu)
 	struct vcpu_svm *svm = to_svm(vcpu);
 
 	if ((!x2avic_enabled && id > AVIC_MAX_PHYSICAL_ID) ||
-	    (id > X2AVIC_MAX_PHYSICAL_ID))
+	    (id > x2avic_max_physical_id))
 		return -EINVAL;
 
 	if (!vcpu->arch.apic->regs)
@@ -493,7 +499,7 @@ int avic_incomplete_ipi_interception(struct kvm_vcpu *vcpu)
 	u32 icrh = svm->vmcb->control.exit_info_1 >> 32;
 	u32 icrl = svm->vmcb->control.exit_info_1;
 	u32 id = svm->vmcb->control.exit_info_2 >> 32;
-	u32 index = svm->vmcb->control.exit_info_2 & 0x1FF;
+	u32 index = svm->vmcb->control.exit_info_2 & avic_physical_max_index_mask;
 	struct kvm_lapic *apic = vcpu->arch.apic;
 
 	trace_kvm_avic_incomplete_ipi(vcpu->vcpu_id, icrh, icrl, id, index);
@@ -1218,8 +1224,19 @@ bool avic_hardware_setup(void)
 
 	/* AVIC is a prerequisite for x2AVIC. */
 	x2avic_enabled = boot_cpu_has(X86_FEATURE_X2AVIC);
-	if (x2avic_enabled)
-		pr_info("x2AVIC enabled\n");
+	if (x2avic_enabled) {
+		x2avic_4k_vcpu_supported = !!(cpuid_ecx(0x8000000a) & 0x40);
+		if (x2avic_4k_vcpu_supported) {
+			x2avic_max_physical_id = X2AVIC_MAX_PHYSICAL_ID_4K;
+			avic_physical_max_index_mask = AVIC_PHYSICAL_MAX_INDEX_4K_MASK;
+		} else {
+			x2avic_max_physical_id = X2AVIC_MAX_PHYSICAL_ID;
+			avic_physical_max_index_mask = AVIC_PHYSICAL_MAX_INDEX_MASK;
+		}
+
+		pr_info("x2AVIC enabled%s\n",
+			x2avic_4k_vcpu_supported ? " (w/ 4K-vcpu)" : "");
+	}
 
 	amd_iommu_register_ga_log_notifier(&avic_ga_log_notifier);
 
-- 
2.48.1


^ permalink raw reply related	[flat|nested] 14+ messages in thread

* [PATCH v3 2/2] KVM: SVM: Limit AVIC physical max index based on configured max_vcpu_ids
  2025-02-20  7:38 [PATCH v3 0/2] KVM: SVM: Add support for 4096 vcpus with x2AVIC Naveen N Rao (AMD)
  2025-02-20  7:38 ` [PATCH v3 1/2] KVM: SVM: Increase X2AVIC limit to 4096 vcpus Naveen N Rao (AMD)
@ 2025-02-20  7:38 ` Naveen N Rao (AMD)
  2025-02-20 13:36   ` Gupta, Pankaj
  2025-06-24  0:38   ` Sean Christopherson
  2025-03-20  5:34 ` [PATCH v3 0/2] KVM: SVM: Add support for 4096 vcpus with x2AVIC Vasant Hegde
  2 siblings, 2 replies; 14+ messages in thread
From: Naveen N Rao (AMD) @ 2025-02-20  7:38 UTC (permalink / raw)
  To: kvm, linux-kernel
  Cc: Sean Christopherson, Paolo Bonzini, Suravee Suthikulpanit,
	Vasant Hegde

KVM allows VMMs to specify the maximum possible APIC ID for a virtual
machine through KVM_CAP_MAX_VCPU_ID capability so as to limit data
structures related to APIC/x2APIC. Utilize the same to set the AVIC
physical max index in the VMCB, similar to VMX. This helps hardware
limit the number of entries to be scanned in the physical APIC ID table
speeding up IPI broadcasts for virtual machines with smaller number of
vcpus.

The minimum allocation required for the Physical APIC ID table is one 4k
page supporting up to 512 entries. With AVIC support for 4096 vcpus
though, it is sufficient to only allocate memory to accommodate the
AVIC physical max index that will be programmed into the VMCB. Limit
memory allocated for the Physical APIC ID table accordingly.

Signed-off-by: Naveen N Rao (AMD) <naveen@kernel.org>
---
 arch/x86/kvm/svm/avic.c | 53 ++++++++++++++++++++++++++++++-----------
 arch/x86/kvm/svm/svm.c  |  6 +++++
 arch/x86/kvm/svm/svm.h  |  1 +
 3 files changed, 46 insertions(+), 14 deletions(-)

diff --git a/arch/x86/kvm/svm/avic.c b/arch/x86/kvm/svm/avic.c
index 1fb322d2ac18..dac4a6648919 100644
--- a/arch/x86/kvm/svm/avic.c
+++ b/arch/x86/kvm/svm/avic.c
@@ -85,6 +85,17 @@ struct amd_svm_iommu_ir {
 	void *data;		/* Storing pointer to struct amd_ir_data */
 };
 
+static inline u32 avic_get_max_physical_id(struct kvm *kvm, bool is_x2apic)
+{
+	u32 avic_max_physical_id = is_x2apic ? x2avic_max_physical_id : AVIC_MAX_PHYSICAL_ID;
+
+	/*
+	 * Assume vcpu_id is the same as APIC ID. Per KVM_CAP_MAX_VCPU_ID, max_vcpu_ids
+	 * represents the max APIC ID for this vm, rather than the max vcpus.
+	 */
+	return min(kvm->arch.max_vcpu_ids - 1, avic_max_physical_id);
+}
+
 static void avic_activate_vmcb(struct vcpu_svm *svm)
 {
 	struct vmcb *vmcb = svm->vmcb01.ptr;
@@ -103,7 +114,7 @@ static void avic_activate_vmcb(struct vcpu_svm *svm)
 	 */
 	if (x2avic_enabled && apic_x2apic_mode(svm->vcpu.arch.apic)) {
 		vmcb->control.int_ctl |= X2APIC_MODE_MASK;
-		vmcb->control.avic_physical_id |= x2avic_max_physical_id;
+		vmcb->control.avic_physical_id |= avic_get_max_physical_id(svm->vcpu.kvm, true);
 		/* Disabling MSR intercept for x2APIC registers */
 		svm_set_x2apic_msr_interception(svm, false);
 	} else {
@@ -114,7 +125,7 @@ static void avic_activate_vmcb(struct vcpu_svm *svm)
 		kvm_make_request(KVM_REQ_TLB_FLUSH_CURRENT, &svm->vcpu);
 
 		/* For xAVIC and hybrid-xAVIC modes */
-		vmcb->control.avic_physical_id |= AVIC_MAX_PHYSICAL_ID;
+		vmcb->control.avic_physical_id |= avic_get_max_physical_id(svm->vcpu.kvm, false);
 		/* Enabling MSR intercept for x2APIC registers */
 		svm_set_x2apic_msr_interception(svm, true);
 	}
@@ -174,6 +185,12 @@ int avic_ga_log_notifier(u32 ga_tag)
 	return 0;
 }
 
+static inline int avic_get_physical_id_table_order(struct kvm *kvm)
+{
+	/* Limit to the maximum physical ID supported in x2avic mode */
+	return get_order((avic_get_max_physical_id(kvm, true) + 1) * sizeof(u64));
+}
+
 void avic_vm_destroy(struct kvm *kvm)
 {
 	unsigned long flags;
@@ -186,7 +203,7 @@ void avic_vm_destroy(struct kvm *kvm)
 		__free_page(kvm_svm->avic_logical_id_table_page);
 	if (kvm_svm->avic_physical_id_table_page)
 		__free_pages(kvm_svm->avic_physical_id_table_page,
-			     get_order(sizeof(u64) * (x2avic_max_physical_id + 1)));
+			     avic_get_physical_id_table_order(kvm));
 
 	spin_lock_irqsave(&svm_vm_data_hash_lock, flags);
 	hash_del(&kvm_svm->hnode);
@@ -199,22 +216,12 @@ int avic_vm_init(struct kvm *kvm)
 	int err = -ENOMEM;
 	struct kvm_svm *kvm_svm = to_kvm_svm(kvm);
 	struct kvm_svm *k2;
-	struct page *p_page;
 	struct page *l_page;
-	u32 vm_id, entries;
+	u32 vm_id;
 
 	if (!enable_apicv)
 		return 0;
 
-	/* Allocating physical APIC ID table */
-	entries = x2avic_max_physical_id + 1;
-	p_page = alloc_pages(GFP_KERNEL_ACCOUNT | __GFP_ZERO,
-			     get_order(sizeof(u64) * entries));
-	if (!p_page)
-		goto free_avic;
-
-	kvm_svm->avic_physical_id_table_page = p_page;
-
 	/* Allocating logical APIC ID table (4KB) */
 	l_page = alloc_page(GFP_KERNEL_ACCOUNT | __GFP_ZERO);
 	if (!l_page)
@@ -265,6 +272,24 @@ void avic_init_vmcb(struct vcpu_svm *svm, struct vmcb *vmcb)
 		avic_deactivate_vmcb(svm);
 }
 
+int avic_alloc_physical_id_table(struct kvm *kvm)
+{
+	struct kvm_svm *kvm_svm = to_kvm_svm(kvm);
+	struct page *p_page;
+
+	if (kvm_svm->avic_physical_id_table_page || !enable_apicv || !irqchip_in_kernel(kvm))
+		return 0;
+
+	p_page = alloc_pages(GFP_KERNEL_ACCOUNT | __GFP_ZERO,
+			     avic_get_physical_id_table_order(kvm));
+	if (!p_page)
+		return -ENOMEM;
+
+	kvm_svm->avic_physical_id_table_page = p_page;
+
+	return 0;
+}
+
 static u64 *avic_get_physical_id_entry(struct kvm_vcpu *vcpu,
 				       unsigned int index)
 {
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index b8aa0f36850f..3cb23298cdc3 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -1423,6 +1423,11 @@ void svm_switch_vmcb(struct vcpu_svm *svm, struct kvm_vmcb_info *target_vmcb)
 	svm->vmcb = target_vmcb->ptr;
 }
 
+static int svm_vcpu_precreate(struct kvm *kvm)
+{
+	return avic_alloc_physical_id_table(kvm);
+}
+
 static int svm_vcpu_create(struct kvm_vcpu *vcpu)
 {
 	struct vcpu_svm *svm;
@@ -5007,6 +5012,7 @@ static struct kvm_x86_ops svm_x86_ops __initdata = {
 	.emergency_disable_virtualization_cpu = svm_emergency_disable_virtualization_cpu,
 	.has_emulated_msr = svm_has_emulated_msr,
 
+	.vcpu_precreate = svm_vcpu_precreate,
 	.vcpu_create = svm_vcpu_create,
 	.vcpu_free = svm_vcpu_free,
 	.vcpu_reset = svm_vcpu_reset,
diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h
index 5b159f017055..b4670afe0034 100644
--- a/arch/x86/kvm/svm/svm.h
+++ b/arch/x86/kvm/svm/svm.h
@@ -694,6 +694,7 @@ bool avic_hardware_setup(void);
 int avic_ga_log_notifier(u32 ga_tag);
 void avic_vm_destroy(struct kvm *kvm);
 int avic_vm_init(struct kvm *kvm);
+int avic_alloc_physical_id_table(struct kvm *kvm);
 void avic_init_vmcb(struct vcpu_svm *svm, struct vmcb *vmcb);
 int avic_incomplete_ipi_interception(struct kvm_vcpu *vcpu);
 int avic_unaccelerated_access_interception(struct kvm_vcpu *vcpu);
-- 
2.48.1


^ permalink raw reply related	[flat|nested] 14+ messages in thread

* Re: [PATCH v3 2/2] KVM: SVM: Limit AVIC physical max index based on configured max_vcpu_ids
  2025-02-20  7:38 ` [PATCH v3 2/2] KVM: SVM: Limit AVIC physical max index based on configured max_vcpu_ids Naveen N Rao (AMD)
@ 2025-02-20 13:36   ` Gupta, Pankaj
  2025-06-24  0:38   ` Sean Christopherson
  1 sibling, 0 replies; 14+ messages in thread
From: Gupta, Pankaj @ 2025-02-20 13:36 UTC (permalink / raw)
  To: Naveen N Rao (AMD), kvm, linux-kernel
  Cc: Sean Christopherson, Paolo Bonzini, Suravee Suthikulpanit,
	Vasant Hegde

On 2/20/2025 8:38 AM, Naveen N Rao (AMD) wrote:
> KVM allows VMMs to specify the maximum possible APIC ID for a virtual
> machine through KVM_CAP_MAX_VCPU_ID capability so as to limit data
> structures related to APIC/x2APIC. Utilize the same to set the AVIC
> physical max index in the VMCB, similar to VMX. This helps hardware
> limit the number of entries to be scanned in the physical APIC ID table
> speeding up IPI broadcasts for virtual machines with smaller number of
> vcpus.
> 
> The minimum allocation required for the Physical APIC ID table is one 4k
> page supporting up to 512 entries. With AVIC support for 4096 vcpus
> though, it is sufficient to only allocate memory to accommodate the
> AVIC physical max index that will be programmed into the VMCB. Limit
> memory allocated for the Physical APIC ID table accordingly.
> 
> Signed-off-by: Naveen N Rao (AMD) <naveen@kernel.org>
Looks good to me. Liked the optimal solution.

Reviewed-by: Pankaj Gupta <pankaj.gupta@amd.com>


> ---
>   arch/x86/kvm/svm/avic.c | 53 ++++++++++++++++++++++++++++++-----------
>   arch/x86/kvm/svm/svm.c  |  6 +++++
>   arch/x86/kvm/svm/svm.h  |  1 +
>   3 files changed, 46 insertions(+), 14 deletions(-)
> 
> diff --git a/arch/x86/kvm/svm/avic.c b/arch/x86/kvm/svm/avic.c
> index 1fb322d2ac18..dac4a6648919 100644
> --- a/arch/x86/kvm/svm/avic.c
> +++ b/arch/x86/kvm/svm/avic.c
> @@ -85,6 +85,17 @@ struct amd_svm_iommu_ir {
>   	void *data;		/* Storing pointer to struct amd_ir_data */
>   };
>   
> +static inline u32 avic_get_max_physical_id(struct kvm *kvm, bool is_x2apic)
> +{
> +	u32 avic_max_physical_id = is_x2apic ? x2avic_max_physical_id : AVIC_MAX_PHYSICAL_ID;
> +
> +	/*
> +	 * Assume vcpu_id is the same as APIC ID. Per KVM_CAP_MAX_VCPU_ID, max_vcpu_ids
> +	 * represents the max APIC ID for this vm, rather than the max vcpus.
> +	 */
> +	return min(kvm->arch.max_vcpu_ids - 1, avic_max_physical_id);
> +}
> +
>   static void avic_activate_vmcb(struct vcpu_svm *svm)
>   {
>   	struct vmcb *vmcb = svm->vmcb01.ptr;
> @@ -103,7 +114,7 @@ static void avic_activate_vmcb(struct vcpu_svm *svm)
>   	 */
>   	if (x2avic_enabled && apic_x2apic_mode(svm->vcpu.arch.apic)) {
>   		vmcb->control.int_ctl |= X2APIC_MODE_MASK;
> -		vmcb->control.avic_physical_id |= x2avic_max_physical_id;
> +		vmcb->control.avic_physical_id |= avic_get_max_physical_id(svm->vcpu.kvm, true);
>   		/* Disabling MSR intercept for x2APIC registers */
>   		svm_set_x2apic_msr_interception(svm, false);
>   	} else {
> @@ -114,7 +125,7 @@ static void avic_activate_vmcb(struct vcpu_svm *svm)
>   		kvm_make_request(KVM_REQ_TLB_FLUSH_CURRENT, &svm->vcpu);
>   
>   		/* For xAVIC and hybrid-xAVIC modes */
> -		vmcb->control.avic_physical_id |= AVIC_MAX_PHYSICAL_ID;
> +		vmcb->control.avic_physical_id |= avic_get_max_physical_id(svm->vcpu.kvm, false);
>   		/* Enabling MSR intercept for x2APIC registers */
>   		svm_set_x2apic_msr_interception(svm, true);
>   	}
> @@ -174,6 +185,12 @@ int avic_ga_log_notifier(u32 ga_tag)
>   	return 0;
>   }
>   
> +static inline int avic_get_physical_id_table_order(struct kvm *kvm)
> +{
> +	/* Limit to the maximum physical ID supported in x2avic mode */
> +	return get_order((avic_get_max_physical_id(kvm, true) + 1) * sizeof(u64));
> +}
> +
>   void avic_vm_destroy(struct kvm *kvm)
>   {
>   	unsigned long flags;
> @@ -186,7 +203,7 @@ void avic_vm_destroy(struct kvm *kvm)
>   		__free_page(kvm_svm->avic_logical_id_table_page);
>   	if (kvm_svm->avic_physical_id_table_page)
>   		__free_pages(kvm_svm->avic_physical_id_table_page,
> -			     get_order(sizeof(u64) * (x2avic_max_physical_id + 1)));
> +			     avic_get_physical_id_table_order(kvm));
>   
>   	spin_lock_irqsave(&svm_vm_data_hash_lock, flags);
>   	hash_del(&kvm_svm->hnode);
> @@ -199,22 +216,12 @@ int avic_vm_init(struct kvm *kvm)
>   	int err = -ENOMEM;
>   	struct kvm_svm *kvm_svm = to_kvm_svm(kvm);
>   	struct kvm_svm *k2;
> -	struct page *p_page;
>   	struct page *l_page;
> -	u32 vm_id, entries;
> +	u32 vm_id;
>   
>   	if (!enable_apicv)
>   		return 0;
>   
> -	/* Allocating physical APIC ID table */
> -	entries = x2avic_max_physical_id + 1;
> -	p_page = alloc_pages(GFP_KERNEL_ACCOUNT | __GFP_ZERO,
> -			     get_order(sizeof(u64) * entries));
> -	if (!p_page)
> -		goto free_avic;
> -
> -	kvm_svm->avic_physical_id_table_page = p_page;
> -
>   	/* Allocating logical APIC ID table (4KB) */
>   	l_page = alloc_page(GFP_KERNEL_ACCOUNT | __GFP_ZERO);
>   	if (!l_page)
> @@ -265,6 +272,24 @@ void avic_init_vmcb(struct vcpu_svm *svm, struct vmcb *vmcb)
>   		avic_deactivate_vmcb(svm);
>   }
>   
> +int avic_alloc_physical_id_table(struct kvm *kvm)
> +{
> +	struct kvm_svm *kvm_svm = to_kvm_svm(kvm);
> +	struct page *p_page;
> +
> +	if (kvm_svm->avic_physical_id_table_page || !enable_apicv || !irqchip_in_kernel(kvm))
> +		return 0;
> +
> +	p_page = alloc_pages(GFP_KERNEL_ACCOUNT | __GFP_ZERO,
> +			     avic_get_physical_id_table_order(kvm));
> +	if (!p_page)
> +		return -ENOMEM;
> +
> +	kvm_svm->avic_physical_id_table_page = p_page;
> +
> +	return 0;
> +}
> +
>   static u64 *avic_get_physical_id_entry(struct kvm_vcpu *vcpu,
>   				       unsigned int index)
>   {
> diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
> index b8aa0f36850f..3cb23298cdc3 100644
> --- a/arch/x86/kvm/svm/svm.c
> +++ b/arch/x86/kvm/svm/svm.c
> @@ -1423,6 +1423,11 @@ void svm_switch_vmcb(struct vcpu_svm *svm, struct kvm_vmcb_info *target_vmcb)
>   	svm->vmcb = target_vmcb->ptr;
>   }
>   
> +static int svm_vcpu_precreate(struct kvm *kvm)
> +{
> +	return avic_alloc_physical_id_table(kvm);
> +}
> +
>   static int svm_vcpu_create(struct kvm_vcpu *vcpu)
>   {
>   	struct vcpu_svm *svm;
> @@ -5007,6 +5012,7 @@ static struct kvm_x86_ops svm_x86_ops __initdata = {
>   	.emergency_disable_virtualization_cpu = svm_emergency_disable_virtualization_cpu,
>   	.has_emulated_msr = svm_has_emulated_msr,
>   
> +	.vcpu_precreate = svm_vcpu_precreate,
>   	.vcpu_create = svm_vcpu_create,
>   	.vcpu_free = svm_vcpu_free,
>   	.vcpu_reset = svm_vcpu_reset,
> diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h
> index 5b159f017055..b4670afe0034 100644
> --- a/arch/x86/kvm/svm/svm.h
> +++ b/arch/x86/kvm/svm/svm.h
> @@ -694,6 +694,7 @@ bool avic_hardware_setup(void);
>   int avic_ga_log_notifier(u32 ga_tag);
>   void avic_vm_destroy(struct kvm *kvm);
>   int avic_vm_init(struct kvm *kvm);
> +int avic_alloc_physical_id_table(struct kvm *kvm);
>   void avic_init_vmcb(struct vcpu_svm *svm, struct vmcb *vmcb);
>   int avic_incomplete_ipi_interception(struct kvm_vcpu *vcpu);
>   int avic_unaccelerated_access_interception(struct kvm_vcpu *vcpu);


^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH v3 0/2] KVM: SVM: Add support for 4096 vcpus with x2AVIC
  2025-02-20  7:38 [PATCH v3 0/2] KVM: SVM: Add support for 4096 vcpus with x2AVIC Naveen N Rao (AMD)
  2025-02-20  7:38 ` [PATCH v3 1/2] KVM: SVM: Increase X2AVIC limit to 4096 vcpus Naveen N Rao (AMD)
  2025-02-20  7:38 ` [PATCH v3 2/2] KVM: SVM: Limit AVIC physical max index based on configured max_vcpu_ids Naveen N Rao (AMD)
@ 2025-03-20  5:34 ` Vasant Hegde
  2 siblings, 0 replies; 14+ messages in thread
From: Vasant Hegde @ 2025-03-20  5:34 UTC (permalink / raw)
  To: Naveen N Rao (AMD), kvm, linux-kernel
  Cc: Sean Christopherson, Paolo Bonzini, Suravee Suthikulpanit

On 2/20/2025 1:08 PM, Naveen N Rao (AMD) wrote:
> This is v3 of the series posted at:
> http://lkml.kernel.org/r/cover.1738563890.git.naveen@kernel.org
> 
> The first patch adds support for up to 4096 vcpus with x2AVIC, while the 
> second patch limits the value that is programmed into 
> AVIC_PHYSICAL_MAX_INDEX in the VMCB based on the max APIC ID indicated 
> by the VMM.

Reviewed-by: Vasant Hegde <vasant.hegde@amd.com>

-Vasant


^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH v3 1/2] KVM: SVM: Increase X2AVIC limit to 4096 vcpus
  2025-02-20  7:38 ` [PATCH v3 1/2] KVM: SVM: Increase X2AVIC limit to 4096 vcpus Naveen N Rao (AMD)
@ 2025-06-23 23:17   ` Sean Christopherson
  2025-07-18 10:21     ` Naveen N Rao
  0 siblings, 1 reply; 14+ messages in thread
From: Sean Christopherson @ 2025-06-23 23:17 UTC (permalink / raw)
  To: Naveen N Rao (AMD)
  Cc: kvm, linux-kernel, Paolo Bonzini, Suravee Suthikulpanit,
	Vasant Hegde

On Thu, Feb 20, 2025, Naveen N Rao (AMD) wrote:
> From: Suravee Suthikulpanit <suravee.suthikulpanit@amd.com>
> 
> Newer AMD platforms enhance x2AVIC feature to support up to 4096 vcpus.
> This capatility is detected via CPUID_Fn8000000A_ECX[x2AVIC_EXT].
> 
> Modify the SVM driver to check the capability. If detected, extend bitmask
> for guest max physical APIC ID to 0xFFF, increase maximum vcpu index to
> 4095, and increase the size of the Phyical APIC ID table from 4K to 32K in
> order to accommodate up to 4096 entries.

Kinda silly, but please split this into (at least) two patches.  One to introduce
the variables to replace the macros, and then one to actually implement support
for 4096 entries.  That makes it a _lot_ easier to review each change (I'm having
trouble teasing out what's actually changing for 4k support).

The changelog also needs more info.  Unless I'm misreading the diff, only the
physical table is being expanded?  How does that work?  (I might be able to
figure it out if I think hard, but I shouldn't have to think that hard).

> @@ -182,7 +185,8 @@ void avic_vm_destroy(struct kvm *kvm)
>  	if (kvm_svm->avic_logical_id_table_page)
>  		__free_page(kvm_svm->avic_logical_id_table_page);
>  	if (kvm_svm->avic_physical_id_table_page)
> -		__free_page(kvm_svm->avic_physical_id_table_page);
> +		__free_pages(kvm_svm->avic_physical_id_table_page,
> +			     get_order(sizeof(u64) * (x2avic_max_physical_id + 1)));

The order should be encapsulated in some way, e.g. through a global or a helper.

> @@ -1218,8 +1224,19 @@ bool avic_hardware_setup(void)
>  
>  	/* AVIC is a prerequisite for x2AVIC. */
>  	x2avic_enabled = boot_cpu_has(X86_FEATURE_X2AVIC);
> -	if (x2avic_enabled)
> -		pr_info("x2AVIC enabled\n");
> +	if (x2avic_enabled) {
> +		x2avic_4k_vcpu_supported = !!(cpuid_ecx(0x8000000a) & 0x40);

No, add an X86_FEATURE_xxx for this, don't open code the CPUID lookup.  I think
I'd even be tempted to use helpers instead of  

> +		if (x2avic_4k_vcpu_supported) {
> +			x2avic_max_physical_id = X2AVIC_MAX_PHYSICAL_ID_4K;
> +			avic_physical_max_index_mask = AVIC_PHYSICAL_MAX_INDEX_4K_MASK;
> +		} else {
> +			x2avic_max_physical_id = X2AVIC_MAX_PHYSICAL_ID;
> +			avic_physical_max_index_mask = AVIC_PHYSICAL_MAX_INDEX_MASK;
> +		}
> +
> +		pr_info("x2AVIC enabled%s\n",
> +			x2avic_4k_vcpu_supported ? " (w/ 4K-vcpu)" : "");

Maybe print the max number of vCPUs that are supported?  That way there is clear
signal when 4k *isn't* supported (and communicating the max number of vCPUs in
the !4k case would be helpful too).

> +	}
>  
>  	amd_iommu_register_ga_log_notifier(&avic_ga_log_notifier);
>  
> -- 
> 2.48.1
> 

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH v3 2/2] KVM: SVM: Limit AVIC physical max index based on configured max_vcpu_ids
  2025-02-20  7:38 ` [PATCH v3 2/2] KVM: SVM: Limit AVIC physical max index based on configured max_vcpu_ids Naveen N Rao (AMD)
  2025-02-20 13:36   ` Gupta, Pankaj
@ 2025-06-24  0:38   ` Sean Christopherson
  2025-07-18 10:34     ` Naveen N Rao
  1 sibling, 1 reply; 14+ messages in thread
From: Sean Christopherson @ 2025-06-24  0:38 UTC (permalink / raw)
  To: Naveen N Rao (AMD)
  Cc: kvm, linux-kernel, Paolo Bonzini, Suravee Suthikulpanit,
	Vasant Hegde

On Thu, Feb 20, 2025, Naveen N Rao (AMD) wrote:
> KVM allows VMMs to specify the maximum possible APIC ID for a virtual
> machine through KVM_CAP_MAX_VCPU_ID capability so as to limit data
> structures related to APIC/x2APIC. Utilize the same to set the AVIC
> physical max index in the VMCB, similar to VMX. This helps hardware
> limit the number of entries to be scanned in the physical APIC ID table
> speeding up IPI broadcasts for virtual machines with smaller number of
> vcpus.
> 
> The minimum allocation required for the Physical APIC ID table is one 4k
> page supporting up to 512 entries. With AVIC support for 4096 vcpus
> though, it is sufficient to only allocate memory to accommodate the
> AVIC physical max index that will be programmed into the VMCB. Limit
> memory allocated for the Physical APIC ID table accordingly.

Can you flip the order of the patches?  This seems like an easy "win" for
performance, and so I can see people wanting to backport this to random kernels
even if they don't care about running 4k vCPUs.

Speaking of which, is there a measurable performance win?

> Signed-off-by: Naveen N Rao (AMD) <naveen@kernel.org>
> ---
>  arch/x86/kvm/svm/avic.c | 53 ++++++++++++++++++++++++++++++-----------
>  arch/x86/kvm/svm/svm.c  |  6 +++++
>  arch/x86/kvm/svm/svm.h  |  1 +
>  3 files changed, 46 insertions(+), 14 deletions(-)
> 
> diff --git a/arch/x86/kvm/svm/avic.c b/arch/x86/kvm/svm/avic.c
> index 1fb322d2ac18..dac4a6648919 100644
> --- a/arch/x86/kvm/svm/avic.c
> +++ b/arch/x86/kvm/svm/avic.c
> @@ -85,6 +85,17 @@ struct amd_svm_iommu_ir {
>  	void *data;		/* Storing pointer to struct amd_ir_data */
>  };
>  
> +static inline u32 avic_get_max_physical_id(struct kvm *kvm, bool is_x2apic)

Formletter incoming...

Do not use "inline" for functions that are visible only to the local compilation
unit.  "inline" is just a hint, and modern compilers are smart enough to inline
functions when appropriate without a hint.

A longer explanation/rant here: https://lore.kernel.org/all/ZAdfX+S323JVWNZC@google.com

> +{
> +	u32 avic_max_physical_id = is_x2apic ? x2avic_max_physical_id : AVIC_MAX_PHYSICAL_ID;

Don't use a super long local variable.  For a helper like this, it's unnecessary,
e.g. if the reader can't understand what arch_max or max_id is, then spelling it
out entirely probably won't help them.

And practically, there's a danger to using long names like this: you're much more
likely to unintentionally "shadow" a global variable.  Functionally, it won't be
a problem, but it can create confusion.  E.g. if we ever added a global
avic_max_physical_id, then this code would get rather confusing.

> +
> +	/*
> +	 * Assume vcpu_id is the same as APIC ID. Per KVM_CAP_MAX_VCPU_ID, max_vcpu_ids
> +	 * represents the max APIC ID for this vm, rather than the max vcpus.
> +	 */
> +	return min(kvm->arch.max_vcpu_ids - 1, avic_max_physical_id);
> +}
> +
>  static void avic_activate_vmcb(struct vcpu_svm *svm)
>  {
>  	struct vmcb *vmcb = svm->vmcb01.ptr;
> @@ -103,7 +114,7 @@ static void avic_activate_vmcb(struct vcpu_svm *svm)
>  	 */
>  	if (x2avic_enabled && apic_x2apic_mode(svm->vcpu.arch.apic)) {
>  		vmcb->control.int_ctl |= X2APIC_MODE_MASK;
> -		vmcb->control.avic_physical_id |= x2avic_max_physical_id;
> +		vmcb->control.avic_physical_id |= avic_get_max_physical_id(svm->vcpu.kvm, true);

Don't pass hardcoded booleans when it is at all possible to do something else.
For this case, I would either do:

  static u32 avic_get_max_physical_id(struct kvm_vcpu *vcpu)
  {
	u32 arch_max;
	
	if (x2avic_enabled && apic_x2apic_mode(vcpu->arch.apic))
		arch_max = x2avic_max_physical_id;
	else
		arch_max = AVIC_MAX_PHYSICAL_ID;

	return min(kvm->arch.max_vcpu_ids - 1, arch_max);
  }

  static void avic_activate_vmcb(struct vcpu_svm *svm)
  {
	struct vmcb *vmcb = svm->vmcb01.ptr;
	struct kvm_vcpu *vcpu = &svm->vcpu;

	vmcb->control.int_ctl &= ~(AVIC_ENABLE_MASK | X2APIC_MODE_MASK);

	vmcb->control.avic_physical_id &= ~AVIC_PHYSICAL_MAX_INDEX_MASK;
	vmcb->control.avic_physical_id |= avic_get_max_physical_id(vcpu);

	vmcb->control.int_ctl |= AVIC_ENABLE_MASK;

	/*
	 * Note: KVM supports hybrid-AVIC mode, where KVM emulates x2APIC MSR
	 * accesses, while interrupt injection to a running vCPU can be
	 * achieved using AVIC doorbell.  KVM disables the APIC access page
	 * (deletes the memslot) if any vCPU has x2APIC enabled, thus enabling
	 * AVIC in hybrid mode activates only the doorbell mechanism.
	 */
	if (x2avic_enabled && apic_x2apic_mode(vcpu->arch.apic)) {
		vmcb->control.int_ctl |= X2APIC_MODE_MASK;
		/* Disabling MSR intercept for x2APIC registers */
		svm_set_x2apic_msr_interception(svm, false);
	} else {
		/*
		 * Flush the TLB, the guest may have inserted a non-APIC
		 * mapping into the TLB while AVIC was disabled.
		 */
		kvm_make_request(KVM_REQ_TLB_FLUSH_CURRENT, vcpu);

		/* Enabling MSR intercept for x2APIC registers */
		svm_set_x2apic_msr_interception(svm, true);
	}
  }

or


  static u32 avic_get_max_physical_id(struct kvm_vcpu *vcpu, u32 arch_max)
  {
	return min(kvm->arch.max_vcpu_ids - 1, arch_max);
  }

  static void avic_activate_vmcb(struct vcpu_svm *svm)
  {
	struct vmcb *vmcb = svm->vmcb01.ptr;
	struct kvm_vcpu *vcpu = &svm->vcpu;
	u32 max_id;

	vmcb->control.int_ctl &= ~(AVIC_ENABLE_MASK | X2APIC_MODE_MASK);
	vmcb->control.int_ctl |= AVIC_ENABLE_MASK;

	/*
	 * Note: KVM supports hybrid-AVIC mode, where KVM emulates x2APIC MSR
	 * accesses, while interrupt injection to a running vCPU can be
	 * achieved using AVIC doorbell.  KVM disables the APIC access page
	 * (deletes the memslot) if any vCPU has x2APIC enabled, thus enabling
	 * AVIC in hybrid mode activates only the doorbell mechanism.
	 */
	if (x2avic_enabled && apic_x2apic_mode(vcpu->arch.apic)) {
		vmcb->control.int_ctl |= X2APIC_MODE_MASK;
		max_id = avic_get_max_physical_id(vcpu, x2avic_max_physical_id);

		/* Disabling MSR intercept for x2APIC registers */
		svm_set_x2apic_msr_interception(svm, false);
	} else {
		max_id = avic_get_max_physical_id(vcpu, AVIC_MAX_PHYSICAL_ID);
		/*
		 * Flush the TLB, the guest may have inserted a non-APIC
		 * mapping into the TLB while AVIC was disabled.
		 */
		kvm_make_request(KVM_REQ_TLB_FLUSH_CURRENT, vcpu);

		/* Enabling MSR intercept for x2APIC registers */
		svm_set_x2apic_msr_interception(svm, true);
	}

	vmcb->control.avic_physical_id &= ~AVIC_PHYSICAL_MAX_INDEX_MASK;
	vmcb->control.avic_physical_id |= max_id;
  }


I don't think I have a preference between the two?

>  		/* Disabling MSR intercept for x2APIC registers */
>  		svm_set_x2apic_msr_interception(svm, false);
>  	} else {
> @@ -114,7 +125,7 @@ static void avic_activate_vmcb(struct vcpu_svm *svm)
>  		kvm_make_request(KVM_REQ_TLB_FLUSH_CURRENT, &svm->vcpu);
>  
>  		/* For xAVIC and hybrid-xAVIC modes */
> -		vmcb->control.avic_physical_id |= AVIC_MAX_PHYSICAL_ID;
> +		vmcb->control.avic_physical_id |= avic_get_max_physical_id(svm->vcpu.kvm, false);
>  		/* Enabling MSR intercept for x2APIC registers */
>  		svm_set_x2apic_msr_interception(svm, true);
>  	}
> @@ -174,6 +185,12 @@ int avic_ga_log_notifier(u32 ga_tag)
>  	return 0;
>  }
>  
> +static inline int avic_get_physical_id_table_order(struct kvm *kvm)

Heh, we got there eventually ;-)

> +{
> +	/* Limit to the maximum physical ID supported in x2avic mode */
> +	return get_order((avic_get_max_physical_id(kvm, true) + 1) * sizeof(u64));
> +}
> +
>  void avic_vm_destroy(struct kvm *kvm)
>  {
>  	unsigned long flags;
> @@ -186,7 +203,7 @@ void avic_vm_destroy(struct kvm *kvm)
>  		__free_page(kvm_svm->avic_logical_id_table_page);
>  	if (kvm_svm->avic_physical_id_table_page)
>  		__free_pages(kvm_svm->avic_physical_id_table_page,
> -			     get_order(sizeof(u64) * (x2avic_max_physical_id + 1)));
> +			     avic_get_physical_id_table_order(kvm));
>  
>  	spin_lock_irqsave(&svm_vm_data_hash_lock, flags);
>  	hash_del(&kvm_svm->hnode);
> @@ -199,22 +216,12 @@ int avic_vm_init(struct kvm *kvm)
>  	int err = -ENOMEM;
>  	struct kvm_svm *kvm_svm = to_kvm_svm(kvm);
>  	struct kvm_svm *k2;
> -	struct page *p_page;
>  	struct page *l_page;
> -	u32 vm_id, entries;
> +	u32 vm_id;
>  
>  	if (!enable_apicv)
>  		return 0;
>  
> -	/* Allocating physical APIC ID table */
> -	entries = x2avic_max_physical_id + 1;
> -	p_page = alloc_pages(GFP_KERNEL_ACCOUNT | __GFP_ZERO,
> -			     get_order(sizeof(u64) * entries));
> -	if (!p_page)
> -		goto free_avic;
> -
> -	kvm_svm->avic_physical_id_table_page = p_page;
> -
>  	/* Allocating logical APIC ID table (4KB) */
>  	l_page = alloc_page(GFP_KERNEL_ACCOUNT | __GFP_ZERO);
>  	if (!l_page)
> @@ -265,6 +272,24 @@ void avic_init_vmcb(struct vcpu_svm *svm, struct vmcb *vmcb)
>  		avic_deactivate_vmcb(svm);
>  }
>  
> +int avic_alloc_physical_id_table(struct kvm *kvm)
> +{
> +	struct kvm_svm *kvm_svm = to_kvm_svm(kvm);
> +	struct page *p_page;
> +
> +	if (kvm_svm->avic_physical_id_table_page || !enable_apicv || !irqchip_in_kernel(kvm))
> +		return 0;
> +
> +	p_page = alloc_pages(GFP_KERNEL_ACCOUNT | __GFP_ZERO,
> +			     avic_get_physical_id_table_order(kvm));
> +	if (!p_page)
> +		return -ENOMEM;
> +
> +	kvm_svm->avic_physical_id_table_page = p_page;
> +
> +	return 0;
> +}
> +
>  static u64 *avic_get_physical_id_entry(struct kvm_vcpu *vcpu,
>  				       unsigned int index)
>  {
> diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
> index b8aa0f36850f..3cb23298cdc3 100644
> --- a/arch/x86/kvm/svm/svm.c
> +++ b/arch/x86/kvm/svm/svm.c
> @@ -1423,6 +1423,11 @@ void svm_switch_vmcb(struct vcpu_svm *svm, struct kvm_vmcb_info *target_vmcb)
>  	svm->vmcb = target_vmcb->ptr;
>  }
>  
> +static int svm_vcpu_precreate(struct kvm *kvm)
> +{
> +	return avic_alloc_physical_id_table(kvm);

Why is allocation being moved to svm_vcpu_precreate()?

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH v3 1/2] KVM: SVM: Increase X2AVIC limit to 4096 vcpus
  2025-06-23 23:17   ` Sean Christopherson
@ 2025-07-18 10:21     ` Naveen N Rao
  2025-07-18 14:24       ` Sean Christopherson
  0 siblings, 1 reply; 14+ messages in thread
From: Naveen N Rao @ 2025-07-18 10:21 UTC (permalink / raw)
  To: Sean Christopherson
  Cc: kvm, linux-kernel, Paolo Bonzini, Suravee Suthikulpanit,
	Vasant Hegde

On Mon, Jun 23, 2025 at 04:17:13PM -0700, Sean Christopherson wrote:
> On Thu, Feb 20, 2025, Naveen N Rao (AMD) wrote:
> > From: Suravee Suthikulpanit <suravee.suthikulpanit@amd.com>
> > 
> > Newer AMD platforms enhance x2AVIC feature to support up to 4096 vcpus.
> > This capatility is detected via CPUID_Fn8000000A_ECX[x2AVIC_EXT].
> > 
> > Modify the SVM driver to check the capability. If detected, extend bitmask
> > for guest max physical APIC ID to 0xFFF, increase maximum vcpu index to
> > 4095, and increase the size of the Phyical APIC ID table from 4K to 32K in
> > order to accommodate up to 4096 entries.
> 
> Kinda silly, but please split this into (at least) two patches.  One to introduce
> the variables to replace the macros, and then one to actually implement support
> for 4096 entries.  That makes it a _lot_ easier to review each change (I'm having
> trouble teasing out what's actually changing for 4k support).

Sure, let me re-work the patches.

> 
> The changelog also needs more info.  Unless I'm misreading the diff, only the
> physical table is being expanded?  How does that work?  (I might be able to
> figure it out if I think hard, but I shouldn't have to think that hard).

Right - it is primarily just the physical ID table being expanded to 
allow AVIC hardware to lookup APIC IDs for vCPUs beyond 511.

As an aside, updated APM covering 4k vCPU support (and other updates) 
has now been published:
https://docs.amd.com/v/u/en-US/24593_3.43

> > @@ -1218,8 +1224,19 @@ bool avic_hardware_setup(void)
> >  
> >  	/* AVIC is a prerequisite for x2AVIC. */
> >  	x2avic_enabled = boot_cpu_has(X86_FEATURE_X2AVIC);
> > -	if (x2avic_enabled)
> > -		pr_info("x2AVIC enabled\n");
> > +	if (x2avic_enabled) {
> > +		x2avic_4k_vcpu_supported = !!(cpuid_ecx(0x8000000a) & 0x40);
> 
> No, add an X86_FEATURE_xxx for this, don't open code the CPUID lookup.  I think
> I'd even be tempted to use helpers instead of  

Ack.

> 
> > +		if (x2avic_4k_vcpu_supported) {
> > +			x2avic_max_physical_id = X2AVIC_MAX_PHYSICAL_ID_4K;
> > +			avic_physical_max_index_mask = AVIC_PHYSICAL_MAX_INDEX_4K_MASK;
> > +		} else {
> > +			x2avic_max_physical_id = X2AVIC_MAX_PHYSICAL_ID;
> > +			avic_physical_max_index_mask = AVIC_PHYSICAL_MAX_INDEX_MASK;
> > +		}
> > +
> > +		pr_info("x2AVIC enabled%s\n",
> > +			x2avic_4k_vcpu_supported ? " (w/ 4K-vcpu)" : "");
> 
> Maybe print the max number of vCPUs that are supported?  That way there is clear
> signal when 4k *isn't* supported (and communicating the max number of vCPUs in
> the !4k case would be helpful too).

I'm tempted to go the opposite way and not print that 4k vCPUs are 
supported by x2AVIC. As it is, there are many reasons AVIC may be 
inhibited and lack of 4k vCPU support is just one other reason, but only
for large VMs.

Most users shouldn't have to care: where possible, AVIC will be enabled 
by default (once that patch series lands). Users who truly care about 
AVIC will anyway need to confirm AVIC isn't inhibited since looking at 
the kernel log won't be sufficient. Those users can very well use cpuid 
to figure out if 4k vCPU support is present.


Thanks,
Naveen


^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH v3 2/2] KVM: SVM: Limit AVIC physical max index based on configured max_vcpu_ids
  2025-06-24  0:38   ` Sean Christopherson
@ 2025-07-18 10:34     ` Naveen N Rao
  2025-07-18 15:17       ` Sean Christopherson
  0 siblings, 1 reply; 14+ messages in thread
From: Naveen N Rao @ 2025-07-18 10:34 UTC (permalink / raw)
  To: Sean Christopherson
  Cc: kvm, linux-kernel, Paolo Bonzini, Suravee Suthikulpanit,
	Vasant Hegde

On Mon, Jun 23, 2025 at 05:38:23PM -0700, Sean Christopherson wrote:
> On Thu, Feb 20, 2025, Naveen N Rao (AMD) wrote:
> > KVM allows VMMs to specify the maximum possible APIC ID for a virtual
> > machine through KVM_CAP_MAX_VCPU_ID capability so as to limit data
> > structures related to APIC/x2APIC. Utilize the same to set the AVIC
> > physical max index in the VMCB, similar to VMX. This helps hardware
> > limit the number of entries to be scanned in the physical APIC ID table
> > speeding up IPI broadcasts for virtual machines with smaller number of
> > vcpus.
> > 
> > The minimum allocation required for the Physical APIC ID table is one 4k
> > page supporting up to 512 entries. With AVIC support for 4096 vcpus
> > though, it is sufficient to only allocate memory to accommodate the
> > AVIC physical max index that will be programmed into the VMCB. Limit
> > memory allocated for the Physical APIC ID table accordingly.
> 
> Can you flip the order of the patches?  This seems like an easy "win" for
> performance, and so I can see people wanting to backport this to random kernels
> even if they don't care about running 4k vCPUs.
> 
> Speaking of which, is there a measurable performance win?

That was my first thought. But for VMs upto 512 vCPUs, I didn't see much 
of a performance difference with broadcast IPIs at all. But, I guess it 
shouldn't hurt, so I will prep a smaller patch that can go before the 4k 
vCPU support patch.

With 4k vCPU support enabled, yes, this makes a lot of difference. IIRC, 
ipi-bench for broadcast IPIs went from ~10-15 seconds down to 3 seconds.

> 
> > Signed-off-by: Naveen N Rao (AMD) <naveen@kernel.org>
> > ---
> >  arch/x86/kvm/svm/avic.c | 53 ++++++++++++++++++++++++++++++-----------
> >  arch/x86/kvm/svm/svm.c  |  6 +++++
> >  arch/x86/kvm/svm/svm.h  |  1 +
> >  3 files changed, 46 insertions(+), 14 deletions(-)
> > 
> > diff --git a/arch/x86/kvm/svm/avic.c b/arch/x86/kvm/svm/avic.c
> > index 1fb322d2ac18..dac4a6648919 100644
> > --- a/arch/x86/kvm/svm/avic.c
> > +++ b/arch/x86/kvm/svm/avic.c
> > @@ -85,6 +85,17 @@ struct amd_svm_iommu_ir {
> >  	void *data;		/* Storing pointer to struct amd_ir_data */
> >  };
> >  
> > +static inline u32 avic_get_max_physical_id(struct kvm *kvm, bool is_x2apic)
> 
> Formletter incoming...
> 
> Do not use "inline" for functions that are visible only to the local compilation
> unit.  "inline" is just a hint, and modern compilers are smart enough to inline
> functions when appropriate without a hint.
> 
> A longer explanation/rant here: https://lore.kernel.org/all/ZAdfX+S323JVWNZC@google.com

Ack.

> 
> > +{
> > +	u32 avic_max_physical_id = is_x2apic ? x2avic_max_physical_id : AVIC_MAX_PHYSICAL_ID;
> 
> Don't use a super long local variable.  For a helper like this, it's unnecessary,
> e.g. if the reader can't understand what arch_max or max_id is, then spelling it
> out entirely probably won't help them.
> 
> And practically, there's a danger to using long names like this: you're much more
> likely to unintentionally "shadow" a global variable.  Functionally, it won't be
> a problem, but it can create confusion.  E.g. if we ever added a global
> avic_max_physical_id, then this code would get rather confusing.

Sure, makes sense.

> 
> > +
> > +	/*
> > +	 * Assume vcpu_id is the same as APIC ID. Per KVM_CAP_MAX_VCPU_ID, max_vcpu_ids
> > +	 * represents the max APIC ID for this vm, rather than the max vcpus.
> > +	 */
> > +	return min(kvm->arch.max_vcpu_ids - 1, avic_max_physical_id);
> > +}
> > +
> >  static void avic_activate_vmcb(struct vcpu_svm *svm)
> >  {
> >  	struct vmcb *vmcb = svm->vmcb01.ptr;
> > @@ -103,7 +114,7 @@ static void avic_activate_vmcb(struct vcpu_svm *svm)
> >  	 */
> >  	if (x2avic_enabled && apic_x2apic_mode(svm->vcpu.arch.apic)) {
> >  		vmcb->control.int_ctl |= X2APIC_MODE_MASK;
> > -		vmcb->control.avic_physical_id |= x2avic_max_physical_id;
> > +		vmcb->control.avic_physical_id |= avic_get_max_physical_id(svm->vcpu.kvm, true);
> 
> Don't pass hardcoded booleans when it is at all possible to do something else.
> For this case, I would either do:
> 
>   static u32 avic_get_max_physical_id(struct kvm_vcpu *vcpu)
>   {
> 	u32 arch_max;
> 	
> 	if (x2avic_enabled && apic_x2apic_mode(vcpu->arch.apic))

This won't work since we want to use this during vCPU init and at that 
point, I don't think we can rely on the vCPU x2APIC mode to decide the 
size of the AVIC physical ID table.

> 		arch_max = x2avic_max_physical_id;
> 	else
> 		arch_max = AVIC_MAX_PHYSICAL_ID;
> 
> 	return min(kvm->arch.max_vcpu_ids - 1, arch_max);
>   }
>

<snip>

> 
>   static u32 avic_get_max_physical_id(struct kvm_vcpu *vcpu, u32 arch_max)
>   {
> 	return min(kvm->arch.max_vcpu_ids - 1, arch_max);
>   }
> 
>   static void avic_activate_vmcb(struct vcpu_svm *svm)
>   {
> 	struct vmcb *vmcb = svm->vmcb01.ptr;
> 	struct kvm_vcpu *vcpu = &svm->vcpu;
> 	u32 max_id;
> 
> 	vmcb->control.int_ctl &= ~(AVIC_ENABLE_MASK | X2APIC_MODE_MASK);
> 	vmcb->control.int_ctl |= AVIC_ENABLE_MASK;
> 
> 	/*
> 	 * Note: KVM supports hybrid-AVIC mode, where KVM emulates x2APIC MSR
> 	 * accesses, while interrupt injection to a running vCPU can be
> 	 * achieved using AVIC doorbell.  KVM disables the APIC access page
> 	 * (deletes the memslot) if any vCPU has x2APIC enabled, thus 
> 	 enabling
> 	 * AVIC in hybrid mode activates only the doorbell mechanism.
> 	 */
> 	if (x2avic_enabled && apic_x2apic_mode(vcpu->arch.apic)) {
> 		vmcb->control.int_ctl |= X2APIC_MODE_MASK;
> 		max_id = avic_get_max_physical_id(vcpu, x2avic_max_physical_id);
> 
> 		/* Disabling MSR intercept for x2APIC registers */
> 		svm_set_x2apic_msr_interception(svm, false);
> 	} else {
> 		max_id = avic_get_max_physical_id(vcpu, 
> 		AVIC_MAX_PHYSICAL_ID);
> 		/*
> 		 * Flush the TLB, the guest may have inserted a non-APIC
> 		 * mapping into the TLB while AVIC was disabled.
> 		 */
> 		kvm_make_request(KVM_REQ_TLB_FLUSH_CURRENT, vcpu);
> 
> 		/* Enabling MSR intercept for x2APIC registers */
> 		svm_set_x2apic_msr_interception(svm, true);
> 	}
> 
> 	vmcb->control.avic_physical_id &= ~AVIC_PHYSICAL_MAX_INDEX_MASK;
> 	vmcb->control.avic_physical_id |= max_id;
>   }
> 
> 
> I don't think I have a preference between the two?

I'm thinking of limiting the helper to just x2AVIC mode, since the 
x1AVIC use is limited to a single place. Let me see what I can come up 
with.

> > +static int svm_vcpu_precreate(struct kvm *kvm)
> > +{
> > +	return avic_alloc_physical_id_table(kvm);
> 
> Why is allocation being moved to svm_vcpu_precreate()?

This is because we want KVM_CAP_MAX_VCPU_ID to have been invoked by the 
VMM, and that is guaranteed to be set by the time the first vCPU is 
created. We restrict the AVIC physical ID table based on the maximum 
number of vCPUs set by the VMM.

This mirrors how Intel VMX uses this capability. I will call this out 
explicitly in the commit log.


Thanks,
Naveen


^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH v3 1/2] KVM: SVM: Increase X2AVIC limit to 4096 vcpus
  2025-07-18 10:21     ` Naveen N Rao
@ 2025-07-18 14:24       ` Sean Christopherson
  2025-07-21 14:11         ` Naveen N Rao
  0 siblings, 1 reply; 14+ messages in thread
From: Sean Christopherson @ 2025-07-18 14:24 UTC (permalink / raw)
  To: Naveen N Rao
  Cc: kvm, linux-kernel, Paolo Bonzini, Suravee Suthikulpanit,
	Vasant Hegde

On Fri, Jul 18, 2025, Naveen N Rao wrote:
> On Mon, Jun 23, 2025 at 04:17:13PM -0700, Sean Christopherson wrote:
> > On Thu, Feb 20, 2025, Naveen N Rao (AMD) wrote:
> > > +		if (x2avic_4k_vcpu_supported) {
> > > +			x2avic_max_physical_id = X2AVIC_MAX_PHYSICAL_ID_4K;
> > > +			avic_physical_max_index_mask = AVIC_PHYSICAL_MAX_INDEX_4K_MASK;
> > > +		} else {
> > > +			x2avic_max_physical_id = X2AVIC_MAX_PHYSICAL_ID;
> > > +			avic_physical_max_index_mask = AVIC_PHYSICAL_MAX_INDEX_MASK;
> > > +		}
> > > +
> > > +		pr_info("x2AVIC enabled%s\n",
> > > +			x2avic_4k_vcpu_supported ? " (w/ 4K-vcpu)" : "");
> > 
> > Maybe print the max number of vCPUs that are supported?  That way there is clear
> > signal when 4k *isn't* supported (and communicating the max number of vCPUs in
> > the !4k case would be helpful too).
> 
> I'm tempted to go the opposite way and not print that 4k vCPUs are 
> supported by x2AVIC. As it is, there are many reasons AVIC may be 
> inhibited and lack of 4k vCPU support is just one other reason, but only
> for large VMs.

This isn't just about AVIC being inhibited though, it's about communicating
hardware support to the admin/user.  While I usually advocate *against* using
printk to log information, I find SVM's pr_info()s about what is/isn't enabled
during module load to be extremely useful, e.g. as sanity checks.  I (re)load
kvm-amd.ko on various hardware configurations on a regular basis, and more than
once the prints have helped me "remember" which platforms do/don't have SEV-ES,
AVIC, etc, and/or detect that I loaded kvm-amd.ko with the wrong overrides.

> Most users shouldn't have to care: where possible, AVIC will be enabled 
> by default (once that patch series lands). Users who truly care about 
> AVIC will anyway need to confirm AVIC isn't inhibited since looking at 
> the kernel log won't be sufficient. Those users can very well use cpuid 
> to figure out if 4k vCPU support is present.

If there wasn't already an "x2AVIC enabled" print, I would probably lean toward
doing nothing.  But since pr_info("x2AVIC enabled\n") already exists, and has
plently of free space for adding extra information, there's basically zero downside
to printing out the number of supported CPUs.  And it's not just a binary yes/no,
e.g. I would wager most people couldn't state the number of vCPUs supported by
the "old" x2AVIC.

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH v3 2/2] KVM: SVM: Limit AVIC physical max index based on configured max_vcpu_ids
  2025-07-18 10:34     ` Naveen N Rao
@ 2025-07-18 15:17       ` Sean Christopherson
  2025-07-21 14:12         ` Naveen N Rao
  0 siblings, 1 reply; 14+ messages in thread
From: Sean Christopherson @ 2025-07-18 15:17 UTC (permalink / raw)
  To: Naveen N Rao
  Cc: kvm, linux-kernel, Paolo Bonzini, Suravee Suthikulpanit,
	Vasant Hegde

On Fri, Jul 18, 2025, Naveen N Rao wrote:
> On Mon, Jun 23, 2025 at 05:38:23PM -0700, Sean Christopherson wrote:
> > > @@ -103,7 +114,7 @@ static void avic_activate_vmcb(struct vcpu_svm *svm)
> > >  	 */
> > >  	if (x2avic_enabled && apic_x2apic_mode(svm->vcpu.arch.apic)) {
> > >  		vmcb->control.int_ctl |= X2APIC_MODE_MASK;
> > > -		vmcb->control.avic_physical_id |= x2avic_max_physical_id;
> > > +		vmcb->control.avic_physical_id |= avic_get_max_physical_id(svm->vcpu.kvm, true);
> > 
> > Don't pass hardcoded booleans when it is at all possible to do something else.
> > For this case, I would either do:
> > 
> >   static u32 avic_get_max_physical_id(struct kvm_vcpu *vcpu)
> >   {
> > 	u32 arch_max;
> > 	
> > 	if (x2avic_enabled && apic_x2apic_mode(vcpu->arch.apic))
> 
> This won't work since we want to use this during vCPU init and at that 
> point, I don't think we can rely on the vCPU x2APIC mode to decide the 
> size of the AVIC physical ID table.

Ah, I missed that this is used by avic_get_physical_id_table_order().  How about
this?

static u32 __avic_get_max_physical_id(struct kvm *kvm, struct kvm_vcpu *vcpu)
{
	u32 arch_max;

	if (x2avic_enabled && (!vcpu || apic_x2apic_mode(vcpu->arch.apic)))
		arch_max = x2avic_max_physical_id;
	else
		arch_max = AVIC_MAX_PHYSICAL_ID;

	return min(kvm->arch.max_vcpu_ids - 1, arch_max);
}

static u32 avic_get_max_physical_id(struct kvm_vcpu *vcpu)
{
	return __avic_get_max_physical_id(vcpu->kvm, vcpu);
}

static void avic_activate_vmcb(struct vcpu_svm *svm)
{
	struct vmcb *vmcb = svm->vmcb01.ptr;
	struct kvm_vcpu *vcpu = &svm->vcpu;

	vmcb->control.int_ctl &= ~(AVIC_ENABLE_MASK | X2APIC_MODE_MASK);

	vmcb->control.avic_physical_id &= ~AVIC_PHYSICAL_MAX_INDEX_MASK;
	vmcb->control.avic_physical_id |= avic_get_max_physical_id(vcpu);

	vmcb->control.int_ctl |= AVIC_ENABLE_MASK;

	/*
	 * Note: KVM supports hybrid-AVIC mode, where KVM emulates x2APIC MSR
	 * accesses, while interrupt injection to a running vCPU can be
	 * achieved using AVIC doorbell.  KVM disables the APIC access page
	 * (deletes the memslot) if any vCPU has x2APIC enabled, thus enabling
	 * AVIC in hybrid mode activates only the doorbell mechanism.
	 */
	if (x2avic_enabled && apic_x2apic_mode(vcpu->arch.apic)) {
		vmcb->control.int_ctl |= X2APIC_MODE_MASK;
		/* Disabling MSR intercept for x2APIC registers */
		svm_set_x2apic_msr_interception(svm, false);
	} else {
		/*
		 * Flush the TLB, the guest may have inserted a non-APIC
		 * mapping into the TLB while AVIC was disabled.
		 */
		kvm_make_request(KVM_REQ_TLB_FLUSH_CURRENT, vcpu);

		/* Enabling MSR intercept for x2APIC registers */
		svm_set_x2apic_msr_interception(svm, true);
	}
}

static int avic_get_physical_id_table_order(struct kvm *kvm)
{
	/* Limit to the maximum physical ID supported in x2avic mode */
	return get_order((__avic_get_max_physical_id(kvm, NULL) + 1) * sizeof(u64));
}

> > > +static int svm_vcpu_precreate(struct kvm *kvm)
> > > +{
> > > +	return avic_alloc_physical_id_table(kvm);
> > 
> > Why is allocation being moved to svm_vcpu_precreate()?
> 
> This is because we want KVM_CAP_MAX_VCPU_ID to have been invoked by the 
> VMM, and that is guaranteed to be set by the time the first vCPU is 
> created. We restrict the AVIC physical ID table based on the maximum 
> number of vCPUs set by the VMM.
> 
> This mirrors how Intel VMX uses this capability. I will call this out 
> explicitly in the commit log.

Do it as a separate patch.  Then you pretty much *have* to write a changelog,
and the changes that are specific to 4k support are even more isolated.

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH v3 1/2] KVM: SVM: Increase X2AVIC limit to 4096 vcpus
  2025-07-18 14:24       ` Sean Christopherson
@ 2025-07-21 14:11         ` Naveen N Rao
  2025-07-21 22:26           ` Sean Christopherson
  0 siblings, 1 reply; 14+ messages in thread
From: Naveen N Rao @ 2025-07-21 14:11 UTC (permalink / raw)
  To: Sean Christopherson
  Cc: kvm, linux-kernel, Paolo Bonzini, Suravee Suthikulpanit,
	Vasant Hegde

On Fri, Jul 18, 2025 at 07:24:15AM -0700, Sean Christopherson wrote:
> On Fri, Jul 18, 2025, Naveen N Rao wrote:
> > On Mon, Jun 23, 2025 at 04:17:13PM -0700, Sean Christopherson wrote:
> > > On Thu, Feb 20, 2025, Naveen N Rao (AMD) wrote:
> > > > +		if (x2avic_4k_vcpu_supported) {
> > > > +			x2avic_max_physical_id = X2AVIC_MAX_PHYSICAL_ID_4K;
> > > > +			avic_physical_max_index_mask = AVIC_PHYSICAL_MAX_INDEX_4K_MASK;
> > > > +		} else {
> > > > +			x2avic_max_physical_id = X2AVIC_MAX_PHYSICAL_ID;
> > > > +			avic_physical_max_index_mask = AVIC_PHYSICAL_MAX_INDEX_MASK;
> > > > +		}
> > > > +
> > > > +		pr_info("x2AVIC enabled%s\n",
> > > > +			x2avic_4k_vcpu_supported ? " (w/ 4K-vcpu)" : "");
> > > 
> > > Maybe print the max number of vCPUs that are supported?  That way there is clear
> > > signal when 4k *isn't* supported (and communicating the max number of vCPUs in
> > > the !4k case would be helpful too).
> > 
> > I'm tempted to go the opposite way and not print that 4k vCPUs are 
> > supported by x2AVIC. As it is, there are many reasons AVIC may be 
> > inhibited and lack of 4k vCPU support is just one other reason, but only
> > for large VMs.
> 
> This isn't just about AVIC being inhibited though, it's about communicating
> hardware support to the admin/user.  While I usually advocate *against* using
> printk to log information, I find SVM's pr_info()s about what is/isn't enabled
> during module load to be extremely useful, e.g. as sanity checks.  I (re)load
> kvm-amd.ko on various hardware configurations on a regular basis, and more than
> once the prints have helped me "remember" which platforms do/don't have SEV-ES,
> AVIC, etc, and/or detect that I loaded kvm-amd.ko with the wrong overrides.

Sure, if you are finding it helpful, that's fine.

> 
> > Most users shouldn't have to care: where possible, AVIC will be enabled 
> > by default (once that patch series lands). Users who truly care about 
> > AVIC will anyway need to confirm AVIC isn't inhibited since looking at 
> > the kernel log won't be sufficient. Those users can very well use cpuid 
> > to figure out if 4k vCPU support is present.
> 
> If there wasn't already an "x2AVIC enabled" print, I would probably lean toward
> doing nothing.  But since pr_info("x2AVIC enabled\n") already exists, and has
> plently of free space for adding extra information, there's basically zero downside
> to printing out the number of supported CPUs.  And it's not just a binary yes/no,
> e.g. I would wager most people couldn't state the number of vCPUs supported by
> the "old" x2AVIC.

Ok, this is what I have now. Let me know if you prefer different 
wording:

	/* AVIC is a prerequisite for x2AVIC. */
        x2avic_enabled = boot_cpu_has(X86_FEATURE_X2AVIC);
-       if (x2avic_enabled)
-               pr_info("x2AVIC enabled\n");
+       if (x2avic_enabled) {
+               if (cpu_feature_enabled(X86_FEATURE_X2AVIC_EXT))
+                       x2avic_max_physical_id = X2AVIC_4K_MAX_PHYSICAL_ID;
+               pr_info("x2AVIC enabled (upto %lld vCPUs)\n", x2avic_max_physical_id + 1);
+       }


- Naveen

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH v3 2/2] KVM: SVM: Limit AVIC physical max index based on configured max_vcpu_ids
  2025-07-18 15:17       ` Sean Christopherson
@ 2025-07-21 14:12         ` Naveen N Rao
  0 siblings, 0 replies; 14+ messages in thread
From: Naveen N Rao @ 2025-07-21 14:12 UTC (permalink / raw)
  To: Sean Christopherson
  Cc: kvm, linux-kernel, Paolo Bonzini, Suravee Suthikulpanit,
	Vasant Hegde

On Fri, Jul 18, 2025 at 08:17:02AM -0700, Sean Christopherson wrote:
> On Fri, Jul 18, 2025, Naveen N Rao wrote:
> > On Mon, Jun 23, 2025 at 05:38:23PM -0700, Sean Christopherson wrote:
> > > > @@ -103,7 +114,7 @@ static void avic_activate_vmcb(struct vcpu_svm *svm)
> > > >  	 */
> > > >  	if (x2avic_enabled && apic_x2apic_mode(svm->vcpu.arch.apic)) {
> > > >  		vmcb->control.int_ctl |= X2APIC_MODE_MASK;
> > > > -		vmcb->control.avic_physical_id |= x2avic_max_physical_id;
> > > > +		vmcb->control.avic_physical_id |= avic_get_max_physical_id(svm->vcpu.kvm, true);
> > > 
> > > Don't pass hardcoded booleans when it is at all possible to do something else.
> > > For this case, I would either do:
> > > 
> > >   static u32 avic_get_max_physical_id(struct kvm_vcpu *vcpu)
> > >   {
> > > 	u32 arch_max;
> > > 	
> > > 	if (x2avic_enabled && apic_x2apic_mode(vcpu->arch.apic))
> > 
> > This won't work since we want to use this during vCPU init and at that 
> > point, I don't think we can rely on the vCPU x2APIC mode to decide the 
> > size of the AVIC physical ID table.
> 
> Ah, I missed that this is used by avic_get_physical_id_table_order().  How about
> this?
> 
> static u32 __avic_get_max_physical_id(struct kvm *kvm, struct kvm_vcpu *vcpu)
> {
> 	u32 arch_max;
> 
> 	if (x2avic_enabled && (!vcpu || apic_x2apic_mode(vcpu->arch.apic)))
> 		arch_max = x2avic_max_physical_id;
> 	else
> 		arch_max = AVIC_MAX_PHYSICAL_ID;
> 
> 	return min(kvm->arch.max_vcpu_ids - 1, arch_max);
> }
> 
> static u32 avic_get_max_physical_id(struct kvm_vcpu *vcpu)
> {
> 	return __avic_get_max_physical_id(vcpu->kvm, vcpu);
> }
> 
> static void avic_activate_vmcb(struct vcpu_svm *svm)
> {
> 	struct vmcb *vmcb = svm->vmcb01.ptr;
> 	struct kvm_vcpu *vcpu = &svm->vcpu;
> 
> 	vmcb->control.int_ctl &= ~(AVIC_ENABLE_MASK | X2APIC_MODE_MASK);
> 
> 	vmcb->control.avic_physical_id &= ~AVIC_PHYSICAL_MAX_INDEX_MASK;
> 	vmcb->control.avic_physical_id |= avic_get_max_physical_id(vcpu);
> 
> 	vmcb->control.int_ctl |= AVIC_ENABLE_MASK;
> 
> 	/*
> 	 * Note: KVM supports hybrid-AVIC mode, where KVM emulates x2APIC MSR
> 	 * accesses, while interrupt injection to a running vCPU can be
> 	 * achieved using AVIC doorbell.  KVM disables the APIC access page
> 	 * (deletes the memslot) if any vCPU has x2APIC enabled, thus enabling
> 	 * AVIC in hybrid mode activates only the doorbell mechanism.
> 	 */
> 	if (x2avic_enabled && apic_x2apic_mode(vcpu->arch.apic)) {
> 		vmcb->control.int_ctl |= X2APIC_MODE_MASK;
> 		/* Disabling MSR intercept for x2APIC registers */
> 		svm_set_x2apic_msr_interception(svm, false);
> 	} else {
> 		/*
> 		 * Flush the TLB, the guest may have inserted a non-APIC
> 		 * mapping into the TLB while AVIC was disabled.
> 		 */
> 		kvm_make_request(KVM_REQ_TLB_FLUSH_CURRENT, vcpu);
> 
> 		/* Enabling MSR intercept for x2APIC registers */
> 		svm_set_x2apic_msr_interception(svm, true);
> 	}
> }
> 
> static int avic_get_physical_id_table_order(struct kvm *kvm)
> {
> 	/* Limit to the maximum physical ID supported in x2avic mode */
> 	return get_order((__avic_get_max_physical_id(kvm, NULL) + 1) * sizeof(u64));
> }

This LGTM, I will incorporate this.

> 
> > > > +static int svm_vcpu_precreate(struct kvm *kvm)
> > > > +{
> > > > +	return avic_alloc_physical_id_table(kvm);
> > > 
> > > Why is allocation being moved to svm_vcpu_precreate()?
> > 
> > This is because we want KVM_CAP_MAX_VCPU_ID to have been invoked by the 
> > VMM, and that is guaranteed to be set by the time the first vCPU is 
> > created. We restrict the AVIC physical ID table based on the maximum 
> > number of vCPUs set by the VMM.
> > 
> > This mirrors how Intel VMX uses this capability. I will call this out 
> > explicitly in the commit log.
> 
> Do it as a separate patch.  Then you pretty much *have* to write a changelog,
> and the changes that are specific to 4k support are even more isolated.

Sure.


Thanks,
Naveen


^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH v3 1/2] KVM: SVM: Increase X2AVIC limit to 4096 vcpus
  2025-07-21 14:11         ` Naveen N Rao
@ 2025-07-21 22:26           ` Sean Christopherson
  0 siblings, 0 replies; 14+ messages in thread
From: Sean Christopherson @ 2025-07-21 22:26 UTC (permalink / raw)
  To: Naveen N Rao
  Cc: kvm, linux-kernel, Paolo Bonzini, Suravee Suthikulpanit,
	Vasant Hegde

On Mon, Jul 21, 2025, Naveen N Rao wrote:
> On Fri, Jul 18, 2025 at 07:24:15AM -0700, Sean Christopherson wrote:
> > If there wasn't already an "x2AVIC enabled" print, I would probably lean toward
> > doing nothing.  But since pr_info("x2AVIC enabled\n") already exists, and has
> > plently of free space for adding extra information, there's basically zero downside
> > to printing out the number of supported CPUs.  And it's not just a binary yes/no,
> > e.g. I would wager most people couldn't state the number of vCPUs supported by
> > the "old" x2AVIC.
> 
> Ok, this is what I have now. Let me know if you prefer different 
> wording:
> 
> 	/* AVIC is a prerequisite for x2AVIC. */
>         x2avic_enabled = boot_cpu_has(X86_FEATURE_X2AVIC);
> -       if (x2avic_enabled)
> -               pr_info("x2AVIC enabled\n");
> +       if (x2avic_enabled) {
> +               if (cpu_feature_enabled(X86_FEATURE_X2AVIC_EXT))
> +                       x2avic_max_physical_id = X2AVIC_4K_MAX_PHYSICAL_ID;

I actually like the approach you initially posted, where KVM explicitly sets
x2avic_max_physical_id for both paths.  KVM could obviously rely on global
initialization to set X2AVIC_MAX_PHYSICAL_ID, but it's not like this code is
performance critical, and have all paths in one place makes it easy to understand
what the "default" value is.

		if (cpu_feature_enabled(X86_FEATURE_X2AVIC_EXT))
			x2avic_max_physical_id = X2AVIC_MAX_PHYSICAL_ID_4K;
		else
			x2avic_max_physical_id = X2AVIC_MAX_PHYSICAL_ID;

> +               pr_info("x2AVIC enabled (upto %lld vCPUs)\n", x2avic_max_physical_id + 1);

Maybe s/upto/max?

> +       }
> 
> 
> - Naveen

^ permalink raw reply	[flat|nested] 14+ messages in thread

end of thread, other threads:[~2025-07-21 22:26 UTC | newest]

Thread overview: 14+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2025-02-20  7:38 [PATCH v3 0/2] KVM: SVM: Add support for 4096 vcpus with x2AVIC Naveen N Rao (AMD)
2025-02-20  7:38 ` [PATCH v3 1/2] KVM: SVM: Increase X2AVIC limit to 4096 vcpus Naveen N Rao (AMD)
2025-06-23 23:17   ` Sean Christopherson
2025-07-18 10:21     ` Naveen N Rao
2025-07-18 14:24       ` Sean Christopherson
2025-07-21 14:11         ` Naveen N Rao
2025-07-21 22:26           ` Sean Christopherson
2025-02-20  7:38 ` [PATCH v3 2/2] KVM: SVM: Limit AVIC physical max index based on configured max_vcpu_ids Naveen N Rao (AMD)
2025-02-20 13:36   ` Gupta, Pankaj
2025-06-24  0:38   ` Sean Christopherson
2025-07-18 10:34     ` Naveen N Rao
2025-07-18 15:17       ` Sean Christopherson
2025-07-21 14:12         ` Naveen N Rao
2025-03-20  5:34 ` [PATCH v3 0/2] KVM: SVM: Add support for 4096 vcpus with x2AVIC Vasant Hegde

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).