public inbox for kvm@vger.kernel.org
 help / color / mirror / Atom feed
* [PATCH v2 0/2] KVM: x86: Add PV IPIs support
@ 2018-07-02 10:14 Wanpeng Li
  2018-07-02 10:14 ` [PATCH v2 1/2] KVM: X86: Implement PV IPI in linux guest Wanpeng Li
  2018-07-02 10:14 ` [PATCH v2 2/2] KVM: X86: Implement PV send IPI support Wanpeng Li
  0 siblings, 2 replies; 8+ messages in thread
From: Wanpeng Li @ 2018-07-02 10:14 UTC (permalink / raw)
  To: linux-kernel, kvm
  Cc: Paolo Bonzini, Radim Krčmář, Vitaly Kuznetsov

Using hypercall to send IPIs by one vmexit instead of one by one for
xAPIC/x2APIC physical mode and one vmexit per-cluster for x2APIC cluster 
mode. 

Even if enable qemu interrupt remapping and PV TLB Shootdown, I can still 
observe ~14% performance boost by ebizzy benchmark for 64 vCPUs VM, the 
total msr-induced vmexits reduce ~70%.

The patchset implements the PV IPIs for vCPUs <= 128 VM, this is really 
common in cloud environment, after this patchset is applied, I can continue 
to add > 64 vCPUs VM support and that implementation has to introduce more 
complex logic.

Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: Radim Krčmář <rkrcmar@redhat.com>
Cc: Vitaly Kuznetsov <vkuznets@redhat.com>

v1 -> v2:
 * sparse apic id > 128, or any other errors, fallback to original apic hooks
 * have two bitmask arguments so that one hypercall handles 128 vCPUs 
 * fix KVM_FEATURE_PV_SEND_IPI doc
 * document hypercall
 * fix NMI selftest fails
 * fix build errors reported by 0day

Wanpeng Li (2):
  KVM: X86: Implement PV IPI in linux guest
  KVM: X86: Implement PV send IPI support

 Documentation/virtual/kvm/cpuid.txt      |  4 ++
 Documentation/virtual/kvm/hypercalls.txt |  6 ++
 arch/x86/include/uapi/asm/kvm_para.h     |  1 +
 arch/x86/kernel/kvm.c                    | 99 ++++++++++++++++++++++++++++++++
 arch/x86/kvm/cpuid.c                     |  3 +-
 arch/x86/kvm/x86.c                       | 42 ++++++++++++++
 include/uapi/linux/kvm_para.h            |  1 +
 7 files changed, 155 insertions(+), 1 deletion(-)

-- 
2.7.4

^ permalink raw reply	[flat|nested] 8+ messages in thread

* [PATCH v2 1/2] KVM: X86: Implement PV IPI in linux guest
  2018-07-02 10:14 [PATCH v2 0/2] KVM: x86: Add PV IPIs support Wanpeng Li
@ 2018-07-02 10:14 ` Wanpeng Li
  2018-07-02 10:26   ` Vitaly Kuznetsov
                     ` (2 more replies)
  2018-07-02 10:14 ` [PATCH v2 2/2] KVM: X86: Implement PV send IPI support Wanpeng Li
  1 sibling, 3 replies; 8+ messages in thread
From: Wanpeng Li @ 2018-07-02 10:14 UTC (permalink / raw)
  To: linux-kernel, kvm
  Cc: Paolo Bonzini, Radim Krčmář, Vitaly Kuznetsov

From: Wanpeng Li <wanpengli@tencent.com>

Implement PV IPIs in guest kernel.

Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: Radim Krčmář <rkrcmar@redhat.com>
Cc: Vitaly Kuznetsov <vkuznets@redhat.com>
Signed-off-by: Wanpeng Li <wanpengli@tencent.com>
---
 arch/x86/kernel/kvm.c         | 99 +++++++++++++++++++++++++++++++++++++++++++
 include/uapi/linux/kvm_para.h |  1 +
 2 files changed, 100 insertions(+)

diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
index 5b2300b..7e3ee25 100644
--- a/arch/x86/kernel/kvm.c
+++ b/arch/x86/kernel/kvm.c
@@ -47,6 +47,7 @@
 #include <asm/hypervisor.h>
 #include <asm/kvm_guest.h>
 
+static struct apic orig_apic;
 static int kvmapf = 1;
 
 static int __init parse_no_kvmapf(char *arg)
@@ -454,6 +455,89 @@ static void __init sev_map_percpu_data(void)
 }
 
 #ifdef CONFIG_SMP
+
+#ifdef CONFIG_X86_64
+static bool __send_ipi_mask(const struct cpumask *mask, int vector)
+{
+	unsigned long flags, ipi_bitmap_low = 0, ipi_bitmap_high = 0, icr = 0;
+	int cpu, apic_id, ret = 1;
+
+	if (cpumask_empty(mask))
+		return true;
+
+	local_irq_save(flags);
+
+	for_each_cpu(cpu, mask) {
+		apic_id = per_cpu(x86_cpu_to_apicid, cpu);
+		if (apic_id < BITS_PER_LONG)
+			__set_bit(apic_id, &ipi_bitmap_low);
+		else if (apic_id < 2 * BITS_PER_LONG)
+			__set_bit(apic_id - BITS_PER_LONG, &ipi_bitmap_high);
+		else
+			goto ipi_mask_done;
+	}
+
+	switch (vector) {
+	default:
+		icr = APIC_DM_FIXED | vector;
+		break;
+	case NMI_VECTOR:
+		icr = APIC_DM_NMI;
+		break;
+	}
+
+	ret = kvm_hypercall3(KVM_HC_SEND_IPI, ipi_bitmap_low, ipi_bitmap_high, icr);
+
+ipi_mask_done:
+	local_irq_restore(flags);
+	return ((ret == 0) ? true : false);
+}
+
+static void kvm_send_ipi_mask(const struct cpumask *mask, int vector)
+{
+	if (!__send_ipi_mask(mask, vector))
+		orig_apic.send_IPI_mask(mask, vector);
+}
+
+static void kvm_send_ipi_mask_allbutself(const struct cpumask *mask, int vector)
+{
+	unsigned int this_cpu = smp_processor_id();
+	struct cpumask new_mask;
+	const struct cpumask *local_mask;
+
+	cpumask_copy(&new_mask, mask);
+	cpumask_clear_cpu(this_cpu, &new_mask);
+	local_mask = &new_mask;
+	if (!__send_ipi_mask(local_mask, vector))
+		orig_apic.send_IPI_mask_allbutself(mask, vector);
+}
+
+static void kvm_send_ipi_allbutself(int vector)
+{
+	kvm_send_ipi_mask_allbutself(cpu_online_mask, vector);
+}
+
+static void kvm_send_ipi_all(int vector)
+{
+	if (!__send_ipi_mask(cpu_online_mask, vector))
+		orig_apic.send_IPI_all(vector);
+}
+
+/*
+ * Set the IPI entry points
+ */
+static void kvm_setup_pv_ipi(void)
+{
+	orig_apic = *apic;
+
+	apic->send_IPI_mask = kvm_send_ipi_mask;
+	apic->send_IPI_mask_allbutself = kvm_send_ipi_mask_allbutself;
+	apic->send_IPI_allbutself = kvm_send_ipi_allbutself;
+	apic->send_IPI_all = kvm_send_ipi_all;
+	pr_info("KVM setup pv IPIs\n");
+}
+#endif
+
 static void __init kvm_smp_prepare_cpus(unsigned int max_cpus)
 {
 	native_smp_prepare_cpus(max_cpus);
@@ -624,12 +708,27 @@ static uint32_t __init kvm_detect(void)
 	return kvm_cpuid_base();
 }
 
+static void __init kvm_apic_init(void)
+{
+#if defined(CONFIG_SMP) && defined(CONFIG_X86_64)
+	if (kvm_para_has_feature(KVM_FEATURE_PV_SEND_IPI) &&
+		num_possible_cpus() <= 2 * BITS_PER_LONG)
+		kvm_setup_pv_ipi();
+#endif
+}
+
+static void __init kvm_init_platform(void)
+{
+	x86_platform.apic_post_init = kvm_apic_init;
+}
+
 const __initconst struct hypervisor_x86 x86_hyper_kvm = {
 	.name			= "KVM",
 	.detect			= kvm_detect,
 	.type			= X86_HYPER_KVM,
 	.init.guest_late_init	= kvm_guest_init,
 	.init.x2apic_available	= kvm_para_available,
+	.init.init_platform	= kvm_init_platform,
 };
 
 static __init int activate_jump_labels(void)
diff --git a/include/uapi/linux/kvm_para.h b/include/uapi/linux/kvm_para.h
index dcf629d..7395f38 100644
--- a/include/uapi/linux/kvm_para.h
+++ b/include/uapi/linux/kvm_para.h
@@ -26,6 +26,7 @@
 #define KVM_HC_MIPS_EXIT_VM		7
 #define KVM_HC_MIPS_CONSOLE_OUTPUT	8
 #define KVM_HC_CLOCK_PAIRING		9
+#define KVM_HC_SEND_IPI			10
 
 /*
  * hypercalls use architecture specific
-- 
2.7.4

^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [PATCH v2 2/2] KVM: X86: Implement PV send IPI support
  2018-07-02 10:14 [PATCH v2 0/2] KVM: x86: Add PV IPIs support Wanpeng Li
  2018-07-02 10:14 ` [PATCH v2 1/2] KVM: X86: Implement PV IPI in linux guest Wanpeng Li
@ 2018-07-02 10:14 ` Wanpeng Li
  1 sibling, 0 replies; 8+ messages in thread
From: Wanpeng Li @ 2018-07-02 10:14 UTC (permalink / raw)
  To: linux-kernel, kvm
  Cc: Paolo Bonzini, Radim Krčmář, Vitaly Kuznetsov

From: Wanpeng Li <wanpengli@tencent.com>

Using hypercall to send IPIs by one vmexit instead of one by one for
xAPIC/x2APIC physical mode and one vmexit per-cluster for x2APIC cluster 
mode. 

Even if enable qemu interrupt remapping and PV TLB Shootdown, I can still 
observe ~14% performance boost by ebizzy benchmark for 64 vCPUs VM, the 
total msr-induced vmexits reduce ~70%.

Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: Radim Krčmář <rkrcmar@redhat.com>
Cc: Vitaly Kuznetsov <vkuznets@redhat.com>
Signed-off-by: Wanpeng Li <wanpengli@tencent.com>
---
 Documentation/virtual/kvm/cpuid.txt      |  4 +++
 Documentation/virtual/kvm/hypercalls.txt |  6 +++++
 arch/x86/include/uapi/asm/kvm_para.h     |  1 +
 arch/x86/kvm/cpuid.c                     |  3 ++-
 arch/x86/kvm/x86.c                       | 42 ++++++++++++++++++++++++++++++++
 5 files changed, 55 insertions(+), 1 deletion(-)

diff --git a/Documentation/virtual/kvm/cpuid.txt b/Documentation/virtual/kvm/cpuid.txt
index ab022dc..97ca194 100644
--- a/Documentation/virtual/kvm/cpuid.txt
+++ b/Documentation/virtual/kvm/cpuid.txt
@@ -62,6 +62,10 @@ KVM_FEATURE_ASYNC_PF_VMEXIT        ||    10 || paravirtualized async PF VM exit
                                    ||       || can be enabled by setting bit 2
                                    ||       || when writing to msr 0x4b564d02
 ------------------------------------------------------------------------------
+KVM_FEATURE_PV_SEND_IPI            ||    11 || guest checks this feature bit
+                                   ||       || before using paravirtualized
+                                   ||       || send IPIs.
+------------------------------------------------------------------------------
 KVM_FEATURE_CLOCKSOURCE_STABLE_BIT ||    24 || host will warn if no guest-side
                                    ||       || per-cpu warps are expected in
                                    ||       || kvmclock.
diff --git a/Documentation/virtual/kvm/hypercalls.txt b/Documentation/virtual/kvm/hypercalls.txt
index a890529..a771ee8 100644
--- a/Documentation/virtual/kvm/hypercalls.txt
+++ b/Documentation/virtual/kvm/hypercalls.txt
@@ -121,3 +121,9 @@ compute the CLOCK_REALTIME for its clock, at the same instant.
 
 Returns KVM_EOPNOTSUPP if the host does not use TSC clocksource,
 or if clock type is different than KVM_CLOCK_PAIRING_WALLCLOCK.
+
+6. KVM_HC_SEND_IPI
+------------------------
+Architecture: x86
+Status: active
+Purpose: Hypercall used to send IPIs.
diff --git a/arch/x86/include/uapi/asm/kvm_para.h b/arch/x86/include/uapi/asm/kvm_para.h
index 0ede697..19980ec 100644
--- a/arch/x86/include/uapi/asm/kvm_para.h
+++ b/arch/x86/include/uapi/asm/kvm_para.h
@@ -28,6 +28,7 @@
 #define KVM_FEATURE_PV_UNHALT		7
 #define KVM_FEATURE_PV_TLB_FLUSH	9
 #define KVM_FEATURE_ASYNC_PF_VMEXIT	10
+#define KVM_FEATURE_PV_SEND_IPI	11
 
 #define KVM_HINTS_REALTIME      0
 
diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index 7e042e3..7bcfa61 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -621,7 +621,8 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
 			     (1 << KVM_FEATURE_CLOCKSOURCE_STABLE_BIT) |
 			     (1 << KVM_FEATURE_PV_UNHALT) |
 			     (1 << KVM_FEATURE_PV_TLB_FLUSH) |
-			     (1 << KVM_FEATURE_ASYNC_PF_VMEXIT);
+			     (1 << KVM_FEATURE_ASYNC_PF_VMEXIT) |
+			     (1 << KVM_FEATURE_PV_SEND_IPI);
 
 		if (sched_info_on())
 			entry->eax |= (1 << KVM_FEATURE_STEAL_TIME);
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 0046aa7..c2cef21 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -6689,6 +6689,45 @@ static void kvm_pv_kick_cpu_op(struct kvm *kvm, unsigned long flags, int apicid)
 	kvm_irq_delivery_to_apic(kvm, NULL, &lapic_irq, NULL);
 }
 
+/*
+ * Return 0 if successfully added and 1 if discarded.
+ */
+static int kvm_pv_send_ipi(struct kvm *kvm, unsigned long ipi_bitmap_low,
+			unsigned long ipi_bitmap_high, unsigned long icr)
+{
+	int i;
+	struct kvm_apic_map *map;
+	struct kvm_vcpu *vcpu;
+	struct kvm_lapic_irq irq = {0};
+
+	switch (icr & APIC_VECTOR_MASK) {
+	default:
+		irq.vector = icr & APIC_VECTOR_MASK;
+		break;
+	case NMI_VECTOR:
+		break;
+	}
+	irq.delivery_mode = icr & APIC_MODE_MASK;
+
+	rcu_read_lock();
+	map = rcu_dereference(kvm->arch.apic_map);
+
+	for_each_set_bit(i, &ipi_bitmap_low, BITS_PER_LONG) {
+		vcpu = map->phys_map[i]->vcpu;
+		if (!kvm_apic_set_irq(vcpu, &irq, NULL))
+			return 1;
+	}
+
+	for_each_set_bit(i, &ipi_bitmap_high, BITS_PER_LONG) {
+		vcpu = map->phys_map[i + BITS_PER_LONG]->vcpu;
+		if (!kvm_apic_set_irq(vcpu, &irq, NULL))
+			return 1;
+	}
+
+	rcu_read_unlock();
+	return 0;
+}
+
 void kvm_vcpu_deactivate_apicv(struct kvm_vcpu *vcpu)
 {
 	vcpu->arch.apicv_active = false;
@@ -6737,6 +6776,9 @@ int kvm_emulate_hypercall(struct kvm_vcpu *vcpu)
 	case KVM_HC_CLOCK_PAIRING:
 		ret = kvm_pv_clock_pairing(vcpu, a0, a1);
 		break;
+	case KVM_HC_SEND_IPI:
+		ret = kvm_pv_send_ipi(vcpu->kvm, a0, a1, a2);
+		break;
 #endif
 	default:
 		ret = -KVM_ENOSYS;
-- 
2.7.4

^ permalink raw reply related	[flat|nested] 8+ messages in thread

* Re: [PATCH v2 1/2] KVM: X86: Implement PV IPI in linux guest
  2018-07-02 10:14 ` [PATCH v2 1/2] KVM: X86: Implement PV IPI in linux guest Wanpeng Li
@ 2018-07-02 10:26   ` Vitaly Kuznetsov
  2018-07-02 11:32     ` Wanpeng Li
  2018-07-02 11:32   ` Paolo Bonzini
  2018-07-02 13:33   ` kbuild test robot
  2 siblings, 1 reply; 8+ messages in thread
From: Vitaly Kuznetsov @ 2018-07-02 10:26 UTC (permalink / raw)
  To: Wanpeng Li; +Cc: linux-kernel, kvm, Paolo Bonzini, Radim Krčmář

Wanpeng Li <kernellwp@gmail.com> writes:

> From: Wanpeng Li <wanpengli@tencent.com>
>
> Implement PV IPIs in guest kernel.
>
> Cc: Paolo Bonzini <pbonzini@redhat.com>
> Cc: Radim Krčmář <rkrcmar@redhat.com>
> Cc: Vitaly Kuznetsov <vkuznets@redhat.com>
> Signed-off-by: Wanpeng Li <wanpengli@tencent.com>
> ---
>  arch/x86/kernel/kvm.c         | 99 +++++++++++++++++++++++++++++++++++++++++++
>  include/uapi/linux/kvm_para.h |  1 +
>  2 files changed, 100 insertions(+)
>
> diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
> index 5b2300b..7e3ee25 100644
> --- a/arch/x86/kernel/kvm.c
> +++ b/arch/x86/kernel/kvm.c
> @@ -47,6 +47,7 @@
>  #include <asm/hypervisor.h>
>  #include <asm/kvm_guest.h>
>
> +static struct apic orig_apic;
>  static int kvmapf = 1;
>
>  static int __init parse_no_kvmapf(char *arg)
> @@ -454,6 +455,89 @@ static void __init sev_map_percpu_data(void)
>  }
>
>  #ifdef CONFIG_SMP
> +
> +#ifdef CONFIG_X86_64
> +static bool __send_ipi_mask(const struct cpumask *mask, int vector)
> +{
> +	unsigned long flags, ipi_bitmap_low = 0, ipi_bitmap_high = 0, icr = 0;
> +	int cpu, apic_id, ret = 1;
> +
> +	if (cpumask_empty(mask))
> +		return true;
> +
> +	local_irq_save(flags);
> +
> +	for_each_cpu(cpu, mask) {
> +		apic_id = per_cpu(x86_cpu_to_apicid, cpu);
> +		if (apic_id < BITS_PER_LONG)
> +			__set_bit(apic_id, &ipi_bitmap_low);
> +		else if (apic_id < 2 * BITS_PER_LONG)
> +			__set_bit(apic_id - BITS_PER_LONG, &ipi_bitmap_high);
> +		else
> +			goto ipi_mask_done;

Nit:

Both the fact that we don't set 'ret' here and the fact that the label
is named 'ipi_mask_done' -- which sounds like 'all OK' at least to me --
contribute to the feeling that we just skip sending IPIs in some cases.

I would prefer to see something like

else {
   ret = -EFAULT;
   goto irq_restore_exit;
}

> +	}
> +
> +	switch (vector) {
> +	default:
> +		icr = APIC_DM_FIXED | vector;
> +		break;
> +	case NMI_VECTOR:
> +		icr = APIC_DM_NMI;
> +		break;
> +	}
> +
> +	ret = kvm_hypercall3(KVM_HC_SEND_IPI, ipi_bitmap_low, ipi_bitmap_high, icr);
> +
> +ipi_mask_done:
> +	local_irq_restore(flags);
> +	return ((ret == 0) ? true : false);

... and why in the first place do we need to make this function return
'bool' then? Let's just make it return 'int'.

> +}
> +
> +static void kvm_send_ipi_mask(const struct cpumask *mask, int vector)
> +{
> +	if (!__send_ipi_mask(mask, vector))
> +		orig_apic.send_IPI_mask(mask, vector);
> +}
> +
> +static void kvm_send_ipi_mask_allbutself(const struct cpumask *mask, int vector)
> +{
> +	unsigned int this_cpu = smp_processor_id();
> +	struct cpumask new_mask;
> +	const struct cpumask *local_mask;
> +
> +	cpumask_copy(&new_mask, mask);
> +	cpumask_clear_cpu(this_cpu, &new_mask);
> +	local_mask = &new_mask;
> +	if (!__send_ipi_mask(local_mask, vector))
> +		orig_apic.send_IPI_mask_allbutself(mask, vector);
> +}
> +
> +static void kvm_send_ipi_allbutself(int vector)
> +{
> +	kvm_send_ipi_mask_allbutself(cpu_online_mask, vector);
> +}
> +
> +static void kvm_send_ipi_all(int vector)
> +{
> +	if (!__send_ipi_mask(cpu_online_mask, vector))
> +		orig_apic.send_IPI_all(vector);
> +}
> +
> +/*
> + * Set the IPI entry points
> + */
> +static void kvm_setup_pv_ipi(void)
> +{
> +	orig_apic = *apic;
> +
> +	apic->send_IPI_mask = kvm_send_ipi_mask;
> +	apic->send_IPI_mask_allbutself = kvm_send_ipi_mask_allbutself;
> +	apic->send_IPI_allbutself = kvm_send_ipi_allbutself;
> +	apic->send_IPI_all = kvm_send_ipi_all;
> +	pr_info("KVM setup pv IPIs\n");
> +}
> +#endif
> +
>  static void __init kvm_smp_prepare_cpus(unsigned int max_cpus)
>  {
>  	native_smp_prepare_cpus(max_cpus);
> @@ -624,12 +708,27 @@ static uint32_t __init kvm_detect(void)
>  	return kvm_cpuid_base();
>  }
>
> +static void __init kvm_apic_init(void)
> +{
> +#if defined(CONFIG_SMP) && defined(CONFIG_X86_64)
> +	if (kvm_para_has_feature(KVM_FEATURE_PV_SEND_IPI) &&
> +		num_possible_cpus() <= 2 * BITS_PER_LONG)
> +		kvm_setup_pv_ipi();
> +#endif
> +}
> +
> +static void __init kvm_init_platform(void)
> +{
> +	x86_platform.apic_post_init = kvm_apic_init;
> +}
> +
>  const __initconst struct hypervisor_x86 x86_hyper_kvm = {
>  	.name			= "KVM",
>  	.detect			= kvm_detect,
>  	.type			= X86_HYPER_KVM,
>  	.init.guest_late_init	= kvm_guest_init,
>  	.init.x2apic_available	= kvm_para_available,
> +	.init.init_platform	= kvm_init_platform,
>  };
>
>  static __init int activate_jump_labels(void)
> diff --git a/include/uapi/linux/kvm_para.h b/include/uapi/linux/kvm_para.h
> index dcf629d..7395f38 100644
> --- a/include/uapi/linux/kvm_para.h
> +++ b/include/uapi/linux/kvm_para.h
> @@ -26,6 +26,7 @@
>  #define KVM_HC_MIPS_EXIT_VM		7
>  #define KVM_HC_MIPS_CONSOLE_OUTPUT	8
>  #define KVM_HC_CLOCK_PAIRING		9
> +#define KVM_HC_SEND_IPI			10
>
>  /*
>   * hypercalls use architecture specific

-- 
  Vitaly

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH v2 1/2] KVM: X86: Implement PV IPI in linux guest
  2018-07-02 10:26   ` Vitaly Kuznetsov
@ 2018-07-02 11:32     ` Wanpeng Li
  0 siblings, 0 replies; 8+ messages in thread
From: Wanpeng Li @ 2018-07-02 11:32 UTC (permalink / raw)
  To: Vitaly Kuznetsov; +Cc: LKML, kvm, Paolo Bonzini, Radim Krcmar

On Mon, 2 Jul 2018 at 18:26, Vitaly Kuznetsov <vkuznets@redhat.com> wrote:
>
> Wanpeng Li <kernellwp@gmail.com> writes:
>
> > From: Wanpeng Li <wanpengli@tencent.com>
> >
> > Implement PV IPIs in guest kernel.
> >
> > Cc: Paolo Bonzini <pbonzini@redhat.com>
> > Cc: Radim Krčmář <rkrcmar@redhat.com>
> > Cc: Vitaly Kuznetsov <vkuznets@redhat.com>
> > Signed-off-by: Wanpeng Li <wanpengli@tencent.com>
> > ---
> >  arch/x86/kernel/kvm.c         | 99 +++++++++++++++++++++++++++++++++++++++++++
> >  include/uapi/linux/kvm_para.h |  1 +
> >  2 files changed, 100 insertions(+)
> >
> > diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
> > index 5b2300b..7e3ee25 100644
> > --- a/arch/x86/kernel/kvm.c
> > +++ b/arch/x86/kernel/kvm.c
> > @@ -47,6 +47,7 @@
> >  #include <asm/hypervisor.h>
> >  #include <asm/kvm_guest.h>
> >
> > +static struct apic orig_apic;
> >  static int kvmapf = 1;
> >
> >  static int __init parse_no_kvmapf(char *arg)
> > @@ -454,6 +455,89 @@ static void __init sev_map_percpu_data(void)
> >  }
> >
> >  #ifdef CONFIG_SMP
> > +
> > +#ifdef CONFIG_X86_64
> > +static bool __send_ipi_mask(const struct cpumask *mask, int vector)
> > +{
> > +     unsigned long flags, ipi_bitmap_low = 0, ipi_bitmap_high = 0, icr = 0;
> > +     int cpu, apic_id, ret = 1;
> > +
> > +     if (cpumask_empty(mask))
> > +             return true;
> > +
> > +     local_irq_save(flags);
> > +
> > +     for_each_cpu(cpu, mask) {
> > +             apic_id = per_cpu(x86_cpu_to_apicid, cpu);
> > +             if (apic_id < BITS_PER_LONG)
> > +                     __set_bit(apic_id, &ipi_bitmap_low);
> > +             else if (apic_id < 2 * BITS_PER_LONG)
> > +                     __set_bit(apic_id - BITS_PER_LONG, &ipi_bitmap_high);
> > +             else
> > +                     goto ipi_mask_done;
>
> Nit:
>
> Both the fact that we don't set 'ret' here and the fact that the label
> is named 'ipi_mask_done' -- which sounds like 'all OK' at least to me --
> contribute to the feeling that we just skip sending IPIs in some cases.
>
> I would prefer to see something like
>
> else {
>    ret = -EFAULT;
>    goto irq_restore_exit;
> }
>
> > +     }
> > +
> > +     switch (vector) {
> > +     default:
> > +             icr = APIC_DM_FIXED | vector;
> > +             break;
> > +     case NMI_VECTOR:
> > +             icr = APIC_DM_NMI;
> > +             break;
> > +     }
> > +
> > +     ret = kvm_hypercall3(KVM_HC_SEND_IPI, ipi_bitmap_low, ipi_bitmap_high, icr);
> > +
> > +ipi_mask_done:
> > +     local_irq_restore(flags);
> > +     return ((ret == 0) ? true : false);
>
> ... and why in the first place do we need to make this function return
> 'bool' then? Let's just make it return 'int'.

Thanks for the comments, will do in v3. Btw, on my haswell desktop(i7
8 HT), there is a 2.5 times performance boot for the IPI
microbenchmark(https://lkml.org/lkml/2017/12/19/141). (8 vCPUs guest,
x2apic physical mode, I will retest on Skylake server w/ 64 vCPUs
x2apic cluster mode guest tomorrow):

Before:
 Dry-run:                         0,            1885493 ns
 Self-IPI:                  7071403,           14711151 ns
 Normal IPI:              204453899,          219896346 ns
 Broadcast IPI:                   0,         2213679722 ns
 Broadcast lock:                  0,         2241226307 ns

After:
 Dry-run:                         0,            1752903 ns
 Self-IPI:                  4944737,           10434149 ns
 Normal IPI:              202351280,          220807969 ns
 Broadcast IPI:                   0,          872991742 ns
=> 2.5 times boost
 Broadcast lock:                  0,          879995113 ns

Regards,
Wanpeng Li

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH v2 1/2] KVM: X86: Implement PV IPI in linux guest
  2018-07-02 10:14 ` [PATCH v2 1/2] KVM: X86: Implement PV IPI in linux guest Wanpeng Li
  2018-07-02 10:26   ` Vitaly Kuznetsov
@ 2018-07-02 11:32   ` Paolo Bonzini
  2018-07-02 12:08     ` Wanpeng Li
  2018-07-02 13:33   ` kbuild test robot
  2 siblings, 1 reply; 8+ messages in thread
From: Paolo Bonzini @ 2018-07-02 11:32 UTC (permalink / raw)
  To: Wanpeng Li, linux-kernel, kvm
  Cc: Radim Krčmář, Vitaly Kuznetsov

On 02/07/2018 12:14, Wanpeng Li wrote:
> +	unsigned long flags, ipi_bitmap_low = 0, ipi_bitmap_high = 0, icr = 0;
> +	int cpu, apic_id, ret = 1;
> +
> +	if (cpumask_empty(mask))
> +		return true;
> +
> +	local_irq_save(flags);
> +
> +	for_each_cpu(cpu, mask) {
> +		apic_id = per_cpu(x86_cpu_to_apicid, cpu);
> +		if (apic_id < BITS_PER_LONG)
> +			__set_bit(apic_id, &ipi_bitmap_low);
> +		else if (apic_id < 2 * BITS_PER_LONG)
> +			__set_bit(apic_id - BITS_PER_LONG, &ipi_bitmap_high);
> +		else
> +			goto ipi_mask_done;
> +	}

CPU masks are themselves bitmaps made of longs, so you should be able to
avoid the loop here.

> +static void kvm_send_ipi_mask(const struct cpumask *mask, int vector)
> +{
> +	if (!__send_ipi_mask(mask, vector))
> +		orig_apic.send_IPI_mask(mask, vector);
> +}
> +
> +static void kvm_send_ipi_mask_allbutself(const struct cpumask *mask, int vector)
> +{
> +	unsigned int this_cpu = smp_processor_id();
> +	struct cpumask new_mask;
> +	const struct cpumask *local_mask;
> +
> +	cpumask_copy(&new_mask, mask);
> +	cpumask_clear_cpu(this_cpu, &new_mask);
> +	local_mask = &new_mask;
> +	if (!__send_ipi_mask(local_mask, vector))
> +		orig_apic.send_IPI_mask_allbutself(mask, vector);
> +}

Likewise, here it should be possible to check the highest bit in the
mask before copying it.

Paolo

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH v2 1/2] KVM: X86: Implement PV IPI in linux guest
  2018-07-02 11:32   ` Paolo Bonzini
@ 2018-07-02 12:08     ` Wanpeng Li
  0 siblings, 0 replies; 8+ messages in thread
From: Wanpeng Li @ 2018-07-02 12:08 UTC (permalink / raw)
  To: Paolo Bonzini; +Cc: LKML, kvm, Radim Krcmar, Vitaly Kuznetsov

On Mon, 2 Jul 2018 at 19:32, Paolo Bonzini <pbonzini@redhat.com> wrote:
>
> On 02/07/2018 12:14, Wanpeng Li wrote:
> > +     unsigned long flags, ipi_bitmap_low = 0, ipi_bitmap_high = 0, icr = 0;
> > +     int cpu, apic_id, ret = 1;
> > +
> > +     if (cpumask_empty(mask))
> > +             return true;
> > +
> > +     local_irq_save(flags);
> > +
> > +     for_each_cpu(cpu, mask) {
> > +             apic_id = per_cpu(x86_cpu_to_apicid, cpu);
> > +             if (apic_id < BITS_PER_LONG)
> > +                     __set_bit(apic_id, &ipi_bitmap_low);
> > +             else if (apic_id < 2 * BITS_PER_LONG)
> > +                     __set_bit(apic_id - BITS_PER_LONG, &ipi_bitmap_high);
> > +             else
> > +                     goto ipi_mask_done;
> > +     }
>
> CPU masks are themselves bitmaps made of longs, so you should be able to
> avoid the loop here.

As we discuss offline, loop is needed since I need to convert
processor id which is allocated by OS to apic id.

>
> > +static void kvm_send_ipi_mask(const struct cpumask *mask, int vector)
> > +{
> > +     if (!__send_ipi_mask(mask, vector))
> > +             orig_apic.send_IPI_mask(mask, vector);
> > +}
> > +
> > +static void kvm_send_ipi_mask_allbutself(const struct cpumask *mask, int vector)
> > +{
> > +     unsigned int this_cpu = smp_processor_id();
> > +     struct cpumask new_mask;
> > +     const struct cpumask *local_mask;
> > +
> > +     cpumask_copy(&new_mask, mask);
> > +     cpumask_clear_cpu(this_cpu, &new_mask);
> > +     local_mask = &new_mask;
> > +     if (!__send_ipi_mask(local_mask, vector))
> > +             orig_apic.send_IPI_mask_allbutself(mask, vector);
> > +}
>
> Likewise, here it should be possible to check the highest bit in the
> mask before copying it.

__send_ipi_mask() has already handled sparse APIC ID and > 128 APID ID
scenarios.

Regards,
Wanpeng Li

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH v2 1/2] KVM: X86: Implement PV IPI in linux guest
  2018-07-02 10:14 ` [PATCH v2 1/2] KVM: X86: Implement PV IPI in linux guest Wanpeng Li
  2018-07-02 10:26   ` Vitaly Kuznetsov
  2018-07-02 11:32   ` Paolo Bonzini
@ 2018-07-02 13:33   ` kbuild test robot
  2 siblings, 0 replies; 8+ messages in thread
From: kbuild test robot @ 2018-07-02 13:33 UTC (permalink / raw)
  To: Wanpeng Li
  Cc: kbuild-all, linux-kernel, kvm, Paolo Bonzini,
	Radim Krčmář, Vitaly Kuznetsov

[-- Attachment #1: Type: text/plain, Size: 1799 bytes --]

Hi Wanpeng,

Thank you for the patch! Yet something to improve:

[auto build test ERROR on kvm/linux-next]
[also build test ERROR on v4.18-rc3 next-20180702]
[if your patch is applied to the wrong git tree, please drop us a note to help improve the system]

url:    https://github.com/0day-ci/linux/commits/Wanpeng-Li/KVM-x86-Add-PV-IPIs-support/20180702-193633
base:   https://git.kernel.org/pub/scm/virt/kvm/kvm.git linux-next
config: x86_64-randconfig-x003-201826 (attached as .config)
compiler: gcc-7 (Debian 7.3.0-16) 7.3.0
reproduce:
        # save the attached .config to linux build tree
        make ARCH=x86_64 

Note: the linux-review/Wanpeng-Li/KVM-x86-Add-PV-IPIs-support/20180702-193633 HEAD 2601a46efa23f54a8f175cd07dae28c88e36a40d builds fine.
      It only hurts bisectibility.

All errors (new ones prefixed by >>):

   arch/x86/kernel/kvm.c: In function 'kvm_apic_init':
>> arch/x86/kernel/kvm.c:714:27: error: 'KVM_FEATURE_PV_SEND_IPI' undeclared (first use in this function); did you mean 'KVM_FEATURE_PV_EOI'?
     if (kvm_para_has_feature(KVM_FEATURE_PV_SEND_IPI) &&
                              ^~~~~~~~~~~~~~~~~~~~~~~
                              KVM_FEATURE_PV_EOI
   arch/x86/kernel/kvm.c:714:27: note: each undeclared identifier is reported only once for each function it appears in

vim +714 arch/x86/kernel/kvm.c

   710	
   711	static void __init kvm_apic_init(void)
   712	{
   713	#if defined(CONFIG_SMP) && defined(CONFIG_X86_64)
 > 714		if (kvm_para_has_feature(KVM_FEATURE_PV_SEND_IPI) &&
   715			num_possible_cpus() <= 2 * BITS_PER_LONG)
   716			kvm_setup_pv_ipi();
   717	#endif
   718	}
   719	

---
0-DAY kernel test infrastructure                Open Source Technology Center
https://lists.01.org/pipermail/kbuild-all                   Intel Corporation

[-- Attachment #2: .config.gz --]
[-- Type: application/gzip, Size: 24070 bytes --]

^ permalink raw reply	[flat|nested] 8+ messages in thread

end of thread, other threads:[~2018-07-02 13:33 UTC | newest]

Thread overview: 8+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2018-07-02 10:14 [PATCH v2 0/2] KVM: x86: Add PV IPIs support Wanpeng Li
2018-07-02 10:14 ` [PATCH v2 1/2] KVM: X86: Implement PV IPI in linux guest Wanpeng Li
2018-07-02 10:26   ` Vitaly Kuznetsov
2018-07-02 11:32     ` Wanpeng Li
2018-07-02 11:32   ` Paolo Bonzini
2018-07-02 12:08     ` Wanpeng Li
2018-07-02 13:33   ` kbuild test robot
2018-07-02 10:14 ` [PATCH v2 2/2] KVM: X86: Implement PV send IPI support Wanpeng Li

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox