* [PATCH 1/3] KVM: x86: detect SPIV changes under APICv
2014-10-30 14:06 [PATCH 0/3] kvm: APICv register write workaround Radim Krčmář
@ 2014-10-30 14:06 ` Radim Krčmář
2014-10-30 14:06 ` [PATCH 2/3] KVM: x86: detect LVTT " Radim Krčmář
` (2 subsequent siblings)
3 siblings, 0 replies; 5+ messages in thread
From: Radim Krčmář @ 2014-10-30 14:06 UTC (permalink / raw)
To: linux-kernel; +Cc: kvm, Paolo Bonzini, Gleb Natapov, Marcelo Tosatti
APICv traps register writes, so we can't retrieve previous value.
(A bit of blame on Intel.)
This caused a migration bug: LAPIC is enabled, so our restore code
correctly lowers apic_sw_enabled, but doesn't increase it after APICv is
disabled, so we get below zero when freeing it; resulting in this trace:
WARNING: at kernel/jump_label.c:81 __static_key_slow_dec+0xa6/0xb0()
jump label: negative count!
[<ffffffff816bf898>] dump_stack+0x19/0x1b
[<ffffffff8107c6f1>] warn_slowpath_common+0x61/0x80
[<ffffffff8107c76c>] warn_slowpath_fmt+0x5c/0x80
[<ffffffff811931e6>] __static_key_slow_dec+0xa6/0xb0
[<ffffffff81193226>] static_key_slow_dec_deferred+0x16/0x20
[<ffffffffa0637698>] kvm_free_lapic+0x88/0xa0 [kvm]
[<ffffffffa061c63e>] kvm_arch_vcpu_uninit+0x2e/0xe0 [kvm]
[<ffffffffa05ff301>] kvm_vcpu_uninit+0x21/0x40 [kvm]
[<ffffffffa067cec7>] vmx_free_vcpu+0x47/0x70 [kvm_intel]
[<ffffffffa061bc50>] kvm_arch_vcpu_free+0x50/0x60 [kvm]
[<ffffffffa061ca22>] kvm_arch_destroy_vm+0x102/0x260 [kvm]
[<ffffffff810b68fd>] ? synchronize_srcu+0x1d/0x20
[<ffffffffa06030d1>] kvm_put_kvm+0xe1/0x1c0 [kvm]
[<ffffffffa06036f8>] kvm_vcpu_release+0x18/0x20 [kvm]
[<ffffffff81215c62>] __fput+0x102/0x310
[<ffffffff81215f4e>] ____fput+0xe/0x10
[<ffffffff810ab664>] task_work_run+0xb4/0xe0
[<ffffffff81083944>] do_exit+0x304/0xc60
[<ffffffff816c8dfc>] ? _raw_spin_unlock_irq+0x2c/0x50
[<ffffffff810fd22d>] ? trace_hardirqs_on_caller+0xfd/0x1c0
[<ffffffff8108432c>] do_group_exit+0x4c/0xc0
[<ffffffff810843b4>] SyS_exit_group+0x14/0x20
[<ffffffff816d33a9>] system_call_fastpath+0x16/0x1b
Signed-off-by: Radim Krčmář <rkrcmar@redhat.com>
---
arch/x86/kvm/lapic.c | 10 ++++++----
arch/x86/kvm/lapic.h | 1 +
2 files changed, 7 insertions(+), 4 deletions(-)
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index b8345dd..f538b14 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -201,11 +201,13 @@ out:
static inline void apic_set_spiv(struct kvm_lapic *apic, u32 val)
{
- u32 prev = kvm_apic_get_reg(apic, APIC_SPIV);
+ bool enabled = val & APIC_SPIV_APIC_ENABLED;
apic_set_reg(apic, APIC_SPIV, val);
- if ((prev ^ val) & APIC_SPIV_APIC_ENABLED) {
- if (val & APIC_SPIV_APIC_ENABLED) {
+
+ if (enabled != apic->sw_enabled) {
+ apic->sw_enabled = enabled;
+ if (enabled) {
static_key_slow_dec_deferred(&apic_sw_disabled);
recalculate_apic_map(apic->vcpu->kvm);
} else
@@ -1320,7 +1322,7 @@ void kvm_free_lapic(struct kvm_vcpu *vcpu)
if (!(vcpu->arch.apic_base & MSR_IA32_APICBASE_ENABLE))
static_key_slow_dec_deferred(&apic_hw_disabled);
- if (!(kvm_apic_get_reg(apic, APIC_SPIV) & APIC_SPIV_APIC_ENABLED))
+ if (!apic->sw_enabled)
static_key_slow_dec_deferred(&apic_sw_disabled);
if (apic->regs)
diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h
index 6a11845..5fcc3d3 100644
--- a/arch/x86/kvm/lapic.h
+++ b/arch/x86/kvm/lapic.h
@@ -33,6 +33,7 @@ struct kvm_lapic {
* Note: Only one register, the TPR, is used by the microcode.
*/
void *regs;
+ bool sw_enabled;
gpa_t vapic_addr;
struct gfn_to_hva_cache vapic_cache;
unsigned long pending_events;
--
2.1.0
^ permalink raw reply related [flat|nested] 5+ messages in thread* [PATCH 2/3] KVM: x86: detect LVTT changes under APICv
2014-10-30 14:06 [PATCH 0/3] kvm: APICv register write workaround Radim Krčmář
2014-10-30 14:06 ` [PATCH 1/3] KVM: x86: detect SPIV changes under APICv Radim Krčmář
@ 2014-10-30 14:06 ` Radim Krčmář
2014-10-30 14:06 ` [PATCH 3/3] KVM: x86: optimize some accesses to LVTT and SPIV Radim Krčmář
2014-10-31 15:06 ` [PATCH 0/3] kvm: APICv register write workaround Paolo Bonzini
3 siblings, 0 replies; 5+ messages in thread
From: Radim Krčmář @ 2014-10-30 14:06 UTC (permalink / raw)
To: linux-kernel; +Cc: kvm, Paolo Bonzini, Gleb Natapov, Marcelo Tosatti
APICv traps register writes, so we can't retrieve previous value and
omit timer cancelation when mode changes.
timer_mode_mask shouldn't be changing as it depends on cpuid.
Signed-off-by: Radim Krčmář <rkrcmar@redhat.com>
---
#define assign(a, b) (a == b ? false : (a = b, true))
arch/x86/kvm/lapic.c | 12 ++++++++----
arch/x86/kvm/lapic.h | 1 +
2 files changed, 9 insertions(+), 4 deletions(-)
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index f538b14..d3a3a1c 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -1205,17 +1205,20 @@ static int apic_reg_write(struct kvm_lapic *apic, u32 reg, u32 val)
break;
- case APIC_LVTT:
- if ((kvm_apic_get_reg(apic, APIC_LVTT) &
- apic->lapic_timer.timer_mode_mask) !=
- (val & apic->lapic_timer.timer_mode_mask))
+ case APIC_LVTT: {
+ u32 timer_mode = val & apic->lapic_timer.timer_mode_mask;
+
+ if (apic->lapic_timer.timer_mode != timer_mode) {
+ apic->lapic_timer.timer_mode = timer_mode;
hrtimer_cancel(&apic->lapic_timer.timer);
+ }
if (!kvm_apic_sw_enabled(apic))
val |= APIC_LVT_MASKED;
val &= (apic_lvt_mask[0] | apic->lapic_timer.timer_mode_mask);
apic_set_reg(apic, APIC_LVTT, val);
break;
+ }
case APIC_TMICT:
if (apic_lvtt_tscdeadline(apic))
@@ -1449,6 +1452,7 @@ void kvm_lapic_reset(struct kvm_vcpu *vcpu)
for (i = 0; i < APIC_LVT_NUM; i++)
apic_set_reg(apic, APIC_LVTT + 0x10 * i, APIC_LVT_MASKED);
+ apic->lapic_timer.timer_mode = 0;
apic_set_reg(apic, APIC_LVT0,
SET_APIC_DELIVERY_MODE(0, APIC_MODE_EXTINT));
diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h
index 5fcc3d3..755a954 100644
--- a/arch/x86/kvm/lapic.h
+++ b/arch/x86/kvm/lapic.h
@@ -11,6 +11,7 @@
struct kvm_timer {
struct hrtimer timer;
s64 period; /* unit: ns */
+ u32 timer_mode;
u32 timer_mode_mask;
u64 tscdeadline;
atomic_t pending; /* accumulated triggered timers */
--
2.1.0
^ permalink raw reply related [flat|nested] 5+ messages in thread* [PATCH 3/3] KVM: x86: optimize some accesses to LVTT and SPIV
2014-10-30 14:06 [PATCH 0/3] kvm: APICv register write workaround Radim Krčmář
2014-10-30 14:06 ` [PATCH 1/3] KVM: x86: detect SPIV changes under APICv Radim Krčmář
2014-10-30 14:06 ` [PATCH 2/3] KVM: x86: detect LVTT " Radim Krčmář
@ 2014-10-30 14:06 ` Radim Krčmář
2014-10-31 15:06 ` [PATCH 0/3] kvm: APICv register write workaround Paolo Bonzini
3 siblings, 0 replies; 5+ messages in thread
From: Radim Krčmář @ 2014-10-30 14:06 UTC (permalink / raw)
To: linux-kernel; +Cc: kvm, Paolo Bonzini, Gleb Natapov, Marcelo Tosatti
We mirror a subset of these registers in separate variables.
Using them directly should be faster.
Signed-off-by: Radim Krčmář <rkrcmar@redhat.com>
---
arch/x86/kvm/lapic.c | 10 +++-------
arch/x86/kvm/lapic.h | 6 +++---
2 files changed, 6 insertions(+), 10 deletions(-)
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index d3a3a1c..67af5d2 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -239,21 +239,17 @@ static inline int apic_lvt_vector(struct kvm_lapic *apic, int lvt_type)
static inline int apic_lvtt_oneshot(struct kvm_lapic *apic)
{
- return ((kvm_apic_get_reg(apic, APIC_LVTT) &
- apic->lapic_timer.timer_mode_mask) == APIC_LVT_TIMER_ONESHOT);
+ return apic->lapic_timer.timer_mode == APIC_LVT_TIMER_ONESHOT;
}
static inline int apic_lvtt_period(struct kvm_lapic *apic)
{
- return ((kvm_apic_get_reg(apic, APIC_LVTT) &
- apic->lapic_timer.timer_mode_mask) == APIC_LVT_TIMER_PERIODIC);
+ return apic->lapic_timer.timer_mode == APIC_LVT_TIMER_PERIODIC;
}
static inline int apic_lvtt_tscdeadline(struct kvm_lapic *apic)
{
- return ((kvm_apic_get_reg(apic, APIC_LVTT) &
- apic->lapic_timer.timer_mode_mask) ==
- APIC_LVT_TIMER_TSCDEADLINE);
+ return apic->lapic_timer.timer_mode == APIC_LVT_TIMER_TSCDEADLINE;
}
static inline int apic_lvt_nmi_mode(u32 lvt_val)
diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h
index 755a954..2c56885 100644
--- a/arch/x86/kvm/lapic.h
+++ b/arch/x86/kvm/lapic.h
@@ -121,11 +121,11 @@ static inline int kvm_apic_hw_enabled(struct kvm_lapic *apic)
extern struct static_key_deferred apic_sw_disabled;
-static inline int kvm_apic_sw_enabled(struct kvm_lapic *apic)
+static inline bool kvm_apic_sw_enabled(struct kvm_lapic *apic)
{
if (static_key_false(&apic_sw_disabled.key))
- return kvm_apic_get_reg(apic, APIC_SPIV) & APIC_SPIV_APIC_ENABLED;
- return APIC_SPIV_APIC_ENABLED;
+ return apic->sw_enabled;
+ return true;
}
static inline bool kvm_apic_present(struct kvm_vcpu *vcpu)
--
2.1.0
^ permalink raw reply related [flat|nested] 5+ messages in thread* Re: [PATCH 0/3] kvm: APICv register write workaround
2014-10-30 14:06 [PATCH 0/3] kvm: APICv register write workaround Radim Krčmář
` (2 preceding siblings ...)
2014-10-30 14:06 ` [PATCH 3/3] KVM: x86: optimize some accesses to LVTT and SPIV Radim Krčmář
@ 2014-10-31 15:06 ` Paolo Bonzini
3 siblings, 0 replies; 5+ messages in thread
From: Paolo Bonzini @ 2014-10-31 15:06 UTC (permalink / raw)
To: Radim Krčmář, linux-kernel
Cc: kvm, Gleb Natapov, Marcelo Tosatti
On 30/10/2014 15:06, Radim Krčmář wrote:
> APICv traps register writes, so we can't retrieve previous value, but
> our code depends on detecting changes.
Applied, thanks.
Paolo
> Apart from disabling APIC register virtualization, we can detect the
> change by using extra memory. One value history is enough, but we still
> don't want to keep it for every APIC register, for performance reasons.
> This leaves us with either a new framework, or exceptions ...
> The latter options fits KVM's path better [1,2].
>
> And when we already mirror a part of registers, optimizing access is
> acceptable [3]. (Squashed to keep bisecters happy.)
>
> ---
> Radim Krčmář (3):
> KVM: x86: detect SPIV changes under APICv
> KVM: x86: detect LVTT changes under APICv
> KVM: x86: optimize some accesses to LVTT and SPIV
>
> arch/x86/kvm/lapic.c | 32 +++++++++++++++++---------------
> arch/x86/kvm/lapic.h | 8 +++++---
> 2 files changed, 22 insertions(+), 18 deletions(-)
>
^ permalink raw reply [flat|nested] 5+ messages in thread