* [PATCHv0 dont apply] RFC: kvm eoi PV using shared memory @ 2012-04-10 13:27 Michael S. Tsirkin 2012-04-10 14:03 ` Avi Kivity 2012-04-15 16:18 ` [PATCHv1 " Michael S. Tsirkin 0 siblings, 2 replies; 32+ messages in thread From: Michael S. Tsirkin @ 2012-04-10 13:27 UTC (permalink / raw) To: kvm I took a stub at implementing PV EOI using shared memory. This should reduce the number of exits an interrupt causes as much as by half. A partially complete draft for both host and guest parts is below. The idea is simple: there's a bit, per APIC, in guest memory, that tells the guest that it does not need EOI. We set it before injecting an interrupt and clear before injecting a nested one. Guest tests it using a test and clear operation - this is necessary so that host can detect interrupt nesting - and if set, it can skip the EOI MSR. There's a new MSR to set the address of said register in guest memory. Otherwise not much changes: - Guest EOI is not required - ISR is automatically cleared before injection Some things are incomplete: add feature negotiation options, qemu support for said options. No testing was done beyond compiling the kernel. I would appreciate early feedback. Signed-off-by: Michael S. Tsirkin <mst@redhat.com> -- diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h index d854101..8430f41 100644 --- a/arch/x86/include/asm/apic.h +++ b/arch/x86/include/asm/apic.h @@ -457,8 +457,13 @@ static inline u32 safe_apic_wait_icr_idle(void) { return 0; } #endif /* CONFIG_X86_LOCAL_APIC */ +DECLARE_EARLY_PER_CPU(unsigned long, apic_eoi); + static inline void ack_APIC_irq(void) { + if (__test_and_clear_bit(0, &__get_cpu_var(apic_eoi))) + return; + /* * ack_APIC_irq() actually gets compiled as a single instruction * ... yummie. diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index e216ba0..0ee1472 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -481,6 +481,12 @@ struct kvm_vcpu_arch { u64 length; u64 status; } osvw; + + struct { + u64 msr_val; + struct gfn_to_hva_cache data; + int vector; + } eoi; }; struct kvm_lpage_info { diff --git a/arch/x86/include/asm/kvm_para.h b/arch/x86/include/asm/kvm_para.h index 734c376..e22b9f8 100644 --- a/arch/x86/include/asm/kvm_para.h +++ b/arch/x86/include/asm/kvm_para.h @@ -37,6 +37,8 @@ #define MSR_KVM_SYSTEM_TIME_NEW 0x4b564d01 #define MSR_KVM_ASYNC_PF_EN 0x4b564d02 #define MSR_KVM_STEAL_TIME 0x4b564d03 +#define MSR_KVM_EOI_EN 0x4b564d04 +#define MSR_KVM_EOI_ENABLED 0x1 struct kvm_steal_time { __u64 steal; diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index 11544d8..1b3f9fa 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -89,6 +89,9 @@ EXPORT_EARLY_PER_CPU_SYMBOL(x86_bios_cpu_apicid); */ DEFINE_EARLY_PER_CPU(int, x86_cpu_to_logical_apicid, BAD_APICID); +DEFINE_EARLY_PER_CPU(unsigned long, apic_eoi, 0); +EXPORT_EARLY_PER_CPU_SYMBOL(apic_eoi); + /* * Knob to control our willingness to enable the local APIC. * diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c index b8ba6e4..8b50f3a 100644 --- a/arch/x86/kernel/kvm.c +++ b/arch/x86/kernel/kvm.c @@ -39,6 +39,7 @@ #include <asm/desc.h> #include <asm/tlbflush.h> #include <asm/idle.h> +#include <asm/apic.h> static int kvmapf = 1; @@ -307,6 +308,9 @@ void __cpuinit kvm_guest_cpu_init(void) smp_processor_id()); } + wrmsrl(MSR_KVM_EOI_EN, __pa(this_cpu_ptr(apic_eoi)) | + MSR_KVM_EOI_ENABLED); + if (has_steal_clock) kvm_register_steal_time(); } diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 8584322..9e38e12 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -265,7 +265,61 @@ int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq) irq->level, irq->trig_mode); } -static inline int apic_find_highest_isr(struct kvm_lapic *apic) +static int eoi_put_user(struct kvm_vcpu *vcpu, u32 val) +{ + + return kvm_write_guest_cached(vcpu->kvm, &vcpu->arch.eoi.data, &val, + sizeof(val)); +} + +static int eoi_get_user(struct kvm_vcpu *vcpu, u32 *val) +{ + + return kvm_read_guest_cached(vcpu->kvm, &vcpu->arch.eoi.data, val, + sizeof(*val)); +} + +static inline bool eoi_enabled(struct kvm_vcpu *vcpu) +{ + return (vcpu->arch.eoi.msr_val & MSR_KVM_EOI_ENABLED); +} + +static int eoi_get_pending_vector(struct kvm_vcpu *vcpu) +{ + u32 val; + if (eoi_get_user(vcpu, &val) < 0) + apic_debug("Can't read EOI MSR value: 0x%llx\n", + (unsigned long long)vcpi->arch.eoi.msr_val); + if (!(val & 0x1)) + vcpu->arch.eoi.vector = -1; + return vcpu->arch.eoi.vector; +} + +static void eoi_set_pending_vector(struct kvm_vcpu *vcpu, int vector) +{ + BUG_ON(vcpu->arch.eoi.vector != -1); + if (eoi_put_user(vcpu, 0x1) < 0) { + apic_debug("Can't set EOI MSR value: 0x%llx\n", + (unsigned long long)vcpi->arch.eoi.msr_val); + return; + } + vcpu->arch.eoi.vector = vector; +} + +static int eoi_clr_pending_vector(struct kvm_vcpu *vcpu) +{ + int vector; + vector = vcpu->arch.eoi.vector; + if (vector != -1 && eoi_put_user(vcpu, 0x0) < 0) { + apic_debug("Can't clear EOI MSR value: 0x%llx\n", + (unsigned long long)vcpi->arch.eoi.msr_val); + return -1; + } + vcpu->arch.eoi.vector = -1; + return vector; +} + +static inline int __apic_find_highest_isr(struct kvm_lapic *apic) { int result; @@ -275,6 +329,17 @@ static inline int apic_find_highest_isr(struct kvm_lapic *apic) return result; } +static inline int apic_find_highest_isr(struct kvm_lapic *apic) +{ + int vector; + if (eoi_enabled(apic->vcpu)) { + vector = eoi_get_pending_vector(apic->vcpu); + if (vector != -1) + return vector; + } + return __apic_find_highest_isr(apic); +} + static void apic_update_ppr(struct kvm_lapic *apic) { u32 tpr, isrv, ppr, old_ppr; @@ -488,6 +553,8 @@ static void apic_set_eoi(struct kvm_lapic *apic) if (vector == -1) return; + if (eoi_enabled(apic->vcpu)) + eoi_clr_pending_vector(apic->vcpu); apic_clear_vector(vector, apic->regs + APIC_ISR); apic_update_ppr(apic); @@ -1236,11 +1303,25 @@ int kvm_get_apic_interrupt(struct kvm_vcpu *vcpu) { int vector = kvm_apic_has_interrupt(vcpu); struct kvm_lapic *apic = vcpu->arch.apic; + bool set_isr = true; if (vector == -1) return -1; - apic_set_vector(vector, apic->regs + APIC_ISR); + if (eoi_enabled(vcpu)) { + /* Anything pending? If yes disable eoi optimization. */ + if (unlikely(apic_find_highest_isr(apic) >= 0)) { + int v = eoi_clr_pending_vector(vcpu); + if (v != -1) + apic_set_vector(v, apic->regs + APIC_ISR); + } else { + eoi_set_pending_vector(vcpu, vector); + set_isr = false; + } + } + + if (set_isr) + apic_set_vector(vector, apic->regs + APIC_ISR); apic_update_ppr(apic); apic_clear_irr(vector, apic); return vector; diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 4044ce0..4d00a4d 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -1502,6 +1502,27 @@ static int kvm_pv_enable_async_pf(struct kvm_vcpu *vcpu, u64 data) return 0; } +static int kvm_pv_enable_apic_eoi(struct kvm_vcpu *vcpu, u64 data) +{ + gpa_t gpa = data & ~MSR_KVM_EOI_ENABLED; + + /* Bit 1 is reserved, Should be zero. */ + if (data & 0x2) + return 1; + + vcpu->arch.eoi.msr_val = data; + vcpu->arch.eoi.vector = -1; + + if (!(data & MSR_KVM_EOI_ENABLED)) { + return 0; + } + + if (kvm_gfn_to_hva_cache_init(vcpu->kvm, &vcpu->arch.eoi.data, gpa)) + return 1; + + return 0; +} + static void kvmclock_reset(struct kvm_vcpu *vcpu) { if (vcpu->arch.time_page) { @@ -1627,6 +1648,10 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data) if (kvm_pv_enable_async_pf(vcpu, data)) return 1; break; + case MSR_KVM_EOI_EN: + if (kvm_pv_enable_apic_eoi(vcpu, data)) + return 1; + break; case MSR_KVM_STEAL_TIME: if (unlikely(!sched_info_on())) diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c index 638a4f3..05c1bf9 100644 --- a/drivers/block/virtio_blk.c +++ b/drivers/block/virtio_blk.c @@ -372,7 +372,6 @@ static int virtblk_name_format(char *prefix, int index, char *buf, int buflen) { const int base = 'z' - 'a' + 1; char *begin = buf + strlen(prefix); - char *begin = buf + strlen(prefix); char *end = buf + buflen; char *p; int unit; diff --git a/net/core/dev.c b/net/core/dev.c index 9d713b8..e42529b 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2455,6 +2455,11 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q, rc = NET_XMIT_SUCCESS; } else { skb_dst_force(skb); + /* Orphan the skb - required if we might hang on to it + * for indefinite time. */ + if (unlikely(dev->priv_flags & IFF_TX_CAN_STALL)) + skb_orphan(skb); + rc = q->enqueue(skb, q) & NET_XMIT_MASK; if (qdisc_run_begin(q)) { if (unlikely(contended)) { @@ -2517,11 +2522,6 @@ int dev_queue_xmit(struct sk_buff *skb) struct Qdisc *q; int rc = -ENOMEM; - /* Orphan the skb - required if we might hang on to it - * for indefinite time. */ - if (dev->priv_flags & IFF_TX_CAN_STALL) - skb_orphan(skb); - /* Disable soft irqs for various locks below. Also * stops preemption for RCU. */ diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index 27883d1..644ca53 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -120,14 +120,11 @@ int sch_direct_xmit(struct sk_buff *skb, struct Qdisc *q, /* And release qdisc */ spin_unlock(root_lock); - /* Orphan the skb - required if we might hang on to it - * for indefinite time. */ - if (dev->priv_flags & IFF_TX_CAN_STALL) - skb_orphan(skb); - HARD_TX_LOCK(dev, txq, smp_processor_id()); - if (!netif_xmit_frozen_or_stopped(txq)) + if (likely(!netif_xmit_frozen_or_stopped(txq))) ret = dev_hard_start_xmit(skb, dev, txq); + else if (dev->priv_flags & IFF_TX_CAN_STALL) + skb_orphan(skb); HARD_TX_UNLOCK(dev, txq); @@ -695,7 +692,7 @@ static void attach_one_default_qdisc(struct net_device *dev, { struct Qdisc *qdisc = &noqueue_qdisc; - if (dev->tx_queue_len) { + if (dev->tx_queue_len && !(dev->priv_flags & IFF_TX_CAN_STALL)) { qdisc = qdisc_create_dflt(dev_queue, &pfifo_fast_ops, TC_H_ROOT); if (!qdisc) { ^ permalink raw reply related [flat|nested] 32+ messages in thread
* Re: [PATCHv0 dont apply] RFC: kvm eoi PV using shared memory 2012-04-10 13:27 [PATCHv0 dont apply] RFC: kvm eoi PV using shared memory Michael S. Tsirkin @ 2012-04-10 14:03 ` Avi Kivity 2012-04-10 14:26 ` Michael S. Tsirkin 2012-04-15 16:18 ` [PATCHv1 " Michael S. Tsirkin 1 sibling, 1 reply; 32+ messages in thread From: Avi Kivity @ 2012-04-10 14:03 UTC (permalink / raw) To: Michael S. Tsirkin; +Cc: kvm On 04/10/2012 04:27 PM, Michael S. Tsirkin wrote: > I took a stub at implementing PV EOI using shared memory. > This should reduce the number of exits an interrupt > causes as much as by half. > > A partially complete draft for both host and guest parts > is below. > > The idea is simple: there's a bit, per APIC, in guest memory, > that tells the guest that it does not need EOI. > We set it before injecting an interrupt and clear > before injecting a nested one. Guest tests it using > a test and clear operation - this is necessary > so that host can detect interrupt nesting - > and if set, it can skip the EOI MSR. > > There's a new MSR to set the address of said register > in guest memory. Otherwise not much changes: > - Guest EOI is not required > - ISR is automatically cleared before injection > > Some things are incomplete: add feature negotiation > options, qemu support for said options. > No testing was done beyond compiling the kernel. > > I would appreciate early feedback. > > Signed-off-by: Michael S. Tsirkin <mst@redhat.com> > > -- > > diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h > index d854101..8430f41 100644 > --- a/arch/x86/include/asm/apic.h > +++ b/arch/x86/include/asm/apic.h > @@ -457,8 +457,13 @@ static inline u32 safe_apic_wait_icr_idle(void) { return 0; } > > #endif /* CONFIG_X86_LOCAL_APIC */ > > +DECLARE_EARLY_PER_CPU(unsigned long, apic_eoi); > + > static inline void ack_APIC_irq(void) > { > + if (__test_and_clear_bit(0, &__get_cpu_var(apic_eoi))) > + return; > + While __test_and_clear_bit() is implemented in a single instruction, it's not required to be. Better have the instruction there explicitly. > /* > * ack_APIC_irq() actually gets compiled as a single instruction > * ... yummie. > diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h > index e216ba0..0ee1472 100644 > --- a/arch/x86/include/asm/kvm_host.h > +++ b/arch/x86/include/asm/kvm_host.h > @@ -481,6 +481,12 @@ struct kvm_vcpu_arch { > u64 length; > u64 status; > } osvw; > + > + struct { > + u64 msr_val; > + struct gfn_to_hva_cache data; > + int vector; > + } eoi; > }; Needs to be cleared on INIT. > > > @@ -307,6 +308,9 @@ void __cpuinit kvm_guest_cpu_init(void) > smp_processor_id()); > } > > + wrmsrl(MSR_KVM_EOI_EN, __pa(this_cpu_ptr(apic_eoi)) | > + MSR_KVM_EOI_ENABLED); > + Clear on kexec. > if (has_steal_clock) > kvm_register_steal_time(); > } > diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c > index 8584322..9e38e12 100644 > --- a/arch/x86/kvm/lapic.c > +++ b/arch/x86/kvm/lapic.c > @@ -265,7 +265,61 @@ int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq) > irq->level, irq->trig_mode); > } > > -static inline int apic_find_highest_isr(struct kvm_lapic *apic) > +static int eoi_put_user(struct kvm_vcpu *vcpu, u32 val) > +{ > + > + return kvm_write_guest_cached(vcpu->kvm, &vcpu->arch.eoi.data, &val, > + sizeof(val)); > +} > + > +static int eoi_get_user(struct kvm_vcpu *vcpu, u32 *val) > +{ > + > + return kvm_read_guest_cached(vcpu->kvm, &vcpu->arch.eoi.data, val, > + sizeof(*val)); > +} > + > +static inline bool eoi_enabled(struct kvm_vcpu *vcpu) > +{ > + return (vcpu->arch.eoi.msr_val & MSR_KVM_EOI_ENABLED); > +} > + > +static int eoi_get_pending_vector(struct kvm_vcpu *vcpu) > +{ > + u32 val; > + if (eoi_get_user(vcpu, &val) < 0) > + apic_debug("Can't read EOI MSR value: 0x%llx\n", > + (unsigned long long)vcpi->arch.eoi.msr_val); > + if (!(val & 0x1)) > + vcpu->arch.eoi.vector = -1; > + return vcpu->arch.eoi.vector; > +} > + > +static void eoi_set_pending_vector(struct kvm_vcpu *vcpu, int vector) > +{ > + BUG_ON(vcpu->arch.eoi.vector != -1); > + if (eoi_put_user(vcpu, 0x1) < 0) { > + apic_debug("Can't set EOI MSR value: 0x%llx\n", > + (unsigned long long)vcpi->arch.eoi.msr_val); > + return; > + } > + vcpu->arch.eoi.vector = vector; > +} > + > +static int eoi_clr_pending_vector(struct kvm_vcpu *vcpu) > +{ > + int vector; > + vector = vcpu->arch.eoi.vector; > + if (vector != -1 && eoi_put_user(vcpu, 0x0) < 0) { > + apic_debug("Can't clear EOI MSR value: 0x%llx\n", > + (unsigned long long)vcpi->arch.eoi.msr_val); > + return -1; > + } > + vcpu->arch.eoi.vector = -1; > + return vector; > +} > + > +static inline int __apic_find_highest_isr(struct kvm_lapic *apic) > { > int result; > > @@ -275,6 +329,17 @@ static inline int apic_find_highest_isr(struct kvm_lapic *apic) > return result; > } > > +static inline int apic_find_highest_isr(struct kvm_lapic *apic) > +{ > + int vector; > + if (eoi_enabled(apic->vcpu)) { > + vector = eoi_get_pending_vector(apic->vcpu); > + if (vector != -1) > + return vector; > + } > + return __apic_find_highest_isr(apic); > +} Why aren't you modifying the ISR unconfitionally? > + > static void apic_update_ppr(struct kvm_lapic *apic) > { > u32 tpr, isrv, ppr, old_ppr; > @@ -488,6 +553,8 @@ static void apic_set_eoi(struct kvm_lapic *apic) > if (vector == -1) > return; > > + if (eoi_enabled(apic->vcpu)) > + eoi_clr_pending_vector(apic->vcpu); > apic_clear_vector(vector, apic->regs + APIC_ISR); > apic_update_ppr(apic); > > @@ -1236,11 +1303,25 @@ int kvm_get_apic_interrupt(struct kvm_vcpu *vcpu) > { > int vector = kvm_apic_has_interrupt(vcpu); > struct kvm_lapic *apic = vcpu->arch.apic; > + bool set_isr = true; > > if (vector == -1) > return -1; > > - apic_set_vector(vector, apic->regs + APIC_ISR); > + if (eoi_enabled(vcpu)) { > + /* Anything pending? If yes disable eoi optimization. */ > + if (unlikely(apic_find_highest_isr(apic) >= 0)) { > + int v = eoi_clr_pending_vector(vcpu); ISR != pending, that's IRR. If ISR vector has lower priority than the new vector, then we don't need to disable eoi avoidance. > + if (v != -1) > + apic_set_vector(v, apic->regs + APIC_ISR); > + } else { > + eoi_set_pending_vector(vcpu, vector); > + set_isr = false; Weird. Just set it normally. Remember that reading the ISR needs to return the correct value. We need to process the avoided EOI before any APIC read/writes, to be sure the guest sees the updated values. Same for IOAPIC, EOI affects remote_irr. That may been we need to sample it after every exit, or perhaps disable the feature for level-triggered interrupts. > + } > + } > + > + if (set_isr) > + apic_set_vector(vector, apic->regs + APIC_ISR); > apic_update_ppr(apic); > apic_clear_irr(vector, apic); > return vector; > diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c > -- error compiling committee.c: too many arguments to function ^ permalink raw reply [flat|nested] 32+ messages in thread
* Re: [PATCHv0 dont apply] RFC: kvm eoi PV using shared memory 2012-04-10 14:03 ` Avi Kivity @ 2012-04-10 14:26 ` Michael S. Tsirkin 2012-04-10 14:33 ` Avi Kivity 2012-04-10 17:59 ` Gleb Natapov 0 siblings, 2 replies; 32+ messages in thread From: Michael S. Tsirkin @ 2012-04-10 14:26 UTC (permalink / raw) To: Avi Kivity; +Cc: kvm On Tue, Apr 10, 2012 at 05:03:22PM +0300, Avi Kivity wrote: > On 04/10/2012 04:27 PM, Michael S. Tsirkin wrote: > > I took a stub at implementing PV EOI using shared memory. > > This should reduce the number of exits an interrupt > > causes as much as by half. > > > > A partially complete draft for both host and guest parts > > is below. > > > > The idea is simple: there's a bit, per APIC, in guest memory, > > that tells the guest that it does not need EOI. > > We set it before injecting an interrupt and clear > > before injecting a nested one. Guest tests it using > > a test and clear operation - this is necessary > > so that host can detect interrupt nesting - > > and if set, it can skip the EOI MSR. > > > > There's a new MSR to set the address of said register > > in guest memory. Otherwise not much changes: > > - Guest EOI is not required > > - ISR is automatically cleared before injection > > > > Some things are incomplete: add feature negotiation > > options, qemu support for said options. > > No testing was done beyond compiling the kernel. > > > > I would appreciate early feedback. > > > > Signed-off-by: Michael S. Tsirkin <mst@redhat.com> > > > > -- > > > > diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h > > index d854101..8430f41 100644 > > --- a/arch/x86/include/asm/apic.h > > +++ b/arch/x86/include/asm/apic.h > > @@ -457,8 +457,13 @@ static inline u32 safe_apic_wait_icr_idle(void) { return 0; } > > > > #endif /* CONFIG_X86_LOCAL_APIC */ > > > > +DECLARE_EARLY_PER_CPU(unsigned long, apic_eoi); > > + > > static inline void ack_APIC_irq(void) > > { > > + if (__test_and_clear_bit(0, &__get_cpu_var(apic_eoi))) > > + return; > > + > > While __test_and_clear_bit() is implemented in a single instruction, > it's not required to be. Better have the instruction there explicitly. > > > /* > > * ack_APIC_irq() actually gets compiled as a single instruction > > * ... yummie. > > diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h > > index e216ba0..0ee1472 100644 > > --- a/arch/x86/include/asm/kvm_host.h > > +++ b/arch/x86/include/asm/kvm_host.h > > @@ -481,6 +481,12 @@ struct kvm_vcpu_arch { > > u64 length; > > u64 status; > > } osvw; > > + > > + struct { > > + u64 msr_val; > > + struct gfn_to_hva_cache data; > > + int vector; > > + } eoi; > > }; > > Needs to be cleared on INIT. You mean kvm_arch_vcpu_reset? > > > > > > @@ -307,6 +308,9 @@ void __cpuinit kvm_guest_cpu_init(void) > > smp_processor_id()); > > } > > > > + wrmsrl(MSR_KVM_EOI_EN, __pa(this_cpu_ptr(apic_eoi)) | > > + MSR_KVM_EOI_ENABLED); > > + > > Clear on kexec. With register_reboot_notifier? > > if (has_steal_clock) > > kvm_register_steal_time(); > > } > > diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c > > index 8584322..9e38e12 100644 > > --- a/arch/x86/kvm/lapic.c > > +++ b/arch/x86/kvm/lapic.c > > @@ -265,7 +265,61 @@ int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq) > > irq->level, irq->trig_mode); > > } > > > > -static inline int apic_find_highest_isr(struct kvm_lapic *apic) > > +static int eoi_put_user(struct kvm_vcpu *vcpu, u32 val) > > +{ > > + > > + return kvm_write_guest_cached(vcpu->kvm, &vcpu->arch.eoi.data, &val, > > + sizeof(val)); > > +} > > + > > +static int eoi_get_user(struct kvm_vcpu *vcpu, u32 *val) > > +{ > > + > > + return kvm_read_guest_cached(vcpu->kvm, &vcpu->arch.eoi.data, val, > > + sizeof(*val)); > > +} > > + > > +static inline bool eoi_enabled(struct kvm_vcpu *vcpu) > > +{ > > + return (vcpu->arch.eoi.msr_val & MSR_KVM_EOI_ENABLED); > > +} > > + > > +static int eoi_get_pending_vector(struct kvm_vcpu *vcpu) > > +{ > > + u32 val; > > + if (eoi_get_user(vcpu, &val) < 0) > > + apic_debug("Can't read EOI MSR value: 0x%llx\n", > > + (unsigned long long)vcpi->arch.eoi.msr_val); > > + if (!(val & 0x1)) > > + vcpu->arch.eoi.vector = -1; > > + return vcpu->arch.eoi.vector; > > +} > > + > > +static void eoi_set_pending_vector(struct kvm_vcpu *vcpu, int vector) > > +{ > > + BUG_ON(vcpu->arch.eoi.vector != -1); > > + if (eoi_put_user(vcpu, 0x1) < 0) { > > + apic_debug("Can't set EOI MSR value: 0x%llx\n", > > + (unsigned long long)vcpi->arch.eoi.msr_val); > > + return; > > + } > > + vcpu->arch.eoi.vector = vector; > > +} > > + > > +static int eoi_clr_pending_vector(struct kvm_vcpu *vcpu) > > +{ > > + int vector; > > + vector = vcpu->arch.eoi.vector; > > + if (vector != -1 && eoi_put_user(vcpu, 0x0) < 0) { > > + apic_debug("Can't clear EOI MSR value: 0x%llx\n", > > + (unsigned long long)vcpi->arch.eoi.msr_val); > > + return -1; > > + } > > + vcpu->arch.eoi.vector = -1; > > + return vector; > > +} > > > > > + > > +static inline int __apic_find_highest_isr(struct kvm_lapic *apic) > > { > > int result; > > > > @@ -275,6 +329,17 @@ static inline int apic_find_highest_isr(struct kvm_lapic *apic) > > return result; > > } > > > > +static inline int apic_find_highest_isr(struct kvm_lapic *apic) > > +{ > > + int vector; > > + if (eoi_enabled(apic->vcpu)) { > > + vector = eoi_get_pending_vector(apic->vcpu); > > + if (vector != -1) > > + return vector; > > + } > > + return __apic_find_highest_isr(apic); > > +} > > Why aren't you modifying the ISR unconfitionally? ISR is not set if there won't be an EOI since it's EOI that normally clears it. > > + > > static void apic_update_ppr(struct kvm_lapic *apic) > > { > > u32 tpr, isrv, ppr, old_ppr; > > @@ -488,6 +553,8 @@ static void apic_set_eoi(struct kvm_lapic *apic) > > if (vector == -1) > > return; > > > > + if (eoi_enabled(apic->vcpu)) > > + eoi_clr_pending_vector(apic->vcpu); > > apic_clear_vector(vector, apic->regs + APIC_ISR); > > apic_update_ppr(apic); > > > > @@ -1236,11 +1303,25 @@ int kvm_get_apic_interrupt(struct kvm_vcpu *vcpu) > > { > > int vector = kvm_apic_has_interrupt(vcpu); > > struct kvm_lapic *apic = vcpu->arch.apic; > > + bool set_isr = true; > > > > if (vector == -1) > > return -1; > > > > - apic_set_vector(vector, apic->regs + APIC_ISR); > > + if (eoi_enabled(vcpu)) { > > + /* Anything pending? If yes disable eoi optimization. */ > > + if (unlikely(apic_find_highest_isr(apic) >= 0)) { > > + int v = eoi_clr_pending_vector(vcpu); > > ISR != pending, that's IRR. If ISR vector has lower priority than the > new vector, then we don't need to disable eoi avoidance. Yes. But we can and it's easier than figuring out priorities. I am guessing such collisions are rare, right? I'll add a trace to make sure. > > + if (v != -1) > > + apic_set_vector(v, apic->regs + APIC_ISR); > > + } else { > > + eoi_set_pending_vector(vcpu, vector); > > + set_isr = false; > > Weird. Just set it normally. Remember that reading the ISR needs to > return the correct value. Marcelo said linux does not normally read ISR - not true? Note this has no effect if the PV optimization is not enabled. > We need to process the avoided EOI before any APIC read/writes, to be > sure the guest sees the updated values. Same for IOAPIC, EOI affects > remote_irr. That may been we need to sample it after every exit, or > perhaps disable the feature for level-triggered interrupts. Disabling would be very sad. Can we sample on remote irr read? > > + } > > + } > > + > > + if (set_isr) > > + apic_set_vector(vector, apic->regs + APIC_ISR); > > apic_update_ppr(apic); > > apic_clear_irr(vector, apic); > > return vector; > > diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c > > > > -- > error compiling committee.c: too many arguments to function ^ permalink raw reply [flat|nested] 32+ messages in thread
* Re: [PATCHv0 dont apply] RFC: kvm eoi PV using shared memory 2012-04-10 14:26 ` Michael S. Tsirkin @ 2012-04-10 14:33 ` Avi Kivity 2012-04-10 14:53 ` Michael S. Tsirkin 2012-04-10 17:59 ` Gleb Natapov 1 sibling, 1 reply; 32+ messages in thread From: Avi Kivity @ 2012-04-10 14:33 UTC (permalink / raw) To: Michael S. Tsirkin; +Cc: kvm On 04/10/2012 05:26 PM, Michael S. Tsirkin wrote: > > > u64 status; > > > } osvw; > > > + > > > + struct { > > > + u64 msr_val; > > > + struct gfn_to_hva_cache data; > > > + int vector; > > > + } eoi; > > > }; > > > > Needs to be cleared on INIT. > > You mean kvm_arch_vcpu_reset? Yes, or kvm_lapic_reset(). > > > > > > > > > @@ -307,6 +308,9 @@ void __cpuinit kvm_guest_cpu_init(void) > > > smp_processor_id()); > > > } > > > > > > + wrmsrl(MSR_KVM_EOI_EN, __pa(this_cpu_ptr(apic_eoi)) | > > > + MSR_KVM_EOI_ENABLED); > > > + > > > > Clear on kexec. > > With register_reboot_notifier? Yes, we already clear some kvm msrs there. > > > > > > - apic_set_vector(vector, apic->regs + APIC_ISR); > > > + if (eoi_enabled(vcpu)) { > > > + /* Anything pending? If yes disable eoi optimization. */ > > > + if (unlikely(apic_find_highest_isr(apic) >= 0)) { > > > + int v = eoi_clr_pending_vector(vcpu); > > > > ISR != pending, that's IRR. If ISR vector has lower priority than the > > new vector, then we don't need to disable eoi avoidance. > > Yes. But we can and it's easier than figuring out priorities. > I am guessing such collisions are rare, right? It's pretty easy, if there is something in IRR but kvm_lapic_has_interrupt() returns -1, then we need to disable eoi avoidance. > I'll add a trace to make sure. > > > > + if (v != -1) > > > + apic_set_vector(v, apic->regs + APIC_ISR); > > > + } else { > > > + eoi_set_pending_vector(vcpu, vector); > > > + set_isr = false; > > > > Weird. Just set it normally. Remember that reading the ISR needs to > > return the correct value. > > Marcelo said linux does not normally read ISR - not true? It's true and it's irrelevant. We aren't coding a feature to what linux does now, but for what linux or another guest may do in the future. > Note this has no effect if the PV optimization is not enabled. > > > We need to process the avoided EOI before any APIC read/writes, to be > > sure the guest sees the updated values. Same for IOAPIC, EOI affects > > remote_irr. That may been we need to sample it after every exit, or > > perhaps disable the feature for level-triggered interrupts. > > Disabling would be very sad. Can we sample on remote irr read? That can be done from another vcpu. Why do we care about level-triggered interrupts? Everything uses MSI or edge-triggered IOAPIC interrupts these days. -- error compiling committee.c: too many arguments to function ^ permalink raw reply [flat|nested] 32+ messages in thread
* Re: [PATCHv0 dont apply] RFC: kvm eoi PV using shared memory 2012-04-10 14:33 ` Avi Kivity @ 2012-04-10 14:53 ` Michael S. Tsirkin 2012-04-10 15:00 ` Avi Kivity 0 siblings, 1 reply; 32+ messages in thread From: Michael S. Tsirkin @ 2012-04-10 14:53 UTC (permalink / raw) To: Avi Kivity; +Cc: kvm On Tue, Apr 10, 2012 at 05:33:18PM +0300, Avi Kivity wrote: > On 04/10/2012 05:26 PM, Michael S. Tsirkin wrote: > > > > u64 status; > > > > } osvw; > > > > + > > > > + struct { > > > > + u64 msr_val; > > > > + struct gfn_to_hva_cache data; > > > > + int vector; > > > > + } eoi; > > > > }; > > > > > > Needs to be cleared on INIT. > > > > You mean kvm_arch_vcpu_reset? > > Yes, or kvm_lapic_reset(). > > > > > > > > > > > > > @@ -307,6 +308,9 @@ void __cpuinit kvm_guest_cpu_init(void) > > > > smp_processor_id()); > > > > } > > > > > > > > + wrmsrl(MSR_KVM_EOI_EN, __pa(this_cpu_ptr(apic_eoi)) | > > > > + MSR_KVM_EOI_ENABLED); > > > > + > > > > > > Clear on kexec. > > > > With register_reboot_notifier? > > Yes, we already clear some kvm msrs there. > > > > > > > > > - apic_set_vector(vector, apic->regs + APIC_ISR); > > > > + if (eoi_enabled(vcpu)) { > > > > + /* Anything pending? If yes disable eoi optimization. */ > > > > + if (unlikely(apic_find_highest_isr(apic) >= 0)) { > > > > + int v = eoi_clr_pending_vector(vcpu); > > > > > > ISR != pending, that's IRR. If ISR vector has lower priority than the > > > new vector, then we don't need to disable eoi avoidance. > > > > Yes. But we can and it's easier than figuring out priorities. > > I am guessing such collisions are rare, right? > > It's pretty easy, if there is something in IRR but > kvm_lapic_has_interrupt() returns -1, then we need to disable eoi avoidance. I only see kvm_apic_has_interrupt - is this what you mean? > > I'll add a trace to make sure. > > > > > > + if (v != -1) > > > > + apic_set_vector(v, apic->regs + APIC_ISR); > > > > + } else { > > > > + eoi_set_pending_vector(vcpu, vector); > > > > + set_isr = false; > > > > > > Weird. Just set it normally. Remember that reading the ISR needs to > > > return the correct value. > > > > Marcelo said linux does not normally read ISR - not true? > > It's true and it's irrelevant. We aren't coding a feature to what linux > does now, but for what linux or another guest may do in the future. Right. So you think reading ISR has value in combination with PV EOI for future guests? I'm not arguing either way just curious. > > Note this has no effect if the PV optimization is not enabled. > > > > > We need to process the avoided EOI before any APIC read/writes, to be > > > sure the guest sees the updated values. Same for IOAPIC, EOI affects > > > remote_irr. That may been we need to sample it after every exit, or > > > perhaps disable the feature for level-triggered interrupts. > > > > Disabling would be very sad. Can we sample on remote irr read? > > That can be done from another vcpu. We still can handle it, right? Where's the code that handles that read? > Why do we care about > level-triggered interrupts? Everything uses MSI or edge-triggered > IOAPIC interrupts these days. Well lots of emulated devices don't yet. They probably should but it's nice to be able to test with e.g. e1000 emulation not just virtio. Besides, kvm_get_apic_interrupt simply doesn't know about the triggering mode at the moment. -- MST ^ permalink raw reply [flat|nested] 32+ messages in thread
* Re: [PATCHv0 dont apply] RFC: kvm eoi PV using shared memory 2012-04-10 14:53 ` Michael S. Tsirkin @ 2012-04-10 15:00 ` Avi Kivity 2012-04-10 15:14 ` Michael S. Tsirkin 0 siblings, 1 reply; 32+ messages in thread From: Avi Kivity @ 2012-04-10 15:00 UTC (permalink / raw) To: Michael S. Tsirkin; +Cc: kvm On 04/10/2012 05:53 PM, Michael S. Tsirkin wrote: > > > > > > Yes. But we can and it's easier than figuring out priorities. > > > I am guessing such collisions are rare, right? > > > > It's pretty easy, if there is something in IRR but > > kvm_lapic_has_interrupt() returns -1, then we need to disable eoi avoidance. > > I only see kvm_apic_has_interrupt - is this what you mean? Yes, sorry. It's not clear whether to do the check in kvm_apic_has_interrupt() or kvm_apic_get_interrupt() - the latter is called only after interrupts are enabled, so it looks like a better place (EOIs while interrupts are disabled have no effect). But need to make sure those functions are actually called, since they're protected by KVM_REQ_EVENT. > > > I'll add a trace to make sure. > > > > > > > > + if (v != -1) > > > > > + apic_set_vector(v, apic->regs + APIC_ISR); > > > > > + } else { > > > > > + eoi_set_pending_vector(vcpu, vector); > > > > > + set_isr = false; > > > > > > > > Weird. Just set it normally. Remember that reading the ISR needs to > > > > return the correct value. > > > > > > Marcelo said linux does not normally read ISR - not true? > > > > It's true and it's irrelevant. We aren't coding a feature to what linux > > does now, but for what linux or another guest may do in the future. > > Right. So you think reading ISR has value > in combination with PV EOI for future guests? > I'm not arguing either way just curious. I don't. But we need to preserve the same interface the APIC has presented for thousands of years (well, almost). > > > > Note this has no effect if the PV optimization is not enabled. > > > > > > > We need to process the avoided EOI before any APIC read/writes, to be > > > > sure the guest sees the updated values. Same for IOAPIC, EOI affects > > > > remote_irr. That may been we need to sample it after every exit, or > > > > perhaps disable the feature for level-triggered interrupts. > > > > > > Disabling would be very sad. Can we sample on remote irr read? > > > > That can be done from another vcpu. > > We still can handle it, right? Where's the code that handles that read? Better to keep everything per-cpu. The code is in virt/kvm/ioapic.c > > > Why do we care about > > level-triggered interrupts? Everything uses MSI or edge-triggered > > IOAPIC interrupts these days. > > Well lots of emulated devices don't yet. > They probably should but it's nice to be able to > test with e.g. e1000 emulation not just virtio. e1000 doesn't support msi? > > Besides, kvm_get_apic_interrupt > simply doesn't know about the triggering mode at the moment. > -- error compiling committee.c: too many arguments to function ^ permalink raw reply [flat|nested] 32+ messages in thread
* Re: [PATCHv0 dont apply] RFC: kvm eoi PV using shared memory 2012-04-10 15:00 ` Avi Kivity @ 2012-04-10 15:14 ` Michael S. Tsirkin 2012-04-10 16:08 ` Avi Kivity 0 siblings, 1 reply; 32+ messages in thread From: Michael S. Tsirkin @ 2012-04-10 15:14 UTC (permalink / raw) To: Avi Kivity; +Cc: kvm On Tue, Apr 10, 2012 at 06:00:51PM +0300, Avi Kivity wrote: > On 04/10/2012 05:53 PM, Michael S. Tsirkin wrote: > > > > > > > > Yes. But we can and it's easier than figuring out priorities. > > > > I am guessing such collisions are rare, right? > > > > > > It's pretty easy, if there is something in IRR but > > > kvm_lapic_has_interrupt() returns -1, then we need to disable eoi avoidance. > > > > I only see kvm_apic_has_interrupt - is this what you mean? > > Yes, sorry. > > It's not clear whether to do the check in kvm_apic_has_interrupt() or > kvm_apic_get_interrupt() - the latter is called only after interrupts > are enabled, so it looks like a better place (EOIs while interrupts are > disabled have no effect). But need to make sure those functions are > actually called, since they're protected by KVM_REQ_EVENT. Sorry not sure what you mean by "make sure" - read the code carefully? > > > > I'll add a trace to make sure. > > > > > > > > > > + if (v != -1) > > > > > > + apic_set_vector(v, apic->regs + APIC_ISR); > > > > > > + } else { > > > > > > + eoi_set_pending_vector(vcpu, vector); > > > > > > + set_isr = false; > > > > > > > > > > Weird. Just set it normally. Remember that reading the ISR needs to > > > > > return the correct value. > > > > > > > > Marcelo said linux does not normally read ISR - not true? > > > > > > It's true and it's irrelevant. We aren't coding a feature to what linux > > > does now, but for what linux or another guest may do in the future. > > > > Right. So you think reading ISR has value > > in combination with PV EOI for future guests? > > I'm not arguing either way just curious. > > I don't. But we need to preserve the same interface the APIC has > presented for thousands of years (well, almost). Talk about overstatements :) > > > > > > Note this has no effect if the PV optimization is not enabled. > > > > > > > > > We need to process the avoided EOI before any APIC read/writes, to be > > > > > sure the guest sees the updated values. Same for IOAPIC, EOI affects > > > > > remote_irr. That may been we need to sample it after every exit, or > > > > > perhaps disable the feature for level-triggered interrupts. > > > > > > > > Disabling would be very sad. Can we sample on remote irr read? > > > > > > That can be done from another vcpu. > > > > We still can handle it, right? Where's the code that handles that read? > > Better to keep everything per-cpu. The code is in virt/kvm/ioapic.c Hmm. Disabling for level handles the ack notifiers issue as well, which I forgot about. It's a tough call. You think looking at TMR in kvm_get_apic_interrupt is safe? > > > > > Why do we care about > > > level-triggered interrupts? Everything uses MSI or edge-triggered > > > IOAPIC interrupts these days. > > > > Well lots of emulated devices don't yet. > > They probably should but it's nice to be able to > > test with e.g. e1000 emulation not just virtio. > > > e1000 doesn't support msi? qemu emulation doesn't. > > > > Besides, kvm_get_apic_interrupt > > simply doesn't know about the triggering mode at the moment. > > > > > -- > error compiling committee.c: too many arguments to function ^ permalink raw reply [flat|nested] 32+ messages in thread
* Re: [PATCHv0 dont apply] RFC: kvm eoi PV using shared memory 2012-04-10 15:14 ` Michael S. Tsirkin @ 2012-04-10 16:08 ` Avi Kivity 2012-04-10 17:06 ` Michael S. Tsirkin 0 siblings, 1 reply; 32+ messages in thread From: Avi Kivity @ 2012-04-10 16:08 UTC (permalink / raw) To: Michael S. Tsirkin; +Cc: kvm On 04/10/2012 06:14 PM, Michael S. Tsirkin wrote: > On Tue, Apr 10, 2012 at 06:00:51PM +0300, Avi Kivity wrote: > > On 04/10/2012 05:53 PM, Michael S. Tsirkin wrote: > > > > > > > > > > Yes. But we can and it's easier than figuring out priorities. > > > > > I am guessing such collisions are rare, right? > > > > > > > > It's pretty easy, if there is something in IRR but > > > > kvm_lapic_has_interrupt() returns -1, then we need to disable eoi avoidance. > > > > > > I only see kvm_apic_has_interrupt - is this what you mean? > > > > Yes, sorry. > > > > It's not clear whether to do the check in kvm_apic_has_interrupt() or > > kvm_apic_get_interrupt() - the latter is called only after interrupts > > are enabled, so it looks like a better place (EOIs while interrupts are > > disabled have no effect). But need to make sure those functions are > > actually called, since they're protected by KVM_REQ_EVENT. > > Sorry not sure what you mean by "make sure" - read the code carefully? Yes. And I mean, get called at the right time. > > > > Better to keep everything per-cpu. The code is in virt/kvm/ioapic.c > > Hmm. Disabling for level handles the ack notifiers > issue as well, which I forgot about. > It's a tough call. You think looking at > TMR in kvm_get_apic_interrupt is safe? Yes, it's read only from the guest point of view IIRC. > > > > > > > Why do we care about > > > > level-triggered interrupts? Everything uses MSI or edge-triggered > > > > IOAPIC interrupts these days. > > > > > > Well lots of emulated devices don't yet. > > > They probably should but it's nice to be able to > > > test with e.g. e1000 emulation not just virtio. > > > > > > e1000 doesn't support msi? > > qemu emulation doesn't. > Can be changed if someone's really interested. But really, avoiding EOIs for e1000 won't help it much. -- error compiling committee.c: too many arguments to function ^ permalink raw reply [flat|nested] 32+ messages in thread
* Re: [PATCHv0 dont apply] RFC: kvm eoi PV using shared memory 2012-04-10 16:08 ` Avi Kivity @ 2012-04-10 17:06 ` Michael S. Tsirkin 0 siblings, 0 replies; 32+ messages in thread From: Michael S. Tsirkin @ 2012-04-10 17:06 UTC (permalink / raw) To: Avi Kivity; +Cc: kvm On Tue, Apr 10, 2012 at 07:08:26PM +0300, Avi Kivity wrote: > On 04/10/2012 06:14 PM, Michael S. Tsirkin wrote: > > On Tue, Apr 10, 2012 at 06:00:51PM +0300, Avi Kivity wrote: > > > On 04/10/2012 05:53 PM, Michael S. Tsirkin wrote: > > > > > > > > > > > > Yes. But we can and it's easier than figuring out priorities. > > > > > > I am guessing such collisions are rare, right? > > > > > > > > > > It's pretty easy, if there is something in IRR but > > > > > kvm_lapic_has_interrupt() returns -1, then we need to disable eoi avoidance. > > > > > > > > I only see kvm_apic_has_interrupt - is this what you mean? > > > > > > Yes, sorry. > > > > > > It's not clear whether to do the check in kvm_apic_has_interrupt() or > > > kvm_apic_get_interrupt() - the latter is called only after interrupts > > > are enabled, so it looks like a better place (EOIs while interrupts are > > > disabled have no effect). But need to make sure those functions are > > > actually called, since they're protected by KVM_REQ_EVENT. > > > > Sorry not sure what you mean by "make sure" - read the code carefully? > > Yes. And I mean, get called at the right time. OK, Review will help here. > > > > > > Better to keep everything per-cpu. The code is in virt/kvm/ioapic.c > > > > Hmm. Disabling for level handles the ack notifiers > > issue as well, which I forgot about. > > It's a tough call. You think looking at > > TMR in kvm_get_apic_interrupt is safe? > > Yes, it's read only from the guest point of view IIRC. > > > > > > > > > > Why do we care about > > > > > level-triggered interrupts? Everything uses MSI or edge-triggered > > > > > IOAPIC interrupts these days. > > > > > > > > Well lots of emulated devices don't yet. > > > > They probably should but it's nice to be able to > > > > test with e.g. e1000 emulation not just virtio. > > > > > > > > > e1000 doesn't support msi? > > > > qemu emulation doesn't. > > > > Can be changed if someone's really interested. But really, avoiding > EOIs for e1000 won't help it much. It will help test EOI avoidance. > -- > error compiling committee.c: too many arguments to function ^ permalink raw reply [flat|nested] 32+ messages in thread
* Re: [PATCHv0 dont apply] RFC: kvm eoi PV using shared memory 2012-04-10 14:26 ` Michael S. Tsirkin 2012-04-10 14:33 ` Avi Kivity @ 2012-04-10 17:59 ` Gleb Natapov 2012-04-10 19:30 ` Michael S. Tsirkin 1 sibling, 1 reply; 32+ messages in thread From: Gleb Natapov @ 2012-04-10 17:59 UTC (permalink / raw) To: Michael S. Tsirkin; +Cc: Avi Kivity, kvm Heh, I was working on that too. On Tue, Apr 10, 2012 at 05:26:18PM +0300, Michael S. Tsirkin wrote: > On Tue, Apr 10, 2012 at 05:03:22PM +0300, Avi Kivity wrote: > > On 04/10/2012 04:27 PM, Michael S. Tsirkin wrote: > > > I took a stub at implementing PV EOI using shared memory. > > > This should reduce the number of exits an interrupt > > > causes as much as by half. > > > > > > A partially complete draft for both host and guest parts > > > is below. > > > > > > The idea is simple: there's a bit, per APIC, in guest memory, > > > that tells the guest that it does not need EOI. > > > We set it before injecting an interrupt and clear > > > before injecting a nested one. Guest tests it using > > > a test and clear operation - this is necessary > > > so that host can detect interrupt nesting - > > > and if set, it can skip the EOI MSR. > > > > > > There's a new MSR to set the address of said register > > > in guest memory. Otherwise not much changes: > > > - Guest EOI is not required > > > - ISR is automatically cleared before injection > > > > > > Some things are incomplete: add feature negotiation > > > options, qemu support for said options. > > > No testing was done beyond compiling the kernel. > > > > > > I would appreciate early feedback. > > > > > > Signed-off-by: Michael S. Tsirkin <mst@redhat.com> > > > > > > -- > > > > > > diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h > > > index d854101..8430f41 100644 > > > --- a/arch/x86/include/asm/apic.h > > > +++ b/arch/x86/include/asm/apic.h > > > @@ -457,8 +457,13 @@ static inline u32 safe_apic_wait_icr_idle(void) { return 0; } > > > > > > #endif /* CONFIG_X86_LOCAL_APIC */ > > > > > > +DECLARE_EARLY_PER_CPU(unsigned long, apic_eoi); > > > + > > > static inline void ack_APIC_irq(void) > > > { > > > + if (__test_and_clear_bit(0, &__get_cpu_var(apic_eoi))) > > > + return; > > > + > > > > While __test_and_clear_bit() is implemented in a single instruction, > > it's not required to be. Better have the instruction there explicitly. > > > > > /* > > > * ack_APIC_irq() actually gets compiled as a single instruction > > > * ... yummie. > > > diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h > > > index e216ba0..0ee1472 100644 > > > --- a/arch/x86/include/asm/kvm_host.h > > > +++ b/arch/x86/include/asm/kvm_host.h > > > @@ -481,6 +481,12 @@ struct kvm_vcpu_arch { > > > u64 length; > > > u64 status; > > > } osvw; > > > + > > > + struct { > > > + u64 msr_val; > > > + struct gfn_to_hva_cache data; > > > + int vector; > > > + } eoi; > > > }; > > > > Needs to be cleared on INIT. > > You mean kvm_arch_vcpu_reset? > > > > > > > > > > @@ -307,6 +308,9 @@ void __cpuinit kvm_guest_cpu_init(void) > > > smp_processor_id()); > > > } > > > > > > + wrmsrl(MSR_KVM_EOI_EN, __pa(this_cpu_ptr(apic_eoi)) | > > > + MSR_KVM_EOI_ENABLED); > > > + > > > > Clear on kexec. > > With register_reboot_notifier? > > > > if (has_steal_clock) > > > kvm_register_steal_time(); > > > } > > > diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c > > > index 8584322..9e38e12 100644 > > > --- a/arch/x86/kvm/lapic.c > > > +++ b/arch/x86/kvm/lapic.c > > > @@ -265,7 +265,61 @@ int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq) > > > irq->level, irq->trig_mode); > > > } > > > > > > -static inline int apic_find_highest_isr(struct kvm_lapic *apic) > > > +static int eoi_put_user(struct kvm_vcpu *vcpu, u32 val) > > > +{ > > > + > > > + return kvm_write_guest_cached(vcpu->kvm, &vcpu->arch.eoi.data, &val, > > > + sizeof(val)); > > > +} > > > + > > > +static int eoi_get_user(struct kvm_vcpu *vcpu, u32 *val) > > > +{ > > > + > > > + return kvm_read_guest_cached(vcpu->kvm, &vcpu->arch.eoi.data, val, > > > + sizeof(*val)); > > > +} > > > + > > > +static inline bool eoi_enabled(struct kvm_vcpu *vcpu) > > > +{ > > > + return (vcpu->arch.eoi.msr_val & MSR_KVM_EOI_ENABLED); > > > +} > > > + > > > +static int eoi_get_pending_vector(struct kvm_vcpu *vcpu) > > > +{ > > > + u32 val; > > > + if (eoi_get_user(vcpu, &val) < 0) > > > + apic_debug("Can't read EOI MSR value: 0x%llx\n", > > > + (unsigned long long)vcpi->arch.eoi.msr_val); > > > + if (!(val & 0x1)) > > > + vcpu->arch.eoi.vector = -1; > > > + return vcpu->arch.eoi.vector; > > > +} > > > + > > > +static void eoi_set_pending_vector(struct kvm_vcpu *vcpu, int vector) > > > +{ > > > + BUG_ON(vcpu->arch.eoi.vector != -1); > > > + if (eoi_put_user(vcpu, 0x1) < 0) { > > > + apic_debug("Can't set EOI MSR value: 0x%llx\n", > > > + (unsigned long long)vcpi->arch.eoi.msr_val); > > > + return; > > > + } > > > + vcpu->arch.eoi.vector = vector; > > > +} > > > + > > > +static int eoi_clr_pending_vector(struct kvm_vcpu *vcpu) > > > +{ > > > + int vector; > > > + vector = vcpu->arch.eoi.vector; > > > + if (vector != -1 && eoi_put_user(vcpu, 0x0) < 0) { > > > + apic_debug("Can't clear EOI MSR value: 0x%llx\n", > > > + (unsigned long long)vcpi->arch.eoi.msr_val); > > > + return -1; > > > + } > > > + vcpu->arch.eoi.vector = -1; > > > + return vector; > > > +} > > > > > > > > > + > > > +static inline int __apic_find_highest_isr(struct kvm_lapic *apic) > > > { > > > int result; > > > > > > @@ -275,6 +329,17 @@ static inline int apic_find_highest_isr(struct kvm_lapic *apic) > > > return result; > > > } > > > > > > +static inline int apic_find_highest_isr(struct kvm_lapic *apic) > > > +{ > > > + int vector; > > > + if (eoi_enabled(apic->vcpu)) { > > > + vector = eoi_get_pending_vector(apic->vcpu); > > > + if (vector != -1) > > > + return vector; > > > + } > > > + return __apic_find_highest_isr(apic); > > > +} > > > > Why aren't you modifying the ISR unconfitionally? > > ISR is not set if there won't be an EOI > since it's EOI that normally clears it. > > > > + > > > static void apic_update_ppr(struct kvm_lapic *apic) > > > { > > > u32 tpr, isrv, ppr, old_ppr; > > > @@ -488,6 +553,8 @@ static void apic_set_eoi(struct kvm_lapic *apic) > > > if (vector == -1) > > > return; > > > > > > + if (eoi_enabled(apic->vcpu)) > > > + eoi_clr_pending_vector(apic->vcpu); > > > apic_clear_vector(vector, apic->regs + APIC_ISR); > > > apic_update_ppr(apic); > > > > > > @@ -1236,11 +1303,25 @@ int kvm_get_apic_interrupt(struct kvm_vcpu *vcpu) > > > { > > > int vector = kvm_apic_has_interrupt(vcpu); > > > struct kvm_lapic *apic = vcpu->arch.apic; > > > + bool set_isr = true; > > > > > > if (vector == -1) > > > return -1; > > > > > > - apic_set_vector(vector, apic->regs + APIC_ISR); > > > + if (eoi_enabled(vcpu)) { > > > + /* Anything pending? If yes disable eoi optimization. */ > > > + if (unlikely(apic_find_highest_isr(apic) >= 0)) { > > > + int v = eoi_clr_pending_vector(vcpu); > > > > ISR != pending, that's IRR. If ISR vector has lower priority than the > > new vector, then we don't need to disable eoi avoidance. > > Yes. But we can and it's easier than figuring out priorities. > I am guessing such collisions are rare, right? > I'll add a trace to make sure. > > > > + if (v != -1) > > > + apic_set_vector(v, apic->regs + APIC_ISR); > > > + } else { > > > + eoi_set_pending_vector(vcpu, vector); > > > + set_isr = false; > > > > Weird. Just set it normally. Remember that reading the ISR needs to > > return the correct value. > > Marcelo said linux does not normally read ISR - not true? > Note this has no effect if the PV optimization is not enabled. > > > We need to process the avoided EOI before any APIC read/writes, to be > > sure the guest sees the updated values. Same for IOAPIC, EOI affects > > remote_irr. That may been we need to sample it after every exit, or > > perhaps disable the feature for level-triggered interrupts. > > Disabling would be very sad. Can we sample on remote irr read? > Nothing sad about it, just correct. MS requires this to be disabled for level triggered interrupts. We have to notify IOAPIC about EOI ASAP. It may hold another interrupt for us that has to be delivered. I was going to avoid most of the trickery in apic code and just check if avoided EOI should be processed on each exit. This adds one if on exit path instead of couple on interrupt injection path though. > > > + } > > > + } > > > + > > > + if (set_isr) > > > + apic_set_vector(vector, apic->regs + APIC_ISR); > > > apic_update_ppr(apic); > > > apic_clear_irr(vector, apic); > > > return vector; > > > diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c > > > > > > > -- > > error compiling committee.c: too many arguments to function > -- > To unsubscribe from this list: send the line "unsubscribe kvm" in > the body of a message to majordomo@vger.kernel.org > More majordomo info at http://vger.kernel.org/majordomo-info.html -- Gleb. ^ permalink raw reply [flat|nested] 32+ messages in thread
* Re: [PATCHv0 dont apply] RFC: kvm eoi PV using shared memory 2012-04-10 17:59 ` Gleb Natapov @ 2012-04-10 19:30 ` Michael S. Tsirkin 2012-04-10 19:33 ` Gleb Natapov 0 siblings, 1 reply; 32+ messages in thread From: Michael S. Tsirkin @ 2012-04-10 19:30 UTC (permalink / raw) To: Gleb Natapov; +Cc: Avi Kivity, kvm On Tue, Apr 10, 2012 at 08:59:21PM +0300, Gleb Natapov wrote: > Heh, I was working on that too. > > On Tue, Apr 10, 2012 at 05:26:18PM +0300, Michael S. Tsirkin wrote: > > On Tue, Apr 10, 2012 at 05:03:22PM +0300, Avi Kivity wrote: > > > On 04/10/2012 04:27 PM, Michael S. Tsirkin wrote: > > > > I took a stub at implementing PV EOI using shared memory. > > > > This should reduce the number of exits an interrupt > > > > causes as much as by half. > > > > > > > > A partially complete draft for both host and guest parts > > > > is below. > > > > > > > > The idea is simple: there's a bit, per APIC, in guest memory, > > > > that tells the guest that it does not need EOI. > > > > We set it before injecting an interrupt and clear > > > > before injecting a nested one. Guest tests it using > > > > a test and clear operation - this is necessary > > > > so that host can detect interrupt nesting - > > > > and if set, it can skip the EOI MSR. > > > > > > > > There's a new MSR to set the address of said register > > > > in guest memory. Otherwise not much changes: > > > > - Guest EOI is not required > > > > - ISR is automatically cleared before injection > > > > > > > > Some things are incomplete: add feature negotiation > > > > options, qemu support for said options. > > > > No testing was done beyond compiling the kernel. > > > > > > > > I would appreciate early feedback. > > > > > > > > Signed-off-by: Michael S. Tsirkin <mst@redhat.com> > > > > > > > > -- > > > > > > > > diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h > > > > index d854101..8430f41 100644 > > > > --- a/arch/x86/include/asm/apic.h > > > > +++ b/arch/x86/include/asm/apic.h > > > > @@ -457,8 +457,13 @@ static inline u32 safe_apic_wait_icr_idle(void) { return 0; } > > > > > > > > #endif /* CONFIG_X86_LOCAL_APIC */ > > > > > > > > +DECLARE_EARLY_PER_CPU(unsigned long, apic_eoi); > > > > + > > > > static inline void ack_APIC_irq(void) > > > > { > > > > + if (__test_and_clear_bit(0, &__get_cpu_var(apic_eoi))) > > > > + return; > > > > + > > > > > > While __test_and_clear_bit() is implemented in a single instruction, > > > it's not required to be. Better have the instruction there explicitly. > > > > > > > /* > > > > * ack_APIC_irq() actually gets compiled as a single instruction > > > > * ... yummie. > > > > diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h > > > > index e216ba0..0ee1472 100644 > > > > --- a/arch/x86/include/asm/kvm_host.h > > > > +++ b/arch/x86/include/asm/kvm_host.h > > > > @@ -481,6 +481,12 @@ struct kvm_vcpu_arch { > > > > u64 length; > > > > u64 status; > > > > } osvw; > > > > + > > > > + struct { > > > > + u64 msr_val; > > > > + struct gfn_to_hva_cache data; > > > > + int vector; > > > > + } eoi; > > > > }; > > > > > > Needs to be cleared on INIT. > > > > You mean kvm_arch_vcpu_reset? > > > > > > > > > > > > > > @@ -307,6 +308,9 @@ void __cpuinit kvm_guest_cpu_init(void) > > > > smp_processor_id()); > > > > } > > > > > > > > + wrmsrl(MSR_KVM_EOI_EN, __pa(this_cpu_ptr(apic_eoi)) | > > > > + MSR_KVM_EOI_ENABLED); > > > > + > > > > > > Clear on kexec. > > > > With register_reboot_notifier? > > > > > > if (has_steal_clock) > > > > kvm_register_steal_time(); > > > > } > > > > diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c > > > > index 8584322..9e38e12 100644 > > > > --- a/arch/x86/kvm/lapic.c > > > > +++ b/arch/x86/kvm/lapic.c > > > > @@ -265,7 +265,61 @@ int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq) > > > > irq->level, irq->trig_mode); > > > > } > > > > > > > > -static inline int apic_find_highest_isr(struct kvm_lapic *apic) > > > > +static int eoi_put_user(struct kvm_vcpu *vcpu, u32 val) > > > > +{ > > > > + > > > > + return kvm_write_guest_cached(vcpu->kvm, &vcpu->arch.eoi.data, &val, > > > > + sizeof(val)); > > > > +} > > > > + > > > > +static int eoi_get_user(struct kvm_vcpu *vcpu, u32 *val) > > > > +{ > > > > + > > > > + return kvm_read_guest_cached(vcpu->kvm, &vcpu->arch.eoi.data, val, > > > > + sizeof(*val)); > > > > +} > > > > + > > > > +static inline bool eoi_enabled(struct kvm_vcpu *vcpu) > > > > +{ > > > > + return (vcpu->arch.eoi.msr_val & MSR_KVM_EOI_ENABLED); > > > > +} > > > > + > > > > +static int eoi_get_pending_vector(struct kvm_vcpu *vcpu) > > > > +{ > > > > + u32 val; > > > > + if (eoi_get_user(vcpu, &val) < 0) > > > > + apic_debug("Can't read EOI MSR value: 0x%llx\n", > > > > + (unsigned long long)vcpi->arch.eoi.msr_val); > > > > + if (!(val & 0x1)) > > > > + vcpu->arch.eoi.vector = -1; > > > > + return vcpu->arch.eoi.vector; > > > > +} > > > > + > > > > +static void eoi_set_pending_vector(struct kvm_vcpu *vcpu, int vector) > > > > +{ > > > > + BUG_ON(vcpu->arch.eoi.vector != -1); > > > > + if (eoi_put_user(vcpu, 0x1) < 0) { > > > > + apic_debug("Can't set EOI MSR value: 0x%llx\n", > > > > + (unsigned long long)vcpi->arch.eoi.msr_val); > > > > + return; > > > > + } > > > > + vcpu->arch.eoi.vector = vector; > > > > +} > > > > + > > > > +static int eoi_clr_pending_vector(struct kvm_vcpu *vcpu) > > > > +{ > > > > + int vector; > > > > + vector = vcpu->arch.eoi.vector; > > > > + if (vector != -1 && eoi_put_user(vcpu, 0x0) < 0) { > > > > + apic_debug("Can't clear EOI MSR value: 0x%llx\n", > > > > + (unsigned long long)vcpi->arch.eoi.msr_val); > > > > + return -1; > > > > + } > > > > + vcpu->arch.eoi.vector = -1; > > > > + return vector; > > > > +} > > > > > > > > > > > > > + > > > > +static inline int __apic_find_highest_isr(struct kvm_lapic *apic) > > > > { > > > > int result; > > > > > > > > @@ -275,6 +329,17 @@ static inline int apic_find_highest_isr(struct kvm_lapic *apic) > > > > return result; > > > > } > > > > > > > > +static inline int apic_find_highest_isr(struct kvm_lapic *apic) > > > > +{ > > > > + int vector; > > > > + if (eoi_enabled(apic->vcpu)) { > > > > + vector = eoi_get_pending_vector(apic->vcpu); > > > > + if (vector != -1) > > > > + return vector; > > > > + } > > > > + return __apic_find_highest_isr(apic); > > > > +} > > > > > > Why aren't you modifying the ISR unconfitionally? > > > > ISR is not set if there won't be an EOI > > since it's EOI that normally clears it. > > > > > > + > > > > static void apic_update_ppr(struct kvm_lapic *apic) > > > > { > > > > u32 tpr, isrv, ppr, old_ppr; > > > > @@ -488,6 +553,8 @@ static void apic_set_eoi(struct kvm_lapic *apic) > > > > if (vector == -1) > > > > return; > > > > > > > > + if (eoi_enabled(apic->vcpu)) > > > > + eoi_clr_pending_vector(apic->vcpu); > > > > apic_clear_vector(vector, apic->regs + APIC_ISR); > > > > apic_update_ppr(apic); > > > > > > > > @@ -1236,11 +1303,25 @@ int kvm_get_apic_interrupt(struct kvm_vcpu *vcpu) > > > > { > > > > int vector = kvm_apic_has_interrupt(vcpu); > > > > struct kvm_lapic *apic = vcpu->arch.apic; > > > > + bool set_isr = true; > > > > > > > > if (vector == -1) > > > > return -1; > > > > > > > > - apic_set_vector(vector, apic->regs + APIC_ISR); > > > > + if (eoi_enabled(vcpu)) { > > > > + /* Anything pending? If yes disable eoi optimization. */ > > > > + if (unlikely(apic_find_highest_isr(apic) >= 0)) { > > > > + int v = eoi_clr_pending_vector(vcpu); > > > > > > ISR != pending, that's IRR. If ISR vector has lower priority than the > > > new vector, then we don't need to disable eoi avoidance. > > > > Yes. But we can and it's easier than figuring out priorities. > > I am guessing such collisions are rare, right? > > I'll add a trace to make sure. > > > > > > + if (v != -1) > > > > + apic_set_vector(v, apic->regs + APIC_ISR); > > > > + } else { > > > > + eoi_set_pending_vector(vcpu, vector); > > > > + set_isr = false; > > > > > > Weird. Just set it normally. Remember that reading the ISR needs to > > > return the correct value. > > > > Marcelo said linux does not normally read ISR - not true? > > Note this has no effect if the PV optimization is not enabled. > > > > > We need to process the avoided EOI before any APIC read/writes, to be > > > sure the guest sees the updated values. Same for IOAPIC, EOI affects > > > remote_irr. That may been we need to sample it after every exit, or > > > perhaps disable the feature for level-triggered interrupts. > > > > Disabling would be very sad. Can we sample on remote irr read? > > > Nothing sad about it, just correct. MS requires this to be disabled for > level triggered interrupts. We don't try to match what HV does 100% anyway. > We have to notify IOAPIC about EOI ASAP. It > may hold another interrupt for us that has to be delivered. You mean the ack notifiers? We can skip just for the vectors which have ack notifiers or only if there are no notifiers. > I was going to avoid most of the trickery in apic code and just check if > avoided EOI should be processed on each exit. This adds one if on exit > path instead of couple on interrupt injection path though. > > > > > + } > > > > + } > > > > + > > > > + if (set_isr) > > > > + apic_set_vector(vector, apic->regs + APIC_ISR); > > > > apic_update_ppr(apic); > > > > apic_clear_irr(vector, apic); > > > > return vector; > > > > diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c > > > > > > > > > > -- > > > error compiling committee.c: too many arguments to function > > -- > > To unsubscribe from this list: send the line "unsubscribe kvm" in > > the body of a message to majordomo@vger.kernel.org > > More majordomo info at http://vger.kernel.org/majordomo-info.html > > -- > Gleb. ^ permalink raw reply [flat|nested] 32+ messages in thread
* Re: [PATCHv0 dont apply] RFC: kvm eoi PV using shared memory 2012-04-10 19:30 ` Michael S. Tsirkin @ 2012-04-10 19:33 ` Gleb Natapov 2012-04-10 19:40 ` Michael S. Tsirkin 0 siblings, 1 reply; 32+ messages in thread From: Gleb Natapov @ 2012-04-10 19:33 UTC (permalink / raw) To: Michael S. Tsirkin; +Cc: Avi Kivity, kvm On Tue, Apr 10, 2012 at 10:30:04PM +0300, Michael S. Tsirkin wrote: > On Tue, Apr 10, 2012 at 08:59:21PM +0300, Gleb Natapov wrote: > > Heh, I was working on that too. > > > > On Tue, Apr 10, 2012 at 05:26:18PM +0300, Michael S. Tsirkin wrote: > > > On Tue, Apr 10, 2012 at 05:03:22PM +0300, Avi Kivity wrote: > > > > On 04/10/2012 04:27 PM, Michael S. Tsirkin wrote: > > > > > I took a stub at implementing PV EOI using shared memory. > > > > > This should reduce the number of exits an interrupt > > > > > causes as much as by half. > > > > > > > > > > A partially complete draft for both host and guest parts > > > > > is below. > > > > > > > > > > The idea is simple: there's a bit, per APIC, in guest memory, > > > > > that tells the guest that it does not need EOI. > > > > > We set it before injecting an interrupt and clear > > > > > before injecting a nested one. Guest tests it using > > > > > a test and clear operation - this is necessary > > > > > so that host can detect interrupt nesting - > > > > > and if set, it can skip the EOI MSR. > > > > > > > > > > There's a new MSR to set the address of said register > > > > > in guest memory. Otherwise not much changes: > > > > > - Guest EOI is not required > > > > > - ISR is automatically cleared before injection > > > > > > > > > > Some things are incomplete: add feature negotiation > > > > > options, qemu support for said options. > > > > > No testing was done beyond compiling the kernel. > > > > > > > > > > I would appreciate early feedback. > > > > > > > > > > Signed-off-by: Michael S. Tsirkin <mst@redhat.com> > > > > > > > > > > -- > > > > > > > > > > diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h > > > > > index d854101..8430f41 100644 > > > > > --- a/arch/x86/include/asm/apic.h > > > > > +++ b/arch/x86/include/asm/apic.h > > > > > @@ -457,8 +457,13 @@ static inline u32 safe_apic_wait_icr_idle(void) { return 0; } > > > > > > > > > > #endif /* CONFIG_X86_LOCAL_APIC */ > > > > > > > > > > +DECLARE_EARLY_PER_CPU(unsigned long, apic_eoi); > > > > > + > > > > > static inline void ack_APIC_irq(void) > > > > > { > > > > > + if (__test_and_clear_bit(0, &__get_cpu_var(apic_eoi))) > > > > > + return; > > > > > + > > > > > > > > While __test_and_clear_bit() is implemented in a single instruction, > > > > it's not required to be. Better have the instruction there explicitly. > > > > > > > > > /* > > > > > * ack_APIC_irq() actually gets compiled as a single instruction > > > > > * ... yummie. > > > > > diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h > > > > > index e216ba0..0ee1472 100644 > > > > > --- a/arch/x86/include/asm/kvm_host.h > > > > > +++ b/arch/x86/include/asm/kvm_host.h > > > > > @@ -481,6 +481,12 @@ struct kvm_vcpu_arch { > > > > > u64 length; > > > > > u64 status; > > > > > } osvw; > > > > > + > > > > > + struct { > > > > > + u64 msr_val; > > > > > + struct gfn_to_hva_cache data; > > > > > + int vector; > > > > > + } eoi; > > > > > }; > > > > > > > > Needs to be cleared on INIT. > > > > > > You mean kvm_arch_vcpu_reset? > > > > > > > > > > > > > > > > > > @@ -307,6 +308,9 @@ void __cpuinit kvm_guest_cpu_init(void) > > > > > smp_processor_id()); > > > > > } > > > > > > > > > > + wrmsrl(MSR_KVM_EOI_EN, __pa(this_cpu_ptr(apic_eoi)) | > > > > > + MSR_KVM_EOI_ENABLED); > > > > > + > > > > > > > > Clear on kexec. > > > > > > With register_reboot_notifier? > > > > > > > > if (has_steal_clock) > > > > > kvm_register_steal_time(); > > > > > } > > > > > diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c > > > > > index 8584322..9e38e12 100644 > > > > > --- a/arch/x86/kvm/lapic.c > > > > > +++ b/arch/x86/kvm/lapic.c > > > > > @@ -265,7 +265,61 @@ int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq) > > > > > irq->level, irq->trig_mode); > > > > > } > > > > > > > > > > -static inline int apic_find_highest_isr(struct kvm_lapic *apic) > > > > > +static int eoi_put_user(struct kvm_vcpu *vcpu, u32 val) > > > > > +{ > > > > > + > > > > > + return kvm_write_guest_cached(vcpu->kvm, &vcpu->arch.eoi.data, &val, > > > > > + sizeof(val)); > > > > > +} > > > > > + > > > > > +static int eoi_get_user(struct kvm_vcpu *vcpu, u32 *val) > > > > > +{ > > > > > + > > > > > + return kvm_read_guest_cached(vcpu->kvm, &vcpu->arch.eoi.data, val, > > > > > + sizeof(*val)); > > > > > +} > > > > > + > > > > > +static inline bool eoi_enabled(struct kvm_vcpu *vcpu) > > > > > +{ > > > > > + return (vcpu->arch.eoi.msr_val & MSR_KVM_EOI_ENABLED); > > > > > +} > > > > > + > > > > > +static int eoi_get_pending_vector(struct kvm_vcpu *vcpu) > > > > > +{ > > > > > + u32 val; > > > > > + if (eoi_get_user(vcpu, &val) < 0) > > > > > + apic_debug("Can't read EOI MSR value: 0x%llx\n", > > > > > + (unsigned long long)vcpi->arch.eoi.msr_val); > > > > > + if (!(val & 0x1)) > > > > > + vcpu->arch.eoi.vector = -1; > > > > > + return vcpu->arch.eoi.vector; > > > > > +} > > > > > + > > > > > +static void eoi_set_pending_vector(struct kvm_vcpu *vcpu, int vector) > > > > > +{ > > > > > + BUG_ON(vcpu->arch.eoi.vector != -1); > > > > > + if (eoi_put_user(vcpu, 0x1) < 0) { > > > > > + apic_debug("Can't set EOI MSR value: 0x%llx\n", > > > > > + (unsigned long long)vcpi->arch.eoi.msr_val); > > > > > + return; > > > > > + } > > > > > + vcpu->arch.eoi.vector = vector; > > > > > +} > > > > > + > > > > > +static int eoi_clr_pending_vector(struct kvm_vcpu *vcpu) > > > > > +{ > > > > > + int vector; > > > > > + vector = vcpu->arch.eoi.vector; > > > > > + if (vector != -1 && eoi_put_user(vcpu, 0x0) < 0) { > > > > > + apic_debug("Can't clear EOI MSR value: 0x%llx\n", > > > > > + (unsigned long long)vcpi->arch.eoi.msr_val); > > > > > + return -1; > > > > > + } > > > > > + vcpu->arch.eoi.vector = -1; > > > > > + return vector; > > > > > +} > > > > > > > > > > > > > > > > > + > > > > > +static inline int __apic_find_highest_isr(struct kvm_lapic *apic) > > > > > { > > > > > int result; > > > > > > > > > > @@ -275,6 +329,17 @@ static inline int apic_find_highest_isr(struct kvm_lapic *apic) > > > > > return result; > > > > > } > > > > > > > > > > +static inline int apic_find_highest_isr(struct kvm_lapic *apic) > > > > > +{ > > > > > + int vector; > > > > > + if (eoi_enabled(apic->vcpu)) { > > > > > + vector = eoi_get_pending_vector(apic->vcpu); > > > > > + if (vector != -1) > > > > > + return vector; > > > > > + } > > > > > + return __apic_find_highest_isr(apic); > > > > > +} > > > > > > > > Why aren't you modifying the ISR unconfitionally? > > > > > > ISR is not set if there won't be an EOI > > > since it's EOI that normally clears it. > > > > > > > > + > > > > > static void apic_update_ppr(struct kvm_lapic *apic) > > > > > { > > > > > u32 tpr, isrv, ppr, old_ppr; > > > > > @@ -488,6 +553,8 @@ static void apic_set_eoi(struct kvm_lapic *apic) > > > > > if (vector == -1) > > > > > return; > > > > > > > > > > + if (eoi_enabled(apic->vcpu)) > > > > > + eoi_clr_pending_vector(apic->vcpu); > > > > > apic_clear_vector(vector, apic->regs + APIC_ISR); > > > > > apic_update_ppr(apic); > > > > > > > > > > @@ -1236,11 +1303,25 @@ int kvm_get_apic_interrupt(struct kvm_vcpu *vcpu) > > > > > { > > > > > int vector = kvm_apic_has_interrupt(vcpu); > > > > > struct kvm_lapic *apic = vcpu->arch.apic; > > > > > + bool set_isr = true; > > > > > > > > > > if (vector == -1) > > > > > return -1; > > > > > > > > > > - apic_set_vector(vector, apic->regs + APIC_ISR); > > > > > + if (eoi_enabled(vcpu)) { > > > > > + /* Anything pending? If yes disable eoi optimization. */ > > > > > + if (unlikely(apic_find_highest_isr(apic) >= 0)) { > > > > > + int v = eoi_clr_pending_vector(vcpu); > > > > > > > > ISR != pending, that's IRR. If ISR vector has lower priority than the > > > > new vector, then we don't need to disable eoi avoidance. > > > > > > Yes. But we can and it's easier than figuring out priorities. > > > I am guessing such collisions are rare, right? > > > I'll add a trace to make sure. > > > > > > > > + if (v != -1) > > > > > + apic_set_vector(v, apic->regs + APIC_ISR); > > > > > + } else { > > > > > + eoi_set_pending_vector(vcpu, vector); > > > > > + set_isr = false; > > > > > > > > Weird. Just set it normally. Remember that reading the ISR needs to > > > > return the correct value. > > > > > > Marcelo said linux does not normally read ISR - not true? > > > Note this has no effect if the PV optimization is not enabled. > > > > > > > We need to process the avoided EOI before any APIC read/writes, to be > > > > sure the guest sees the updated values. Same for IOAPIC, EOI affects > > > > remote_irr. That may been we need to sample it after every exit, or > > > > perhaps disable the feature for level-triggered interrupts. > > > > > > Disabling would be very sad. Can we sample on remote irr read? > > > > > Nothing sad about it, just correct. MS requires this to be disabled for > > level triggered interrupts. > > We don't try to match what HV does 100% anyway. > We should. The same code will be used for HV. > > We have to notify IOAPIC about EOI ASAP. It > > may hold another interrupt for us that has to be delivered. > > You mean the ack notifiers? We can skip just for the vectors > which have ack notifiers or only if there are no notifiers. > No. I mean: if (!ent->fields.mask && (ioapic->irr & (1 << i))) ioapic_service(ioapic, i); > > I was going to avoid most of the trickery in apic code and just check if > > avoided EOI should be processed on each exit. This adds one if on exit > > path instead of couple on interrupt injection path though. > > > > > > > + } > > > > > + } > > > > > + > > > > > + if (set_isr) > > > > > + apic_set_vector(vector, apic->regs + APIC_ISR); > > > > > apic_update_ppr(apic); > > > > > apic_clear_irr(vector, apic); > > > > > return vector; > > > > > diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c > > > > > > > > > > > > > -- > > > > error compiling committee.c: too many arguments to function > > > -- > > > To unsubscribe from this list: send the line "unsubscribe kvm" in > > > the body of a message to majordomo@vger.kernel.org > > > More majordomo info at http://vger.kernel.org/majordomo-info.html > > > > -- > > Gleb. -- Gleb. ^ permalink raw reply [flat|nested] 32+ messages in thread
* Re: [PATCHv0 dont apply] RFC: kvm eoi PV using shared memory 2012-04-10 19:33 ` Gleb Natapov @ 2012-04-10 19:40 ` Michael S. Tsirkin 2012-04-10 19:42 ` Gleb Natapov 0 siblings, 1 reply; 32+ messages in thread From: Michael S. Tsirkin @ 2012-04-10 19:40 UTC (permalink / raw) To: Gleb Natapov; +Cc: Avi Kivity, kvm On Tue, Apr 10, 2012 at 10:33:54PM +0300, Gleb Natapov wrote: > > We don't try to match what HV does 100% anyway. > > > We should. The same code will be used for HV. Only where it makes sense, that is where the functionality is sufficiently similar. > > > We have to notify IOAPIC about EOI ASAP. It > > > may hold another interrupt for us that has to be delivered. > > > > You mean the ack notifiers? We can skip just for the vectors > > which have ack notifiers or only if there are no notifiers. > > > No. I mean: > > if (!ent->fields.mask && (ioapic->irr & (1 << i))) > ioapic_service(ioapic, i); Hmm. ^ permalink raw reply [flat|nested] 32+ messages in thread
* Re: [PATCHv0 dont apply] RFC: kvm eoi PV using shared memory 2012-04-10 19:40 ` Michael S. Tsirkin @ 2012-04-10 19:42 ` Gleb Natapov 0 siblings, 0 replies; 32+ messages in thread From: Gleb Natapov @ 2012-04-10 19:42 UTC (permalink / raw) To: Michael S. Tsirkin; +Cc: Avi Kivity, kvm On Tue, Apr 10, 2012 at 10:40:14PM +0300, Michael S. Tsirkin wrote: > On Tue, Apr 10, 2012 at 10:33:54PM +0300, Gleb Natapov wrote: > > > We don't try to match what HV does 100% anyway. > > > > > We should. The same code will be used for HV. > > Only where it makes sense, that is where the functionality > is sufficiently similar. > You can sprinkle additional ifs in the code, but I do not see the point. > > > > We have to notify IOAPIC about EOI ASAP. It > > > > may hold another interrupt for us that has to be delivered. > > > > > > You mean the ack notifiers? We can skip just for the vectors > > > which have ack notifiers or only if there are no notifiers. > > > > > No. I mean: > > > > if (!ent->fields.mask && (ioapic->irr & (1 << i))) > > ioapic_service(ioapic, i); > > Hmm. -- Gleb. ^ permalink raw reply [flat|nested] 32+ messages in thread
* [PATCHv1 dont apply] RFC: kvm eoi PV using shared memory 2012-04-10 13:27 [PATCHv0 dont apply] RFC: kvm eoi PV using shared memory Michael S. Tsirkin 2012-04-10 14:03 ` Avi Kivity @ 2012-04-15 16:18 ` Michael S. Tsirkin 2012-04-16 10:08 ` Gleb Natapov 1 sibling, 1 reply; 32+ messages in thread From: Michael S. Tsirkin @ 2012-04-15 16:18 UTC (permalink / raw) To: kvm I got lots of useful feedback from v0 so I thought sending out a brain dump again would be a good idea. This is mainly to show how I'm trying to address the comments I got from the previous round. Flames/feedback are wellcome! Changes from v0: - Tweaked setup MSRs a bit - Keep ISR bit set. Before reading ISR, test EOI in guest memory and clear - Check priority for nested interrupts, we can enable optimization if new one is high priority - Disable optimization for any interrupt handled by ioapic (This is because ioapic handles notifiers and pic and it generally gets messy. It's possible that we can optimize some ioapic-handled edge interrupts - but is it worth it?) - A less intrusive change for guest apic (0 overhead without kvm) --- I took a stub at implementing PV EOI using shared memory. This should reduce the number of exits an interrupt causes as much as by half. A partially complete draft for both host and guest parts is below. The idea is simple: there's a bit, per APIC, in guest memory, that tells the guest that it does not need EOI. We set it before injecting an interrupt and clear before injecting a nested one. Guest tests it using a test and clear operation - this is necessary so that host can detect interrupt nesting - and if set, it can skip the EOI MSR. There's a new MSR to set the address of said register in guest memory. Otherwise not much changed: - Guest EOI is not required - Register is tested ISR is automatically cleared before injection qemu support is incomplete - mostly for feature negotiation. need to add some trace points to enable profiling. No testing was done beyond compiling the kernel. Signed-off-by: Michael S. Tsirkin <mst@redhat.com> diff --git a/arch/x86/include/asm/bitops.h b/arch/x86/include/asm/bitops.h index b97596e..c9c70ea 100644 --- a/arch/x86/include/asm/bitops.h +++ b/arch/x86/include/asm/bitops.h @@ -26,11 +26,13 @@ #if __GNUC__ < 4 || (__GNUC__ == 4 && __GNUC_MINOR__ < 1) /* Technically wrong, but this avoids compilation errors on some gcc versions. */ -#define BITOP_ADDR(x) "=m" (*(volatile long *) (x)) +#define BITOP_ADDR_CONSTRAINT "=m" #else -#define BITOP_ADDR(x) "+m" (*(volatile long *) (x)) +#define BITOP_ADDR_CONSTRAINT "+m" #endif +#define BITOP_ADDR(x) BITOP_ADDR_CONSTRAINT (*(volatile long *) (x)) + #define ADDR BITOP_ADDR(addr) /* diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index e216ba0..3d09ef1 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -481,6 +481,12 @@ struct kvm_vcpu_arch { u64 length; u64 status; } osvw; + + struct { + u64 msr_val; + struct gfn_to_hva_cache data; + bool pending; + } eoi; }; struct kvm_lpage_info { diff --git a/arch/x86/include/asm/kvm_para.h b/arch/x86/include/asm/kvm_para.h index 734c376..164376a 100644 --- a/arch/x86/include/asm/kvm_para.h +++ b/arch/x86/include/asm/kvm_para.h @@ -22,6 +22,7 @@ #define KVM_FEATURE_CLOCKSOURCE2 3 #define KVM_FEATURE_ASYNC_PF 4 #define KVM_FEATURE_STEAL_TIME 5 +#define KVM_FEATURE_EOI 6 /* The last 8 bits are used to indicate how to interpret the flags field * in pvclock structure. If no bits are set, all flags are ignored. @@ -37,6 +38,8 @@ #define MSR_KVM_SYSTEM_TIME_NEW 0x4b564d01 #define MSR_KVM_ASYNC_PF_EN 0x4b564d02 #define MSR_KVM_STEAL_TIME 0x4b564d03 +#define MSR_KVM_EOI_EN 0x4b564d04 +#define MSR_KVM_EOI_DISABLED 0x0L struct kvm_steal_time { __u64 steal; diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c index b8ba6e4..450aae4 100644 --- a/arch/x86/kernel/kvm.c +++ b/arch/x86/kernel/kvm.c @@ -39,6 +39,7 @@ #include <asm/desc.h> #include <asm/tlbflush.h> #include <asm/idle.h> +#include <asm/apic.h> static int kvmapf = 1; @@ -290,6 +291,33 @@ static void kvm_register_steal_time(void) cpu, __pa(st)); } +/* TODO: needs to be early? aligned? */ +static DEFINE_EARLY_PER_CPU(u8, apic_eoi, 0); + +/* Our own copy of __test_and_clear_bit to make sure + * it is done with a single instruction */ +static inline int kvm_test_and_clear_bit(int nr, volatile u8* addr) +{ + int oldbit; + + asm volatile("btr %2,%1\n\t" + "sbb %0,%0" + : "=r" (oldbit), + BITOP_ADDR_CONSTRAINT (*(volatile u8 *) (addr)) + : "Ir" (nr)); + return oldbit; +} + +static void (*kvm_guest_native_apic_write)(u32 reg, u32 val); +static void kvm_guest_apic_write(u32 reg, u32 val) +{ + if (reg == APIC_EOI && + kvm_test_and_clear_bit(0, &__get_cpu_var(apic_eoi))) + return; + + kvm_guest_native_apic_write(reg, val); +} + void __cpuinit kvm_guest_cpu_init(void) { if (!kvm_para_available()) @@ -307,11 +335,18 @@ void __cpuinit kvm_guest_cpu_init(void) smp_processor_id()); } + + if (kvm_para_has_feature(KVM_FEATURE_EOI)) { + kvm_guest_native_apic_write = apic->write; + apic->write = kvm_guest_apic_write; + wrmsrl(MSR_KVM_EOI_EN, __pa(&__get_cpu_var(apic_eoi))); + } + if (has_steal_clock) kvm_register_steal_time(); } -static void kvm_pv_disable_apf(void *unused) +static void kvm_pv_disable_apf(void) { if (!__get_cpu_var(apf_reason).enabled) return; @@ -323,11 +358,18 @@ static void kvm_pv_disable_apf(void *unused) smp_processor_id()); } +static void kvm_pv_guest_cpu_reboot(void *unused) +{ + if (kvm_para_has_feature(KVM_FEATURE_EOI)) + wrmsrl(MSR_KVM_EOI_EN, MSR_KVM_EOI_DISABLED); + kvm_pv_disable_apf(); +} + static int kvm_pv_reboot_notify(struct notifier_block *nb, unsigned long code, void *unused) { if (code == SYS_RESTART) - on_each_cpu(kvm_pv_disable_apf, NULL, 1); + on_each_cpu(kvm_pv_guest_cpu_reboot, NULL, 1); return NOTIFY_DONE; } @@ -378,7 +420,9 @@ static void __cpuinit kvm_guest_cpu_online(void *dummy) static void kvm_guest_cpu_offline(void *dummy) { kvm_disable_steal_time(); - kvm_pv_disable_apf(NULL); + if (kvm_para_has_feature(KVM_FEATURE_EOI)) + wrmsrl(MSR_KVM_EOI_EN, MSR_KVM_EOI_DISABLED); + kvm_pv_disable_apf(); apf_task_wake_all(); } diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index 9fed5be..7d00d2d 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -408,6 +408,7 @@ static int do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, (1 << KVM_FEATURE_NOP_IO_DELAY) | (1 << KVM_FEATURE_CLOCKSOURCE2) | (1 << KVM_FEATURE_ASYNC_PF) | + (1 << KVM_FEATURE_EOI) | (1 << KVM_FEATURE_CLOCKSOURCE_STABLE_BIT); if (sched_info_on()) diff --git a/arch/x86/kvm/irq.c b/arch/x86/kvm/irq.c index 7e06ba1..07bdfab 100644 --- a/arch/x86/kvm/irq.c +++ b/arch/x86/kvm/irq.c @@ -48,7 +48,7 @@ int kvm_cpu_has_interrupt(struct kvm_vcpu *v) if (!irqchip_in_kernel(v->kvm)) return v->arch.interrupt.pending; - if (kvm_apic_has_interrupt(v) == -1) { /* LAPIC */ + if (kvm_apic_has_interrupt(v) < 0) { /* LAPIC */ if (kvm_apic_accept_pic_intr(v)) { s = pic_irqchip(v->kvm); /* PIC */ return s->output; diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 992b4ea..c2118ab 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -270,6 +270,54 @@ int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq) irq->level, irq->trig_mode); } +static int eoi_put_user(struct kvm_vcpu *vcpu, u8 val) +{ + + return kvm_write_guest_cached(vcpu->kvm, &vcpu->arch.eoi.data, &val, + sizeof(val)); +} + +static int eoi_get_user(struct kvm_vcpu *vcpu, u8 *val) +{ + + return kvm_read_guest_cached(vcpu->kvm, &vcpu->arch.eoi.data, val, + sizeof(*val)); +} + +static inline bool eoi_enabled(struct kvm_vcpu *vcpu) +{ + return vcpu->arch.eoi.msr_val != MSR_KVM_EOI_DISABLED; +} + +static bool eoi_get_pending(struct kvm_vcpu *vcpu) +{ + u8 val; + if (eoi_get_user(vcpu, &val) < 0) + apic_debug("Can't read EOI MSR value: 0x%llx\n", + (unsigned long long)vcpi->arch.eoi.msr_val); + return val & 0x1; +} + +static void eoi_set_pending(struct kvm_vcpu *vcpu) +{ + if (eoi_put_user(vcpu, 0x1) < 0) { + apic_debug("Can't set EOI MSR value: 0x%llx\n", + (unsigned long long)vcpi->arch.eoi.msr_val); + return; + } + vcpu->arch.eoi.pending = true; +} + +static void eoi_clr_pending(struct kvm_vcpu *vcpu) +{ + if (eoi_put_user(vcpu, 0x0) < 0) { + apic_debug("Can't clear EOI MSR value: 0x%llx\n", + (unsigned long long)vcpi->arch.eoi.msr_val); + return; + } + vcpu->arch.eoi.pending = false; +} + static inline int apic_find_highest_isr(struct kvm_lapic *apic) { int result; @@ -280,6 +328,20 @@ static inline int apic_find_highest_isr(struct kvm_lapic *apic) return result; } +static void apic_update_isr(struct kvm_lapic *apic) +{ + int vector; + if (!eoi_enabled(apic->vcpu) || + !apic->vcpu->arch.eoi.pending || + eoi_get_pending(apic->vcpu)) + return; + apic->vcpu->arch.eoi.pending = false; + vector = apic_find_highest_isr(apic); + if (vector == -1) + return; + apic_clear_vector(vector, apic->regs + APIC_ISR); +} + static void apic_update_ppr(struct kvm_lapic *apic) { u32 tpr, isrv, ppr, old_ppr; @@ -287,6 +349,7 @@ static void apic_update_ppr(struct kvm_lapic *apic) old_ppr = apic_get_reg(apic, APIC_PROCPRI); tpr = apic_get_reg(apic, APIC_TASKPRI); + apic_update_isr(apic); isr = apic_find_highest_isr(apic); isrv = (isr != -1) ? isr : 0; @@ -492,6 +555,8 @@ static void apic_set_eoi(struct kvm_lapic *apic) if (vector == -1) return; + if (eoi_enabled(apic->vcpu)) + eoi_clr_pending(apic->vcpu); apic_clear_vector(vector, apic->regs + APIC_ISR); apic_update_ppr(apic); @@ -1085,6 +1150,8 @@ void kvm_lapic_reset(struct kvm_vcpu *vcpu) atomic_set(&apic->lapic_timer.pending, 0); if (kvm_vcpu_is_bsp(vcpu)) vcpu->arch.apic_base |= MSR_IA32_APICBASE_BSP; + vcpu->arch.eoi.msr_val = MSR_KVM_EOI_DISABLED; + vcpu->arch.eoi.pending = false; apic_update_ppr(apic); vcpu->arch.apic_arb_prio = 0; @@ -1210,9 +1277,10 @@ int kvm_apic_has_interrupt(struct kvm_vcpu *vcpu) apic_update_ppr(apic); highest_irr = apic_find_highest_irr(apic); - if ((highest_irr == -1) || - ((highest_irr & 0xF0) <= apic_get_reg(apic, APIC_PROCPRI))) + if (highest_irr == -1) return -1; + if (((highest_irr & 0xF0) <= apic_get_reg(apic, APIC_PROCPRI))) + return -2; return highest_irr; } @@ -1244,9 +1312,20 @@ int kvm_get_apic_interrupt(struct kvm_vcpu *vcpu) int vector = kvm_apic_has_interrupt(vcpu); struct kvm_lapic *apic = vcpu->arch.apic; - if (vector == -1) + /* Detect interrupt nesting and disable EOI optimization */ + if (eoi_enabled(vcpu) && vector == -2) + eoi_clr_pending(vcpu); + + if (vector < 0) return -1; + if (eoi_enabled(vcpu)) { + if (kvm_ioapic_handles_vector(vcpu->kvm, vector)) + eoi_clr_pending(vcpu); + else + eoi_set_pending(vcpu); + } + apic_set_vector(vector, apic->regs + APIC_ISR); apic_update_ppr(apic); apic_clear_irr(vector, apic); @@ -1261,6 +1340,8 @@ void kvm_apic_post_state_restore(struct kvm_vcpu *vcpu) MSR_IA32_APICBASE_BASE; kvm_apic_set_version(vcpu); + if (eoi_enabled(apic->vcpu)) + apic->vcpu->arch.eoi.pending = eoi_get_pending(apic->vcpu); apic_update_ppr(apic); hrtimer_cancel(&apic->lapic_timer.timer); update_divide_count(apic); @@ -1392,3 +1473,18 @@ int kvm_hv_vapic_msr_read(struct kvm_vcpu *vcpu, u32 reg, u64 *data) return 0; } + +int kvm_pv_enable_apic_eoi(struct kvm_vcpu *vcpu, u64 data) +{ + if (data == MSR_KVM_EOI_DISABLED) { + struct kvm_lapic *apic = vcpu->arch.apic; + if (apic && apic_enabled(apic)) + apic_update_isr(apic); + } else if (kvm_gfn_to_hva_cache_init(vcpu->kvm, &vcpu->arch.eoi.data, + data)) + return 1; + + vcpu->arch.eoi.msr_val = data; + vcpu->arch.eoi.pending = false; + return 0; +} diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h index 6f4ce25..042dace 100644 --- a/arch/x86/kvm/lapic.h +++ b/arch/x86/kvm/lapic.h @@ -60,4 +60,6 @@ static inline bool kvm_hv_vapic_assist_page_enabled(struct kvm_vcpu *vcpu) { return vcpu->arch.hv_vapic & HV_X64_MSR_APIC_ASSIST_PAGE_ENABLE; } + +int kvm_pv_enable_apic_eoi(struct kvm_vcpu *vcpu, u64 data); #endif diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 4044ce0..31d6762 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -1627,6 +1627,10 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data) if (kvm_pv_enable_async_pf(vcpu, data)) return 1; break; + case MSR_KVM_EOI_EN: + if (kvm_pv_enable_apic_eoi(vcpu, data)) + return 1; + break; case MSR_KVM_STEAL_TIME: if (unlikely(!sched_info_on())) ^ permalink raw reply related [flat|nested] 32+ messages in thread
* Re: [PATCHv1 dont apply] RFC: kvm eoi PV using shared memory 2012-04-15 16:18 ` [PATCHv1 " Michael S. Tsirkin @ 2012-04-16 10:08 ` Gleb Natapov 2012-04-16 11:09 ` Michael S. Tsirkin 2012-04-17 9:22 ` Avi Kivity 0 siblings, 2 replies; 32+ messages in thread From: Gleb Natapov @ 2012-04-16 10:08 UTC (permalink / raw) To: Michael S. Tsirkin; +Cc: kvm On Sun, Apr 15, 2012 at 07:18:58PM +0300, Michael S. Tsirkin wrote: > I got lots of useful feedback from v0 so I thought > sending out a brain dump again would be a good idea. > This is mainly to show how I'm trying to address the > comments I got from the previous round. Flames/feedback > are wellcome! > > Changes from v0: > - Tweaked setup MSRs a bit > - Keep ISR bit set. Before reading ISR, test EOI in guest memory and clear > - Check priority for nested interrupts, we can > enable optimization if new one is high priority > - Disable optimization for any interrupt handled by ioapic > (This is because ioapic handles notifiers and pic and it generally gets messy. > It's possible that we can optimize some ioapic-handled > edge interrupts - but is it worth it?) > - A less intrusive change for guest apic (0 overhead without kvm) > > --- > > I took a stub at implementing PV EOI using shared memory. > This should reduce the number of exits an interrupt > causes as much as by half. > > A partially complete draft for both host and guest parts > is below. > > The idea is simple: there's a bit, per APIC, in guest memory, > that tells the guest that it does not need EOI. > We set it before injecting an interrupt and clear > before injecting a nested one. Guest tests it using > a test and clear operation - this is necessary > so that host can detect interrupt nesting - > and if set, it can skip the EOI MSR. > > There's a new MSR to set the address of said register > in guest memory. Otherwise not much changed: > - Guest EOI is not required > - Register is tested ISR is automatically cleared before injection > > qemu support is incomplete - mostly for feature negotiation. > need to add some trace points to enable profiling. > No testing was done beyond compiling the kernel. > > Signed-off-by: Michael S. Tsirkin <mst@redhat.com> > > diff --git a/arch/x86/include/asm/bitops.h b/arch/x86/include/asm/bitops.h > index b97596e..c9c70ea 100644 > --- a/arch/x86/include/asm/bitops.h > +++ b/arch/x86/include/asm/bitops.h > @@ -26,11 +26,13 @@ > #if __GNUC__ < 4 || (__GNUC__ == 4 && __GNUC_MINOR__ < 1) > /* Technically wrong, but this avoids compilation errors on some gcc > versions. */ > -#define BITOP_ADDR(x) "=m" (*(volatile long *) (x)) > +#define BITOP_ADDR_CONSTRAINT "=m" > #else > -#define BITOP_ADDR(x) "+m" (*(volatile long *) (x)) > +#define BITOP_ADDR_CONSTRAINT "+m" > #endif > > +#define BITOP_ADDR(x) BITOP_ADDR_CONSTRAINT (*(volatile long *) (x)) > + > #define ADDR BITOP_ADDR(addr) > > /* > diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h > index e216ba0..3d09ef1 100644 > --- a/arch/x86/include/asm/kvm_host.h > +++ b/arch/x86/include/asm/kvm_host.h > @@ -481,6 +481,12 @@ struct kvm_vcpu_arch { > u64 length; > u64 status; > } osvw; > + > + struct { > + u64 msr_val; > + struct gfn_to_hva_cache data; > + bool pending; > + } eoi; > }; > > struct kvm_lpage_info { > diff --git a/arch/x86/include/asm/kvm_para.h b/arch/x86/include/asm/kvm_para.h > index 734c376..164376a 100644 > --- a/arch/x86/include/asm/kvm_para.h > +++ b/arch/x86/include/asm/kvm_para.h > @@ -22,6 +22,7 @@ > #define KVM_FEATURE_CLOCKSOURCE2 3 > #define KVM_FEATURE_ASYNC_PF 4 > #define KVM_FEATURE_STEAL_TIME 5 > +#define KVM_FEATURE_EOI 6 > > /* The last 8 bits are used to indicate how to interpret the flags field > * in pvclock structure. If no bits are set, all flags are ignored. > @@ -37,6 +38,8 @@ > #define MSR_KVM_SYSTEM_TIME_NEW 0x4b564d01 > #define MSR_KVM_ASYNC_PF_EN 0x4b564d02 > #define MSR_KVM_STEAL_TIME 0x4b564d03 > +#define MSR_KVM_EOI_EN 0x4b564d04 > +#define MSR_KVM_EOI_DISABLED 0x0L This is valid gpa. Follow others MSR example i.e align the address to, lets say dword, and use lsb as enable bit. Other low bits are reserved (#GP if set). And please move defines that not define new MSRs to some other place. > > struct kvm_steal_time { > __u64 steal; > diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c > index b8ba6e4..450aae4 100644 > --- a/arch/x86/kernel/kvm.c > +++ b/arch/x86/kernel/kvm.c > @@ -39,6 +39,7 @@ > #include <asm/desc.h> > #include <asm/tlbflush.h> > #include <asm/idle.h> > +#include <asm/apic.h> > > static int kvmapf = 1; > > @@ -290,6 +291,33 @@ static void kvm_register_steal_time(void) > cpu, __pa(st)); > } > > +/* TODO: needs to be early? aligned? */ > +static DEFINE_EARLY_PER_CPU(u8, apic_eoi, 0); > + > +/* Our own copy of __test_and_clear_bit to make sure > + * it is done with a single instruction */ > +static inline int kvm_test_and_clear_bit(int nr, volatile u8* addr) > +{ > + int oldbit; > + > + asm volatile("btr %2,%1\n\t" > + "sbb %0,%0" > + : "=r" (oldbit), > + BITOP_ADDR_CONSTRAINT (*(volatile u8 *) (addr)) > + : "Ir" (nr)); > + return oldbit; > +} > + > +static void (*kvm_guest_native_apic_write)(u32 reg, u32 val); > +static void kvm_guest_apic_write(u32 reg, u32 val) > +{ > + if (reg == APIC_EOI && > + kvm_test_and_clear_bit(0, &__get_cpu_var(apic_eoi))) > + return; > + > + kvm_guest_native_apic_write(reg, val); > +} > + > void __cpuinit kvm_guest_cpu_init(void) > { > if (!kvm_para_available()) > @@ -307,11 +335,18 @@ void __cpuinit kvm_guest_cpu_init(void) > smp_processor_id()); > } > > + > + if (kvm_para_has_feature(KVM_FEATURE_EOI)) { > + kvm_guest_native_apic_write = apic->write; > + apic->write = kvm_guest_apic_write; > + wrmsrl(MSR_KVM_EOI_EN, __pa(&__get_cpu_var(apic_eoi))); > + } Can happen on more then one cpu. After it happens on two kvm_guest_apic_write() will call to itself. > + > if (has_steal_clock) > kvm_register_steal_time(); > } > > -static void kvm_pv_disable_apf(void *unused) > +static void kvm_pv_disable_apf(void) > { > if (!__get_cpu_var(apf_reason).enabled) > return; > @@ -323,11 +358,18 @@ static void kvm_pv_disable_apf(void *unused) > smp_processor_id()); > } > > +static void kvm_pv_guest_cpu_reboot(void *unused) > +{ > + if (kvm_para_has_feature(KVM_FEATURE_EOI)) > + wrmsrl(MSR_KVM_EOI_EN, MSR_KVM_EOI_DISABLED); Shouldn't it zero apic_eoi? > + kvm_pv_disable_apf(); > +} > + > static int kvm_pv_reboot_notify(struct notifier_block *nb, > unsigned long code, void *unused) > { > if (code == SYS_RESTART) > - on_each_cpu(kvm_pv_disable_apf, NULL, 1); > + on_each_cpu(kvm_pv_guest_cpu_reboot, NULL, 1); > return NOTIFY_DONE; > } > > @@ -378,7 +420,9 @@ static void __cpuinit kvm_guest_cpu_online(void *dummy) > static void kvm_guest_cpu_offline(void *dummy) > { > kvm_disable_steal_time(); > - kvm_pv_disable_apf(NULL); > + if (kvm_para_has_feature(KVM_FEATURE_EOI)) > + wrmsrl(MSR_KVM_EOI_EN, MSR_KVM_EOI_DISABLED); Same. > + kvm_pv_disable_apf(); > apf_task_wake_all(); > } > > diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c > index 9fed5be..7d00d2d 100644 > --- a/arch/x86/kvm/cpuid.c > +++ b/arch/x86/kvm/cpuid.c > @@ -408,6 +408,7 @@ static int do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, > (1 << KVM_FEATURE_NOP_IO_DELAY) | > (1 << KVM_FEATURE_CLOCKSOURCE2) | > (1 << KVM_FEATURE_ASYNC_PF) | > + (1 << KVM_FEATURE_EOI) | > (1 << KVM_FEATURE_CLOCKSOURCE_STABLE_BIT); > > if (sched_info_on()) > diff --git a/arch/x86/kvm/irq.c b/arch/x86/kvm/irq.c > index 7e06ba1..07bdfab 100644 > --- a/arch/x86/kvm/irq.c > +++ b/arch/x86/kvm/irq.c > @@ -48,7 +48,7 @@ int kvm_cpu_has_interrupt(struct kvm_vcpu *v) > if (!irqchip_in_kernel(v->kvm)) > return v->arch.interrupt.pending; > > - if (kvm_apic_has_interrupt(v) == -1) { /* LAPIC */ > + if (kvm_apic_has_interrupt(v) < 0) { /* LAPIC */ > if (kvm_apic_accept_pic_intr(v)) { > s = pic_irqchip(v->kvm); /* PIC */ > return s->output; > diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c > index 992b4ea..c2118ab 100644 > --- a/arch/x86/kvm/lapic.c > +++ b/arch/x86/kvm/lapic.c > @@ -270,6 +270,54 @@ int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq) > irq->level, irq->trig_mode); > } > > +static int eoi_put_user(struct kvm_vcpu *vcpu, u8 val) > +{ > + > + return kvm_write_guest_cached(vcpu->kvm, &vcpu->arch.eoi.data, &val, > + sizeof(val)); > +} > + > +static int eoi_get_user(struct kvm_vcpu *vcpu, u8 *val) > +{ > + > + return kvm_read_guest_cached(vcpu->kvm, &vcpu->arch.eoi.data, val, > + sizeof(*val)); > +} > + > +static inline bool eoi_enabled(struct kvm_vcpu *vcpu) > +{ > + return vcpu->arch.eoi.msr_val != MSR_KVM_EOI_DISABLED; > +} > + > +static bool eoi_get_pending(struct kvm_vcpu *vcpu) > +{ > + u8 val; > + if (eoi_get_user(vcpu, &val) < 0) > + apic_debug("Can't read EOI MSR value: 0x%llx\n", > + (unsigned long long)vcpi->arch.eoi.msr_val); > + return val & 0x1; > +} > + > +static void eoi_set_pending(struct kvm_vcpu *vcpu) > +{ > + if (eoi_put_user(vcpu, 0x1) < 0) { > + apic_debug("Can't set EOI MSR value: 0x%llx\n", > + (unsigned long long)vcpi->arch.eoi.msr_val); > + return; > + } > + vcpu->arch.eoi.pending = true; > +} > + > +static void eoi_clr_pending(struct kvm_vcpu *vcpu) > +{ > + if (eoi_put_user(vcpu, 0x0) < 0) { > + apic_debug("Can't clear EOI MSR value: 0x%llx\n", > + (unsigned long long)vcpi->arch.eoi.msr_val); > + return; > + } > + vcpu->arch.eoi.pending = false; > +} > + > static inline int apic_find_highest_isr(struct kvm_lapic *apic) > { > int result; > @@ -280,6 +328,20 @@ static inline int apic_find_highest_isr(struct kvm_lapic *apic) > return result; > } > > +static void apic_update_isr(struct kvm_lapic *apic) > +{ > + int vector; > + if (!eoi_enabled(apic->vcpu) || > + !apic->vcpu->arch.eoi.pending || > + eoi_get_pending(apic->vcpu)) > + return; > + apic->vcpu->arch.eoi.pending = false; > + vector = apic_find_highest_isr(apic); > + if (vector == -1) > + return; > + apic_clear_vector(vector, apic->regs + APIC_ISR); > +} > + We should just call apic_set_eoi() on exit if eoi.pending && !eoi_get_pending(). This removes the need for the function and its calls. We already have call to kvm_lapic_sync_from_vapic() on exit path which should be extended to do the above. > static void apic_update_ppr(struct kvm_lapic *apic) > { > u32 tpr, isrv, ppr, old_ppr; > @@ -287,6 +349,7 @@ static void apic_update_ppr(struct kvm_lapic *apic) > > old_ppr = apic_get_reg(apic, APIC_PROCPRI); > tpr = apic_get_reg(apic, APIC_TASKPRI); > + apic_update_isr(apic); > isr = apic_find_highest_isr(apic); > isrv = (isr != -1) ? isr : 0; > > @@ -492,6 +555,8 @@ static void apic_set_eoi(struct kvm_lapic *apic) > if (vector == -1) > return; > > + if (eoi_enabled(apic->vcpu)) > + eoi_clr_pending(apic->vcpu); You have many of those. Just fold eoi_enabled() check into eoi_clr_pending(). > apic_clear_vector(vector, apic->regs + APIC_ISR); > apic_update_ppr(apic); > > @@ -1085,6 +1150,8 @@ void kvm_lapic_reset(struct kvm_vcpu *vcpu) > atomic_set(&apic->lapic_timer.pending, 0); > if (kvm_vcpu_is_bsp(vcpu)) > vcpu->arch.apic_base |= MSR_IA32_APICBASE_BSP; > + vcpu->arch.eoi.msr_val = MSR_KVM_EOI_DISABLED; > + vcpu->arch.eoi.pending = false; > apic_update_ppr(apic); > > vcpu->arch.apic_arb_prio = 0; > @@ -1210,9 +1277,10 @@ int kvm_apic_has_interrupt(struct kvm_vcpu *vcpu) > > apic_update_ppr(apic); > highest_irr = apic_find_highest_irr(apic); > - if ((highest_irr == -1) || > - ((highest_irr & 0xF0) <= apic_get_reg(apic, APIC_PROCPRI))) > + if (highest_irr == -1) > return -1; > + if (((highest_irr & 0xF0) <= apic_get_reg(apic, APIC_PROCPRI))) > + return -2; > return highest_irr; > } > > @@ -1244,9 +1312,20 @@ int kvm_get_apic_interrupt(struct kvm_vcpu *vcpu) > int vector = kvm_apic_has_interrupt(vcpu); > struct kvm_lapic *apic = vcpu->arch.apic; > > - if (vector == -1) > + /* Detect interrupt nesting and disable EOI optimization */ > + if (eoi_enabled(vcpu) && vector == -2) > + eoi_clr_pending(vcpu); > + > + if (vector < 0) > return -1; > > + if (eoi_enabled(vcpu)) { > + if (kvm_ioapic_handles_vector(vcpu->kvm, vector)) > + eoi_clr_pending(vcpu); > + else > + eoi_set_pending(vcpu); > + } > + > apic_set_vector(vector, apic->regs + APIC_ISR); > apic_update_ppr(apic); > apic_clear_irr(vector, apic); > @@ -1261,6 +1340,8 @@ void kvm_apic_post_state_restore(struct kvm_vcpu *vcpu) > MSR_IA32_APICBASE_BASE; > kvm_apic_set_version(vcpu); > > + if (eoi_enabled(apic->vcpu)) > + apic->vcpu->arch.eoi.pending = eoi_get_pending(apic->vcpu); > apic_update_ppr(apic); > hrtimer_cancel(&apic->lapic_timer.timer); > update_divide_count(apic); > @@ -1392,3 +1473,18 @@ int kvm_hv_vapic_msr_read(struct kvm_vcpu *vcpu, u32 reg, u64 *data) > > return 0; > } > + > +int kvm_pv_enable_apic_eoi(struct kvm_vcpu *vcpu, u64 data) > +{ > + if (data == MSR_KVM_EOI_DISABLED) { > + struct kvm_lapic *apic = vcpu->arch.apic; > + if (apic && apic_enabled(apic)) > + apic_update_isr(apic); > + } else if (kvm_gfn_to_hva_cache_init(vcpu->kvm, &vcpu->arch.eoi.data, > + data)) > + return 1; > + > + vcpu->arch.eoi.msr_val = data; > + vcpu->arch.eoi.pending = false; > + return 0; > +} > diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h > index 6f4ce25..042dace 100644 > --- a/arch/x86/kvm/lapic.h > +++ b/arch/x86/kvm/lapic.h > @@ -60,4 +60,6 @@ static inline bool kvm_hv_vapic_assist_page_enabled(struct kvm_vcpu *vcpu) > { > return vcpu->arch.hv_vapic & HV_X64_MSR_APIC_ASSIST_PAGE_ENABLE; > } > + > +int kvm_pv_enable_apic_eoi(struct kvm_vcpu *vcpu, u64 data); > #endif > diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c > index 4044ce0..31d6762 100644 > --- a/arch/x86/kvm/x86.c > +++ b/arch/x86/kvm/x86.c > @@ -1627,6 +1627,10 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data) > if (kvm_pv_enable_async_pf(vcpu, data)) > return 1; > break; > + case MSR_KVM_EOI_EN: > + if (kvm_pv_enable_apic_eoi(vcpu, data)) > + return 1; > + break; > case MSR_KVM_STEAL_TIME: > > if (unlikely(!sched_info_on())) > -- > To unsubscribe from this list: send the line "unsubscribe kvm" in > the body of a message to majordomo@vger.kernel.org > More majordomo info at http://vger.kernel.org/majordomo-info.html -- Gleb. ^ permalink raw reply [flat|nested] 32+ messages in thread
* Re: [PATCHv1 dont apply] RFC: kvm eoi PV using shared memory 2012-04-16 10:08 ` Gleb Natapov @ 2012-04-16 11:09 ` Michael S. Tsirkin 2012-04-16 11:24 ` Gleb Natapov 2012-04-17 9:22 ` Avi Kivity 1 sibling, 1 reply; 32+ messages in thread From: Michael S. Tsirkin @ 2012-04-16 11:09 UTC (permalink / raw) To: Gleb Natapov; +Cc: kvm Thanks very much for the review. I'll address the comments. Some questions on your comments below. On Mon, Apr 16, 2012 at 01:08:07PM +0300, Gleb Natapov wrote: > > @@ -37,6 +38,8 @@ > > #define MSR_KVM_SYSTEM_TIME_NEW 0x4b564d01 > > #define MSR_KVM_ASYNC_PF_EN 0x4b564d02 > > #define MSR_KVM_STEAL_TIME 0x4b564d03 > > +#define MSR_KVM_EOI_EN 0x4b564d04 > > +#define MSR_KVM_EOI_DISABLED 0x0L > This is valid gpa. Follow others MSR example i.e align the address to, > lets say dword, and use lsb as enable bit. We only need a single byte, since this is per-CPU - it's better to save the memory, so no alignment is required. An explicit disable msr would also address this, right? > > +static void apic_update_isr(struct kvm_lapic *apic) > > +{ > > + int vector; > > + if (!eoi_enabled(apic->vcpu) || > > + !apic->vcpu->arch.eoi.pending || > > + eoi_get_pending(apic->vcpu)) > > + return; > > + apic->vcpu->arch.eoi.pending = false; > > + vector = apic_find_highest_isr(apic); > > + if (vector == -1) > > + return; > > + apic_clear_vector(vector, apic->regs + APIC_ISR); > > +} > > + > We should just call apic_set_eoi() on exit if eoi.pending && !eoi_get_pending(). > This removes the need for the function and its calls. It's a bit of a waste: that one does all kind extra things which we know we don't need, some of the atomics. And it's datapath so extra stuff is not free. Probably a good idea to replace the call on MSR disable - I think apic_update_ppr is a better thing to call there. Is there anything else that I missed? > We already have > call to kvm_lapic_sync_from_vapic() on exit path which should be > extended to do the above. It already does this. It calls apic_set_tpr which calls apic_update_ppr which calls apic_update_isr. -- MST ^ permalink raw reply [flat|nested] 32+ messages in thread
* Re: [PATCHv1 dont apply] RFC: kvm eoi PV using shared memory 2012-04-16 11:09 ` Michael S. Tsirkin @ 2012-04-16 11:24 ` Gleb Natapov 2012-04-16 12:18 ` Michael S. Tsirkin 0 siblings, 1 reply; 32+ messages in thread From: Gleb Natapov @ 2012-04-16 11:24 UTC (permalink / raw) To: Michael S. Tsirkin; +Cc: kvm On Mon, Apr 16, 2012 at 02:09:20PM +0300, Michael S. Tsirkin wrote: > Thanks very much for the review. I'll address the comments. > Some questions on your comments below. > > On Mon, Apr 16, 2012 at 01:08:07PM +0300, Gleb Natapov wrote: > > > @@ -37,6 +38,8 @@ > > > #define MSR_KVM_SYSTEM_TIME_NEW 0x4b564d01 > > > #define MSR_KVM_ASYNC_PF_EN 0x4b564d02 > > > #define MSR_KVM_STEAL_TIME 0x4b564d03 > > > +#define MSR_KVM_EOI_EN 0x4b564d04 > > > +#define MSR_KVM_EOI_DISABLED 0x0L > > This is valid gpa. Follow others MSR example i.e align the address to, > > lets say dword, and use lsb as enable bit. > > We only need a single byte, since this is per-CPU - > it's better to save the memory, so no alignment is required. > An explicit disable msr would also address this, right? > We do not have shortage of memory. Better make all MSRs works the same way. BTW have you added new MSR to msrs_to_save array? I forgot to checked. > > > +static void apic_update_isr(struct kvm_lapic *apic) > > > +{ > > > + int vector; > > > + if (!eoi_enabled(apic->vcpu) || > > > + !apic->vcpu->arch.eoi.pending || > > > + eoi_get_pending(apic->vcpu)) > > > + return; > > > + apic->vcpu->arch.eoi.pending = false; > > > + vector = apic_find_highest_isr(apic); > > > + if (vector == -1) > > > + return; > > > + apic_clear_vector(vector, apic->regs + APIC_ISR); > > > +} > > > + > > We should just call apic_set_eoi() on exit if eoi.pending && !eoi_get_pending(). > > This removes the need for the function and its calls. > > It's a bit of a waste: that one does all kind extra things > which we know we don't need, some of the atomics. And it's datapath > so extra stuff is not free. > How much time those extra things are taking compared to vmexit you already serving? And there is a good chance you will do them during vmentry anyway while trying to inject (or just check for) new interrupt. > Probably a good idea to replace the call on MSR disable - I think > apic_update_ppr is a better thing to call there. > > Is there anything else that I missed? I think that simple things are better then complex things if the end result is the same :) Try it and see how much simpler it is. Have you measured that what you are trying to optimize actually worth optimizing? That you can measure the optimization at all? > > > We already have > > call to kvm_lapic_sync_from_vapic() on exit path which should be > > extended to do the above. > > It already does this. It calls apic_set_tpr > which calls apic_update_ppr which calls > apic_update_isr. > It does it only if vapic is in use (and it is usually not). But the if() is already there so we do not need to worry that one additional if() on the exit path will slow KVM to the crawl. -- Gleb. ^ permalink raw reply [flat|nested] 32+ messages in thread
* Re: [PATCHv1 dont apply] RFC: kvm eoi PV using shared memory 2012-04-16 11:24 ` Gleb Natapov @ 2012-04-16 12:18 ` Michael S. Tsirkin 2012-04-16 12:30 ` Gleb Natapov 2012-04-17 9:24 ` Avi Kivity 0 siblings, 2 replies; 32+ messages in thread From: Michael S. Tsirkin @ 2012-04-16 12:18 UTC (permalink / raw) To: Gleb Natapov; +Cc: kvm On Mon, Apr 16, 2012 at 02:24:46PM +0300, Gleb Natapov wrote: > On Mon, Apr 16, 2012 at 02:09:20PM +0300, Michael S. Tsirkin wrote: > > Thanks very much for the review. I'll address the comments. > > Some questions on your comments below. > > > > On Mon, Apr 16, 2012 at 01:08:07PM +0300, Gleb Natapov wrote: > > > > @@ -37,6 +38,8 @@ > > > > #define MSR_KVM_SYSTEM_TIME_NEW 0x4b564d01 > > > > #define MSR_KVM_ASYNC_PF_EN 0x4b564d02 > > > > #define MSR_KVM_STEAL_TIME 0x4b564d03 > > > > +#define MSR_KVM_EOI_EN 0x4b564d04 > > > > +#define MSR_KVM_EOI_DISABLED 0x0L > > > This is valid gpa. Follow others MSR example i.e align the address to, > > > lets say dword, and use lsb as enable bit. > > > > We only need a single byte, since this is per-CPU - > > it's better to save the memory, so no alignment is required. > > An explicit disable msr would also address this, right? > > > We do not have shortage of memory. > Better make all MSRs works the same > way. I agree it's nice to have EOI and ASYNC_PF look similar but wasting memory is also bad. I'll ponder this some more. > BTW have you added new MSR to msrs_to_save array? I forgot to > checked. I didn't yet. Trying to understand how will that affect cross-version migration - any input? > > > > +static void apic_update_isr(struct kvm_lapic *apic) > > > > +{ > > > > + int vector; > > > > + if (!eoi_enabled(apic->vcpu) || > > > > + !apic->vcpu->arch.eoi.pending || > > > > + eoi_get_pending(apic->vcpu)) > > > > + return; > > > > + apic->vcpu->arch.eoi.pending = false; > > > > + vector = apic_find_highest_isr(apic); > > > > + if (vector == -1) > > > > + return; > > > > + apic_clear_vector(vector, apic->regs + APIC_ISR); > > > > +} > > > > + > > > We should just call apic_set_eoi() on exit if eoi.pending && !eoi_get_pending(). > > > This removes the need for the function and its calls. > > > > It's a bit of a waste: that one does all kind extra things > > which we know we don't need, some of the atomics. And it's datapath > > so extra stuff is not free. > > > How much time those extra things are taking compared to vmexit you > already serving? And there is a good chance you will do them during > vmentry anyway while trying to inject (or just check for) new interrupt. No need to do them twice :) > > Probably a good idea to replace the call on MSR disable - I think > > apic_update_ppr is a better thing to call there. > > > > Is there anything else that I missed? > I think that simple things are better then complex things if the end result is > the same :) Try it and see how much simpler it is. It doesn't seem to be simpler at all. The common functionality is about 4 lines. > Have you measured > that what you are trying to optimize actually worth optimizing? That you > can measure the optimization at all? The claim is not that it's measureable. The claim is that it does not scale to keep adding things to do on each entry. > > > > > We already have > > > call to kvm_lapic_sync_from_vapic() on exit path which should be > > > extended to do the above. > > > > It already does this. It calls apic_set_tpr > > which calls apic_update_ppr which calls > > apic_update_isr. > > > It does it only if vapic is in use (and it is usually not). When it's not we don't need to update ppr and so no need to update isr on this exit. > But the if() > is already there so we do not need to worry that one additional if() on > the exit path will slow KVM to the crawl. The number of things we need to do on each entry keeps going up, if we just keep adding stuff it won't end well. > -- > Gleb. ^ permalink raw reply [flat|nested] 32+ messages in thread
* Re: [PATCHv1 dont apply] RFC: kvm eoi PV using shared memory 2012-04-16 12:18 ` Michael S. Tsirkin @ 2012-04-16 12:30 ` Gleb Natapov 2012-04-16 13:13 ` Michael S. Tsirkin 2012-04-17 9:24 ` Avi Kivity 1 sibling, 1 reply; 32+ messages in thread From: Gleb Natapov @ 2012-04-16 12:30 UTC (permalink / raw) To: Michael S. Tsirkin; +Cc: kvm On Mon, Apr 16, 2012 at 03:18:25PM +0300, Michael S. Tsirkin wrote: > On Mon, Apr 16, 2012 at 02:24:46PM +0300, Gleb Natapov wrote: > > On Mon, Apr 16, 2012 at 02:09:20PM +0300, Michael S. Tsirkin wrote: > > > Thanks very much for the review. I'll address the comments. > > > Some questions on your comments below. > > > > > > On Mon, Apr 16, 2012 at 01:08:07PM +0300, Gleb Natapov wrote: > > > > > @@ -37,6 +38,8 @@ > > > > > #define MSR_KVM_SYSTEM_TIME_NEW 0x4b564d01 > > > > > #define MSR_KVM_ASYNC_PF_EN 0x4b564d02 > > > > > #define MSR_KVM_STEAL_TIME 0x4b564d03 > > > > > +#define MSR_KVM_EOI_EN 0x4b564d04 > > > > > +#define MSR_KVM_EOI_DISABLED 0x0L > > > > This is valid gpa. Follow others MSR example i.e align the address to, > > > > lets say dword, and use lsb as enable bit. > > > > > > We only need a single byte, since this is per-CPU - > > > it's better to save the memory, so no alignment is required. > > > An explicit disable msr would also address this, right? > > > > > We do not have shortage of memory. > > Better make all MSRs works the same > > way. > > I agree it's nice to have EOI and ASYNC_PF look similar > but wasting memory is also bad. I'll ponder this some more. > Steal time and kvm clock too and may be others (if anything left at all). I hope you are kidding about wasting of 4 bytes per vcpu. > > BTW have you added new MSR to msrs_to_save array? I forgot to > > checked. > > I didn't yet. Trying to understand how will that affect > cross-version migration - any input? > Not sure. You need to check what userspace does with them. > > > > > +static void apic_update_isr(struct kvm_lapic *apic) > > > > > +{ > > > > > + int vector; > > > > > + if (!eoi_enabled(apic->vcpu) || > > > > > + !apic->vcpu->arch.eoi.pending || > > > > > + eoi_get_pending(apic->vcpu)) > > > > > + return; > > > > > + apic->vcpu->arch.eoi.pending = false; > > > > > + vector = apic_find_highest_isr(apic); > > > > > + if (vector == -1) > > > > > + return; > > > > > + apic_clear_vector(vector, apic->regs + APIC_ISR); > > > > > +} > > > > > + > > > > We should just call apic_set_eoi() on exit if eoi.pending && !eoi_get_pending(). > > > > This removes the need for the function and its calls. > > > > > > It's a bit of a waste: that one does all kind extra things > > > which we know we don't need, some of the atomics. And it's datapath > > > so extra stuff is not free. > > > > > How much time those extra things are taking compared to vmexit you > > already serving? And there is a good chance you will do them during > > vmentry anyway while trying to inject (or just check for) new interrupt. > > No need to do them twice :) > > > > Probably a good idea to replace the call on MSR disable - I think > > > apic_update_ppr is a better thing to call there. > > > > > > Is there anything else that I missed? > > I think that simple things are better then complex things if the end result is > > the same :) Try it and see how much simpler it is. > > It doesn't seem to be simpler at all. The common functionality is > about 4 lines. Send patch for us to see. lapic changes should be minimal. > > > Have you measured > > that what you are trying to optimize actually worth optimizing? That you > > can measure the optimization at all? > > The claim is not that it's measureable. The claim is that > it does not scale to keep adding things to do on each entry. > Only if there is something to do. "Premature optimization is the root of all evil". The PV eoi is about not exiting on eoi unnecessary. You are mixing this with trying to avoid calling eoi code for given interrupt at all. Two different optimization, do not try lump them together. > > > > > > > We already have > > > > call to kvm_lapic_sync_from_vapic() on exit path which should be > > > > extended to do the above. > > > > > > It already does this. It calls apic_set_tpr > > > which calls apic_update_ppr which calls > > > apic_update_isr. > > > > > It does it only if vapic is in use (and it is usually not). > > When it's not we don't need to update ppr and so > no need to update isr on this exit. If there was eoi we need to update both. > > > But the if() > > is already there so we do not need to worry that one additional if() on > > the exit path will slow KVM to the crawl. > > The number of things we need to do on each entry keeps going up, if we > just keep adding stuff it won't end well. > You do not add stuff. The if() is already there. -- Gleb. ^ permalink raw reply [flat|nested] 32+ messages in thread
* Re: [PATCHv1 dont apply] RFC: kvm eoi PV using shared memory 2012-04-16 12:30 ` Gleb Natapov @ 2012-04-16 13:13 ` Michael S. Tsirkin 2012-04-16 15:10 ` Gleb Natapov 0 siblings, 1 reply; 32+ messages in thread From: Michael S. Tsirkin @ 2012-04-16 13:13 UTC (permalink / raw) To: Gleb Natapov; +Cc: kvm On Mon, Apr 16, 2012 at 03:30:47PM +0300, Gleb Natapov wrote: > On Mon, Apr 16, 2012 at 03:18:25PM +0300, Michael S. Tsirkin wrote: > > On Mon, Apr 16, 2012 at 02:24:46PM +0300, Gleb Natapov wrote: > > > On Mon, Apr 16, 2012 at 02:09:20PM +0300, Michael S. Tsirkin wrote: > > > > Thanks very much for the review. I'll address the comments. > > > > Some questions on your comments below. > > > > > > > > On Mon, Apr 16, 2012 at 01:08:07PM +0300, Gleb Natapov wrote: > > > > > > @@ -37,6 +38,8 @@ > > > > > > #define MSR_KVM_SYSTEM_TIME_NEW 0x4b564d01 > > > > > > #define MSR_KVM_ASYNC_PF_EN 0x4b564d02 > > > > > > #define MSR_KVM_STEAL_TIME 0x4b564d03 > > > > > > +#define MSR_KVM_EOI_EN 0x4b564d04 > > > > > > +#define MSR_KVM_EOI_DISABLED 0x0L > > > > > This is valid gpa. Follow others MSR example i.e align the address to, > > > > > lets say dword, and use lsb as enable bit. > > > > > > > > We only need a single byte, since this is per-CPU - > > > > it's better to save the memory, so no alignment is required. > > > > An explicit disable msr would also address this, right? > > > > > > > We do not have shortage of memory. > > > Better make all MSRs works the same > > > way. > > > > I agree it's nice to have EOI and ASYNC_PF look similar > > but wasting memory is also bad. I'll ponder this some more. > > > Steal time and kvm clock too and may be others (if anything left at > all). I hope you are kidding about wasting of 4 bytes per vcpu. Not vcpu - cpu. It's wasted whenever kernel/kvm.c is built so it has cost on physical machines as well. > > > BTW have you added new MSR to msrs_to_save array? I forgot to > > > checked. > > > > I didn't yet. Trying to understand how will that affect > > cross-version migration - any input? > > > Not sure. You need to check what userspace does with them. > > > > > > > +static void apic_update_isr(struct kvm_lapic *apic) > > > > > > +{ > > > > > > + int vector; > > > > > > + if (!eoi_enabled(apic->vcpu) || > > > > > > + !apic->vcpu->arch.eoi.pending || > > > > > > + eoi_get_pending(apic->vcpu)) > > > > > > + return; > > > > > > + apic->vcpu->arch.eoi.pending = false; > > > > > > + vector = apic_find_highest_isr(apic); > > > > > > + if (vector == -1) > > > > > > + return; > > > > > > + apic_clear_vector(vector, apic->regs + APIC_ISR); > > > > > > +} > > > > > > + > > > > > We should just call apic_set_eoi() on exit if eoi.pending && !eoi_get_pending(). > > > > > This removes the need for the function and its calls. > > > > > > > > It's a bit of a waste: that one does all kind extra things > > > > which we know we don't need, some of the atomics. And it's datapath > > > > so extra stuff is not free. > > > > > > > How much time those extra things are taking compared to vmexit you > > > already serving? And there is a good chance you will do them during > > > vmentry anyway while trying to inject (or just check for) new interrupt. > > > > No need to do them twice :) > > > > > > Probably a good idea to replace the call on MSR disable - I think > > > > apic_update_ppr is a better thing to call there. > > > > > > > > Is there anything else that I missed? > > > I think that simple things are better then complex things if the end result is > > > the same :) Try it and see how much simpler it is. > > > > It doesn't seem to be simpler at all. The common functionality is > > about 4 lines. > Send patch for us to see. That's what you are replying to, no? You can see that it is 4 lines of code. > lapic changes should be minimal. Exactly my motivation. > > > > > Have you measured > > > that what you are trying to optimize actually worth optimizing? That you > > > can measure the optimization at all? > > > > The claim is not that it's measureable. The claim is that > > it does not scale to keep adding things to do on each entry. > > > Only if there is something to do. "Premature optimization is the root of > all evil". The PV eoi is about not exiting on eoi unnecessary. You are > mixing this with trying to avoid calling eoi code for given interrupt at > all. I don't think this is what my patch does. EOI still clears ISR for each interrupt. > Two different optimization, do not try lump them together. > > > > > > > > > We already have > > > > > call to kvm_lapic_sync_from_vapic() on exit path which should be > > > > > extended to do the above. > > > > > > > > It already does this. It calls apic_set_tpr > > > > which calls apic_update_ppr which calls > > > > apic_update_isr. > > > > > > > It does it only if vapic is in use (and it is usually not). > > > > When it's not we don't need to update ppr and so > > no need to update isr on this exit. > If there was eoi we need to update both. By same logic we should call update_ppr on each entry. The overhead is unlikely to be measureable either :). > > > > > But the if() > > > is already there so we do not need to worry that one additional if() on > > > the exit path will slow KVM to the crawl. > > > > The number of things we need to do on each entry keeps going up, if we > > just keep adding stuff it won't end well. > > > You do not add stuff. The if() is already there. Your proposal was to check userspace eoi record each time when eoi is pending, no? This would certainly add some overhead. I also find the logic easier to follow as is - it is contained in lapic.c without relying on being called from x86.c as just the right moment. > -- > Gleb. ^ permalink raw reply [flat|nested] 32+ messages in thread
* Re: [PATCHv1 dont apply] RFC: kvm eoi PV using shared memory 2012-04-16 13:13 ` Michael S. Tsirkin @ 2012-04-16 15:10 ` Gleb Natapov 2012-04-16 16:33 ` Michael S. Tsirkin 2012-04-16 17:24 ` Michael S. Tsirkin 0 siblings, 2 replies; 32+ messages in thread From: Gleb Natapov @ 2012-04-16 15:10 UTC (permalink / raw) To: Michael S. Tsirkin; +Cc: kvm On Mon, Apr 16, 2012 at 04:13:29PM +0300, Michael S. Tsirkin wrote: > On Mon, Apr 16, 2012 at 03:30:47PM +0300, Gleb Natapov wrote: > > On Mon, Apr 16, 2012 at 03:18:25PM +0300, Michael S. Tsirkin wrote: > > > On Mon, Apr 16, 2012 at 02:24:46PM +0300, Gleb Natapov wrote: > > > > On Mon, Apr 16, 2012 at 02:09:20PM +0300, Michael S. Tsirkin wrote: > > > > > Thanks very much for the review. I'll address the comments. > > > > > Some questions on your comments below. > > > > > > > > > > On Mon, Apr 16, 2012 at 01:08:07PM +0300, Gleb Natapov wrote: > > > > > > > @@ -37,6 +38,8 @@ > > > > > > > #define MSR_KVM_SYSTEM_TIME_NEW 0x4b564d01 > > > > > > > #define MSR_KVM_ASYNC_PF_EN 0x4b564d02 > > > > > > > #define MSR_KVM_STEAL_TIME 0x4b564d03 > > > > > > > +#define MSR_KVM_EOI_EN 0x4b564d04 > > > > > > > +#define MSR_KVM_EOI_DISABLED 0x0L > > > > > > This is valid gpa. Follow others MSR example i.e align the address to, > > > > > > lets say dword, and use lsb as enable bit. > > > > > > > > > > We only need a single byte, since this is per-CPU - > > > > > it's better to save the memory, so no alignment is required. > > > > > An explicit disable msr would also address this, right? > > > > > > > > > We do not have shortage of memory. > > > > Better make all MSRs works the same > > > > way. > > > > > > I agree it's nice to have EOI and ASYNC_PF look similar > > > but wasting memory is also bad. I'll ponder this some more. > > > > > Steal time and kvm clock too and may be others (if anything left at > > all). I hope you are kidding about wasting of 4 bytes per vcpu. > > Not vcpu - cpu. It's wasted whenever kernel/kvm.c is built so it has > cost on physical machines as well. > There are less real cpus than vcpus usually :) > > > > BTW have you added new MSR to msrs_to_save array? I forgot to > > > > checked. > > > > > > I didn't yet. Trying to understand how will that affect > > > cross-version migration - any input? > > > > > Not sure. You need to check what userspace does with them. > > > > > > > > > +static void apic_update_isr(struct kvm_lapic *apic) > > > > > > > +{ > > > > > > > + int vector; > > > > > > > + if (!eoi_enabled(apic->vcpu) || > > > > > > > + !apic->vcpu->arch.eoi.pending || > > > > > > > + eoi_get_pending(apic->vcpu)) > > > > > > > + return; > > > > > > > + apic->vcpu->arch.eoi.pending = false; > > > > > > > + vector = apic_find_highest_isr(apic); > > > > > > > + if (vector == -1) > > > > > > > + return; > > > > > > > + apic_clear_vector(vector, apic->regs + APIC_ISR); > > > > > > > +} > > > > > > > + > > > > > > We should just call apic_set_eoi() on exit if eoi.pending && !eoi_get_pending(). > > > > > > This removes the need for the function and its calls. > > > > > > > > > > It's a bit of a waste: that one does all kind extra things > > > > > which we know we don't need, some of the atomics. And it's datapath > > > > > so extra stuff is not free. > > > > > > > > > How much time those extra things are taking compared to vmexit you > > > > already serving? And there is a good chance you will do them during > > > > vmentry anyway while trying to inject (or just check for) new interrupt. > > > > > > No need to do them twice :) > > > > > > > > Probably a good idea to replace the call on MSR disable - I think > > > > > apic_update_ppr is a better thing to call there. > > > > > > > > > > Is there anything else that I missed? > > > > I think that simple things are better then complex things if the end result is > > > > the same :) Try it and see how much simpler it is. > > > > > > It doesn't seem to be simpler at all. The common functionality is > > > about 4 lines. > > Send patch for us to see. > > That's what you are replying to, no? > You can see that it is 4 lines of code. No. I mean something like patch below. Applies on top of yours. Did not check that it works or even compiles. > > > lapic changes should be minimal. > > Exactly my motivation. > My patch removes 13 lines more :) > > > > > > > Have you measured > > > > that what you are trying to optimize actually worth optimizing? That you > > > > can measure the optimization at all? > > > > > > The claim is not that it's measureable. The claim is that > > > it does not scale to keep adding things to do on each entry. > > > > > Only if there is something to do. "Premature optimization is the root of > > all evil". The PV eoi is about not exiting on eoi unnecessary. You are > > mixing this with trying to avoid calling eoi code for given interrupt at > > all. > > I don't think this is what my patch does. EOI still clears ISR > for each interrupt. > > > Two different optimization, do not try lump them together. > > > > > > > > > > > We already have > > > > > > call to kvm_lapic_sync_from_vapic() on exit path which should be > > > > > > extended to do the above. > > > > > > > > > > It already does this. It calls apic_set_tpr > > > > > which calls apic_update_ppr which calls > > > > > apic_update_isr. > > > > > > > > > It does it only if vapic is in use (and it is usually not). > > > > > > When it's not we don't need to update ppr and so > > > no need to update isr on this exit. > > If there was eoi we need to update both. > > By same logic we should call update_ppr on each entry. > The overhead is unlikely to be measureable either :). > It is small enough for us to not care about it on RHEL6 where it is called on each entry. > > > > > > > But the if() > > > > is already there so we do not need to worry that one additional if() on > > > > the exit path will slow KVM to the crawl. > > > > > > The number of things we need to do on each entry keeps going up, if we > > > just keep adding stuff it won't end well. > > > > > You do not add stuff. The if() is already there. > > > Your proposal was to check userspace eoi record > each time when eoi is pending, no? Yes. > This would certainly add some overhead. > Only when eoi is pending. This is rare. > I also find the logic easier to follow as is - > it is contained in lapic.c without relying > on being called from x86.c as just the right moment. > See the patch. It change nothing outside of lapic.c. diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index d184a41..8fb5eca 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -323,20 +323,6 @@ static inline int apic_find_highest_isr(struct kvm_lapic *apic) return result; } -static void apic_update_isr(struct kvm_lapic *apic) -{ - int vector; - if (!eoi_enabled(apic->vcpu) || - !apic->vcpu->arch.eoi.pending || - eoi_get_pending(apic->vcpu)) - return; - apic->vcpu->arch.eoi.pending = false; - vector = apic_find_highest_isr(apic); - if (vector == -1) - return; - apic_clear_vector(vector, apic->regs + APIC_ISR); -} - static void apic_update_ppr(struct kvm_lapic *apic) { u32 tpr, isrv, ppr, old_ppr; @@ -344,7 +330,6 @@ static void apic_update_ppr(struct kvm_lapic *apic) old_ppr = apic_get_reg(apic, APIC_PROCPRI); tpr = apic_get_reg(apic, APIC_TASKPRI); - apic_update_isr(apic); isr = apic_find_highest_isr(apic); isrv = (isr != -1) ? isr : 0; @@ -1361,14 +1346,19 @@ void kvm_lapic_sync_from_vapic(struct kvm_vcpu *vcpu) u32 data; void *vapic; - if (!irqchip_in_kernel(vcpu->kvm) || !vcpu->arch.apic->vapic_addr) + if (!irqchip_in_kernel(vcpu->kvm) || !vcpu->arch.apic->vapic_addr || + !apic->vcpu->arch.eoi.pending) return; - vapic = kmap_atomic(vcpu->arch.apic->vapic_page); - data = *(u32 *)(vapic + offset_in_page(vcpu->arch.apic->vapic_addr)); - kunmap_atomic(vapic); + if (apic->vcpu->arch.eoi.pending && !eoi_get_pending(apic->vcpu)) { + apic_set_eoi(apic); + } else { + vapic = kmap_atomic(vcpu->arch.apic->vapic_page); + data = *(u32 *)(vapic + offset_in_page(vcpu->arch.apic->vapic_addr)); + kunmap_atomic(vapic); - apic_set_tpr(vcpu->arch.apic, data & 0xff); + apic_set_tpr(vcpu->arch.apic, data & 0xff); + } } void kvm_lapic_sync_to_vapic(struct kvm_vcpu *vcpu) @@ -1469,13 +1459,10 @@ int kvm_hv_vapic_msr_read(struct kvm_vcpu *vcpu, u32 reg, u64 *data) int kvm_pv_enable_apic_eoi(struct kvm_vcpu *vcpu, u64 data) { - if (data == MSR_KVM_EOI_DISABLED) { - struct kvm_lapic *apic = vcpu->arch.apic; - if (apic && apic_enabled(apic)) - apic_update_isr(apic); - } else if (kvm_gfn_to_hva_cache_init(vcpu->kvm, &vcpu->arch.eoi.data, - data)) - return 1; + if (data != MSR_KVM_EOI_DISABLED) + if (kvm_gfn_to_hva_cache_init(vcpu->kvm, &vcpu->arch.eoi.data, + data)) + return 1; vcpu->arch.eoi.msr_val = data; vcpu->arch.eoi.pending = false; -- Gleb. ^ permalink raw reply related [flat|nested] 32+ messages in thread
* Re: [PATCHv1 dont apply] RFC: kvm eoi PV using shared memory 2012-04-16 15:10 ` Gleb Natapov @ 2012-04-16 16:33 ` Michael S. Tsirkin 2012-04-16 17:51 ` Gleb Natapov 2012-04-16 17:24 ` Michael S. Tsirkin 1 sibling, 1 reply; 32+ messages in thread From: Michael S. Tsirkin @ 2012-04-16 16:33 UTC (permalink / raw) To: Gleb Natapov; +Cc: kvm On Mon, Apr 16, 2012 at 06:10:11PM +0300, Gleb Natapov wrote: > On Mon, Apr 16, 2012 at 04:13:29PM +0300, Michael S. Tsirkin wrote: > > On Mon, Apr 16, 2012 at 03:30:47PM +0300, Gleb Natapov wrote: > > > On Mon, Apr 16, 2012 at 03:18:25PM +0300, Michael S. Tsirkin wrote: > > > > On Mon, Apr 16, 2012 at 02:24:46PM +0300, Gleb Natapov wrote: > > > > > On Mon, Apr 16, 2012 at 02:09:20PM +0300, Michael S. Tsirkin wrote: > > > > > > Thanks very much for the review. I'll address the comments. > > > > > > Some questions on your comments below. > > > > > > > > > > > > On Mon, Apr 16, 2012 at 01:08:07PM +0300, Gleb Natapov wrote: > > > > > > > > @@ -37,6 +38,8 @@ > > > > > > > > #define MSR_KVM_SYSTEM_TIME_NEW 0x4b564d01 > > > > > > > > #define MSR_KVM_ASYNC_PF_EN 0x4b564d02 > > > > > > > > #define MSR_KVM_STEAL_TIME 0x4b564d03 > > > > > > > > +#define MSR_KVM_EOI_EN 0x4b564d04 > > > > > > > > +#define MSR_KVM_EOI_DISABLED 0x0L > > > > > > > This is valid gpa. Follow others MSR example i.e align the address to, > > > > > > > lets say dword, and use lsb as enable bit. > > > > > > > > > > > > We only need a single byte, since this is per-CPU - > > > > > > it's better to save the memory, so no alignment is required. > > > > > > An explicit disable msr would also address this, right? > > > > > > > > > > > We do not have shortage of memory. > > > > > Better make all MSRs works the same > > > > > way. > > > > > > > > I agree it's nice to have EOI and ASYNC_PF look similar > > > > but wasting memory is also bad. I'll ponder this some more. > > > > > > > Steal time and kvm clock too and may be others (if anything left at > > > all). I hope you are kidding about wasting of 4 bytes per vcpu. > > > > Not vcpu - cpu. It's wasted whenever kernel/kvm.c is built so it has > > cost on physical machines as well. > > > There are less real cpus than vcpus usually :) I'm adding this percpu always. This makes it cheap to access but it means it is allocated on physical cpus - just unused there. > > > > > BTW have you added new MSR to msrs_to_save array? I forgot to > > > > > checked. > > > > > > > > I didn't yet. Trying to understand how will that affect > > > > cross-version migration - any input? > > > > > > > Not sure. You need to check what userspace does with them. > > > > > > > > > > > +static void apic_update_isr(struct kvm_lapic *apic) > > > > > > > > +{ > > > > > > > > + int vector; > > > > > > > > + if (!eoi_enabled(apic->vcpu) || > > > > > > > > + !apic->vcpu->arch.eoi.pending || > > > > > > > > + eoi_get_pending(apic->vcpu)) > > > > > > > > + return; > > > > > > > > + apic->vcpu->arch.eoi.pending = false; > > > > > > > > + vector = apic_find_highest_isr(apic); > > > > > > > > + if (vector == -1) > > > > > > > > + return; > > > > > > > > + apic_clear_vector(vector, apic->regs + APIC_ISR); > > > > > > > > +} > > > > > > > > + > > > > > > > We should just call apic_set_eoi() on exit if eoi.pending && !eoi_get_pending(). > > > > > > > This removes the need for the function and its calls. > > > > > > > > > > > > It's a bit of a waste: that one does all kind extra things > > > > > > which we know we don't need, some of the atomics. And it's datapath > > > > > > so extra stuff is not free. > > > > > > > > > > > How much time those extra things are taking compared to vmexit you > > > > > already serving? And there is a good chance you will do them during > > > > > vmentry anyway while trying to inject (or just check for) new interrupt. > > > > > > > > No need to do them twice :) > > > > > > > > > > Probably a good idea to replace the call on MSR disable - I think > > > > > > apic_update_ppr is a better thing to call there. > > > > > > > > > > > > Is there anything else that I missed? > > > > > I think that simple things are better then complex things if the end result is > > > > > the same :) Try it and see how much simpler it is. > > > > > > > > It doesn't seem to be simpler at all. The common functionality is > > > > about 4 lines. > > > Send patch for us to see. > > > > That's what you are replying to, no? > > You can see that it is 4 lines of code. > No. I mean something like patch below. Applies on top of yours. Did not > check that it works or even compiles. > > > > > > lapic changes should be minimal. > > > > Exactly my motivation. > > > My patch removes 13 lines more :) I'll take a look, thanks. > > > > > > > > > Have you measured > > > > > that what you are trying to optimize actually worth optimizing? That you > > > > > can measure the optimization at all? > > > > > > > > The claim is not that it's measureable. The claim is that > > > > it does not scale to keep adding things to do on each entry. > > > > > > > Only if there is something to do. "Premature optimization is the root of > > > all evil". The PV eoi is about not exiting on eoi unnecessary. You are > > > mixing this with trying to avoid calling eoi code for given interrupt at > > > all. > > > > I don't think this is what my patch does. EOI still clears ISR > > for each interrupt. > > > > > Two different optimization, do not try lump them together. > > > > > > > > > > > > > We already have > > > > > > > call to kvm_lapic_sync_from_vapic() on exit path which should be > > > > > > > extended to do the above. > > > > > > > > > > > > It already does this. It calls apic_set_tpr > > > > > > which calls apic_update_ppr which calls > > > > > > apic_update_isr. > > > > > > > > > > > It does it only if vapic is in use (and it is usually not). > > > > > > > > When it's not we don't need to update ppr and so > > > > no need to update isr on this exit. > > > If there was eoi we need to update both. > > > > By same logic we should call update_ppr on each entry. > > The overhead is unlikely to be measureable either :). > > > It is small enough for us to not care about it on RHEL6 where it is > called on each entry. Exactly. So why do not we do it? > > > > > > > > > But the if() > > > > > is already there so we do not need to worry that one additional if() on > > > > > the exit path will slow KVM to the crawl. > > > > > > > > The number of things we need to do on each entry keeps going up, if we > > > > just keep adding stuff it won't end well. > > > > > > > You do not add stuff. The if() is already there. > > > > > > Your proposal was to check userspace eoi record > > each time when eoi is pending, no? > Yes. > > > This would certainly add some overhead. > > > Only when eoi is pending. This is rare. This is exactly while guest handles an interrupt. It's not all that rare at all: e.g. device drivers cause an exit from interrupt handler by doing io. > > I also find the logic easier to follow as is - > > it is contained in lapic.c without relying > > on being called from x86.c as just the right moment. > > > See the patch. It change nothing outside of lapic.c. I'll take a look, thanks. ^ permalink raw reply [flat|nested] 32+ messages in thread
* Re: [PATCHv1 dont apply] RFC: kvm eoi PV using shared memory 2012-04-16 16:33 ` Michael S. Tsirkin @ 2012-04-16 17:51 ` Gleb Natapov 2012-04-16 19:01 ` Michael S. Tsirkin 0 siblings, 1 reply; 32+ messages in thread From: Gleb Natapov @ 2012-04-16 17:51 UTC (permalink / raw) To: Michael S. Tsirkin; +Cc: kvm On Mon, Apr 16, 2012 at 07:33:28PM +0300, Michael S. Tsirkin wrote: > On Mon, Apr 16, 2012 at 06:10:11PM +0300, Gleb Natapov wrote: > > On Mon, Apr 16, 2012 at 04:13:29PM +0300, Michael S. Tsirkin wrote: > > > On Mon, Apr 16, 2012 at 03:30:47PM +0300, Gleb Natapov wrote: > > > > On Mon, Apr 16, 2012 at 03:18:25PM +0300, Michael S. Tsirkin wrote: > > > > > On Mon, Apr 16, 2012 at 02:24:46PM +0300, Gleb Natapov wrote: > > > > > > On Mon, Apr 16, 2012 at 02:09:20PM +0300, Michael S. Tsirkin wrote: > > > > > > > Thanks very much for the review. I'll address the comments. > > > > > > > Some questions on your comments below. > > > > > > > > > > > > > > On Mon, Apr 16, 2012 at 01:08:07PM +0300, Gleb Natapov wrote: > > > > > > > > > @@ -37,6 +38,8 @@ > > > > > > > > > #define MSR_KVM_SYSTEM_TIME_NEW 0x4b564d01 > > > > > > > > > #define MSR_KVM_ASYNC_PF_EN 0x4b564d02 > > > > > > > > > #define MSR_KVM_STEAL_TIME 0x4b564d03 > > > > > > > > > +#define MSR_KVM_EOI_EN 0x4b564d04 > > > > > > > > > +#define MSR_KVM_EOI_DISABLED 0x0L > > > > > > > > This is valid gpa. Follow others MSR example i.e align the address to, > > > > > > > > lets say dword, and use lsb as enable bit. > > > > > > > > > > > > > > We only need a single byte, since this is per-CPU - > > > > > > > it's better to save the memory, so no alignment is required. > > > > > > > An explicit disable msr would also address this, right? > > > > > > > > > > > > > We do not have shortage of memory. > > > > > > Better make all MSRs works the same > > > > > > way. > > > > > > > > > > I agree it's nice to have EOI and ASYNC_PF look similar > > > > > but wasting memory is also bad. I'll ponder this some more. > > > > > > > > > Steal time and kvm clock too and may be others (if anything left at > > > > all). I hope you are kidding about wasting of 4 bytes per vcpu. > > > > > > Not vcpu - cpu. It's wasted whenever kernel/kvm.c is built so it has > > > cost on physical machines as well. > > > > > There are less real cpus than vcpus usually :) > > I'm adding this percpu always. This makes it cheap to > access but it means it is allocated on physical cpus - > just unused there. > I got it. So suppose you have 1024 cpus, so if you'll use dword instead of byte you will spend additional 3072 bytes (which you likely spend anyway due to alignment that will be done to your u8). How much memory do you expect to have with your 1024 cpus to care about 3072 bytes? Are we seriously discussing it? > > > > > > BTW have you added new MSR to msrs_to_save array? I forgot to > > > > > > checked. > > > > > > > > > > I didn't yet. Trying to understand how will that affect > > > > > cross-version migration - any input? > > > > > > > > > Not sure. You need to check what userspace does with them. > > > > > > > > > > > > > +static void apic_update_isr(struct kvm_lapic *apic) > > > > > > > > > +{ > > > > > > > > > + int vector; > > > > > > > > > + if (!eoi_enabled(apic->vcpu) || > > > > > > > > > + !apic->vcpu->arch.eoi.pending || > > > > > > > > > + eoi_get_pending(apic->vcpu)) > > > > > > > > > + return; > > > > > > > > > + apic->vcpu->arch.eoi.pending = false; > > > > > > > > > + vector = apic_find_highest_isr(apic); > > > > > > > > > + if (vector == -1) > > > > > > > > > + return; > > > > > > > > > + apic_clear_vector(vector, apic->regs + APIC_ISR); > > > > > > > > > +} > > > > > > > > > + > > > > > > > > We should just call apic_set_eoi() on exit if eoi.pending && !eoi_get_pending(). > > > > > > > > This removes the need for the function and its calls. > > > > > > > > > > > > > > It's a bit of a waste: that one does all kind extra things > > > > > > > which we know we don't need, some of the atomics. And it's datapath > > > > > > > so extra stuff is not free. > > > > > > > > > > > > > How much time those extra things are taking compared to vmexit you > > > > > > already serving? And there is a good chance you will do them during > > > > > > vmentry anyway while trying to inject (or just check for) new interrupt. > > > > > > > > > > No need to do them twice :) > > > > > > > > > > > > Probably a good idea to replace the call on MSR disable - I think > > > > > > > apic_update_ppr is a better thing to call there. > > > > > > > > > > > > > > Is there anything else that I missed? > > > > > > I think that simple things are better then complex things if the end result is > > > > > > the same :) Try it and see how much simpler it is. > > > > > > > > > > It doesn't seem to be simpler at all. The common functionality is > > > > > about 4 lines. > > > > Send patch for us to see. > > > > > > That's what you are replying to, no? > > > You can see that it is 4 lines of code. > > No. I mean something like patch below. Applies on top of yours. Did not > > check that it works or even compiles. > > > > > > > > > lapic changes should be minimal. > > > > > > Exactly my motivation. > > > > > My patch removes 13 lines more :) > > I'll take a look, thanks. > > > > > > > > > > > > Have you measured > > > > > > that what you are trying to optimize actually worth optimizing? That you > > > > > > can measure the optimization at all? > > > > > > > > > > The claim is not that it's measureable. The claim is that > > > > > it does not scale to keep adding things to do on each entry. > > > > > > > > > Only if there is something to do. "Premature optimization is the root of > > > > all evil". The PV eoi is about not exiting on eoi unnecessary. You are > > > > mixing this with trying to avoid calling eoi code for given interrupt at > > > > all. > > > > > > I don't think this is what my patch does. EOI still clears ISR > > > for each interrupt. > > > > > > > Two different optimization, do not try lump them together. > > > > > > > > > > > > > > > We already have > > > > > > > > call to kvm_lapic_sync_from_vapic() on exit path which should be > > > > > > > > extended to do the above. > > > > > > > > > > > > > > It already does this. It calls apic_set_tpr > > > > > > > which calls apic_update_ppr which calls > > > > > > > apic_update_isr. > > > > > > > > > > > > > It does it only if vapic is in use (and it is usually not). > > > > > > > > > > When it's not we don't need to update ppr and so > > > > > no need to update isr on this exit. > > > > If there was eoi we need to update both. > > > > > > By same logic we should call update_ppr on each entry. > > > The overhead is unlikely to be measureable either :). > > > > > It is small enough for us to not care about it on RHEL6 where it is > > called on each entry. > > Exactly. So why do not we do it? > Calling it on each entry is a little bit excessive, calling it on each interrupt it OK. But even when it is called on each entry it does not create performance problems. > > > > > > > > > > > But the if() > > > > > > is already there so we do not need to worry that one additional if() on > > > > > > the exit path will slow KVM to the crawl. > > > > > > > > > > The number of things we need to do on each entry keeps going up, if we > > > > > just keep adding stuff it won't end well. > > > > > > > > > You do not add stuff. The if() is already there. > > > > > > > > > Your proposal was to check userspace eoi record > > > each time when eoi is pending, no? > > Yes. > > > > > This would certainly add some overhead. > > > > > Only when eoi is pending. This is rare. > > This is exactly while guest handles an interrupt. > It's not all that rare at all: e.g. device > drivers cause an exit from interrupt > handler by doing io. So eoi will be coalesced with io that device driver does. Exactly what we want. But in great scheme of things interrupts are rare :) The optimizations is disabled when interrupts are coming faster that they are served anyway. > > > > I also find the logic easier to follow as is - > > > it is contained in lapic.c without relying > > > on being called from x86.c as just the right moment. > > > > > See the patch. It change nothing outside of lapic.c. > > I'll take a look, thanks. -- Gleb. ^ permalink raw reply [flat|nested] 32+ messages in thread
* Re: [PATCHv1 dont apply] RFC: kvm eoi PV using shared memory 2012-04-16 17:51 ` Gleb Natapov @ 2012-04-16 19:01 ` Michael S. Tsirkin 2012-04-17 8:45 ` Gleb Natapov 0 siblings, 1 reply; 32+ messages in thread From: Michael S. Tsirkin @ 2012-04-16 19:01 UTC (permalink / raw) To: Gleb Natapov; +Cc: kvm On Mon, Apr 16, 2012 at 08:51:16PM +0300, Gleb Natapov wrote: > > > Only when eoi is pending. This is rare. > > > > This is exactly while guest handles an interrupt. > > It's not all that rare at all: e.g. device > > drivers cause an exit from interrupt > > handler by doing io. > So eoi will be coalesced with io that device driver does. Exactly what > we want. It won't. While we handle interrupts eoi is still set. So there will be a couple of tests + read from userspace Wasted not a huge overhead but it's the principle of the thing. -- MST ^ permalink raw reply [flat|nested] 32+ messages in thread
* Re: [PATCHv1 dont apply] RFC: kvm eoi PV using shared memory 2012-04-16 19:01 ` Michael S. Tsirkin @ 2012-04-17 8:45 ` Gleb Natapov 0 siblings, 0 replies; 32+ messages in thread From: Gleb Natapov @ 2012-04-17 8:45 UTC (permalink / raw) To: Michael S. Tsirkin; +Cc: kvm On Mon, Apr 16, 2012 at 10:01:29PM +0300, Michael S. Tsirkin wrote: > On Mon, Apr 16, 2012 at 08:51:16PM +0300, Gleb Natapov wrote: > > > > Only when eoi is pending. This is rare. > > > > > > This is exactly while guest handles an interrupt. > > > It's not all that rare at all: e.g. device > > > drivers cause an exit from interrupt > > > handler by doing io. > > So eoi will be coalesced with io that device driver does. Exactly what > > we want. > > It won't. While we handle interrupts eoi is still set. > So there will be a couple of tests + read from userspace > Wasted not a huge overhead but it's the principle of the thing. > Linux acks irq before calling device handler. Windows for some devices does IO to a device before acking. -- Gleb. ^ permalink raw reply [flat|nested] 32+ messages in thread
* Re: [PATCHv1 dont apply] RFC: kvm eoi PV using shared memory 2012-04-16 15:10 ` Gleb Natapov 2012-04-16 16:33 ` Michael S. Tsirkin @ 2012-04-16 17:24 ` Michael S. Tsirkin 2012-04-16 17:37 ` Gleb Natapov 1 sibling, 1 reply; 32+ messages in thread From: Michael S. Tsirkin @ 2012-04-16 17:24 UTC (permalink / raw) To: Gleb Natapov; +Cc: kvm On Mon, Apr 16, 2012 at 06:10:11PM +0300, Gleb Natapov wrote: > > > lapic changes should be minimal. > > > > Exactly my motivation. > > > My patch removes 13 lines more :) Haven't checked whether your patch is correct yet but I see it's checking the eoi register on each exit. I think it's clear this would make code a bit shorter (not necessarily clearer as we are relying on ) but as I said even though the extra overhead is likely negligeable I have a feeling it's a wrong approach since this won't scale as we add more features. Let's see what do others think. > > I also find the logic easier to follow as is - > > it is contained in lapic.c without relying > > on being called from x86.c as just the right moment. > > > See the patch. It change nothing outside of lapic.c. True but you rely on x86.c to call kvm_sync_lapic_from_vapic at the right time before injecting interrupts. I haven't checked whether that is always the case but to me, this makes the code less clear and more fragile. Again, it appears to be a matter of taste. -- MST ^ permalink raw reply [flat|nested] 32+ messages in thread
* Re: [PATCHv1 dont apply] RFC: kvm eoi PV using shared memory 2012-04-16 17:24 ` Michael S. Tsirkin @ 2012-04-16 17:37 ` Gleb Natapov 2012-04-16 18:56 ` Michael S. Tsirkin 0 siblings, 1 reply; 32+ messages in thread From: Gleb Natapov @ 2012-04-16 17:37 UTC (permalink / raw) To: Michael S. Tsirkin; +Cc: kvm On Mon, Apr 16, 2012 at 08:24:16PM +0300, Michael S. Tsirkin wrote: > On Mon, Apr 16, 2012 at 06:10:11PM +0300, Gleb Natapov wrote: > > > > lapic changes should be minimal. > > > > > > Exactly my motivation. > > > > > My patch removes 13 lines more :) > > Haven't checked whether your patch is correct yet > but I see it's checking the eoi register on each exit. > Only if eoi.pending == true on exit. > I think it's clear this would make code a bit shorter > (not necessarily clearer as we are relying on ) but > as I said even though the extra overhead is likely > negligeable I have a feeling it's a wrong approach since > this won't scale as we add more features. > > Let's see what do others think. > What I do not like about not calling eoi here is that we have to reason about all the paths into apic and check that we clear isr on all of them. And that is not all. eoi handler set event request, is it OK to skip it? May be, or may be not. > > > I also find the logic easier to follow as is - > > > it is contained in lapic.c without relying > > > on being called from x86.c as just the right moment. > > > > > See the patch. It change nothing outside of lapic.c. > > True but you rely on x86.c to call kvm_sync_lapic_from_vapic > at the right time before injecting interrupts. Not before injecting interrupts, but on vmexit. > I haven't checked whether that is always the case but > to me, this makes the code less clear and more fragile. > We call a lot of apic functions from x86.c. That's were interrupt injection happens. > Again, it appears to be a matter of taste. > > -- > MST -- Gleb. ^ permalink raw reply [flat|nested] 32+ messages in thread
* Re: [PATCHv1 dont apply] RFC: kvm eoi PV using shared memory 2012-04-16 17:37 ` Gleb Natapov @ 2012-04-16 18:56 ` Michael S. Tsirkin 2012-04-17 8:59 ` Gleb Natapov 0 siblings, 1 reply; 32+ messages in thread From: Michael S. Tsirkin @ 2012-04-16 18:56 UTC (permalink / raw) To: Gleb Natapov; +Cc: kvm On Mon, Apr 16, 2012 at 08:37:37PM +0300, Gleb Natapov wrote: > On Mon, Apr 16, 2012 at 08:24:16PM +0300, Michael S. Tsirkin wrote: > > On Mon, Apr 16, 2012 at 06:10:11PM +0300, Gleb Natapov wrote: > > > > > lapic changes should be minimal. > > > > > > > > Exactly my motivation. > > > > > > > My patch removes 13 lines more :) > > > > Haven't checked whether your patch is correct yet > > but I see it's checking the eoi register on each exit. > > > Only if eoi.pending == true on exit. it's checking eoi.pending always and eoi register when that is true. > > I think it's clear this would make code a bit shorter > > (not necessarily clearer as we are relying on ) but > > as I said even though the extra overhead is likely > > negligeable I have a feeling it's a wrong approach since > > this won't scale as we add more features. > > > > Let's see what do others think. > > > What I do not like about not calling eoi here is that we > have to reason about all the paths into apic and check that > we clear isr on all of them. Not at all. Instead, check each time before we read isr or ppr. > And that is not all. eoi handler > set event request, is it OK to skip it? May be, or may be not. I think it's for reinjection of lower prio interrupt. Since eoi optimization is disabled in that case we don't need to set event request. > > > > I also find the logic easier to follow as is - > > > > it is contained in lapic.c without relying > > > > on being called from x86.c as just the right moment. > > > > > > > See the patch. It change nothing outside of lapic.c. > > > > True but you rely on x86.c to call kvm_sync_lapic_from_vapic > > at the right time before injecting interrupts. > Not before injecting interrupts, but on vmexit. sync is called on entry, not on exit, no? > > I haven't checked whether that is always the case but > > to me, this makes the code less clear and more fragile. > > > We call a lot of apic functions from x86.c. That's were interrupt > injection happens. > > > Again, it appears to be a matter of taste. > > > > -- > > MST > > -- > Gleb. ^ permalink raw reply [flat|nested] 32+ messages in thread
* Re: [PATCHv1 dont apply] RFC: kvm eoi PV using shared memory 2012-04-16 18:56 ` Michael S. Tsirkin @ 2012-04-17 8:59 ` Gleb Natapov 0 siblings, 0 replies; 32+ messages in thread From: Gleb Natapov @ 2012-04-17 8:59 UTC (permalink / raw) To: Michael S. Tsirkin; +Cc: kvm On Mon, Apr 16, 2012 at 09:56:02PM +0300, Michael S. Tsirkin wrote: > On Mon, Apr 16, 2012 at 08:37:37PM +0300, Gleb Natapov wrote: > > On Mon, Apr 16, 2012 at 08:24:16PM +0300, Michael S. Tsirkin wrote: > > > On Mon, Apr 16, 2012 at 06:10:11PM +0300, Gleb Natapov wrote: > > > > > > lapic changes should be minimal. > > > > > > > > > > Exactly my motivation. > > > > > > > > > My patch removes 13 lines more :) > > > > > > Haven't checked whether your patch is correct yet > > > but I see it's checking the eoi register on each exit. > > > > > Only if eoi.pending == true on exit. > > it's checking eoi.pending always and eoi register when > that is true. > We already have if there, so we can reuse it. Instead of checking vapic_addr in kvm_lapic_sync_from_vapic() let it check apic_attention bitmask. vapic_addr and eoi.pending will set bit there and if() will become if(!apic_attention) return. Zero overhead. > > > I think it's clear this would make code a bit shorter > > > (not necessarily clearer as we are relying on ) but > > > as I said even though the extra overhead is likely > > > negligeable I have a feeling it's a wrong approach since > > > this won't scale as we add more features. > > > > > > Let's see what do others think. > > > > > What I do not like about not calling eoi here is that we > > have to reason about all the paths into apic and check that > > we clear isr on all of them. > > Not at all. Instead, check each time before we read isr > or ppr. Or inject interrupt, or ... Luckily lapic code calls apic_update_ppr() a lot (just in case), so adding check there likely covers any relevant entry into the apic, but this is exactly reasoning I would like to avoid if possible :) I do not see bug per se with your current scheme otherwise I would point it out. > > > And that is not all. eoi handler > > set event request, is it OK to skip it? May be, or may be not. > > I think it's for reinjection of lower prio interrupt. Me too. But skipping setting that bit should not be taken lightly. We need to know for sure. Current code tries to err on safe side and prefers to set event request when not needed instead of missing it when it is needed. Those rare cases are hard to debug. > Since eoi optimization is disabled in that case we don't need to set > event request. > > > > > > I also find the logic easier to follow as is - > > > > > it is contained in lapic.c without relying > > > > > on being called from x86.c as just the right moment. > > > > > > > > > See the patch. It change nothing outside of lapic.c. > > > > > > True but you rely on x86.c to call kvm_sync_lapic_from_vapic > > > at the right time before injecting interrupts. > > Not before injecting interrupts, but on vmexit. > > sync is called on entry, not on exit, no? No. Look at the code. Entry is to late. > > > > > I haven't checked whether that is always the case but > > > to me, this makes the code less clear and more fragile. > > > > > We call a lot of apic functions from x86.c. That's were interrupt > > injection happens. > > > > > Again, it appears to be a matter of taste. > > > > > > -- > > > MST > > > > -- > > Gleb. -- Gleb. ^ permalink raw reply [flat|nested] 32+ messages in thread
* Re: [PATCHv1 dont apply] RFC: kvm eoi PV using shared memory 2012-04-16 12:18 ` Michael S. Tsirkin 2012-04-16 12:30 ` Gleb Natapov @ 2012-04-17 9:24 ` Avi Kivity 1 sibling, 0 replies; 32+ messages in thread From: Avi Kivity @ 2012-04-17 9:24 UTC (permalink / raw) To: Michael S. Tsirkin; +Cc: Gleb Natapov, kvm On 04/16/2012 03:18 PM, Michael S. Tsirkin wrote: > On Mon, Apr 16, 2012 at 02:24:46PM +0300, Gleb Natapov wrote: > > On Mon, Apr 16, 2012 at 02:09:20PM +0300, Michael S. Tsirkin wrote: > > > Thanks very much for the review. I'll address the comments. > > > Some questions on your comments below. > > > > > > On Mon, Apr 16, 2012 at 01:08:07PM +0300, Gleb Natapov wrote: > > > > > @@ -37,6 +38,8 @@ > > > > > #define MSR_KVM_SYSTEM_TIME_NEW 0x4b564d01 > > > > > #define MSR_KVM_ASYNC_PF_EN 0x4b564d02 > > > > > #define MSR_KVM_STEAL_TIME 0x4b564d03 > > > > > +#define MSR_KVM_EOI_EN 0x4b564d04 > > > > > +#define MSR_KVM_EOI_DISABLED 0x0L > > > > This is valid gpa. Follow others MSR example i.e align the address to, > > > > lets say dword, and use lsb as enable bit. > > > > > > We only need a single byte, since this is per-CPU - > > > it's better to save the memory, so no alignment is required. > > > An explicit disable msr would also address this, right? > > > > > We do not have shortage of memory. > > Better make all MSRs works the same > > way. > > I agree it's nice to have EOI and ASYNC_PF look similar > but wasting memory is also bad. I'll ponder this some more. Wasting three bytes? > > BTW have you added new MSR to msrs_to_save array? I forgot to > > checked. > > I didn't yet. Trying to understand how will that affect > cross-version migration - any input? It will just work. -- error compiling committee.c: too many arguments to function ^ permalink raw reply [flat|nested] 32+ messages in thread
* Re: [PATCHv1 dont apply] RFC: kvm eoi PV using shared memory 2012-04-16 10:08 ` Gleb Natapov 2012-04-16 11:09 ` Michael S. Tsirkin @ 2012-04-17 9:22 ` Avi Kivity 1 sibling, 0 replies; 32+ messages in thread From: Avi Kivity @ 2012-04-17 9:22 UTC (permalink / raw) To: Gleb Natapov; +Cc: Michael S. Tsirkin, kvm On 04/16/2012 01:08 PM, Gleb Natapov wrote: > > } > > > > + > > + if (kvm_para_has_feature(KVM_FEATURE_EOI)) { > > + kvm_guest_native_apic_write = apic->write; > > + apic->write = kvm_guest_apic_write; > > + wrmsrl(MSR_KVM_EOI_EN, __pa(&__get_cpu_var(apic_eoi))); > > + } > Can happen on more then one cpu. After it happens on two kvm_guest_apic_write() > will call to itself. It's also hacky. Try static_cpu_has() in the apic code (and add an eoi method). -- error compiling committee.c: too many arguments to function ^ permalink raw reply [flat|nested] 32+ messages in thread
end of thread, other threads:[~2012-04-17 9:24 UTC | newest] Thread overview: 32+ messages (download: mbox.gz follow: Atom feed -- links below jump to the message on this page -- 2012-04-10 13:27 [PATCHv0 dont apply] RFC: kvm eoi PV using shared memory Michael S. Tsirkin 2012-04-10 14:03 ` Avi Kivity 2012-04-10 14:26 ` Michael S. Tsirkin 2012-04-10 14:33 ` Avi Kivity 2012-04-10 14:53 ` Michael S. Tsirkin 2012-04-10 15:00 ` Avi Kivity 2012-04-10 15:14 ` Michael S. Tsirkin 2012-04-10 16:08 ` Avi Kivity 2012-04-10 17:06 ` Michael S. Tsirkin 2012-04-10 17:59 ` Gleb Natapov 2012-04-10 19:30 ` Michael S. Tsirkin 2012-04-10 19:33 ` Gleb Natapov 2012-04-10 19:40 ` Michael S. Tsirkin 2012-04-10 19:42 ` Gleb Natapov 2012-04-15 16:18 ` [PATCHv1 " Michael S. Tsirkin 2012-04-16 10:08 ` Gleb Natapov 2012-04-16 11:09 ` Michael S. Tsirkin 2012-04-16 11:24 ` Gleb Natapov 2012-04-16 12:18 ` Michael S. Tsirkin 2012-04-16 12:30 ` Gleb Natapov 2012-04-16 13:13 ` Michael S. Tsirkin 2012-04-16 15:10 ` Gleb Natapov 2012-04-16 16:33 ` Michael S. Tsirkin 2012-04-16 17:51 ` Gleb Natapov 2012-04-16 19:01 ` Michael S. Tsirkin 2012-04-17 8:45 ` Gleb Natapov 2012-04-16 17:24 ` Michael S. Tsirkin 2012-04-16 17:37 ` Gleb Natapov 2012-04-16 18:56 ` Michael S. Tsirkin 2012-04-17 8:59 ` Gleb Natapov 2012-04-17 9:24 ` Avi Kivity 2012-04-17 9:22 ` Avi Kivity
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox