[PATCHv0 dont apply] RFC: kvm eoi PV using shared memory

public inbox for kvm@vger.kernel.org
 help / color / mirror / Atom feed

* [PATCHv0 dont apply] RFC: kvm eoi PV using shared memory
@ 2012-04-10 13:27 Michael S. Tsirkin
  2012-04-10 14:03 ` Avi Kivity
  2012-04-15 16:18 ` [PATCHv1 " Michael S. Tsirkin
  0 siblings, 2 replies; 32+ messages in thread
From: Michael S. Tsirkin @ 2012-04-10 13:27 UTC (permalink / raw)
  To: kvm

I took a stub at implementing PV EOI using shared memory.
This should reduce the number of exits an interrupt
causes as much as by half.

A partially complete draft for both host and guest parts
is below.

The idea is simple: there's a bit, per APIC, in guest memory,
that tells the guest that it does not need EOI.
We set it before injecting an interrupt and clear
before injecting a nested one. Guest tests it using
a test and clear operation - this is necessary
so that host can detect interrupt nesting -
and if set, it can skip the EOI MSR.

There's a new MSR to set the address of said register
in guest memory. Otherwise not much changes:
- Guest EOI is not required
- ISR is automatically cleared before injection

Some things are incomplete: add feature negotiation
options, qemu support for said options.
No testing was done beyond compiling the kernel.

I would appreciate early feedback.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>

--

diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h
index d854101..8430f41 100644
--- a/arch/x86/include/asm/apic.h
+++ b/arch/x86/include/asm/apic.h
@@ -457,8 +457,13 @@ static inline u32 safe_apic_wait_icr_idle(void) { return 0; }
 
 #endif /* CONFIG_X86_LOCAL_APIC */
 
+DECLARE_EARLY_PER_CPU(unsigned long, apic_eoi);
+
 static inline void ack_APIC_irq(void)
 {
+	if (__test_and_clear_bit(0, &__get_cpu_var(apic_eoi)))
+		return;
+
 	/*
 	 * ack_APIC_irq() actually gets compiled as a single instruction
 	 * ... yummie.
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index e216ba0..0ee1472 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -481,6 +481,12 @@ struct kvm_vcpu_arch {
 		u64 length;
 		u64 status;
 	} osvw;
+
+	struct {
+		u64 msr_val;
+		struct gfn_to_hva_cache data;
+		int vector;
+	} eoi;
 };
 
 struct kvm_lpage_info {
diff --git a/arch/x86/include/asm/kvm_para.h b/arch/x86/include/asm/kvm_para.h
index 734c376..e22b9f8 100644
--- a/arch/x86/include/asm/kvm_para.h
+++ b/arch/x86/include/asm/kvm_para.h
@@ -37,6 +37,8 @@
 #define MSR_KVM_SYSTEM_TIME_NEW 0x4b564d01
 #define MSR_KVM_ASYNC_PF_EN 0x4b564d02
 #define MSR_KVM_STEAL_TIME  0x4b564d03
+#define MSR_KVM_EOI_EN      0x4b564d04
+#define MSR_KVM_EOI_ENABLED 0x1
 
 struct kvm_steal_time {
 	__u64 steal;
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index 11544d8..1b3f9fa 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -89,6 +89,9 @@ EXPORT_EARLY_PER_CPU_SYMBOL(x86_bios_cpu_apicid);
  */
 DEFINE_EARLY_PER_CPU(int, x86_cpu_to_logical_apicid, BAD_APICID);
 
+DEFINE_EARLY_PER_CPU(unsigned long, apic_eoi, 0);
+EXPORT_EARLY_PER_CPU_SYMBOL(apic_eoi);
+
 /*
  * Knob to control our willingness to enable the local APIC.
  *
diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
index b8ba6e4..8b50f3a 100644
--- a/arch/x86/kernel/kvm.c
+++ b/arch/x86/kernel/kvm.c
@@ -39,6 +39,7 @@
 #include <asm/desc.h>
 #include <asm/tlbflush.h>
 #include <asm/idle.h>
+#include <asm/apic.h>
 
 static int kvmapf = 1;
 
@@ -307,6 +308,9 @@ void __cpuinit kvm_guest_cpu_init(void)
 		       smp_processor_id());
 	}
 
+	wrmsrl(MSR_KVM_EOI_EN, __pa(this_cpu_ptr(apic_eoi)) |
+	       MSR_KVM_EOI_ENABLED);
+
 	if (has_steal_clock)
 		kvm_register_steal_time();
 }
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 8584322..9e38e12 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -265,7 +265,61 @@ int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq)
 			irq->level, irq->trig_mode);
 }
 
-static inline int apic_find_highest_isr(struct kvm_lapic *apic)
+static int eoi_put_user(struct kvm_vcpu *vcpu, u32 val)
+{
+
+	return kvm_write_guest_cached(vcpu->kvm, &vcpu->arch.eoi.data, &val,
+				      sizeof(val));
+}
+
+static int eoi_get_user(struct kvm_vcpu *vcpu, u32 *val)
+{
+
+	return kvm_read_guest_cached(vcpu->kvm, &vcpu->arch.eoi.data, val,
+				      sizeof(*val));
+}
+
+static inline bool eoi_enabled(struct kvm_vcpu *vcpu)
+{
+	return (vcpu->arch.eoi.msr_val & MSR_KVM_EOI_ENABLED);
+}
+
+static int eoi_get_pending_vector(struct kvm_vcpu *vcpu)
+{
+	u32 val;
+	if (eoi_get_user(vcpu, &val) < 0)
+		apic_debug("Can't read EOI MSR value: 0x%llx\n",
+			   (unsigned long long)vcpi->arch.eoi.msr_val);
+	if (!(val & 0x1))
+		vcpu->arch.eoi.vector = -1;
+	return vcpu->arch.eoi.vector;
+}
+
+static void eoi_set_pending_vector(struct kvm_vcpu *vcpu, int vector)
+{
+	BUG_ON(vcpu->arch.eoi.vector != -1);
+	if (eoi_put_user(vcpu, 0x1) < 0) {
+		apic_debug("Can't set EOI MSR value: 0x%llx\n",
+			   (unsigned long long)vcpi->arch.eoi.msr_val);
+		return;
+	}
+	vcpu->arch.eoi.vector = vector;
+}
+
+static int eoi_clr_pending_vector(struct kvm_vcpu *vcpu)
+{
+	int vector;
+	vector = vcpu->arch.eoi.vector;
+	if (vector != -1 && eoi_put_user(vcpu, 0x0) < 0) {
+		apic_debug("Can't clear EOI MSR value: 0x%llx\n",
+			   (unsigned long long)vcpi->arch.eoi.msr_val);
+		return -1;
+	}
+	vcpu->arch.eoi.vector = -1;
+	return vector;
+}
+
+static inline int __apic_find_highest_isr(struct kvm_lapic *apic)
 {
 	int result;
 
@@ -275,6 +329,17 @@ static inline int apic_find_highest_isr(struct kvm_lapic *apic)
 	return result;
 }
 
+static inline int apic_find_highest_isr(struct kvm_lapic *apic)
+{
+	int vector;
+	if (eoi_enabled(apic->vcpu)) {
+		vector = eoi_get_pending_vector(apic->vcpu);
+		if (vector != -1)
+			return vector;
+	}
+	return __apic_find_highest_isr(apic);
+}
+
 static void apic_update_ppr(struct kvm_lapic *apic)
 {
 	u32 tpr, isrv, ppr, old_ppr;
@@ -488,6 +553,8 @@ static void apic_set_eoi(struct kvm_lapic *apic)
 	if (vector == -1)
 		return;
 
+	if (eoi_enabled(apic->vcpu))
+		eoi_clr_pending_vector(apic->vcpu);
 	apic_clear_vector(vector, apic->regs + APIC_ISR);
 	apic_update_ppr(apic);
 
@@ -1236,11 +1303,25 @@ int kvm_get_apic_interrupt(struct kvm_vcpu *vcpu)
 {
 	int vector = kvm_apic_has_interrupt(vcpu);
 	struct kvm_lapic *apic = vcpu->arch.apic;
+	bool set_isr = true;
 
 	if (vector == -1)
 		return -1;
 
-	apic_set_vector(vector, apic->regs + APIC_ISR);
+	if (eoi_enabled(vcpu)) {
+		/* Anything pending? If yes disable eoi optimization. */
+		if (unlikely(apic_find_highest_isr(apic) >= 0)) {
+			int v = eoi_clr_pending_vector(vcpu);
+			if (v != -1)
+				apic_set_vector(v, apic->regs + APIC_ISR);
+		} else {
+			eoi_set_pending_vector(vcpu, vector);
+			set_isr = false;
+		}
+	}
+
+	if (set_isr)
+		apic_set_vector(vector, apic->regs + APIC_ISR);
 	apic_update_ppr(apic);
 	apic_clear_irr(vector, apic);
 	return vector;
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 4044ce0..4d00a4d 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -1502,6 +1502,27 @@ static int kvm_pv_enable_async_pf(struct kvm_vcpu *vcpu, u64 data)
 	return 0;
 }
 
+static int kvm_pv_enable_apic_eoi(struct kvm_vcpu *vcpu, u64 data)
+{
+	gpa_t gpa = data & ~MSR_KVM_EOI_ENABLED;
+
+	/* Bit 1 is reserved, Should be zero. */
+	if (data & 0x2)
+		return 1;
+
+	vcpu->arch.eoi.msr_val = data;
+	vcpu->arch.eoi.vector = -1;
+
+	if (!(data & MSR_KVM_EOI_ENABLED)) {
+		return 0;
+	}
+
+	if (kvm_gfn_to_hva_cache_init(vcpu->kvm, &vcpu->arch.eoi.data, gpa))
+		return 1;
+
+	return 0;
+}
+
 static void kvmclock_reset(struct kvm_vcpu *vcpu)
 {
 	if (vcpu->arch.time_page) {
@@ -1627,6 +1648,10 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data)
 		if (kvm_pv_enable_async_pf(vcpu, data))
 			return 1;
 		break;
+	case MSR_KVM_EOI_EN:
+		if (kvm_pv_enable_apic_eoi(vcpu, data))
+			return 1;
+		break;
 	case MSR_KVM_STEAL_TIME:
 
 		if (unlikely(!sched_info_on()))
diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c
index 638a4f3..05c1bf9 100644
--- a/drivers/block/virtio_blk.c
+++ b/drivers/block/virtio_blk.c
@@ -372,7 +372,6 @@ static int virtblk_name_format(char *prefix, int index, char *buf, int buflen)
 {
 	const int base = 'z' - 'a' + 1;
 	char *begin = buf + strlen(prefix);
-	char *begin = buf + strlen(prefix);
 	char *end = buf + buflen;
 	char *p;
 	int unit;
diff --git a/net/core/dev.c b/net/core/dev.c
index 9d713b8..e42529b 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2455,6 +2455,11 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
 		rc = NET_XMIT_SUCCESS;
 	} else {
 		skb_dst_force(skb);
+		/* Orphan the skb - required if we might hang on to it
+		 * for indefinite time. */
+		if (unlikely(dev->priv_flags & IFF_TX_CAN_STALL))
+			skb_orphan(skb);
+
 		rc = q->enqueue(skb, q) & NET_XMIT_MASK;
 		if (qdisc_run_begin(q)) {
 			if (unlikely(contended)) {
@@ -2517,11 +2522,6 @@ int dev_queue_xmit(struct sk_buff *skb)
 	struct Qdisc *q;
 	int rc = -ENOMEM;
 
-	/* Orphan the skb - required if we might hang on to it
-	 * for indefinite time. */
-	if (dev->priv_flags & IFF_TX_CAN_STALL)
-		skb_orphan(skb);
-
 	/* Disable soft irqs for various locks below. Also
 	 * stops preemption for RCU.
 	 */
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index 27883d1..644ca53 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -120,14 +120,11 @@ int sch_direct_xmit(struct sk_buff *skb, struct Qdisc *q,
 	/* And release qdisc */
 	spin_unlock(root_lock);
 
-	/* Orphan the skb - required if we might hang on to it
-	 * for indefinite time. */
-	if (dev->priv_flags & IFF_TX_CAN_STALL)
-		skb_orphan(skb);
-
 	HARD_TX_LOCK(dev, txq, smp_processor_id());
-	if (!netif_xmit_frozen_or_stopped(txq))
+	if (likely(!netif_xmit_frozen_or_stopped(txq)))
 		ret = dev_hard_start_xmit(skb, dev, txq);
+	else if (dev->priv_flags & IFF_TX_CAN_STALL)
+		skb_orphan(skb);
 
 	HARD_TX_UNLOCK(dev, txq);
 
@@ -695,7 +692,7 @@ static void attach_one_default_qdisc(struct net_device *dev,
 {
 	struct Qdisc *qdisc = &noqueue_qdisc;
 
-	if (dev->tx_queue_len) {
+	if (dev->tx_queue_len && !(dev->priv_flags & IFF_TX_CAN_STALL)) {
 		qdisc = qdisc_create_dflt(dev_queue,
 					  &pfifo_fast_ops, TC_H_ROOT);
 		if (!qdisc) {

^ permalink raw reply related	[flat|nested] 32+ messages in thread

* Re: [PATCHv0 dont apply] RFC: kvm eoi PV using shared memory
  2012-04-10 13:27 [PATCHv0 dont apply] RFC: kvm eoi PV using shared memory Michael S. Tsirkin
@ 2012-04-10 14:03 ` Avi Kivity
  2012-04-10 14:26   ` Michael S. Tsirkin
  2012-04-15 16:18 ` [PATCHv1 " Michael S. Tsirkin
  1 sibling, 1 reply; 32+ messages in thread
From: Avi Kivity @ 2012-04-10 14:03 UTC (permalink / raw)
  To: Michael S. Tsirkin; +Cc: kvm

On 04/10/2012 04:27 PM, Michael S. Tsirkin wrote:
> I took a stub at implementing PV EOI using shared memory.
> This should reduce the number of exits an interrupt
> causes as much as by half.
>
> A partially complete draft for both host and guest parts
> is below.
>
> The idea is simple: there's a bit, per APIC, in guest memory,
> that tells the guest that it does not need EOI.
> We set it before injecting an interrupt and clear
> before injecting a nested one. Guest tests it using
> a test and clear operation - this is necessary
> so that host can detect interrupt nesting -
> and if set, it can skip the EOI MSR.
>
> There's a new MSR to set the address of said register
> in guest memory. Otherwise not much changes:
> - Guest EOI is not required
> - ISR is automatically cleared before injection
>
> Some things are incomplete: add feature negotiation
> options, qemu support for said options.
> No testing was done beyond compiling the kernel.
>
> I would appreciate early feedback.
>
> Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
>
> --
>
> diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h
> index d854101..8430f41 100644
> --- a/arch/x86/include/asm/apic.h
> +++ b/arch/x86/include/asm/apic.h
> @@ -457,8 +457,13 @@ static inline u32 safe_apic_wait_icr_idle(void) { return 0; }
>  
>  #endif /* CONFIG_X86_LOCAL_APIC */
>  
> +DECLARE_EARLY_PER_CPU(unsigned long, apic_eoi);
> +
>  static inline void ack_APIC_irq(void)
>  {
> +	if (__test_and_clear_bit(0, &__get_cpu_var(apic_eoi)))
> +		return;
> +

While __test_and_clear_bit() is implemented in a single instruction,
it's not required to be.  Better have the instruction there explicitly.

>  	/*
>  	 * ack_APIC_irq() actually gets compiled as a single instruction
>  	 * ... yummie.
> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> index e216ba0..0ee1472 100644
> --- a/arch/x86/include/asm/kvm_host.h
> +++ b/arch/x86/include/asm/kvm_host.h
> @@ -481,6 +481,12 @@ struct kvm_vcpu_arch {
>  		u64 length;
>  		u64 status;
>  	} osvw;
> +
> +	struct {
> +		u64 msr_val;
> +		struct gfn_to_hva_cache data;
> +		int vector;
> +	} eoi;
>  };

Needs to be cleared on INIT.

>  
>
> @@ -307,6 +308,9 @@ void __cpuinit kvm_guest_cpu_init(void)
>  		       smp_processor_id());
>  	}
>  
> +	wrmsrl(MSR_KVM_EOI_EN, __pa(this_cpu_ptr(apic_eoi)) |
> +	       MSR_KVM_EOI_ENABLED);
> +

Clear on kexec.

>  	if (has_steal_clock)
>  		kvm_register_steal_time();
>  }
> diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
> index 8584322..9e38e12 100644
> --- a/arch/x86/kvm/lapic.c
> +++ b/arch/x86/kvm/lapic.c
> @@ -265,7 +265,61 @@ int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq)
>  			irq->level, irq->trig_mode);
>  }
>  
> -static inline int apic_find_highest_isr(struct kvm_lapic *apic)
> +static int eoi_put_user(struct kvm_vcpu *vcpu, u32 val)
> +{
> +
> +	return kvm_write_guest_cached(vcpu->kvm, &vcpu->arch.eoi.data, &val,
> +				      sizeof(val));
> +}
> +
> +static int eoi_get_user(struct kvm_vcpu *vcpu, u32 *val)
> +{
> +
> +	return kvm_read_guest_cached(vcpu->kvm, &vcpu->arch.eoi.data, val,
> +				      sizeof(*val));
> +}
> +
> +static inline bool eoi_enabled(struct kvm_vcpu *vcpu)
> +{
> +	return (vcpu->arch.eoi.msr_val & MSR_KVM_EOI_ENABLED);
> +}
> +
> +static int eoi_get_pending_vector(struct kvm_vcpu *vcpu)
> +{
> +	u32 val;
> +	if (eoi_get_user(vcpu, &val) < 0)
> +		apic_debug("Can't read EOI MSR value: 0x%llx\n",
> +			   (unsigned long long)vcpi->arch.eoi.msr_val);
> +	if (!(val & 0x1))
> +		vcpu->arch.eoi.vector = -1;
> +	return vcpu->arch.eoi.vector;
> +}
> +
> +static void eoi_set_pending_vector(struct kvm_vcpu *vcpu, int vector)
> +{
> +	BUG_ON(vcpu->arch.eoi.vector != -1);
> +	if (eoi_put_user(vcpu, 0x1) < 0) {
> +		apic_debug("Can't set EOI MSR value: 0x%llx\n",
> +			   (unsigned long long)vcpi->arch.eoi.msr_val);
> +		return;
> +	}
> +	vcpu->arch.eoi.vector = vector;
> +}
> +
> +static int eoi_clr_pending_vector(struct kvm_vcpu *vcpu)
> +{
> +	int vector;
> +	vector = vcpu->arch.eoi.vector;
> +	if (vector != -1 && eoi_put_user(vcpu, 0x0) < 0) {
> +		apic_debug("Can't clear EOI MSR value: 0x%llx\n",
> +			   (unsigned long long)vcpi->arch.eoi.msr_val);
> +		return -1;
> +	}
> +	vcpu->arch.eoi.vector = -1;
> +	return vector;
> +}



> +
> +static inline int __apic_find_highest_isr(struct kvm_lapic *apic)
>  {
>  	int result;
>  
> @@ -275,6 +329,17 @@ static inline int apic_find_highest_isr(struct kvm_lapic *apic)
>  	return result;
>  }
>  
> +static inline int apic_find_highest_isr(struct kvm_lapic *apic)
> +{
> +	int vector;
> +	if (eoi_enabled(apic->vcpu)) {
> +		vector = eoi_get_pending_vector(apic->vcpu);
> +		if (vector != -1)
> +			return vector;
> +	}
> +	return __apic_find_highest_isr(apic);
> +}

Why aren't you modifying the ISR unconfitionally?

> +
>  static void apic_update_ppr(struct kvm_lapic *apic)
>  {
>  	u32 tpr, isrv, ppr, old_ppr;
> @@ -488,6 +553,8 @@ static void apic_set_eoi(struct kvm_lapic *apic)
>  	if (vector == -1)
>  		return;
>  
> +	if (eoi_enabled(apic->vcpu))
> +		eoi_clr_pending_vector(apic->vcpu);
>  	apic_clear_vector(vector, apic->regs + APIC_ISR);
>  	apic_update_ppr(apic);
>  
> @@ -1236,11 +1303,25 @@ int kvm_get_apic_interrupt(struct kvm_vcpu *vcpu)
>  {
>  	int vector = kvm_apic_has_interrupt(vcpu);
>  	struct kvm_lapic *apic = vcpu->arch.apic;
> +	bool set_isr = true;
>  
>  	if (vector == -1)
>  		return -1;
>  
> -	apic_set_vector(vector, apic->regs + APIC_ISR);
> +	if (eoi_enabled(vcpu)) {
> +		/* Anything pending? If yes disable eoi optimization. */
> +		if (unlikely(apic_find_highest_isr(apic) >= 0)) {
> +			int v = eoi_clr_pending_vector(vcpu);

ISR != pending, that's IRR.  If ISR vector has lower priority than the
new vector, then we don't need to disable eoi avoidance.

> +			if (v != -1)
> +				apic_set_vector(v, apic->regs + APIC_ISR);
> +		} else {
> +			eoi_set_pending_vector(vcpu, vector);
> +			set_isr = false;

Weird.  Just set it normally.  Remember that reading the ISR needs to
return the correct value.

We need to process the avoided EOI before any APIC read/writes, to be
sure the guest sees the updated values.  Same for IOAPIC, EOI affects
remote_irr.  That may been we need to sample it after every exit, or
perhaps disable the feature for level-triggered interrupts.


> +		}
> +	}
> +
> +	if (set_isr)
> +		apic_set_vector(vector, apic->regs + APIC_ISR);
>  	apic_update_ppr(apic);
>  	apic_clear_irr(vector, apic);
>  	return vector;
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
>

-- 
error compiling committee.c: too many arguments to function


^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [PATCHv0 dont apply] RFC: kvm eoi PV using shared memory
  2012-04-10 14:03 ` Avi Kivity
@ 2012-04-10 14:26   ` Michael S. Tsirkin
  2012-04-10 14:33     ` Avi Kivity
  2012-04-10 17:59     ` Gleb Natapov
  0 siblings, 2 replies; 32+ messages in thread
From: Michael S. Tsirkin @ 2012-04-10 14:26 UTC (permalink / raw)
  To: Avi Kivity; +Cc: kvm

On Tue, Apr 10, 2012 at 05:03:22PM +0300, Avi Kivity wrote:
> On 04/10/2012 04:27 PM, Michael S. Tsirkin wrote:
> > I took a stub at implementing PV EOI using shared memory.
> > This should reduce the number of exits an interrupt
> > causes as much as by half.
> >
> > A partially complete draft for both host and guest parts
> > is below.
> >
> > The idea is simple: there's a bit, per APIC, in guest memory,
> > that tells the guest that it does not need EOI.
> > We set it before injecting an interrupt and clear
> > before injecting a nested one. Guest tests it using
> > a test and clear operation - this is necessary
> > so that host can detect interrupt nesting -
> > and if set, it can skip the EOI MSR.
> >
> > There's a new MSR to set the address of said register
> > in guest memory. Otherwise not much changes:
> > - Guest EOI is not required
> > - ISR is automatically cleared before injection
> >
> > Some things are incomplete: add feature negotiation
> > options, qemu support for said options.
> > No testing was done beyond compiling the kernel.
> >
> > I would appreciate early feedback.
> >
> > Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
> >
> > --
> >
> > diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h
> > index d854101..8430f41 100644
> > --- a/arch/x86/include/asm/apic.h
> > +++ b/arch/x86/include/asm/apic.h
> > @@ -457,8 +457,13 @@ static inline u32 safe_apic_wait_icr_idle(void) { return 0; }
> >  
> >  #endif /* CONFIG_X86_LOCAL_APIC */
> >  
> > +DECLARE_EARLY_PER_CPU(unsigned long, apic_eoi);
> > +
> >  static inline void ack_APIC_irq(void)
> >  {
> > +	if (__test_and_clear_bit(0, &__get_cpu_var(apic_eoi)))
> > +		return;
> > +
> 
> While __test_and_clear_bit() is implemented in a single instruction,
> it's not required to be.  Better have the instruction there explicitly.
> 
> >  	/*
> >  	 * ack_APIC_irq() actually gets compiled as a single instruction
> >  	 * ... yummie.
> > diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> > index e216ba0..0ee1472 100644
> > --- a/arch/x86/include/asm/kvm_host.h
> > +++ b/arch/x86/include/asm/kvm_host.h
> > @@ -481,6 +481,12 @@ struct kvm_vcpu_arch {
> >  		u64 length;
> >  		u64 status;
> >  	} osvw;
> > +
> > +	struct {
> > +		u64 msr_val;
> > +		struct gfn_to_hva_cache data;
> > +		int vector;
> > +	} eoi;
> >  };
> 
> Needs to be cleared on INIT.

You mean kvm_arch_vcpu_reset?

> >  
> >
> > @@ -307,6 +308,9 @@ void __cpuinit kvm_guest_cpu_init(void)
> >  		       smp_processor_id());
> >  	}
> >  
> > +	wrmsrl(MSR_KVM_EOI_EN, __pa(this_cpu_ptr(apic_eoi)) |
> > +	       MSR_KVM_EOI_ENABLED);
> > +
> 
> Clear on kexec.

With register_reboot_notifier?

> >  	if (has_steal_clock)
> >  		kvm_register_steal_time();
> >  }
> > diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
> > index 8584322..9e38e12 100644
> > --- a/arch/x86/kvm/lapic.c
> > +++ b/arch/x86/kvm/lapic.c
> > @@ -265,7 +265,61 @@ int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq)
> >  			irq->level, irq->trig_mode);
> >  }
> >  
> > -static inline int apic_find_highest_isr(struct kvm_lapic *apic)
> > +static int eoi_put_user(struct kvm_vcpu *vcpu, u32 val)
> > +{
> > +
> > +	return kvm_write_guest_cached(vcpu->kvm, &vcpu->arch.eoi.data, &val,
> > +				      sizeof(val));
> > +}
> > +
> > +static int eoi_get_user(struct kvm_vcpu *vcpu, u32 *val)
> > +{
> > +
> > +	return kvm_read_guest_cached(vcpu->kvm, &vcpu->arch.eoi.data, val,
> > +				      sizeof(*val));
> > +}
> > +
> > +static inline bool eoi_enabled(struct kvm_vcpu *vcpu)
> > +{
> > +	return (vcpu->arch.eoi.msr_val & MSR_KVM_EOI_ENABLED);
> > +}
> > +
> > +static int eoi_get_pending_vector(struct kvm_vcpu *vcpu)
> > +{
> > +	u32 val;
> > +	if (eoi_get_user(vcpu, &val) < 0)
> > +		apic_debug("Can't read EOI MSR value: 0x%llx\n",
> > +			   (unsigned long long)vcpi->arch.eoi.msr_val);
> > +	if (!(val & 0x1))
> > +		vcpu->arch.eoi.vector = -1;
> > +	return vcpu->arch.eoi.vector;
> > +}
> > +
> > +static void eoi_set_pending_vector(struct kvm_vcpu *vcpu, int vector)
> > +{
> > +	BUG_ON(vcpu->arch.eoi.vector != -1);
> > +	if (eoi_put_user(vcpu, 0x1) < 0) {
> > +		apic_debug("Can't set EOI MSR value: 0x%llx\n",
> > +			   (unsigned long long)vcpi->arch.eoi.msr_val);
> > +		return;
> > +	}
> > +	vcpu->arch.eoi.vector = vector;
> > +}
> > +
> > +static int eoi_clr_pending_vector(struct kvm_vcpu *vcpu)
> > +{
> > +	int vector;
> > +	vector = vcpu->arch.eoi.vector;
> > +	if (vector != -1 && eoi_put_user(vcpu, 0x0) < 0) {
> > +		apic_debug("Can't clear EOI MSR value: 0x%llx\n",
> > +			   (unsigned long long)vcpi->arch.eoi.msr_val);
> > +		return -1;
> > +	}
> > +	vcpu->arch.eoi.vector = -1;
> > +	return vector;
> > +}
> 
> 
> 
> > +
> > +static inline int __apic_find_highest_isr(struct kvm_lapic *apic)
> >  {
> >  	int result;
> >  
> > @@ -275,6 +329,17 @@ static inline int apic_find_highest_isr(struct kvm_lapic *apic)
> >  	return result;
> >  }
> >  
> > +static inline int apic_find_highest_isr(struct kvm_lapic *apic)
> > +{
> > +	int vector;
> > +	if (eoi_enabled(apic->vcpu)) {
> > +		vector = eoi_get_pending_vector(apic->vcpu);
> > +		if (vector != -1)
> > +			return vector;
> > +	}
> > +	return __apic_find_highest_isr(apic);
> > +}
> 
> Why aren't you modifying the ISR unconfitionally?

ISR is not set if there won't be an EOI
since it's EOI that normally clears it.

> > +
> >  static void apic_update_ppr(struct kvm_lapic *apic)
> >  {
> >  	u32 tpr, isrv, ppr, old_ppr;
> > @@ -488,6 +553,8 @@ static void apic_set_eoi(struct kvm_lapic *apic)
> >  	if (vector == -1)
> >  		return;
> >  
> > +	if (eoi_enabled(apic->vcpu))
> > +		eoi_clr_pending_vector(apic->vcpu);
> >  	apic_clear_vector(vector, apic->regs + APIC_ISR);
> >  	apic_update_ppr(apic);
> >  
> > @@ -1236,11 +1303,25 @@ int kvm_get_apic_interrupt(struct kvm_vcpu *vcpu)
> >  {
> >  	int vector = kvm_apic_has_interrupt(vcpu);
> >  	struct kvm_lapic *apic = vcpu->arch.apic;
> > +	bool set_isr = true;
> >  
> >  	if (vector == -1)
> >  		return -1;
> >  
> > -	apic_set_vector(vector, apic->regs + APIC_ISR);
> > +	if (eoi_enabled(vcpu)) {
> > +		/* Anything pending? If yes disable eoi optimization. */
> > +		if (unlikely(apic_find_highest_isr(apic) >= 0)) {
> > +			int v = eoi_clr_pending_vector(vcpu);
> 
> ISR != pending, that's IRR.  If ISR vector has lower priority than the
> new vector, then we don't need to disable eoi avoidance.

Yes. But we can and it's easier than figuring out priorities.
I am guessing such collisions are rare, right?
I'll add a trace to make sure.

> > +			if (v != -1)
> > +				apic_set_vector(v, apic->regs + APIC_ISR);
> > +		} else {
> > +			eoi_set_pending_vector(vcpu, vector);
> > +			set_isr = false;
> 
> Weird.  Just set it normally.  Remember that reading the ISR needs to
> return the correct value.

Marcelo said linux does not normally read ISR - not true?
Note this has no effect if the PV optimization is not enabled.

> We need to process the avoided EOI before any APIC read/writes, to be
> sure the guest sees the updated values.  Same for IOAPIC, EOI affects
> remote_irr.  That may been we need to sample it after every exit, or
> perhaps disable the feature for level-triggered interrupts.

Disabling would be very sad.  Can we sample on remote irr read?

> > +		}
> > +	}
> > +
> > +	if (set_isr)
> > +		apic_set_vector(vector, apic->regs + APIC_ISR);
> >  	apic_update_ppr(apic);
> >  	apic_clear_irr(vector, apic);
> >  	return vector;
> > diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> >
> 
> -- 
> error compiling committee.c: too many arguments to function

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [PATCHv0 dont apply] RFC: kvm eoi PV using shared memory
  2012-04-10 14:26   ` Michael S. Tsirkin
@ 2012-04-10 14:33     ` Avi Kivity
  2012-04-10 14:53       ` Michael S. Tsirkin
  2012-04-10 17:59     ` Gleb Natapov
  1 sibling, 1 reply; 32+ messages in thread
From: Avi Kivity @ 2012-04-10 14:33 UTC (permalink / raw)
  To: Michael S. Tsirkin; +Cc: kvm

On 04/10/2012 05:26 PM, Michael S. Tsirkin wrote:
> > >  		u64 status;
> > >  	} osvw;
> > > +
> > > +	struct {
> > > +		u64 msr_val;
> > > +		struct gfn_to_hva_cache data;
> > > +		int vector;
> > > +	} eoi;
> > >  };
> > 
> > Needs to be cleared on INIT.
>
> You mean kvm_arch_vcpu_reset?

Yes, or kvm_lapic_reset().

> > >  
> > >
> > > @@ -307,6 +308,9 @@ void __cpuinit kvm_guest_cpu_init(void)
> > >  		       smp_processor_id());
> > >  	}
> > >  
> > > +	wrmsrl(MSR_KVM_EOI_EN, __pa(this_cpu_ptr(apic_eoi)) |
> > > +	       MSR_KVM_EOI_ENABLED);
> > > +
> > 
> > Clear on kexec.
>
> With register_reboot_notifier?

Yes, we already clear some kvm msrs there.

> > >  
> > > -	apic_set_vector(vector, apic->regs + APIC_ISR);
> > > +	if (eoi_enabled(vcpu)) {
> > > +		/* Anything pending? If yes disable eoi optimization. */
> > > +		if (unlikely(apic_find_highest_isr(apic) >= 0)) {
> > > +			int v = eoi_clr_pending_vector(vcpu);
> > 
> > ISR != pending, that's IRR.  If ISR vector has lower priority than the
> > new vector, then we don't need to disable eoi avoidance.
>
> Yes. But we can and it's easier than figuring out priorities.
> I am guessing such collisions are rare, right?

It's pretty easy, if there is something in IRR but
kvm_lapic_has_interrupt() returns -1, then we need to disable eoi avoidance.

> I'll add a trace to make sure.
>
> > > +			if (v != -1)
> > > +				apic_set_vector(v, apic->regs + APIC_ISR);
> > > +		} else {
> > > +			eoi_set_pending_vector(vcpu, vector);
> > > +			set_isr = false;
> > 
> > Weird.  Just set it normally.  Remember that reading the ISR needs to
> > return the correct value.
>
> Marcelo said linux does not normally read ISR - not true?

It's true and it's irrelevant.  We aren't coding a feature to what linux
does now, but for what linux or another guest may do in the future.

> Note this has no effect if the PV optimization is not enabled.
>
> > We need to process the avoided EOI before any APIC read/writes, to be
> > sure the guest sees the updated values.  Same for IOAPIC, EOI affects
> > remote_irr.  That may been we need to sample it after every exit, or
> > perhaps disable the feature for level-triggered interrupts.
>
> Disabling would be very sad.  Can we sample on remote irr read?

That can be done from another vcpu.  Why do we care about
level-triggered interrupts?  Everything uses MSI or edge-triggered
IOAPIC interrupts these days.


-- 
error compiling committee.c: too many arguments to function


^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [PATCHv0 dont apply] RFC: kvm eoi PV using shared memory
  2012-04-10 14:33     ` Avi Kivity
@ 2012-04-10 14:53       ` Michael S. Tsirkin
  2012-04-10 15:00         ` Avi Kivity
  0 siblings, 1 reply; 32+ messages in thread
From: Michael S. Tsirkin @ 2012-04-10 14:53 UTC (permalink / raw)
  To: Avi Kivity; +Cc: kvm

On Tue, Apr 10, 2012 at 05:33:18PM +0300, Avi Kivity wrote:
> On 04/10/2012 05:26 PM, Michael S. Tsirkin wrote:
> > > >  		u64 status;
> > > >  	} osvw;
> > > > +
> > > > +	struct {
> > > > +		u64 msr_val;
> > > > +		struct gfn_to_hva_cache data;
> > > > +		int vector;
> > > > +	} eoi;
> > > >  };
> > > 
> > > Needs to be cleared on INIT.
> >
> > You mean kvm_arch_vcpu_reset?
> 
> Yes, or kvm_lapic_reset().
> 
> > > >  
> > > >
> > > > @@ -307,6 +308,9 @@ void __cpuinit kvm_guest_cpu_init(void)
> > > >  		       smp_processor_id());
> > > >  	}
> > > >  
> > > > +	wrmsrl(MSR_KVM_EOI_EN, __pa(this_cpu_ptr(apic_eoi)) |
> > > > +	       MSR_KVM_EOI_ENABLED);
> > > > +
> > > 
> > > Clear on kexec.
> >
> > With register_reboot_notifier?
> 
> Yes, we already clear some kvm msrs there.
> 
> > > >  
> > > > -	apic_set_vector(vector, apic->regs + APIC_ISR);
> > > > +	if (eoi_enabled(vcpu)) {
> > > > +		/* Anything pending? If yes disable eoi optimization. */
> > > > +		if (unlikely(apic_find_highest_isr(apic) >= 0)) {
> > > > +			int v = eoi_clr_pending_vector(vcpu);
> > > 
> > > ISR != pending, that's IRR.  If ISR vector has lower priority than the
> > > new vector, then we don't need to disable eoi avoidance.
> >
> > Yes. But we can and it's easier than figuring out priorities.
> > I am guessing such collisions are rare, right?
> 
> It's pretty easy, if there is something in IRR but
> kvm_lapic_has_interrupt() returns -1, then we need to disable eoi avoidance.

I only see kvm_apic_has_interrupt - is this what you mean?

> > I'll add a trace to make sure.
> >
> > > > +			if (v != -1)
> > > > +				apic_set_vector(v, apic->regs + APIC_ISR);
> > > > +		} else {
> > > > +			eoi_set_pending_vector(vcpu, vector);
> > > > +			set_isr = false;
> > > 
> > > Weird.  Just set it normally.  Remember that reading the ISR needs to
> > > return the correct value.
> >
> > Marcelo said linux does not normally read ISR - not true?
> 
> It's true and it's irrelevant.  We aren't coding a feature to what linux
> does now, but for what linux or another guest may do in the future.

Right. So you think reading ISR has value
in combination with PV EOI for future guests?
I'm not arguing either way just curious.

> > Note this has no effect if the PV optimization is not enabled.
> >
> > > We need to process the avoided EOI before any APIC read/writes, to be
> > > sure the guest sees the updated values.  Same for IOAPIC, EOI affects
> > > remote_irr.  That may been we need to sample it after every exit, or
> > > perhaps disable the feature for level-triggered interrupts.
> >
> > Disabling would be very sad.  Can we sample on remote irr read?
> 
> That can be done from another vcpu.

We still can handle it, right? Where's the code that handles that read?

> Why do we care about
> level-triggered interrupts?  Everything uses MSI or edge-triggered
> IOAPIC interrupts these days.

Well lots of emulated devices don't yet.
They probably should but it's nice to be able to
test with e.g. e1000 emulation not just virtio.

Besides, kvm_get_apic_interrupt
simply doesn't know about the triggering mode at the moment.

-- 
MST

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [PATCHv0 dont apply] RFC: kvm eoi PV using shared memory
  2012-04-10 14:53       ` Michael S. Tsirkin
@ 2012-04-10 15:00         ` Avi Kivity
  2012-04-10 15:14           ` Michael S. Tsirkin
  0 siblings, 1 reply; 32+ messages in thread
From: Avi Kivity @ 2012-04-10 15:00 UTC (permalink / raw)
  To: Michael S. Tsirkin; +Cc: kvm

On 04/10/2012 05:53 PM, Michael S. Tsirkin wrote:
> > >
> > > Yes. But we can and it's easier than figuring out priorities.
> > > I am guessing such collisions are rare, right?
> > 
> > It's pretty easy, if there is something in IRR but
> > kvm_lapic_has_interrupt() returns -1, then we need to disable eoi avoidance.
>
> I only see kvm_apic_has_interrupt - is this what you mean?

Yes, sorry.

It's not clear whether to do the check in kvm_apic_has_interrupt() or
kvm_apic_get_interrupt() - the latter is called only after interrupts
are enabled, so it looks like a better place (EOIs while interrupts are
disabled have no effect).  But need to make sure those functions are
actually called, since they're protected by KVM_REQ_EVENT.

> > > I'll add a trace to make sure.
> > >
> > > > > +			if (v != -1)
> > > > > +				apic_set_vector(v, apic->regs + APIC_ISR);
> > > > > +		} else {
> > > > > +			eoi_set_pending_vector(vcpu, vector);
> > > > > +			set_isr = false;
> > > > 
> > > > Weird.  Just set it normally.  Remember that reading the ISR needs to
> > > > return the correct value.
> > >
> > > Marcelo said linux does not normally read ISR - not true?
> > 
> > It's true and it's irrelevant.  We aren't coding a feature to what linux
> > does now, but for what linux or another guest may do in the future.
>
> Right. So you think reading ISR has value
> in combination with PV EOI for future guests?
> I'm not arguing either way just curious.

I don't.  But we need to preserve the same interface the APIC has
presented for thousands of years (well, almost).

>
> > > Note this has no effect if the PV optimization is not enabled.
> > >
> > > > We need to process the avoided EOI before any APIC read/writes, to be
> > > > sure the guest sees the updated values.  Same for IOAPIC, EOI affects
> > > > remote_irr.  That may been we need to sample it after every exit, or
> > > > perhaps disable the feature for level-triggered interrupts.
> > >
> > > Disabling would be very sad.  Can we sample on remote irr read?
> > 
> > That can be done from another vcpu.
>
> We still can handle it, right? Where's the code that handles that read?

Better to keep everything per-cpu.  The code is in virt/kvm/ioapic.c

>
> > Why do we care about
> > level-triggered interrupts?  Everything uses MSI or edge-triggered
> > IOAPIC interrupts these days.
>
> Well lots of emulated devices don't yet.
> They probably should but it's nice to be able to
> test with e.g. e1000 emulation not just virtio.


e1000 doesn't support msi?

>
> Besides, kvm_get_apic_interrupt
> simply doesn't know about the triggering mode at the moment.
>


-- 
error compiling committee.c: too many arguments to function


^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [PATCHv0 dont apply] RFC: kvm eoi PV using shared memory
  2012-04-10 15:00         ` Avi Kivity
@ 2012-04-10 15:14           ` Michael S. Tsirkin
  2012-04-10 16:08             ` Avi Kivity
  0 siblings, 1 reply; 32+ messages in thread
From: Michael S. Tsirkin @ 2012-04-10 15:14 UTC (permalink / raw)
  To: Avi Kivity; +Cc: kvm

On Tue, Apr 10, 2012 at 06:00:51PM +0300, Avi Kivity wrote:
> On 04/10/2012 05:53 PM, Michael S. Tsirkin wrote:
> > > >
> > > > Yes. But we can and it's easier than figuring out priorities.
> > > > I am guessing such collisions are rare, right?
> > > 
> > > It's pretty easy, if there is something in IRR but
> > > kvm_lapic_has_interrupt() returns -1, then we need to disable eoi avoidance.
> >
> > I only see kvm_apic_has_interrupt - is this what you mean?
> 
> Yes, sorry.
> 
> It's not clear whether to do the check in kvm_apic_has_interrupt() or
> kvm_apic_get_interrupt() - the latter is called only after interrupts
> are enabled, so it looks like a better place (EOIs while interrupts are
> disabled have no effect).  But need to make sure those functions are
> actually called, since they're protected by KVM_REQ_EVENT.

Sorry not sure what you mean by "make sure" - read the code carefully?

> > > > I'll add a trace to make sure.
> > > >
> > > > > > +			if (v != -1)
> > > > > > +				apic_set_vector(v, apic->regs + APIC_ISR);
> > > > > > +		} else {
> > > > > > +			eoi_set_pending_vector(vcpu, vector);
> > > > > > +			set_isr = false;
> > > > > 
> > > > > Weird.  Just set it normally.  Remember that reading the ISR needs to
> > > > > return the correct value.
> > > >
> > > > Marcelo said linux does not normally read ISR - not true?
> > > 
> > > It's true and it's irrelevant.  We aren't coding a feature to what linux
> > > does now, but for what linux or another guest may do in the future.
> >
> > Right. So you think reading ISR has value
> > in combination with PV EOI for future guests?
> > I'm not arguing either way just curious.
> 
> I don't.  But we need to preserve the same interface the APIC has
> presented for thousands of years (well, almost).


Talk about overstatements :)

> >
> > > > Note this has no effect if the PV optimization is not enabled.
> > > >
> > > > > We need to process the avoided EOI before any APIC read/writes, to be
> > > > > sure the guest sees the updated values.  Same for IOAPIC, EOI affects
> > > > > remote_irr.  That may been we need to sample it after every exit, or
> > > > > perhaps disable the feature for level-triggered interrupts.
> > > >
> > > > Disabling would be very sad.  Can we sample on remote irr read?
> > > 
> > > That can be done from another vcpu.
> >
> > We still can handle it, right? Where's the code that handles that read?
> 
> Better to keep everything per-cpu.  The code is in virt/kvm/ioapic.c

Hmm. Disabling for level handles the ack notifiers
issue as well, which I forgot about.
It's a tough call. You think looking at
TMR in kvm_get_apic_interrupt is safe?

> >
> > > Why do we care about
> > > level-triggered interrupts?  Everything uses MSI or edge-triggered
> > > IOAPIC interrupts these days.
> >
> > Well lots of emulated devices don't yet.
> > They probably should but it's nice to be able to
> > test with e.g. e1000 emulation not just virtio.
> 
> 
> e1000 doesn't support msi?

qemu emulation doesn't.

> >
> > Besides, kvm_get_apic_interrupt
> > simply doesn't know about the triggering mode at the moment.
> >
> 
> 
> -- 
> error compiling committee.c: too many arguments to function

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [PATCHv0 dont apply] RFC: kvm eoi PV using shared memory
  2012-04-10 15:14           ` Michael S. Tsirkin
@ 2012-04-10 16:08             ` Avi Kivity
  2012-04-10 17:06               ` Michael S. Tsirkin
  0 siblings, 1 reply; 32+ messages in thread
From: Avi Kivity @ 2012-04-10 16:08 UTC (permalink / raw)
  To: Michael S. Tsirkin; +Cc: kvm

On 04/10/2012 06:14 PM, Michael S. Tsirkin wrote:
> On Tue, Apr 10, 2012 at 06:00:51PM +0300, Avi Kivity wrote:
> > On 04/10/2012 05:53 PM, Michael S. Tsirkin wrote:
> > > > >
> > > > > Yes. But we can and it's easier than figuring out priorities.
> > > > > I am guessing such collisions are rare, right?
> > > > 
> > > > It's pretty easy, if there is something in IRR but
> > > > kvm_lapic_has_interrupt() returns -1, then we need to disable eoi avoidance.
> > >
> > > I only see kvm_apic_has_interrupt - is this what you mean?
> > 
> > Yes, sorry.
> > 
> > It's not clear whether to do the check in kvm_apic_has_interrupt() or
> > kvm_apic_get_interrupt() - the latter is called only after interrupts
> > are enabled, so it looks like a better place (EOIs while interrupts are
> > disabled have no effect).  But need to make sure those functions are
> > actually called, since they're protected by KVM_REQ_EVENT.
>
> Sorry not sure what you mean by "make sure" - read the code carefully?

Yes.  And I mean, get called at the right time.

> > 
> > Better to keep everything per-cpu.  The code is in virt/kvm/ioapic.c
>
> Hmm. Disabling for level handles the ack notifiers
> issue as well, which I forgot about.
> It's a tough call. You think looking at
> TMR in kvm_get_apic_interrupt is safe?

Yes, it's read only from the guest point of view IIRC.

> > >
> > > > Why do we care about
> > > > level-triggered interrupts?  Everything uses MSI or edge-triggered
> > > > IOAPIC interrupts these days.
> > >
> > > Well lots of emulated devices don't yet.
> > > They probably should but it's nice to be able to
> > > test with e.g. e1000 emulation not just virtio.
> > 
> > 
> > e1000 doesn't support msi?
>
> qemu emulation doesn't.
>

Can be changed if someone's really interested.  But really, avoiding
EOIs for e1000 won't help it much.

-- 
error compiling committee.c: too many arguments to function


^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [PATCHv0 dont apply] RFC: kvm eoi PV using shared memory
  2012-04-10 16:08             ` Avi Kivity
@ 2012-04-10 17:06               ` Michael S. Tsirkin
  0 siblings, 0 replies; 32+ messages in thread
From: Michael S. Tsirkin @ 2012-04-10 17:06 UTC (permalink / raw)
  To: Avi Kivity; +Cc: kvm

On Tue, Apr 10, 2012 at 07:08:26PM +0300, Avi Kivity wrote:
> On 04/10/2012 06:14 PM, Michael S. Tsirkin wrote:
> > On Tue, Apr 10, 2012 at 06:00:51PM +0300, Avi Kivity wrote:
> > > On 04/10/2012 05:53 PM, Michael S. Tsirkin wrote:
> > > > > >
> > > > > > Yes. But we can and it's easier than figuring out priorities.
> > > > > > I am guessing such collisions are rare, right?
> > > > > 
> > > > > It's pretty easy, if there is something in IRR but
> > > > > kvm_lapic_has_interrupt() returns -1, then we need to disable eoi avoidance.
> > > >
> > > > I only see kvm_apic_has_interrupt - is this what you mean?
> > > 
> > > Yes, sorry.
> > > 
> > > It's not clear whether to do the check in kvm_apic_has_interrupt() or
> > > kvm_apic_get_interrupt() - the latter is called only after interrupts
> > > are enabled, so it looks like a better place (EOIs while interrupts are
> > > disabled have no effect).  But need to make sure those functions are
> > > actually called, since they're protected by KVM_REQ_EVENT.
> >
> > Sorry not sure what you mean by "make sure" - read the code carefully?
> 
> Yes.  And I mean, get called at the right time.

OK, Review will help here.

> > > 
> > > Better to keep everything per-cpu.  The code is in virt/kvm/ioapic.c
> >
> > Hmm. Disabling for level handles the ack notifiers
> > issue as well, which I forgot about.
> > It's a tough call. You think looking at
> > TMR in kvm_get_apic_interrupt is safe?
> 
> Yes, it's read only from the guest point of view IIRC.
> 
> > > >
> > > > > Why do we care about
> > > > > level-triggered interrupts?  Everything uses MSI or edge-triggered
> > > > > IOAPIC interrupts these days.
> > > >
> > > > Well lots of emulated devices don't yet.
> > > > They probably should but it's nice to be able to
> > > > test with e.g. e1000 emulation not just virtio.
> > > 
> > > 
> > > e1000 doesn't support msi?
> >
> > qemu emulation doesn't.
> >
> 
> Can be changed if someone's really interested.  But really, avoiding
> EOIs for e1000 won't help it much.

It will help test EOI avoidance.

> -- 
> error compiling committee.c: too many arguments to function

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [PATCHv0 dont apply] RFC: kvm eoi PV using shared memory
  2012-04-10 14:26   ` Michael S. Tsirkin
  2012-04-10 14:33     ` Avi Kivity
@ 2012-04-10 17:59     ` Gleb Natapov
  2012-04-10 19:30       ` Michael S. Tsirkin
  1 sibling, 1 reply; 32+ messages in thread
From: Gleb Natapov @ 2012-04-10 17:59 UTC (permalink / raw)
  To: Michael S. Tsirkin; +Cc: Avi Kivity, kvm

Heh, I was working on that too.

On Tue, Apr 10, 2012 at 05:26:18PM +0300, Michael S. Tsirkin wrote:
> On Tue, Apr 10, 2012 at 05:03:22PM +0300, Avi Kivity wrote:
> > On 04/10/2012 04:27 PM, Michael S. Tsirkin wrote:
> > > I took a stub at implementing PV EOI using shared memory.
> > > This should reduce the number of exits an interrupt
> > > causes as much as by half.
> > >
> > > A partially complete draft for both host and guest parts
> > > is below.
> > >
> > > The idea is simple: there's a bit, per APIC, in guest memory,
> > > that tells the guest that it does not need EOI.
> > > We set it before injecting an interrupt and clear
> > > before injecting a nested one. Guest tests it using
> > > a test and clear operation - this is necessary
> > > so that host can detect interrupt nesting -
> > > and if set, it can skip the EOI MSR.
> > >
> > > There's a new MSR to set the address of said register
> > > in guest memory. Otherwise not much changes:
> > > - Guest EOI is not required
> > > - ISR is automatically cleared before injection
> > >
> > > Some things are incomplete: add feature negotiation
> > > options, qemu support for said options.
> > > No testing was done beyond compiling the kernel.
> > >
> > > I would appreciate early feedback.
> > >
> > > Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
> > >
> > > --
> > >
> > > diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h
> > > index d854101..8430f41 100644
> > > --- a/arch/x86/include/asm/apic.h
> > > +++ b/arch/x86/include/asm/apic.h
> > > @@ -457,8 +457,13 @@ static inline u32 safe_apic_wait_icr_idle(void) { return 0; }
> > >  
> > >  #endif /* CONFIG_X86_LOCAL_APIC */
> > >  
> > > +DECLARE_EARLY_PER_CPU(unsigned long, apic_eoi);
> > > +
> > >  static inline void ack_APIC_irq(void)
> > >  {
> > > +	if (__test_and_clear_bit(0, &__get_cpu_var(apic_eoi)))
> > > +		return;
> > > +
> > 
> > While __test_and_clear_bit() is implemented in a single instruction,
> > it's not required to be.  Better have the instruction there explicitly.
> > 
> > >  	/*
> > >  	 * ack_APIC_irq() actually gets compiled as a single instruction
> > >  	 * ... yummie.
> > > diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> > > index e216ba0..0ee1472 100644
> > > --- a/arch/x86/include/asm/kvm_host.h
> > > +++ b/arch/x86/include/asm/kvm_host.h
> > > @@ -481,6 +481,12 @@ struct kvm_vcpu_arch {
> > >  		u64 length;
> > >  		u64 status;
> > >  	} osvw;
> > > +
> > > +	struct {
> > > +		u64 msr_val;
> > > +		struct gfn_to_hva_cache data;
> > > +		int vector;
> > > +	} eoi;
> > >  };
> > 
> > Needs to be cleared on INIT.
> 
> You mean kvm_arch_vcpu_reset?
> 
> > >  
> > >
> > > @@ -307,6 +308,9 @@ void __cpuinit kvm_guest_cpu_init(void)
> > >  		       smp_processor_id());
> > >  	}
> > >  
> > > +	wrmsrl(MSR_KVM_EOI_EN, __pa(this_cpu_ptr(apic_eoi)) |
> > > +	       MSR_KVM_EOI_ENABLED);
> > > +
> > 
> > Clear on kexec.
> 
> With register_reboot_notifier?
> 
> > >  	if (has_steal_clock)
> > >  		kvm_register_steal_time();
> > >  }
> > > diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
> > > index 8584322..9e38e12 100644
> > > --- a/arch/x86/kvm/lapic.c
> > > +++ b/arch/x86/kvm/lapic.c
> > > @@ -265,7 +265,61 @@ int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq)
> > >  			irq->level, irq->trig_mode);
> > >  }
> > >  
> > > -static inline int apic_find_highest_isr(struct kvm_lapic *apic)
> > > +static int eoi_put_user(struct kvm_vcpu *vcpu, u32 val)
> > > +{
> > > +
> > > +	return kvm_write_guest_cached(vcpu->kvm, &vcpu->arch.eoi.data, &val,
> > > +				      sizeof(val));
> > > +}
> > > +
> > > +static int eoi_get_user(struct kvm_vcpu *vcpu, u32 *val)
> > > +{
> > > +
> > > +	return kvm_read_guest_cached(vcpu->kvm, &vcpu->arch.eoi.data, val,
> > > +				      sizeof(*val));
> > > +}
> > > +
> > > +static inline bool eoi_enabled(struct kvm_vcpu *vcpu)
> > > +{
> > > +	return (vcpu->arch.eoi.msr_val & MSR_KVM_EOI_ENABLED);
> > > +}
> > > +
> > > +static int eoi_get_pending_vector(struct kvm_vcpu *vcpu)
> > > +{
> > > +	u32 val;
> > > +	if (eoi_get_user(vcpu, &val) < 0)
> > > +		apic_debug("Can't read EOI MSR value: 0x%llx\n",
> > > +			   (unsigned long long)vcpi->arch.eoi.msr_val);
> > > +	if (!(val & 0x1))
> > > +		vcpu->arch.eoi.vector = -1;
> > > +	return vcpu->arch.eoi.vector;
> > > +}
> > > +
> > > +static void eoi_set_pending_vector(struct kvm_vcpu *vcpu, int vector)
> > > +{
> > > +	BUG_ON(vcpu->arch.eoi.vector != -1);
> > > +	if (eoi_put_user(vcpu, 0x1) < 0) {
> > > +		apic_debug("Can't set EOI MSR value: 0x%llx\n",
> > > +			   (unsigned long long)vcpi->arch.eoi.msr_val);
> > > +		return;
> > > +	}
> > > +	vcpu->arch.eoi.vector = vector;
> > > +}
> > > +
> > > +static int eoi_clr_pending_vector(struct kvm_vcpu *vcpu)
> > > +{
> > > +	int vector;
> > > +	vector = vcpu->arch.eoi.vector;
> > > +	if (vector != -1 && eoi_put_user(vcpu, 0x0) < 0) {
> > > +		apic_debug("Can't clear EOI MSR value: 0x%llx\n",
> > > +			   (unsigned long long)vcpi->arch.eoi.msr_val);
> > > +		return -1;
> > > +	}
> > > +	vcpu->arch.eoi.vector = -1;
> > > +	return vector;
> > > +}
> > 
> > 
> > 
> > > +
> > > +static inline int __apic_find_highest_isr(struct kvm_lapic *apic)
> > >  {
> > >  	int result;
> > >  
> > > @@ -275,6 +329,17 @@ static inline int apic_find_highest_isr(struct kvm_lapic *apic)
> > >  	return result;
> > >  }
> > >  
> > > +static inline int apic_find_highest_isr(struct kvm_lapic *apic)
> > > +{
> > > +	int vector;
> > > +	if (eoi_enabled(apic->vcpu)) {
> > > +		vector = eoi_get_pending_vector(apic->vcpu);
> > > +		if (vector != -1)
> > > +			return vector;
> > > +	}
> > > +	return __apic_find_highest_isr(apic);
> > > +}
> > 
> > Why aren't you modifying the ISR unconfitionally?
> 
> ISR is not set if there won't be an EOI
> since it's EOI that normally clears it.
> 
> > > +
> > >  static void apic_update_ppr(struct kvm_lapic *apic)
> > >  {
> > >  	u32 tpr, isrv, ppr, old_ppr;
> > > @@ -488,6 +553,8 @@ static void apic_set_eoi(struct kvm_lapic *apic)
> > >  	if (vector == -1)
> > >  		return;
> > >  
> > > +	if (eoi_enabled(apic->vcpu))
> > > +		eoi_clr_pending_vector(apic->vcpu);
> > >  	apic_clear_vector(vector, apic->regs + APIC_ISR);
> > >  	apic_update_ppr(apic);
> > >  
> > > @@ -1236,11 +1303,25 @@ int kvm_get_apic_interrupt(struct kvm_vcpu *vcpu)
> > >  {
> > >  	int vector = kvm_apic_has_interrupt(vcpu);
> > >  	struct kvm_lapic *apic = vcpu->arch.apic;
> > > +	bool set_isr = true;
> > >  
> > >  	if (vector == -1)
> > >  		return -1;
> > >  
> > > -	apic_set_vector(vector, apic->regs + APIC_ISR);
> > > +	if (eoi_enabled(vcpu)) {
> > > +		/* Anything pending? If yes disable eoi optimization. */
> > > +		if (unlikely(apic_find_highest_isr(apic) >= 0)) {
> > > +			int v = eoi_clr_pending_vector(vcpu);
> > 
> > ISR != pending, that's IRR.  If ISR vector has lower priority than the
> > new vector, then we don't need to disable eoi avoidance.
> 
> Yes. But we can and it's easier than figuring out priorities.
> I am guessing such collisions are rare, right?
> I'll add a trace to make sure.
> 
> > > +			if (v != -1)
> > > +				apic_set_vector(v, apic->regs + APIC_ISR);
> > > +		} else {
> > > +			eoi_set_pending_vector(vcpu, vector);
> > > +			set_isr = false;
> > 
> > Weird.  Just set it normally.  Remember that reading the ISR needs to
> > return the correct value.
> 
> Marcelo said linux does not normally read ISR - not true?
> Note this has no effect if the PV optimization is not enabled.
> 
> > We need to process the avoided EOI before any APIC read/writes, to be
> > sure the guest sees the updated values.  Same for IOAPIC, EOI affects
> > remote_irr.  That may been we need to sample it after every exit, or
> > perhaps disable the feature for level-triggered interrupts.
> 
> Disabling would be very sad.  Can we sample on remote irr read?
> 
Nothing sad about it, just correct. MS requires this to be disabled for
level triggered interrupts. We have to notify IOAPIC about EOI ASAP. It
may hold another interrupt for us that has to be delivered.

I was going to avoid most of the trickery in apic code and just check if
avoided EOI should be processed on each exit. This adds one if on exit
path instead of couple on interrupt injection path though.

> > > +		}
> > > +	}
> > > +
> > > +	if (set_isr)
> > > +		apic_set_vector(vector, apic->regs + APIC_ISR);
> > >  	apic_update_ppr(apic);
> > >  	apic_clear_irr(vector, apic);
> > >  	return vector;
> > > diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> > >
> > 
> > -- 
> > error compiling committee.c: too many arguments to function
> --
> To unsubscribe from this list: send the line "unsubscribe kvm" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html

--
			Gleb.

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [PATCHv0 dont apply] RFC: kvm eoi PV using shared memory
  2012-04-10 17:59     ` Gleb Natapov
@ 2012-04-10 19:30       ` Michael S. Tsirkin
  2012-04-10 19:33         ` Gleb Natapov
  0 siblings, 1 reply; 32+ messages in thread
From: Michael S. Tsirkin @ 2012-04-10 19:30 UTC (permalink / raw)
  To: Gleb Natapov; +Cc: Avi Kivity, kvm

On Tue, Apr 10, 2012 at 08:59:21PM +0300, Gleb Natapov wrote:
> Heh, I was working on that too.
> 
> On Tue, Apr 10, 2012 at 05:26:18PM +0300, Michael S. Tsirkin wrote:
> > On Tue, Apr 10, 2012 at 05:03:22PM +0300, Avi Kivity wrote:
> > > On 04/10/2012 04:27 PM, Michael S. Tsirkin wrote:
> > > > I took a stub at implementing PV EOI using shared memory.
> > > > This should reduce the number of exits an interrupt
> > > > causes as much as by half.
> > > >
> > > > A partially complete draft for both host and guest parts
> > > > is below.
> > > >
> > > > The idea is simple: there's a bit, per APIC, in guest memory,
> > > > that tells the guest that it does not need EOI.
> > > > We set it before injecting an interrupt and clear
> > > > before injecting a nested one. Guest tests it using
> > > > a test and clear operation - this is necessary
> > > > so that host can detect interrupt nesting -
> > > > and if set, it can skip the EOI MSR.
> > > >
> > > > There's a new MSR to set the address of said register
> > > > in guest memory. Otherwise not much changes:
> > > > - Guest EOI is not required
> > > > - ISR is automatically cleared before injection
> > > >
> > > > Some things are incomplete: add feature negotiation
> > > > options, qemu support for said options.
> > > > No testing was done beyond compiling the kernel.
> > > >
> > > > I would appreciate early feedback.
> > > >
> > > > Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
> > > >
> > > > --
> > > >
> > > > diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h
> > > > index d854101..8430f41 100644
> > > > --- a/arch/x86/include/asm/apic.h
> > > > +++ b/arch/x86/include/asm/apic.h
> > > > @@ -457,8 +457,13 @@ static inline u32 safe_apic_wait_icr_idle(void) { return 0; }
> > > >  
> > > >  #endif /* CONFIG_X86_LOCAL_APIC */
> > > >  
> > > > +DECLARE_EARLY_PER_CPU(unsigned long, apic_eoi);
> > > > +
> > > >  static inline void ack_APIC_irq(void)
> > > >  {
> > > > +	if (__test_and_clear_bit(0, &__get_cpu_var(apic_eoi)))
> > > > +		return;
> > > > +
> > > 
> > > While __test_and_clear_bit() is implemented in a single instruction,
> > > it's not required to be.  Better have the instruction there explicitly.
> > > 
> > > >  	/*
> > > >  	 * ack_APIC_irq() actually gets compiled as a single instruction
> > > >  	 * ... yummie.
> > > > diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> > > > index e216ba0..0ee1472 100644
> > > > --- a/arch/x86/include/asm/kvm_host.h
> > > > +++ b/arch/x86/include/asm/kvm_host.h
> > > > @@ -481,6 +481,12 @@ struct kvm_vcpu_arch {
> > > >  		u64 length;
> > > >  		u64 status;
> > > >  	} osvw;
> > > > +
> > > > +	struct {
> > > > +		u64 msr_val;
> > > > +		struct gfn_to_hva_cache data;
> > > > +		int vector;
> > > > +	} eoi;
> > > >  };
> > > 
> > > Needs to be cleared on INIT.
> > 
> > You mean kvm_arch_vcpu_reset?
> > 
> > > >  
> > > >
> > > > @@ -307,6 +308,9 @@ void __cpuinit kvm_guest_cpu_init(void)
> > > >  		       smp_processor_id());
> > > >  	}
> > > >  
> > > > +	wrmsrl(MSR_KVM_EOI_EN, __pa(this_cpu_ptr(apic_eoi)) |
> > > > +	       MSR_KVM_EOI_ENABLED);
> > > > +
> > > 
> > > Clear on kexec.
> > 
> > With register_reboot_notifier?
> > 
> > > >  	if (has_steal_clock)
> > > >  		kvm_register_steal_time();
> > > >  }
> > > > diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
> > > > index 8584322..9e38e12 100644
> > > > --- a/arch/x86/kvm/lapic.c
> > > > +++ b/arch/x86/kvm/lapic.c
> > > > @@ -265,7 +265,61 @@ int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq)
> > > >  			irq->level, irq->trig_mode);
> > > >  }
> > > >  
> > > > -static inline int apic_find_highest_isr(struct kvm_lapic *apic)
> > > > +static int eoi_put_user(struct kvm_vcpu *vcpu, u32 val)
> > > > +{
> > > > +
> > > > +	return kvm_write_guest_cached(vcpu->kvm, &vcpu->arch.eoi.data, &val,
> > > > +				      sizeof(val));
> > > > +}
> > > > +
> > > > +static int eoi_get_user(struct kvm_vcpu *vcpu, u32 *val)
> > > > +{
> > > > +
> > > > +	return kvm_read_guest_cached(vcpu->kvm, &vcpu->arch.eoi.data, val,
> > > > +				      sizeof(*val));
> > > > +}
> > > > +
> > > > +static inline bool eoi_enabled(struct kvm_vcpu *vcpu)
> > > > +{
> > > > +	return (vcpu->arch.eoi.msr_val & MSR_KVM_EOI_ENABLED);
> > > > +}
> > > > +
> > > > +static int eoi_get_pending_vector(struct kvm_vcpu *vcpu)
> > > > +{
> > > > +	u32 val;
> > > > +	if (eoi_get_user(vcpu, &val) < 0)
> > > > +		apic_debug("Can't read EOI MSR value: 0x%llx\n",
> > > > +			   (unsigned long long)vcpi->arch.eoi.msr_val);
> > > > +	if (!(val & 0x1))
> > > > +		vcpu->arch.eoi.vector = -1;
> > > > +	return vcpu->arch.eoi.vector;
> > > > +}
> > > > +
> > > > +static void eoi_set_pending_vector(struct kvm_vcpu *vcpu, int vector)
> > > > +{
> > > > +	BUG_ON(vcpu->arch.eoi.vector != -1);
> > > > +	if (eoi_put_user(vcpu, 0x1) < 0) {
> > > > +		apic_debug("Can't set EOI MSR value: 0x%llx\n",
> > > > +			   (unsigned long long)vcpi->arch.eoi.msr_val);
> > > > +		return;
> > > > +	}
> > > > +	vcpu->arch.eoi.vector = vector;
> > > > +}
> > > > +
> > > > +static int eoi_clr_pending_vector(struct kvm_vcpu *vcpu)
> > > > +{
> > > > +	int vector;
> > > > +	vector = vcpu->arch.eoi.vector;
> > > > +	if (vector != -1 && eoi_put_user(vcpu, 0x0) < 0) {
> > > > +		apic_debug("Can't clear EOI MSR value: 0x%llx\n",
> > > > +			   (unsigned long long)vcpi->arch.eoi.msr_val);
> > > > +		return -1;
> > > > +	}
> > > > +	vcpu->arch.eoi.vector = -1;
> > > > +	return vector;
> > > > +}
> > > 
> > > 
> > > 
> > > > +
> > > > +static inline int __apic_find_highest_isr(struct kvm_lapic *apic)
> > > >  {
> > > >  	int result;
> > > >  
> > > > @@ -275,6 +329,17 @@ static inline int apic_find_highest_isr(struct kvm_lapic *apic)
> > > >  	return result;
> > > >  }
> > > >  
> > > > +static inline int apic_find_highest_isr(struct kvm_lapic *apic)
> > > > +{
> > > > +	int vector;
> > > > +	if (eoi_enabled(apic->vcpu)) {
> > > > +		vector = eoi_get_pending_vector(apic->vcpu);
> > > > +		if (vector != -1)
> > > > +			return vector;
> > > > +	}
> > > > +	return __apic_find_highest_isr(apic);
> > > > +}
> > > 
> > > Why aren't you modifying the ISR unconfitionally?
> > 
> > ISR is not set if there won't be an EOI
> > since it's EOI that normally clears it.
> > 
> > > > +
> > > >  static void apic_update_ppr(struct kvm_lapic *apic)
> > > >  {
> > > >  	u32 tpr, isrv, ppr, old_ppr;
> > > > @@ -488,6 +553,8 @@ static void apic_set_eoi(struct kvm_lapic *apic)
> > > >  	if (vector == -1)
> > > >  		return;
> > > >  
> > > > +	if (eoi_enabled(apic->vcpu))
> > > > +		eoi_clr_pending_vector(apic->vcpu);
> > > >  	apic_clear_vector(vector, apic->regs + APIC_ISR);
> > > >  	apic_update_ppr(apic);
> > > >  
> > > > @@ -1236,11 +1303,25 @@ int kvm_get_apic_interrupt(struct kvm_vcpu *vcpu)
> > > >  {
> > > >  	int vector = kvm_apic_has_interrupt(vcpu);
> > > >  	struct kvm_lapic *apic = vcpu->arch.apic;
> > > > +	bool set_isr = true;
> > > >  
> > > >  	if (vector == -1)
> > > >  		return -1;
> > > >  
> > > > -	apic_set_vector(vector, apic->regs + APIC_ISR);
> > > > +	if (eoi_enabled(vcpu)) {
> > > > +		/* Anything pending? If yes disable eoi optimization. */
> > > > +		if (unlikely(apic_find_highest_isr(apic) >= 0)) {
> > > > +			int v = eoi_clr_pending_vector(vcpu);
> > > 
> > > ISR != pending, that's IRR.  If ISR vector has lower priority than the
> > > new vector, then we don't need to disable eoi avoidance.
> > 
> > Yes. But we can and it's easier than figuring out priorities.
> > I am guessing such collisions are rare, right?
> > I'll add a trace to make sure.
> > 
> > > > +			if (v != -1)
> > > > +				apic_set_vector(v, apic->regs + APIC_ISR);
> > > > +		} else {
> > > > +			eoi_set_pending_vector(vcpu, vector);
> > > > +			set_isr = false;
> > > 
> > > Weird.  Just set it normally.  Remember that reading the ISR needs to
> > > return the correct value.
> > 
> > Marcelo said linux does not normally read ISR - not true?
> > Note this has no effect if the PV optimization is not enabled.
> > 
> > > We need to process the avoided EOI before any APIC read/writes, to be
> > > sure the guest sees the updated values.  Same for IOAPIC, EOI affects
> > > remote_irr.  That may been we need to sample it after every exit, or
> > > perhaps disable the feature for level-triggered interrupts.
> > 
> > Disabling would be very sad.  Can we sample on remote irr read?
> > 
> Nothing sad about it, just correct. MS requires this to be disabled for
> level triggered interrupts.

We don't try to match what HV does 100% anyway.

> We have to notify IOAPIC about EOI ASAP. It
> may hold another interrupt for us that has to be delivered.

You mean the ack notifiers? We can skip just for the vectors
which have ack notifiers or only if there are no notifiers.

> I was going to avoid most of the trickery in apic code and just check if
> avoided EOI should be processed on each exit. This adds one if on exit
> path instead of couple on interrupt injection path though.
> 
> > > > +		}
> > > > +	}
> > > > +
> > > > +	if (set_isr)
> > > > +		apic_set_vector(vector, apic->regs + APIC_ISR);
> > > >  	apic_update_ppr(apic);
> > > >  	apic_clear_irr(vector, apic);
> > > >  	return vector;
> > > > diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> > > >
> > > 
> > > -- 
> > > error compiling committee.c: too many arguments to function
> > --
> > To unsubscribe from this list: send the line "unsubscribe kvm" in
> > the body of a message to majordomo@vger.kernel.org
> > More majordomo info at  http://vger.kernel.org/majordomo-info.html
> 
> --
> 			Gleb.

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [PATCHv0 dont apply] RFC: kvm eoi PV using shared memory
  2012-04-10 19:30       ` Michael S. Tsirkin
@ 2012-04-10 19:33         ` Gleb Natapov
  2012-04-10 19:40           ` Michael S. Tsirkin
  0 siblings, 1 reply; 32+ messages in thread
From: Gleb Natapov @ 2012-04-10 19:33 UTC (permalink / raw)
  To: Michael S. Tsirkin; +Cc: Avi Kivity, kvm

On Tue, Apr 10, 2012 at 10:30:04PM +0300, Michael S. Tsirkin wrote:
> On Tue, Apr 10, 2012 at 08:59:21PM +0300, Gleb Natapov wrote:
> > Heh, I was working on that too.
> > 
> > On Tue, Apr 10, 2012 at 05:26:18PM +0300, Michael S. Tsirkin wrote:
> > > On Tue, Apr 10, 2012 at 05:03:22PM +0300, Avi Kivity wrote:
> > > > On 04/10/2012 04:27 PM, Michael S. Tsirkin wrote:
> > > > > I took a stub at implementing PV EOI using shared memory.
> > > > > This should reduce the number of exits an interrupt
> > > > > causes as much as by half.
> > > > >
> > > > > A partially complete draft for both host and guest parts
> > > > > is below.
> > > > >
> > > > > The idea is simple: there's a bit, per APIC, in guest memory,
> > > > > that tells the guest that it does not need EOI.
> > > > > We set it before injecting an interrupt and clear
> > > > > before injecting a nested one. Guest tests it using
> > > > > a test and clear operation - this is necessary
> > > > > so that host can detect interrupt nesting -
> > > > > and if set, it can skip the EOI MSR.
> > > > >
> > > > > There's a new MSR to set the address of said register
> > > > > in guest memory. Otherwise not much changes:
> > > > > - Guest EOI is not required
> > > > > - ISR is automatically cleared before injection
> > > > >
> > > > > Some things are incomplete: add feature negotiation
> > > > > options, qemu support for said options.
> > > > > No testing was done beyond compiling the kernel.
> > > > >
> > > > > I would appreciate early feedback.
> > > > >
> > > > > Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
> > > > >
> > > > > --
> > > > >
> > > > > diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h
> > > > > index d854101..8430f41 100644
> > > > > --- a/arch/x86/include/asm/apic.h
> > > > > +++ b/arch/x86/include/asm/apic.h
> > > > > @@ -457,8 +457,13 @@ static inline u32 safe_apic_wait_icr_idle(void) { return 0; }
> > > > >  
> > > > >  #endif /* CONFIG_X86_LOCAL_APIC */
> > > > >  
> > > > > +DECLARE_EARLY_PER_CPU(unsigned long, apic_eoi);
> > > > > +
> > > > >  static inline void ack_APIC_irq(void)
> > > > >  {
> > > > > +	if (__test_and_clear_bit(0, &__get_cpu_var(apic_eoi)))
> > > > > +		return;
> > > > > +
> > > > 
> > > > While __test_and_clear_bit() is implemented in a single instruction,
> > > > it's not required to be.  Better have the instruction there explicitly.
> > > > 
> > > > >  	/*
> > > > >  	 * ack_APIC_irq() actually gets compiled as a single instruction
> > > > >  	 * ... yummie.
> > > > > diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> > > > > index e216ba0..0ee1472 100644
> > > > > --- a/arch/x86/include/asm/kvm_host.h
> > > > > +++ b/arch/x86/include/asm/kvm_host.h
> > > > > @@ -481,6 +481,12 @@ struct kvm_vcpu_arch {
> > > > >  		u64 length;
> > > > >  		u64 status;
> > > > >  	} osvw;
> > > > > +
> > > > > +	struct {
> > > > > +		u64 msr_val;
> > > > > +		struct gfn_to_hva_cache data;
> > > > > +		int vector;
> > > > > +	} eoi;
> > > > >  };
> > > > 
> > > > Needs to be cleared on INIT.
> > > 
> > > You mean kvm_arch_vcpu_reset?
> > > 
> > > > >  
> > > > >
> > > > > @@ -307,6 +308,9 @@ void __cpuinit kvm_guest_cpu_init(void)
> > > > >  		       smp_processor_id());
> > > > >  	}
> > > > >  
> > > > > +	wrmsrl(MSR_KVM_EOI_EN, __pa(this_cpu_ptr(apic_eoi)) |
> > > > > +	       MSR_KVM_EOI_ENABLED);
> > > > > +
> > > > 
> > > > Clear on kexec.
> > > 
> > > With register_reboot_notifier?
> > > 
> > > > >  	if (has_steal_clock)
> > > > >  		kvm_register_steal_time();
> > > > >  }
> > > > > diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
> > > > > index 8584322..9e38e12 100644
> > > > > --- a/arch/x86/kvm/lapic.c
> > > > > +++ b/arch/x86/kvm/lapic.c
> > > > > @@ -265,7 +265,61 @@ int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq)
> > > > >  			irq->level, irq->trig_mode);
> > > > >  }
> > > > >  
> > > > > -static inline int apic_find_highest_isr(struct kvm_lapic *apic)
> > > > > +static int eoi_put_user(struct kvm_vcpu *vcpu, u32 val)
> > > > > +{
> > > > > +
> > > > > +	return kvm_write_guest_cached(vcpu->kvm, &vcpu->arch.eoi.data, &val,
> > > > > +				      sizeof(val));
> > > > > +}
> > > > > +
> > > > > +static int eoi_get_user(struct kvm_vcpu *vcpu, u32 *val)
> > > > > +{
> > > > > +
> > > > > +	return kvm_read_guest_cached(vcpu->kvm, &vcpu->arch.eoi.data, val,
> > > > > +				      sizeof(*val));
> > > > > +}
> > > > > +
> > > > > +static inline bool eoi_enabled(struct kvm_vcpu *vcpu)
> > > > > +{
> > > > > +	return (vcpu->arch.eoi.msr_val & MSR_KVM_EOI_ENABLED);
> > > > > +}
> > > > > +
> > > > > +static int eoi_get_pending_vector(struct kvm_vcpu *vcpu)
> > > > > +{
> > > > > +	u32 val;
> > > > > +	if (eoi_get_user(vcpu, &val) < 0)
> > > > > +		apic_debug("Can't read EOI MSR value: 0x%llx\n",
> > > > > +			   (unsigned long long)vcpi->arch.eoi.msr_val);
> > > > > +	if (!(val & 0x1))
> > > > > +		vcpu->arch.eoi.vector = -1;
> > > > > +	return vcpu->arch.eoi.vector;
> > > > > +}
> > > > > +
> > > > > +static void eoi_set_pending_vector(struct kvm_vcpu *vcpu, int vector)
> > > > > +{
> > > > > +	BUG_ON(vcpu->arch.eoi.vector != -1);
> > > > > +	if (eoi_put_user(vcpu, 0x1) < 0) {
> > > > > +		apic_debug("Can't set EOI MSR value: 0x%llx\n",
> > > > > +			   (unsigned long long)vcpi->arch.eoi.msr_val);
> > > > > +		return;
> > > > > +	}
> > > > > +	vcpu->arch.eoi.vector = vector;
> > > > > +}
> > > > > +
> > > > > +static int eoi_clr_pending_vector(struct kvm_vcpu *vcpu)
> > > > > +{
> > > > > +	int vector;
> > > > > +	vector = vcpu->arch.eoi.vector;
> > > > > +	if (vector != -1 && eoi_put_user(vcpu, 0x0) < 0) {
> > > > > +		apic_debug("Can't clear EOI MSR value: 0x%llx\n",
> > > > > +			   (unsigned long long)vcpi->arch.eoi.msr_val);
> > > > > +		return -1;
> > > > > +	}
> > > > > +	vcpu->arch.eoi.vector = -1;
> > > > > +	return vector;
> > > > > +}
> > > > 
> > > > 
> > > > 
> > > > > +
> > > > > +static inline int __apic_find_highest_isr(struct kvm_lapic *apic)
> > > > >  {
> > > > >  	int result;
> > > > >  
> > > > > @@ -275,6 +329,17 @@ static inline int apic_find_highest_isr(struct kvm_lapic *apic)
> > > > >  	return result;
> > > > >  }
> > > > >  
> > > > > +static inline int apic_find_highest_isr(struct kvm_lapic *apic)
> > > > > +{
> > > > > +	int vector;
> > > > > +	if (eoi_enabled(apic->vcpu)) {
> > > > > +		vector = eoi_get_pending_vector(apic->vcpu);
> > > > > +		if (vector != -1)
> > > > > +			return vector;
> > > > > +	}
> > > > > +	return __apic_find_highest_isr(apic);
> > > > > +}
> > > > 
> > > > Why aren't you modifying the ISR unconfitionally?
> > > 
> > > ISR is not set if there won't be an EOI
> > > since it's EOI that normally clears it.
> > > 
> > > > > +
> > > > >  static void apic_update_ppr(struct kvm_lapic *apic)
> > > > >  {
> > > > >  	u32 tpr, isrv, ppr, old_ppr;
> > > > > @@ -488,6 +553,8 @@ static void apic_set_eoi(struct kvm_lapic *apic)
> > > > >  	if (vector == -1)
> > > > >  		return;
> > > > >  
> > > > > +	if (eoi_enabled(apic->vcpu))
> > > > > +		eoi_clr_pending_vector(apic->vcpu);
> > > > >  	apic_clear_vector(vector, apic->regs + APIC_ISR);
> > > > >  	apic_update_ppr(apic);
> > > > >  
> > > > > @@ -1236,11 +1303,25 @@ int kvm_get_apic_interrupt(struct kvm_vcpu *vcpu)
> > > > >  {
> > > > >  	int vector = kvm_apic_has_interrupt(vcpu);
> > > > >  	struct kvm_lapic *apic = vcpu->arch.apic;
> > > > > +	bool set_isr = true;
> > > > >  
> > > > >  	if (vector == -1)
> > > > >  		return -1;
> > > > >  
> > > > > -	apic_set_vector(vector, apic->regs + APIC_ISR);
> > > > > +	if (eoi_enabled(vcpu)) {
> > > > > +		/* Anything pending? If yes disable eoi optimization. */
> > > > > +		if (unlikely(apic_find_highest_isr(apic) >= 0)) {
> > > > > +			int v = eoi_clr_pending_vector(vcpu);
> > > > 
> > > > ISR != pending, that's IRR.  If ISR vector has lower priority than the
> > > > new vector, then we don't need to disable eoi avoidance.
> > > 
> > > Yes. But we can and it's easier than figuring out priorities.
> > > I am guessing such collisions are rare, right?
> > > I'll add a trace to make sure.
> > > 
> > > > > +			if (v != -1)
> > > > > +				apic_set_vector(v, apic->regs + APIC_ISR);
> > > > > +		} else {
> > > > > +			eoi_set_pending_vector(vcpu, vector);
> > > > > +			set_isr = false;
> > > > 
> > > > Weird.  Just set it normally.  Remember that reading the ISR needs to
> > > > return the correct value.
> > > 
> > > Marcelo said linux does not normally read ISR - not true?
> > > Note this has no effect if the PV optimization is not enabled.
> > > 
> > > > We need to process the avoided EOI before any APIC read/writes, to be
> > > > sure the guest sees the updated values.  Same for IOAPIC, EOI affects
> > > > remote_irr.  That may been we need to sample it after every exit, or
> > > > perhaps disable the feature for level-triggered interrupts.
> > > 
> > > Disabling would be very sad.  Can we sample on remote irr read?
> > > 
> > Nothing sad about it, just correct. MS requires this to be disabled for
> > level triggered interrupts.
> 
> We don't try to match what HV does 100% anyway.
> 
We should. The same code will be used for HV.

> > We have to notify IOAPIC about EOI ASAP. It
> > may hold another interrupt for us that has to be delivered.
> 
> You mean the ack notifiers? We can skip just for the vectors
> which have ack notifiers or only if there are no notifiers.
> 
No. I mean:

                if (!ent->fields.mask && (ioapic->irr & (1 << i)))
                        ioapic_service(ioapic, i);

> > I was going to avoid most of the trickery in apic code and just check if
> > avoided EOI should be processed on each exit. This adds one if on exit
> > path instead of couple on interrupt injection path though.
> > 
> > > > > +		}
> > > > > +	}
> > > > > +
> > > > > +	if (set_isr)
> > > > > +		apic_set_vector(vector, apic->regs + APIC_ISR);
> > > > >  	apic_update_ppr(apic);
> > > > >  	apic_clear_irr(vector, apic);
> > > > >  	return vector;
> > > > > diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> > > > >
> > > > 
> > > > -- 
> > > > error compiling committee.c: too many arguments to function
> > > --
> > > To unsubscribe from this list: send the line "unsubscribe kvm" in
> > > the body of a message to majordomo@vger.kernel.org
> > > More majordomo info at  http://vger.kernel.org/majordomo-info.html
> > 
> > --
> > 			Gleb.

--
			Gleb.

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [PATCHv0 dont apply] RFC: kvm eoi PV using shared memory
  2012-04-10 19:33         ` Gleb Natapov
@ 2012-04-10 19:40           ` Michael S. Tsirkin
  2012-04-10 19:42             ` Gleb Natapov
  0 siblings, 1 reply; 32+ messages in thread
From: Michael S. Tsirkin @ 2012-04-10 19:40 UTC (permalink / raw)
  To: Gleb Natapov; +Cc: Avi Kivity, kvm

On Tue, Apr 10, 2012 at 10:33:54PM +0300, Gleb Natapov wrote:
> > We don't try to match what HV does 100% anyway.
> > 
> We should. The same code will be used for HV.

Only where it makes sense, that is where the functionality
is sufficiently similar.

> > > We have to notify IOAPIC about EOI ASAP. It
> > > may hold another interrupt for us that has to be delivered.
> > 
> > You mean the ack notifiers? We can skip just for the vectors
> > which have ack notifiers or only if there are no notifiers.
> > 
> No. I mean:
> 
>                 if (!ent->fields.mask && (ioapic->irr & (1 << i)))
>                         ioapic_service(ioapic, i);

Hmm.

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [PATCHv0 dont apply] RFC: kvm eoi PV using shared memory
  2012-04-10 19:40           ` Michael S. Tsirkin
@ 2012-04-10 19:42             ` Gleb Natapov
  0 siblings, 0 replies; 32+ messages in thread
From: Gleb Natapov @ 2012-04-10 19:42 UTC (permalink / raw)
  To: Michael S. Tsirkin; +Cc: Avi Kivity, kvm

On Tue, Apr 10, 2012 at 10:40:14PM +0300, Michael S. Tsirkin wrote:
> On Tue, Apr 10, 2012 at 10:33:54PM +0300, Gleb Natapov wrote:
> > > We don't try to match what HV does 100% anyway.
> > > 
> > We should. The same code will be used for HV.
> 
> Only where it makes sense, that is where the functionality
> is sufficiently similar.
> 
You can sprinkle additional ifs in the code, but I do not see the point.

> > > > We have to notify IOAPIC about EOI ASAP. It
> > > > may hold another interrupt for us that has to be delivered.
> > > 
> > > You mean the ack notifiers? We can skip just for the vectors
> > > which have ack notifiers or only if there are no notifiers.
> > > 
> > No. I mean:
> > 
> >                 if (!ent->fields.mask && (ioapic->irr & (1 << i)))
> >                         ioapic_service(ioapic, i);
> 
> Hmm.

--
			Gleb.

^ permalink raw reply	[flat|nested] 32+ messages in thread

* [PATCHv1 dont apply] RFC: kvm eoi PV using shared memory
  2012-04-10 13:27 [PATCHv0 dont apply] RFC: kvm eoi PV using shared memory Michael S. Tsirkin
  2012-04-10 14:03 ` Avi Kivity
@ 2012-04-15 16:18 ` Michael S. Tsirkin
  2012-04-16 10:08   ` Gleb Natapov
  1 sibling, 1 reply; 32+ messages in thread
From: Michael S. Tsirkin @ 2012-04-15 16:18 UTC (permalink / raw)
  To: kvm

I got lots of useful feedback from v0 so I thought
sending out a brain dump again would be a good idea.
This is mainly to show how I'm trying to address the
comments I got from the previous round. Flames/feedback
are wellcome!

Changes from v0:
- Tweaked setup MSRs a bit
- Keep ISR bit set. Before reading ISR, test EOI in guest memory and clear
- Check priority for nested interrupts, we can
  enable optimization if new one is high priority
- Disable optimization for any interrupt handled by ioapic
  (This is because ioapic handles notifiers and pic and it generally gets messy.
   It's possible that we can optimize some ioapic-handled
   edge interrupts - but is it worth it?)
- A less intrusive change for guest apic (0 overhead without kvm)

---

I took a stub at implementing PV EOI using shared memory.
This should reduce the number of exits an interrupt
causes as much as by half.

A partially complete draft for both host and guest parts
is below.

The idea is simple: there's a bit, per APIC, in guest memory,
that tells the guest that it does not need EOI.
We set it before injecting an interrupt and clear
before injecting a nested one. Guest tests it using
a test and clear operation - this is necessary
so that host can detect interrupt nesting -
and if set, it can skip the EOI MSR.

There's a new MSR to set the address of said register
in guest memory. Otherwise not much changed:
- Guest EOI is not required
- Register is tested ISR is automatically cleared before injection
 
qemu support is incomplete - mostly for feature negotiation.
need to add some trace points to enable profiling.
No testing was done beyond compiling the kernel.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>

diff --git a/arch/x86/include/asm/bitops.h b/arch/x86/include/asm/bitops.h
index b97596e..c9c70ea 100644
--- a/arch/x86/include/asm/bitops.h
+++ b/arch/x86/include/asm/bitops.h
@@ -26,11 +26,13 @@
 #if __GNUC__ < 4 || (__GNUC__ == 4 && __GNUC_MINOR__ < 1)
 /* Technically wrong, but this avoids compilation errors on some gcc
    versions. */
-#define BITOP_ADDR(x) "=m" (*(volatile long *) (x))
+#define BITOP_ADDR_CONSTRAINT "=m"
 #else
-#define BITOP_ADDR(x) "+m" (*(volatile long *) (x))
+#define BITOP_ADDR_CONSTRAINT "+m"
 #endif
 
+#define BITOP_ADDR(x) BITOP_ADDR_CONSTRAINT (*(volatile long *) (x))
+
 #define ADDR				BITOP_ADDR(addr)
 
 /*
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index e216ba0..3d09ef1 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -481,6 +481,12 @@ struct kvm_vcpu_arch {
 		u64 length;
 		u64 status;
 	} osvw;
+
+	struct {
+		u64 msr_val;
+		struct gfn_to_hva_cache data;
+		bool pending;
+	} eoi;
 };
 
 struct kvm_lpage_info {
diff --git a/arch/x86/include/asm/kvm_para.h b/arch/x86/include/asm/kvm_para.h
index 734c376..164376a 100644
--- a/arch/x86/include/asm/kvm_para.h
+++ b/arch/x86/include/asm/kvm_para.h
@@ -22,6 +22,7 @@
 #define KVM_FEATURE_CLOCKSOURCE2        3
 #define KVM_FEATURE_ASYNC_PF		4
 #define KVM_FEATURE_STEAL_TIME		5
+#define KVM_FEATURE_EOI			6
 
 /* The last 8 bits are used to indicate how to interpret the flags field
  * in pvclock structure. If no bits are set, all flags are ignored.
@@ -37,6 +38,8 @@
 #define MSR_KVM_SYSTEM_TIME_NEW 0x4b564d01
 #define MSR_KVM_ASYNC_PF_EN 0x4b564d02
 #define MSR_KVM_STEAL_TIME  0x4b564d03
+#define MSR_KVM_EOI_EN      0x4b564d04
+#define MSR_KVM_EOI_DISABLED 0x0L
 
 struct kvm_steal_time {
 	__u64 steal;
diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
index b8ba6e4..450aae4 100644
--- a/arch/x86/kernel/kvm.c
+++ b/arch/x86/kernel/kvm.c
@@ -39,6 +39,7 @@
 #include <asm/desc.h>
 #include <asm/tlbflush.h>
 #include <asm/idle.h>
+#include <asm/apic.h>
 
 static int kvmapf = 1;
 
@@ -290,6 +291,33 @@ static void kvm_register_steal_time(void)
 		cpu, __pa(st));
 }
 
+/* TODO: needs to be early? aligned? */
+static DEFINE_EARLY_PER_CPU(u8, apic_eoi, 0);
+
+/* Our own copy of __test_and_clear_bit to make sure
+ * it is done with a single instruction */
+static inline int kvm_test_and_clear_bit(int nr, volatile u8* addr)
+{
+	int oldbit;
+
+	asm volatile("btr %2,%1\n\t"
+		     "sbb %0,%0"
+		     : "=r" (oldbit),
+		     BITOP_ADDR_CONSTRAINT (*(volatile u8 *) (addr))
+		     : "Ir" (nr));
+	return oldbit;
+}
+
+static void (*kvm_guest_native_apic_write)(u32 reg, u32 val);
+static void kvm_guest_apic_write(u32 reg, u32 val)
+{
+	if (reg == APIC_EOI &&
+	    kvm_test_and_clear_bit(0, &__get_cpu_var(apic_eoi)))
+			return;
+
+	kvm_guest_native_apic_write(reg, val);
+}
+
 void __cpuinit kvm_guest_cpu_init(void)
 {
 	if (!kvm_para_available())
@@ -307,11 +335,18 @@ void __cpuinit kvm_guest_cpu_init(void)
 		       smp_processor_id());
 	}
 
+	
+	if (kvm_para_has_feature(KVM_FEATURE_EOI)) {
+		kvm_guest_native_apic_write = apic->write;
+		apic->write = kvm_guest_apic_write;
+		wrmsrl(MSR_KVM_EOI_EN, __pa(&__get_cpu_var(apic_eoi)));
+	}
+
 	if (has_steal_clock)
 		kvm_register_steal_time();
 }
 
-static void kvm_pv_disable_apf(void *unused)
+static void kvm_pv_disable_apf(void)
 {
 	if (!__get_cpu_var(apf_reason).enabled)
 		return;
@@ -323,11 +358,18 @@ static void kvm_pv_disable_apf(void *unused)
 	       smp_processor_id());
 }
 
+static void kvm_pv_guest_cpu_reboot(void *unused)
+{
+	if (kvm_para_has_feature(KVM_FEATURE_EOI))
+		wrmsrl(MSR_KVM_EOI_EN, MSR_KVM_EOI_DISABLED);
+	kvm_pv_disable_apf();
+}
+
 static int kvm_pv_reboot_notify(struct notifier_block *nb,
 				unsigned long code, void *unused)
 {
 	if (code == SYS_RESTART)
-		on_each_cpu(kvm_pv_disable_apf, NULL, 1);
+		on_each_cpu(kvm_pv_guest_cpu_reboot, NULL, 1);
 	return NOTIFY_DONE;
 }
 
@@ -378,7 +420,9 @@ static void __cpuinit kvm_guest_cpu_online(void *dummy)
 static void kvm_guest_cpu_offline(void *dummy)
 {
 	kvm_disable_steal_time();
-	kvm_pv_disable_apf(NULL);
+	if (kvm_para_has_feature(KVM_FEATURE_EOI))
+		wrmsrl(MSR_KVM_EOI_EN, MSR_KVM_EOI_DISABLED);
+	kvm_pv_disable_apf();
 	apf_task_wake_all();
 }
 
diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index 9fed5be..7d00d2d 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -408,6 +408,7 @@ static int do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
 			     (1 << KVM_FEATURE_NOP_IO_DELAY) |
 			     (1 << KVM_FEATURE_CLOCKSOURCE2) |
 			     (1 << KVM_FEATURE_ASYNC_PF) |
+			     (1 << KVM_FEATURE_EOI) |
 			     (1 << KVM_FEATURE_CLOCKSOURCE_STABLE_BIT);
 
 		if (sched_info_on())
diff --git a/arch/x86/kvm/irq.c b/arch/x86/kvm/irq.c
index 7e06ba1..07bdfab 100644
--- a/arch/x86/kvm/irq.c
+++ b/arch/x86/kvm/irq.c
@@ -48,7 +48,7 @@ int kvm_cpu_has_interrupt(struct kvm_vcpu *v)
 	if (!irqchip_in_kernel(v->kvm))
 		return v->arch.interrupt.pending;
 
-	if (kvm_apic_has_interrupt(v) == -1) {	/* LAPIC */
+	if (kvm_apic_has_interrupt(v) < 0) {	/* LAPIC */
 		if (kvm_apic_accept_pic_intr(v)) {
 			s = pic_irqchip(v->kvm);	/* PIC */
 			return s->output;
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 992b4ea..c2118ab 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -270,6 +270,54 @@ int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq)
 			irq->level, irq->trig_mode);
 }
 
+static int eoi_put_user(struct kvm_vcpu *vcpu, u8 val)
+{
+
+	return kvm_write_guest_cached(vcpu->kvm, &vcpu->arch.eoi.data, &val,
+				      sizeof(val));
+}
+
+static int eoi_get_user(struct kvm_vcpu *vcpu, u8 *val)
+{
+
+	return kvm_read_guest_cached(vcpu->kvm, &vcpu->arch.eoi.data, val,
+				      sizeof(*val));
+}
+
+static inline bool eoi_enabled(struct kvm_vcpu *vcpu)
+{
+	return vcpu->arch.eoi.msr_val != MSR_KVM_EOI_DISABLED;
+}
+
+static bool eoi_get_pending(struct kvm_vcpu *vcpu)
+{
+	u8 val;
+	if (eoi_get_user(vcpu, &val) < 0)
+		apic_debug("Can't read EOI MSR value: 0x%llx\n",
+			   (unsigned long long)vcpi->arch.eoi.msr_val);
+	return val & 0x1;
+}
+
+static void eoi_set_pending(struct kvm_vcpu *vcpu)
+{
+	if (eoi_put_user(vcpu, 0x1) < 0) {
+		apic_debug("Can't set EOI MSR value: 0x%llx\n",
+			   (unsigned long long)vcpi->arch.eoi.msr_val);
+		return;
+	}
+	vcpu->arch.eoi.pending = true;
+}
+
+static void eoi_clr_pending(struct kvm_vcpu *vcpu)
+{
+	if (eoi_put_user(vcpu, 0x0) < 0) {
+		apic_debug("Can't clear EOI MSR value: 0x%llx\n",
+			   (unsigned long long)vcpi->arch.eoi.msr_val);
+		return;
+	}
+	vcpu->arch.eoi.pending = false;
+}
+
 static inline int apic_find_highest_isr(struct kvm_lapic *apic)
 {
 	int result;
@@ -280,6 +328,20 @@ static inline int apic_find_highest_isr(struct kvm_lapic *apic)
 	return result;
 }
 
+static void apic_update_isr(struct kvm_lapic *apic)
+{
+	int vector;
+	if (!eoi_enabled(apic->vcpu) ||
+	    !apic->vcpu->arch.eoi.pending ||
+	    eoi_get_pending(apic->vcpu))
+		return;
+	apic->vcpu->arch.eoi.pending = false;
+	vector = apic_find_highest_isr(apic);
+	if (vector == -1)
+		return;
+	apic_clear_vector(vector, apic->regs + APIC_ISR);
+}
+
 static void apic_update_ppr(struct kvm_lapic *apic)
 {
 	u32 tpr, isrv, ppr, old_ppr;
@@ -287,6 +349,7 @@ static void apic_update_ppr(struct kvm_lapic *apic)
 
 	old_ppr = apic_get_reg(apic, APIC_PROCPRI);
 	tpr = apic_get_reg(apic, APIC_TASKPRI);
+	apic_update_isr(apic);
 	isr = apic_find_highest_isr(apic);
 	isrv = (isr != -1) ? isr : 0;
 
@@ -492,6 +555,8 @@ static void apic_set_eoi(struct kvm_lapic *apic)
 	if (vector == -1)
 		return;
 
+	if (eoi_enabled(apic->vcpu))
+		eoi_clr_pending(apic->vcpu);
 	apic_clear_vector(vector, apic->regs + APIC_ISR);
 	apic_update_ppr(apic);
 
@@ -1085,6 +1150,8 @@ void kvm_lapic_reset(struct kvm_vcpu *vcpu)
 	atomic_set(&apic->lapic_timer.pending, 0);
 	if (kvm_vcpu_is_bsp(vcpu))
 		vcpu->arch.apic_base |= MSR_IA32_APICBASE_BSP;
+	vcpu->arch.eoi.msr_val = MSR_KVM_EOI_DISABLED;
+	vcpu->arch.eoi.pending = false;
 	apic_update_ppr(apic);
 
 	vcpu->arch.apic_arb_prio = 0;
@@ -1210,9 +1277,10 @@ int kvm_apic_has_interrupt(struct kvm_vcpu *vcpu)
 
 	apic_update_ppr(apic);
 	highest_irr = apic_find_highest_irr(apic);
-	if ((highest_irr == -1) ||
-	    ((highest_irr & 0xF0) <= apic_get_reg(apic, APIC_PROCPRI)))
+	if (highest_irr == -1)
 		return -1;
+	if (((highest_irr & 0xF0) <= apic_get_reg(apic, APIC_PROCPRI)))
+		return -2;
 	return highest_irr;
 }
 
@@ -1244,9 +1312,20 @@ int kvm_get_apic_interrupt(struct kvm_vcpu *vcpu)
 	int vector = kvm_apic_has_interrupt(vcpu);
 	struct kvm_lapic *apic = vcpu->arch.apic;
 
-	if (vector == -1)
+	/* Detect interrupt nesting and disable EOI optimization */
+	if (eoi_enabled(vcpu) && vector == -2)
+		eoi_clr_pending(vcpu);
+
+	if (vector < 0)
 		return -1;
 
+	if (eoi_enabled(vcpu)) {
+		if (kvm_ioapic_handles_vector(vcpu->kvm, vector))
+			eoi_clr_pending(vcpu);
+		else
+			eoi_set_pending(vcpu);
+	}
+
 	apic_set_vector(vector, apic->regs + APIC_ISR);
 	apic_update_ppr(apic);
 	apic_clear_irr(vector, apic);
@@ -1261,6 +1340,8 @@ void kvm_apic_post_state_restore(struct kvm_vcpu *vcpu)
 			     MSR_IA32_APICBASE_BASE;
 	kvm_apic_set_version(vcpu);
 
+	if (eoi_enabled(apic->vcpu))
+	    apic->vcpu->arch.eoi.pending = eoi_get_pending(apic->vcpu);
 	apic_update_ppr(apic);
 	hrtimer_cancel(&apic->lapic_timer.timer);
 	update_divide_count(apic);
@@ -1392,3 +1473,18 @@ int kvm_hv_vapic_msr_read(struct kvm_vcpu *vcpu, u32 reg, u64 *data)
 
 	return 0;
 }
+
+int kvm_pv_enable_apic_eoi(struct kvm_vcpu *vcpu, u64 data)
+{
+	if (data == MSR_KVM_EOI_DISABLED) {
+		struct kvm_lapic *apic = vcpu->arch.apic;
+		if (apic && apic_enabled(apic))
+			apic_update_isr(apic);
+	} else if (kvm_gfn_to_hva_cache_init(vcpu->kvm, &vcpu->arch.eoi.data,
+					     data))
+		return 1;
+
+	vcpu->arch.eoi.msr_val = data;
+	vcpu->arch.eoi.pending = false;
+	return 0;
+}
diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h
index 6f4ce25..042dace 100644
--- a/arch/x86/kvm/lapic.h
+++ b/arch/x86/kvm/lapic.h
@@ -60,4 +60,6 @@ static inline bool kvm_hv_vapic_assist_page_enabled(struct kvm_vcpu *vcpu)
 {
 	return vcpu->arch.hv_vapic & HV_X64_MSR_APIC_ASSIST_PAGE_ENABLE;
 }
+
+int kvm_pv_enable_apic_eoi(struct kvm_vcpu *vcpu, u64 data);
 #endif
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 4044ce0..31d6762 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -1627,6 +1627,10 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data)
 		if (kvm_pv_enable_async_pf(vcpu, data))
 			return 1;
 		break;
+	case MSR_KVM_EOI_EN:
+		if (kvm_pv_enable_apic_eoi(vcpu, data))
+			return 1;
+		break;
 	case MSR_KVM_STEAL_TIME:
 
 		if (unlikely(!sched_info_on()))

^ permalink raw reply related	[flat|nested] 32+ messages in thread

* Re: [PATCHv1 dont apply] RFC: kvm eoi PV using shared memory
  2012-04-15 16:18 ` [PATCHv1 " Michael S. Tsirkin
@ 2012-04-16 10:08   ` Gleb Natapov
  2012-04-16 11:09     ` Michael S. Tsirkin
  2012-04-17  9:22     ` Avi Kivity
  0 siblings, 2 replies; 32+ messages in thread
From: Gleb Natapov @ 2012-04-16 10:08 UTC (permalink / raw)
  To: Michael S. Tsirkin; +Cc: kvm

On Sun, Apr 15, 2012 at 07:18:58PM +0300, Michael S. Tsirkin wrote:
> I got lots of useful feedback from v0 so I thought
> sending out a brain dump again would be a good idea.
> This is mainly to show how I'm trying to address the
> comments I got from the previous round. Flames/feedback
> are wellcome!
> 
> Changes from v0:
> - Tweaked setup MSRs a bit
> - Keep ISR bit set. Before reading ISR, test EOI in guest memory and clear
> - Check priority for nested interrupts, we can
>   enable optimization if new one is high priority
> - Disable optimization for any interrupt handled by ioapic
>   (This is because ioapic handles notifiers and pic and it generally gets messy.
>    It's possible that we can optimize some ioapic-handled
>    edge interrupts - but is it worth it?)
> - A less intrusive change for guest apic (0 overhead without kvm)
> 
> ---
> 
> I took a stub at implementing PV EOI using shared memory.
> This should reduce the number of exits an interrupt
> causes as much as by half.
> 
> A partially complete draft for both host and guest parts
> is below.
> 
> The idea is simple: there's a bit, per APIC, in guest memory,
> that tells the guest that it does not need EOI.
> We set it before injecting an interrupt and clear
> before injecting a nested one. Guest tests it using
> a test and clear operation - this is necessary
> so that host can detect interrupt nesting -
> and if set, it can skip the EOI MSR.
> 
> There's a new MSR to set the address of said register
> in guest memory. Otherwise not much changed:
> - Guest EOI is not required
> - Register is tested ISR is automatically cleared before injection
>  
> qemu support is incomplete - mostly for feature negotiation.
> need to add some trace points to enable profiling.
> No testing was done beyond compiling the kernel.
> 
> Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
> 
> diff --git a/arch/x86/include/asm/bitops.h b/arch/x86/include/asm/bitops.h
> index b97596e..c9c70ea 100644
> --- a/arch/x86/include/asm/bitops.h
> +++ b/arch/x86/include/asm/bitops.h
> @@ -26,11 +26,13 @@
>  #if __GNUC__ < 4 || (__GNUC__ == 4 && __GNUC_MINOR__ < 1)
>  /* Technically wrong, but this avoids compilation errors on some gcc
>     versions. */
> -#define BITOP_ADDR(x) "=m" (*(volatile long *) (x))
> +#define BITOP_ADDR_CONSTRAINT "=m"
>  #else
> -#define BITOP_ADDR(x) "+m" (*(volatile long *) (x))
> +#define BITOP_ADDR_CONSTRAINT "+m"
>  #endif
>  
> +#define BITOP_ADDR(x) BITOP_ADDR_CONSTRAINT (*(volatile long *) (x))
> +
>  #define ADDR				BITOP_ADDR(addr)
>  
>  /*
> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> index e216ba0..3d09ef1 100644
> --- a/arch/x86/include/asm/kvm_host.h
> +++ b/arch/x86/include/asm/kvm_host.h
> @@ -481,6 +481,12 @@ struct kvm_vcpu_arch {
>  		u64 length;
>  		u64 status;
>  	} osvw;
> +
> +	struct {
> +		u64 msr_val;
> +		struct gfn_to_hva_cache data;
> +		bool pending;
> +	} eoi;
>  };
>  
>  struct kvm_lpage_info {
> diff --git a/arch/x86/include/asm/kvm_para.h b/arch/x86/include/asm/kvm_para.h
> index 734c376..164376a 100644
> --- a/arch/x86/include/asm/kvm_para.h
> +++ b/arch/x86/include/asm/kvm_para.h
> @@ -22,6 +22,7 @@
>  #define KVM_FEATURE_CLOCKSOURCE2        3
>  #define KVM_FEATURE_ASYNC_PF		4
>  #define KVM_FEATURE_STEAL_TIME		5
> +#define KVM_FEATURE_EOI			6
>  
>  /* The last 8 bits are used to indicate how to interpret the flags field
>   * in pvclock structure. If no bits are set, all flags are ignored.
> @@ -37,6 +38,8 @@
>  #define MSR_KVM_SYSTEM_TIME_NEW 0x4b564d01
>  #define MSR_KVM_ASYNC_PF_EN 0x4b564d02
>  #define MSR_KVM_STEAL_TIME  0x4b564d03
> +#define MSR_KVM_EOI_EN      0x4b564d04
> +#define MSR_KVM_EOI_DISABLED 0x0L
This is valid gpa. Follow others MSR example i.e align the address to,
lets say dword, and use lsb as enable bit. Other low bits are reserved
(#GP if set). And please move defines that not define new MSRs to some
other place.

>  
>  struct kvm_steal_time {
>  	__u64 steal;
> diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
> index b8ba6e4..450aae4 100644
> --- a/arch/x86/kernel/kvm.c
> +++ b/arch/x86/kernel/kvm.c
> @@ -39,6 +39,7 @@
>  #include <asm/desc.h>
>  #include <asm/tlbflush.h>
>  #include <asm/idle.h>
> +#include <asm/apic.h>
>  
>  static int kvmapf = 1;
>  
> @@ -290,6 +291,33 @@ static void kvm_register_steal_time(void)
>  		cpu, __pa(st));
>  }
>  
> +/* TODO: needs to be early? aligned? */
> +static DEFINE_EARLY_PER_CPU(u8, apic_eoi, 0);
> +
> +/* Our own copy of __test_and_clear_bit to make sure
> + * it is done with a single instruction */
> +static inline int kvm_test_and_clear_bit(int nr, volatile u8* addr)
> +{
> +	int oldbit;
> +
> +	asm volatile("btr %2,%1\n\t"
> +		     "sbb %0,%0"
> +		     : "=r" (oldbit),
> +		     BITOP_ADDR_CONSTRAINT (*(volatile u8 *) (addr))
> +		     : "Ir" (nr));
> +	return oldbit;
> +}
> +
> +static void (*kvm_guest_native_apic_write)(u32 reg, u32 val);
> +static void kvm_guest_apic_write(u32 reg, u32 val)
> +{
> +	if (reg == APIC_EOI &&
> +	    kvm_test_and_clear_bit(0, &__get_cpu_var(apic_eoi)))
> +			return;
> +
> +	kvm_guest_native_apic_write(reg, val);
> +}
> +
>  void __cpuinit kvm_guest_cpu_init(void)
>  {
>  	if (!kvm_para_available())
> @@ -307,11 +335,18 @@ void __cpuinit kvm_guest_cpu_init(void)
>  		       smp_processor_id());
>  	}
>  
> +	
> +	if (kvm_para_has_feature(KVM_FEATURE_EOI)) {
> +		kvm_guest_native_apic_write = apic->write;
> +		apic->write = kvm_guest_apic_write;
> +		wrmsrl(MSR_KVM_EOI_EN, __pa(&__get_cpu_var(apic_eoi)));
> +	}
Can happen on more then one cpu. After it happens on two kvm_guest_apic_write()
will call to itself.

> +
>  	if (has_steal_clock)
>  		kvm_register_steal_time();
>  }
>  
> -static void kvm_pv_disable_apf(void *unused)
> +static void kvm_pv_disable_apf(void)
>  {
>  	if (!__get_cpu_var(apf_reason).enabled)
>  		return;
> @@ -323,11 +358,18 @@ static void kvm_pv_disable_apf(void *unused)
>  	       smp_processor_id());
>  }
>  
> +static void kvm_pv_guest_cpu_reboot(void *unused)
> +{
> +	if (kvm_para_has_feature(KVM_FEATURE_EOI))
> +		wrmsrl(MSR_KVM_EOI_EN, MSR_KVM_EOI_DISABLED);
Shouldn't it zero apic_eoi?

> +	kvm_pv_disable_apf();
> +}
> +
>  static int kvm_pv_reboot_notify(struct notifier_block *nb,
>  				unsigned long code, void *unused)
>  {
>  	if (code == SYS_RESTART)
> -		on_each_cpu(kvm_pv_disable_apf, NULL, 1);
> +		on_each_cpu(kvm_pv_guest_cpu_reboot, NULL, 1);
>  	return NOTIFY_DONE;
>  }
>  
> @@ -378,7 +420,9 @@ static void __cpuinit kvm_guest_cpu_online(void *dummy)
>  static void kvm_guest_cpu_offline(void *dummy)
>  {
>  	kvm_disable_steal_time();
> -	kvm_pv_disable_apf(NULL);
> +	if (kvm_para_has_feature(KVM_FEATURE_EOI))
> +		wrmsrl(MSR_KVM_EOI_EN, MSR_KVM_EOI_DISABLED);
Same.

> +	kvm_pv_disable_apf();
>  	apf_task_wake_all();
>  }
>  
> diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
> index 9fed5be..7d00d2d 100644
> --- a/arch/x86/kvm/cpuid.c
> +++ b/arch/x86/kvm/cpuid.c
> @@ -408,6 +408,7 @@ static int do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
>  			     (1 << KVM_FEATURE_NOP_IO_DELAY) |
>  			     (1 << KVM_FEATURE_CLOCKSOURCE2) |
>  			     (1 << KVM_FEATURE_ASYNC_PF) |
> +			     (1 << KVM_FEATURE_EOI) |
>  			     (1 << KVM_FEATURE_CLOCKSOURCE_STABLE_BIT);
>  
>  		if (sched_info_on())
> diff --git a/arch/x86/kvm/irq.c b/arch/x86/kvm/irq.c
> index 7e06ba1..07bdfab 100644
> --- a/arch/x86/kvm/irq.c
> +++ b/arch/x86/kvm/irq.c
> @@ -48,7 +48,7 @@ int kvm_cpu_has_interrupt(struct kvm_vcpu *v)
>  	if (!irqchip_in_kernel(v->kvm))
>  		return v->arch.interrupt.pending;
>  
> -	if (kvm_apic_has_interrupt(v) == -1) {	/* LAPIC */
> +	if (kvm_apic_has_interrupt(v) < 0) {	/* LAPIC */
>  		if (kvm_apic_accept_pic_intr(v)) {
>  			s = pic_irqchip(v->kvm);	/* PIC */
>  			return s->output;
> diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
> index 992b4ea..c2118ab 100644
> --- a/arch/x86/kvm/lapic.c
> +++ b/arch/x86/kvm/lapic.c
> @@ -270,6 +270,54 @@ int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq)
>  			irq->level, irq->trig_mode);
>  }
>  
> +static int eoi_put_user(struct kvm_vcpu *vcpu, u8 val)
> +{
> +
> +	return kvm_write_guest_cached(vcpu->kvm, &vcpu->arch.eoi.data, &val,
> +				      sizeof(val));
> +}
> +
> +static int eoi_get_user(struct kvm_vcpu *vcpu, u8 *val)
> +{
> +
> +	return kvm_read_guest_cached(vcpu->kvm, &vcpu->arch.eoi.data, val,
> +				      sizeof(*val));
> +}
> +
> +static inline bool eoi_enabled(struct kvm_vcpu *vcpu)
> +{
> +	return vcpu->arch.eoi.msr_val != MSR_KVM_EOI_DISABLED;
> +}
> +
> +static bool eoi_get_pending(struct kvm_vcpu *vcpu)
> +{
> +	u8 val;
> +	if (eoi_get_user(vcpu, &val) < 0)
> +		apic_debug("Can't read EOI MSR value: 0x%llx\n",
> +			   (unsigned long long)vcpi->arch.eoi.msr_val);
> +	return val & 0x1;
> +}
> +
> +static void eoi_set_pending(struct kvm_vcpu *vcpu)
> +{
> +	if (eoi_put_user(vcpu, 0x1) < 0) {
> +		apic_debug("Can't set EOI MSR value: 0x%llx\n",
> +			   (unsigned long long)vcpi->arch.eoi.msr_val);
> +		return;
> +	}
> +	vcpu->arch.eoi.pending = true;
> +}
> +
> +static void eoi_clr_pending(struct kvm_vcpu *vcpu)
> +{
> +	if (eoi_put_user(vcpu, 0x0) < 0) {
> +		apic_debug("Can't clear EOI MSR value: 0x%llx\n",
> +			   (unsigned long long)vcpi->arch.eoi.msr_val);
> +		return;
> +	}
> +	vcpu->arch.eoi.pending = false;
> +}
> +
>  static inline int apic_find_highest_isr(struct kvm_lapic *apic)
>  {
>  	int result;
> @@ -280,6 +328,20 @@ static inline int apic_find_highest_isr(struct kvm_lapic *apic)
>  	return result;
>  }
>  
> +static void apic_update_isr(struct kvm_lapic *apic)
> +{
> +	int vector;
> +	if (!eoi_enabled(apic->vcpu) ||
> +	    !apic->vcpu->arch.eoi.pending ||
> +	    eoi_get_pending(apic->vcpu))
> +		return;
> +	apic->vcpu->arch.eoi.pending = false;
> +	vector = apic_find_highest_isr(apic);
> +	if (vector == -1)
> +		return;
> +	apic_clear_vector(vector, apic->regs + APIC_ISR);
> +}
> +
We should just call apic_set_eoi() on exit if eoi.pending && !eoi_get_pending().
This removes the need for the function and its calls. We already have
call to kvm_lapic_sync_from_vapic() on exit path which should be
extended to do the above.

>  static void apic_update_ppr(struct kvm_lapic *apic)
>  {
>  	u32 tpr, isrv, ppr, old_ppr;
> @@ -287,6 +349,7 @@ static void apic_update_ppr(struct kvm_lapic *apic)
>  
>  	old_ppr = apic_get_reg(apic, APIC_PROCPRI);
>  	tpr = apic_get_reg(apic, APIC_TASKPRI);
> +	apic_update_isr(apic);
>  	isr = apic_find_highest_isr(apic);
>  	isrv = (isr != -1) ? isr : 0;
>  
> @@ -492,6 +555,8 @@ static void apic_set_eoi(struct kvm_lapic *apic)
>  	if (vector == -1)
>  		return;
>  
> +	if (eoi_enabled(apic->vcpu))
> +		eoi_clr_pending(apic->vcpu);
You have many of those. Just fold eoi_enabled() check into
eoi_clr_pending().

>  	apic_clear_vector(vector, apic->regs + APIC_ISR);
>  	apic_update_ppr(apic);
>  
> @@ -1085,6 +1150,8 @@ void kvm_lapic_reset(struct kvm_vcpu *vcpu)
>  	atomic_set(&apic->lapic_timer.pending, 0);
>  	if (kvm_vcpu_is_bsp(vcpu))
>  		vcpu->arch.apic_base |= MSR_IA32_APICBASE_BSP;
> +	vcpu->arch.eoi.msr_val = MSR_KVM_EOI_DISABLED;
> +	vcpu->arch.eoi.pending = false;
>  	apic_update_ppr(apic);
>  
>  	vcpu->arch.apic_arb_prio = 0;
> @@ -1210,9 +1277,10 @@ int kvm_apic_has_interrupt(struct kvm_vcpu *vcpu)
>  
>  	apic_update_ppr(apic);
>  	highest_irr = apic_find_highest_irr(apic);
> -	if ((highest_irr == -1) ||
> -	    ((highest_irr & 0xF0) <= apic_get_reg(apic, APIC_PROCPRI)))
> +	if (highest_irr == -1)
>  		return -1;
> +	if (((highest_irr & 0xF0) <= apic_get_reg(apic, APIC_PROCPRI)))
> +		return -2;
>  	return highest_irr;
>  }
>  
> @@ -1244,9 +1312,20 @@ int kvm_get_apic_interrupt(struct kvm_vcpu *vcpu)
>  	int vector = kvm_apic_has_interrupt(vcpu);
>  	struct kvm_lapic *apic = vcpu->arch.apic;
>  
> -	if (vector == -1)
> +	/* Detect interrupt nesting and disable EOI optimization */
> +	if (eoi_enabled(vcpu) && vector == -2)
> +		eoi_clr_pending(vcpu);
> +
> +	if (vector < 0)
>  		return -1;
>  
> +	if (eoi_enabled(vcpu)) {
> +		if (kvm_ioapic_handles_vector(vcpu->kvm, vector))
> +			eoi_clr_pending(vcpu);
> +		else
> +			eoi_set_pending(vcpu);
> +	}
> +
>  	apic_set_vector(vector, apic->regs + APIC_ISR);
>  	apic_update_ppr(apic);
>  	apic_clear_irr(vector, apic);
> @@ -1261,6 +1340,8 @@ void kvm_apic_post_state_restore(struct kvm_vcpu *vcpu)
>  			     MSR_IA32_APICBASE_BASE;
>  	kvm_apic_set_version(vcpu);
>  
> +	if (eoi_enabled(apic->vcpu))
> +	    apic->vcpu->arch.eoi.pending = eoi_get_pending(apic->vcpu);
>  	apic_update_ppr(apic);
>  	hrtimer_cancel(&apic->lapic_timer.timer);
>  	update_divide_count(apic);
> @@ -1392,3 +1473,18 @@ int kvm_hv_vapic_msr_read(struct kvm_vcpu *vcpu, u32 reg, u64 *data)
>  
>  	return 0;
>  }
> +
> +int kvm_pv_enable_apic_eoi(struct kvm_vcpu *vcpu, u64 data)
> +{
> +	if (data == MSR_KVM_EOI_DISABLED) {
> +		struct kvm_lapic *apic = vcpu->arch.apic;
> +		if (apic && apic_enabled(apic))
> +			apic_update_isr(apic);
> +	} else if (kvm_gfn_to_hva_cache_init(vcpu->kvm, &vcpu->arch.eoi.data,
> +					     data))
> +		return 1;
> +
> +	vcpu->arch.eoi.msr_val = data;
> +	vcpu->arch.eoi.pending = false;
> +	return 0;
> +}
> diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h
> index 6f4ce25..042dace 100644
> --- a/arch/x86/kvm/lapic.h
> +++ b/arch/x86/kvm/lapic.h
> @@ -60,4 +60,6 @@ static inline bool kvm_hv_vapic_assist_page_enabled(struct kvm_vcpu *vcpu)
>  {
>  	return vcpu->arch.hv_vapic & HV_X64_MSR_APIC_ASSIST_PAGE_ENABLE;
>  }
> +
> +int kvm_pv_enable_apic_eoi(struct kvm_vcpu *vcpu, u64 data);
>  #endif
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index 4044ce0..31d6762 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -1627,6 +1627,10 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data)
>  		if (kvm_pv_enable_async_pf(vcpu, data))
>  			return 1;
>  		break;
> +	case MSR_KVM_EOI_EN:
> +		if (kvm_pv_enable_apic_eoi(vcpu, data))
> +			return 1;
> +		break;
>  	case MSR_KVM_STEAL_TIME:
>  
>  		if (unlikely(!sched_info_on()))
> --
> To unsubscribe from this list: send the line "unsubscribe kvm" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html

--
			Gleb.

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [PATCHv1 dont apply] RFC: kvm eoi PV using shared memory
  2012-04-16 10:08   ` Gleb Natapov
@ 2012-04-16 11:09     ` Michael S. Tsirkin
  2012-04-16 11:24       ` Gleb Natapov
  2012-04-17  9:22     ` Avi Kivity
  1 sibling, 1 reply; 32+ messages in thread
From: Michael S. Tsirkin @ 2012-04-16 11:09 UTC (permalink / raw)
  To: Gleb Natapov; +Cc: kvm

Thanks very much for the review. I'll address the comments.
Some questions on your comments below.

On Mon, Apr 16, 2012 at 01:08:07PM +0300, Gleb Natapov wrote:
> > @@ -37,6 +38,8 @@
> >  #define MSR_KVM_SYSTEM_TIME_NEW 0x4b564d01
> >  #define MSR_KVM_ASYNC_PF_EN 0x4b564d02
> >  #define MSR_KVM_STEAL_TIME  0x4b564d03
> > +#define MSR_KVM_EOI_EN      0x4b564d04
> > +#define MSR_KVM_EOI_DISABLED 0x0L
> This is valid gpa. Follow others MSR example i.e align the address to,
> lets say dword, and use lsb as enable bit.

We only need a single byte, since this is per-CPU -
it's better to save the memory, so no alignment is required.
An explicit disable msr would also address this, right?

> > +static void apic_update_isr(struct kvm_lapic *apic)
> > +{
> > +	int vector;
> > +	if (!eoi_enabled(apic->vcpu) ||
> > +	    !apic->vcpu->arch.eoi.pending ||
> > +	    eoi_get_pending(apic->vcpu))
> > +		return;
> > +	apic->vcpu->arch.eoi.pending = false;
> > +	vector = apic_find_highest_isr(apic);
> > +	if (vector == -1)
> > +		return;
> > +	apic_clear_vector(vector, apic->regs + APIC_ISR);
> > +}
> > +
> We should just call apic_set_eoi() on exit if eoi.pending && !eoi_get_pending().
> This removes the need for the function and its calls.

It's a bit of a waste: that one does all kind extra things
which we know we don't need, some of the atomics. And it's datapath
so extra stuff is not free.

Probably a good idea to replace the call on MSR disable - I think
apic_update_ppr is a better thing to call there.

Is there anything else that I missed?

> We already have
> call to kvm_lapic_sync_from_vapic() on exit path which should be
> extended to do the above.

It already does this. It calls apic_set_tpr
which calls apic_update_ppr which calls
apic_update_isr.

-- 
MST

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [PATCHv1 dont apply] RFC: kvm eoi PV using shared memory
  2012-04-16 11:09     ` Michael S. Tsirkin
@ 2012-04-16 11:24       ` Gleb Natapov
  2012-04-16 12:18         ` Michael S. Tsirkin
  0 siblings, 1 reply; 32+ messages in thread
From: Gleb Natapov @ 2012-04-16 11:24 UTC (permalink / raw)
  To: Michael S. Tsirkin; +Cc: kvm

On Mon, Apr 16, 2012 at 02:09:20PM +0300, Michael S. Tsirkin wrote:
> Thanks very much for the review. I'll address the comments.
> Some questions on your comments below.
> 
> On Mon, Apr 16, 2012 at 01:08:07PM +0300, Gleb Natapov wrote:
> > > @@ -37,6 +38,8 @@
> > >  #define MSR_KVM_SYSTEM_TIME_NEW 0x4b564d01
> > >  #define MSR_KVM_ASYNC_PF_EN 0x4b564d02
> > >  #define MSR_KVM_STEAL_TIME  0x4b564d03
> > > +#define MSR_KVM_EOI_EN      0x4b564d04
> > > +#define MSR_KVM_EOI_DISABLED 0x0L
> > This is valid gpa. Follow others MSR example i.e align the address to,
> > lets say dword, and use lsb as enable bit.
> 
> We only need a single byte, since this is per-CPU -
> it's better to save the memory, so no alignment is required.
> An explicit disable msr would also address this, right?
> 
We do not have shortage of memory. Better make all MSRs works the same
way. BTW have you added new MSR to msrs_to_save array? I forgot to
checked.

> > > +static void apic_update_isr(struct kvm_lapic *apic)
> > > +{
> > > +	int vector;
> > > +	if (!eoi_enabled(apic->vcpu) ||
> > > +	    !apic->vcpu->arch.eoi.pending ||
> > > +	    eoi_get_pending(apic->vcpu))
> > > +		return;
> > > +	apic->vcpu->arch.eoi.pending = false;
> > > +	vector = apic_find_highest_isr(apic);
> > > +	if (vector == -1)
> > > +		return;
> > > +	apic_clear_vector(vector, apic->regs + APIC_ISR);
> > > +}
> > > +
> > We should just call apic_set_eoi() on exit if eoi.pending && !eoi_get_pending().
> > This removes the need for the function and its calls.
> 
> It's a bit of a waste: that one does all kind extra things
> which we know we don't need, some of the atomics. And it's datapath
> so extra stuff is not free.
> 
How much time those extra things are taking compared to vmexit you
already serving? And there is a good chance you will do them during
vmentry anyway while trying to inject (or just check for) new interrupt.

> Probably a good idea to replace the call on MSR disable - I think
> apic_update_ppr is a better thing to call there.
> 
> Is there anything else that I missed?
I think that simple things are better then complex things if the end result is
the same :) Try it and see how much simpler it is. Have you measured
that what you are trying to optimize actually worth optimizing? That you
can measure the optimization at all?

> 
> > We already have
> > call to kvm_lapic_sync_from_vapic() on exit path which should be
> > extended to do the above.
> 
> It already does this. It calls apic_set_tpr
> which calls apic_update_ppr which calls
> apic_update_isr.
> 
It does it only if vapic is in use (and it is usually not). But the if()
is already there so we do not need to worry that one additional if() on
the exit path will slow KVM to the crawl.

--
			Gleb.

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [PATCHv1 dont apply] RFC: kvm eoi PV using shared memory
  2012-04-16 11:24       ` Gleb Natapov
@ 2012-04-16 12:18         ` Michael S. Tsirkin
  2012-04-16 12:30           ` Gleb Natapov
  2012-04-17  9:24           ` Avi Kivity
  0 siblings, 2 replies; 32+ messages in thread
From: Michael S. Tsirkin @ 2012-04-16 12:18 UTC (permalink / raw)
  To: Gleb Natapov; +Cc: kvm

On Mon, Apr 16, 2012 at 02:24:46PM +0300, Gleb Natapov wrote:
> On Mon, Apr 16, 2012 at 02:09:20PM +0300, Michael S. Tsirkin wrote:
> > Thanks very much for the review. I'll address the comments.
> > Some questions on your comments below.
> > 
> > On Mon, Apr 16, 2012 at 01:08:07PM +0300, Gleb Natapov wrote:
> > > > @@ -37,6 +38,8 @@
> > > >  #define MSR_KVM_SYSTEM_TIME_NEW 0x4b564d01
> > > >  #define MSR_KVM_ASYNC_PF_EN 0x4b564d02
> > > >  #define MSR_KVM_STEAL_TIME  0x4b564d03
> > > > +#define MSR_KVM_EOI_EN      0x4b564d04
> > > > +#define MSR_KVM_EOI_DISABLED 0x0L
> > > This is valid gpa. Follow others MSR example i.e align the address to,
> > > lets say dword, and use lsb as enable bit.
> > 
> > We only need a single byte, since this is per-CPU -
> > it's better to save the memory, so no alignment is required.
> > An explicit disable msr would also address this, right?
> > 
> We do not have shortage of memory.
> Better make all MSRs works the same
> way.

I agree it's nice to have EOI and ASYNC_PF look similar
but wasting memory is also bad.  I'll ponder this some more.

> BTW have you added new MSR to msrs_to_save array? I forgot to
> checked.

I didn't yet. Trying to understand how will that affect
cross-version migration - any input?

> > > > +static void apic_update_isr(struct kvm_lapic *apic)
> > > > +{
> > > > +	int vector;
> > > > +	if (!eoi_enabled(apic->vcpu) ||
> > > > +	    !apic->vcpu->arch.eoi.pending ||
> > > > +	    eoi_get_pending(apic->vcpu))
> > > > +		return;
> > > > +	apic->vcpu->arch.eoi.pending = false;
> > > > +	vector = apic_find_highest_isr(apic);
> > > > +	if (vector == -1)
> > > > +		return;
> > > > +	apic_clear_vector(vector, apic->regs + APIC_ISR);
> > > > +}
> > > > +
> > > We should just call apic_set_eoi() on exit if eoi.pending && !eoi_get_pending().
> > > This removes the need for the function and its calls.
> > 
> > It's a bit of a waste: that one does all kind extra things
> > which we know we don't need, some of the atomics. And it's datapath
> > so extra stuff is not free.
> > 
> How much time those extra things are taking compared to vmexit you
> already serving? And there is a good chance you will do them during
> vmentry anyway while trying to inject (or just check for) new interrupt.

No need to do them twice :)

> > Probably a good idea to replace the call on MSR disable - I think
> > apic_update_ppr is a better thing to call there.
> > 
> > Is there anything else that I missed?
> I think that simple things are better then complex things if the end result is
> the same :) Try it and see how much simpler it is.

It doesn't seem to be simpler at all. The common functionality is
about 4 lines.

> Have you measured
> that what you are trying to optimize actually worth optimizing? That you
> can measure the optimization at all?

The claim is not that it's measureable. The claim is that
it does not scale to keep adding things to do on each entry.

> > 
> > > We already have
> > > call to kvm_lapic_sync_from_vapic() on exit path which should be
> > > extended to do the above.
> > 
> > It already does this. It calls apic_set_tpr
> > which calls apic_update_ppr which calls
> > apic_update_isr.
> > 
> It does it only if vapic is in use (and it is usually not).

When it's not we don't need to update ppr and so
no need to update isr on this exit.

> But the if()
> is already there so we do not need to worry that one additional if() on
> the exit path will slow KVM to the crawl.

The number of things we need to do on each entry keeps going up, if we
just keep adding stuff it won't end well.



> --
> 			Gleb.

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [PATCHv1 dont apply] RFC: kvm eoi PV using shared memory
  2012-04-16 12:18         ` Michael S. Tsirkin
@ 2012-04-16 12:30           ` Gleb Natapov
  2012-04-16 13:13             ` Michael S. Tsirkin
  2012-04-17  9:24           ` Avi Kivity
  1 sibling, 1 reply; 32+ messages in thread
From: Gleb Natapov @ 2012-04-16 12:30 UTC (permalink / raw)
  To: Michael S. Tsirkin; +Cc: kvm

On Mon, Apr 16, 2012 at 03:18:25PM +0300, Michael S. Tsirkin wrote:
> On Mon, Apr 16, 2012 at 02:24:46PM +0300, Gleb Natapov wrote:
> > On Mon, Apr 16, 2012 at 02:09:20PM +0300, Michael S. Tsirkin wrote:
> > > Thanks very much for the review. I'll address the comments.
> > > Some questions on your comments below.
> > > 
> > > On Mon, Apr 16, 2012 at 01:08:07PM +0300, Gleb Natapov wrote:
> > > > > @@ -37,6 +38,8 @@
> > > > >  #define MSR_KVM_SYSTEM_TIME_NEW 0x4b564d01
> > > > >  #define MSR_KVM_ASYNC_PF_EN 0x4b564d02
> > > > >  #define MSR_KVM_STEAL_TIME  0x4b564d03
> > > > > +#define MSR_KVM_EOI_EN      0x4b564d04
> > > > > +#define MSR_KVM_EOI_DISABLED 0x0L
> > > > This is valid gpa. Follow others MSR example i.e align the address to,
> > > > lets say dword, and use lsb as enable bit.
> > > 
> > > We only need a single byte, since this is per-CPU -
> > > it's better to save the memory, so no alignment is required.
> > > An explicit disable msr would also address this, right?
> > > 
> > We do not have shortage of memory.
> > Better make all MSRs works the same
> > way.
> 
> I agree it's nice to have EOI and ASYNC_PF look similar
> but wasting memory is also bad.  I'll ponder this some more.
> 
Steal time and kvm clock too and may be others (if anything left at
all). I hope you are kidding about wasting of 4 bytes per vcpu.

> > BTW have you added new MSR to msrs_to_save array? I forgot to
> > checked.
> 
> I didn't yet. Trying to understand how will that affect
> cross-version migration - any input?
> 
Not sure. You need to check what userspace does with them.

> > > > > +static void apic_update_isr(struct kvm_lapic *apic)
> > > > > +{
> > > > > +	int vector;
> > > > > +	if (!eoi_enabled(apic->vcpu) ||
> > > > > +	    !apic->vcpu->arch.eoi.pending ||
> > > > > +	    eoi_get_pending(apic->vcpu))
> > > > > +		return;
> > > > > +	apic->vcpu->arch.eoi.pending = false;
> > > > > +	vector = apic_find_highest_isr(apic);
> > > > > +	if (vector == -1)
> > > > > +		return;
> > > > > +	apic_clear_vector(vector, apic->regs + APIC_ISR);
> > > > > +}
> > > > > +
> > > > We should just call apic_set_eoi() on exit if eoi.pending && !eoi_get_pending().
> > > > This removes the need for the function and its calls.
> > > 
> > > It's a bit of a waste: that one does all kind extra things
> > > which we know we don't need, some of the atomics. And it's datapath
> > > so extra stuff is not free.
> > > 
> > How much time those extra things are taking compared to vmexit you
> > already serving? And there is a good chance you will do them during
> > vmentry anyway while trying to inject (or just check for) new interrupt.
> 
> No need to do them twice :)
> 
> > > Probably a good idea to replace the call on MSR disable - I think
> > > apic_update_ppr is a better thing to call there.
> > > 
> > > Is there anything else that I missed?
> > I think that simple things are better then complex things if the end result is
> > the same :) Try it and see how much simpler it is.
> 
> It doesn't seem to be simpler at all. The common functionality is
> about 4 lines.
Send patch for us to see. lapic changes should be minimal.

> 
> > Have you measured
> > that what you are trying to optimize actually worth optimizing? That you
> > can measure the optimization at all?
> 
> The claim is not that it's measureable. The claim is that
> it does not scale to keep adding things to do on each entry.
> 
Only if there is something to do. "Premature optimization is the root of
all evil". The PV eoi is about not exiting on eoi unnecessary. You are
mixing this with trying to avoid calling eoi code for given interrupt at
all. Two different optimization, do not try lump them together.

> > > 
> > > > We already have
> > > > call to kvm_lapic_sync_from_vapic() on exit path which should be
> > > > extended to do the above.
> > > 
> > > It already does this. It calls apic_set_tpr
> > > which calls apic_update_ppr which calls
> > > apic_update_isr.
> > > 
> > It does it only if vapic is in use (and it is usually not).
> 
> When it's not we don't need to update ppr and so
> no need to update isr on this exit.
If there was eoi we need to update both.

> 
> > But the if()
> > is already there so we do not need to worry that one additional if() on
> > the exit path will slow KVM to the crawl.
> 
> The number of things we need to do on each entry keeps going up, if we
> just keep adding stuff it won't end well.
> 
You do not add stuff. The if() is already there.

--
			Gleb.

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [PATCHv1 dont apply] RFC: kvm eoi PV using shared memory
  2012-04-16 12:30           ` Gleb Natapov
@ 2012-04-16 13:13             ` Michael S. Tsirkin
  2012-04-16 15:10               ` Gleb Natapov
  0 siblings, 1 reply; 32+ messages in thread
From: Michael S. Tsirkin @ 2012-04-16 13:13 UTC (permalink / raw)
  To: Gleb Natapov; +Cc: kvm

On Mon, Apr 16, 2012 at 03:30:47PM +0300, Gleb Natapov wrote:
> On Mon, Apr 16, 2012 at 03:18:25PM +0300, Michael S. Tsirkin wrote:
> > On Mon, Apr 16, 2012 at 02:24:46PM +0300, Gleb Natapov wrote:
> > > On Mon, Apr 16, 2012 at 02:09:20PM +0300, Michael S. Tsirkin wrote:
> > > > Thanks very much for the review. I'll address the comments.
> > > > Some questions on your comments below.
> > > > 
> > > > On Mon, Apr 16, 2012 at 01:08:07PM +0300, Gleb Natapov wrote:
> > > > > > @@ -37,6 +38,8 @@
> > > > > >  #define MSR_KVM_SYSTEM_TIME_NEW 0x4b564d01
> > > > > >  #define MSR_KVM_ASYNC_PF_EN 0x4b564d02
> > > > > >  #define MSR_KVM_STEAL_TIME  0x4b564d03
> > > > > > +#define MSR_KVM_EOI_EN      0x4b564d04
> > > > > > +#define MSR_KVM_EOI_DISABLED 0x0L
> > > > > This is valid gpa. Follow others MSR example i.e align the address to,
> > > > > lets say dword, and use lsb as enable bit.
> > > > 
> > > > We only need a single byte, since this is per-CPU -
> > > > it's better to save the memory, so no alignment is required.
> > > > An explicit disable msr would also address this, right?
> > > > 
> > > We do not have shortage of memory.
> > > Better make all MSRs works the same
> > > way.
> > 
> > I agree it's nice to have EOI and ASYNC_PF look similar
> > but wasting memory is also bad.  I'll ponder this some more.
> > 
> Steal time and kvm clock too and may be others (if anything left at
> all). I hope you are kidding about wasting of 4 bytes per vcpu.

Not vcpu - cpu. It's wasted whenever kernel/kvm.c is built so it has
cost on physical machines as well.

> > > BTW have you added new MSR to msrs_to_save array? I forgot to
> > > checked.
> > 
> > I didn't yet. Trying to understand how will that affect
> > cross-version migration - any input?
> > 
> Not sure. You need to check what userspace does with them.
> 
> > > > > > +static void apic_update_isr(struct kvm_lapic *apic)
> > > > > > +{
> > > > > > +	int vector;
> > > > > > +	if (!eoi_enabled(apic->vcpu) ||
> > > > > > +	    !apic->vcpu->arch.eoi.pending ||
> > > > > > +	    eoi_get_pending(apic->vcpu))
> > > > > > +		return;
> > > > > > +	apic->vcpu->arch.eoi.pending = false;
> > > > > > +	vector = apic_find_highest_isr(apic);
> > > > > > +	if (vector == -1)
> > > > > > +		return;
> > > > > > +	apic_clear_vector(vector, apic->regs + APIC_ISR);
> > > > > > +}
> > > > > > +
> > > > > We should just call apic_set_eoi() on exit if eoi.pending && !eoi_get_pending().
> > > > > This removes the need for the function and its calls.
> > > > 
> > > > It's a bit of a waste: that one does all kind extra things
> > > > which we know we don't need, some of the atomics. And it's datapath
> > > > so extra stuff is not free.
> > > > 
> > > How much time those extra things are taking compared to vmexit you
> > > already serving? And there is a good chance you will do them during
> > > vmentry anyway while trying to inject (or just check for) new interrupt.
> > 
> > No need to do them twice :)
> > 
> > > > Probably a good idea to replace the call on MSR disable - I think
> > > > apic_update_ppr is a better thing to call there.
> > > > 
> > > > Is there anything else that I missed?
> > > I think that simple things are better then complex things if the end result is
> > > the same :) Try it and see how much simpler it is.
> > 
> > It doesn't seem to be simpler at all. The common functionality is
> > about 4 lines.
> Send patch for us to see.

That's what you are replying to, no?
You can see that it is 4 lines of code.

> lapic changes should be minimal.

Exactly my motivation.

> > 
> > > Have you measured
> > > that what you are trying to optimize actually worth optimizing? That you
> > > can measure the optimization at all?
> > 
> > The claim is not that it's measureable. The claim is that
> > it does not scale to keep adding things to do on each entry.
> > 
> Only if there is something to do. "Premature optimization is the root of
> all evil". The PV eoi is about not exiting on eoi unnecessary. You are
> mixing this with trying to avoid calling eoi code for given interrupt at
> all.

I don't think this is what my patch does. EOI still clears ISR
for each interrupt.

> Two different optimization, do not try lump them together.
> > > > 
> > > > > We already have
> > > > > call to kvm_lapic_sync_from_vapic() on exit path which should be
> > > > > extended to do the above.
> > > > 
> > > > It already does this. It calls apic_set_tpr
> > > > which calls apic_update_ppr which calls
> > > > apic_update_isr.
> > > > 
> > > It does it only if vapic is in use (and it is usually not).
> > 
> > When it's not we don't need to update ppr and so
> > no need to update isr on this exit.
> If there was eoi we need to update both.

By same logic we should call update_ppr on each entry.
The overhead is unlikely to be measureable either :).

> > 
> > > But the if()
> > > is already there so we do not need to worry that one additional if() on
> > > the exit path will slow KVM to the crawl.
> > 
> > The number of things we need to do on each entry keeps going up, if we
> > just keep adding stuff it won't end well.
> > 
> You do not add stuff. The if() is already there.


Your proposal was to check userspace eoi record
each time when eoi is pending, no?
This would certainly add some overhead.

I also find the logic easier to follow as is -
it is contained in lapic.c without relying
on being called from x86.c as just the right moment.

> --
> 			Gleb.

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [PATCHv1 dont apply] RFC: kvm eoi PV using shared memory
  2012-04-16 13:13             ` Michael S. Tsirkin
@ 2012-04-16 15:10               ` Gleb Natapov
  2012-04-16 16:33                 ` Michael S. Tsirkin
  2012-04-16 17:24                 ` Michael S. Tsirkin
  0 siblings, 2 replies; 32+ messages in thread
From: Gleb Natapov @ 2012-04-16 15:10 UTC (permalink / raw)
  To: Michael S. Tsirkin; +Cc: kvm

On Mon, Apr 16, 2012 at 04:13:29PM +0300, Michael S. Tsirkin wrote:
> On Mon, Apr 16, 2012 at 03:30:47PM +0300, Gleb Natapov wrote:
> > On Mon, Apr 16, 2012 at 03:18:25PM +0300, Michael S. Tsirkin wrote:
> > > On Mon, Apr 16, 2012 at 02:24:46PM +0300, Gleb Natapov wrote:
> > > > On Mon, Apr 16, 2012 at 02:09:20PM +0300, Michael S. Tsirkin wrote:
> > > > > Thanks very much for the review. I'll address the comments.
> > > > > Some questions on your comments below.
> > > > > 
> > > > > On Mon, Apr 16, 2012 at 01:08:07PM +0300, Gleb Natapov wrote:
> > > > > > > @@ -37,6 +38,8 @@
> > > > > > >  #define MSR_KVM_SYSTEM_TIME_NEW 0x4b564d01
> > > > > > >  #define MSR_KVM_ASYNC_PF_EN 0x4b564d02
> > > > > > >  #define MSR_KVM_STEAL_TIME  0x4b564d03
> > > > > > > +#define MSR_KVM_EOI_EN      0x4b564d04
> > > > > > > +#define MSR_KVM_EOI_DISABLED 0x0L
> > > > > > This is valid gpa. Follow others MSR example i.e align the address to,
> > > > > > lets say dword, and use lsb as enable bit.
> > > > > 
> > > > > We only need a single byte, since this is per-CPU -
> > > > > it's better to save the memory, so no alignment is required.
> > > > > An explicit disable msr would also address this, right?
> > > > > 
> > > > We do not have shortage of memory.
> > > > Better make all MSRs works the same
> > > > way.
> > > 
> > > I agree it's nice to have EOI and ASYNC_PF look similar
> > > but wasting memory is also bad.  I'll ponder this some more.
> > > 
> > Steal time and kvm clock too and may be others (if anything left at
> > all). I hope you are kidding about wasting of 4 bytes per vcpu.
> 
> Not vcpu - cpu. It's wasted whenever kernel/kvm.c is built so it has
> cost on physical machines as well.
> 
There are less real cpus than vcpus usually :)

> > > > BTW have you added new MSR to msrs_to_save array? I forgot to
> > > > checked.
> > > 
> > > I didn't yet. Trying to understand how will that affect
> > > cross-version migration - any input?
> > > 
> > Not sure. You need to check what userspace does with them.
> > 
> > > > > > > +static void apic_update_isr(struct kvm_lapic *apic)
> > > > > > > +{
> > > > > > > +	int vector;
> > > > > > > +	if (!eoi_enabled(apic->vcpu) ||
> > > > > > > +	    !apic->vcpu->arch.eoi.pending ||
> > > > > > > +	    eoi_get_pending(apic->vcpu))
> > > > > > > +		return;
> > > > > > > +	apic->vcpu->arch.eoi.pending = false;
> > > > > > > +	vector = apic_find_highest_isr(apic);
> > > > > > > +	if (vector == -1)
> > > > > > > +		return;
> > > > > > > +	apic_clear_vector(vector, apic->regs + APIC_ISR);
> > > > > > > +}
> > > > > > > +
> > > > > > We should just call apic_set_eoi() on exit if eoi.pending && !eoi_get_pending().
> > > > > > This removes the need for the function and its calls.
> > > > > 
> > > > > It's a bit of a waste: that one does all kind extra things
> > > > > which we know we don't need, some of the atomics. And it's datapath
> > > > > so extra stuff is not free.
> > > > > 
> > > > How much time those extra things are taking compared to vmexit you
> > > > already serving? And there is a good chance you will do them during
> > > > vmentry anyway while trying to inject (or just check for) new interrupt.
> > > 
> > > No need to do them twice :)
> > > 
> > > > > Probably a good idea to replace the call on MSR disable - I think
> > > > > apic_update_ppr is a better thing to call there.
> > > > > 
> > > > > Is there anything else that I missed?
> > > > I think that simple things are better then complex things if the end result is
> > > > the same :) Try it and see how much simpler it is.
> > > 
> > > It doesn't seem to be simpler at all. The common functionality is
> > > about 4 lines.
> > Send patch for us to see.
> 
> That's what you are replying to, no?
> You can see that it is 4 lines of code.
No. I mean something like patch below. Applies on top of yours. Did not
check that it works or even compiles.

> 
> > lapic changes should be minimal.
> 
> Exactly my motivation.
> 
My patch removes 13 lines more :)

> > > 
> > > > Have you measured
> > > > that what you are trying to optimize actually worth optimizing? That you
> > > > can measure the optimization at all?
> > > 
> > > The claim is not that it's measureable. The claim is that
> > > it does not scale to keep adding things to do on each entry.
> > > 
> > Only if there is something to do. "Premature optimization is the root of
> > all evil". The PV eoi is about not exiting on eoi unnecessary. You are
> > mixing this with trying to avoid calling eoi code for given interrupt at
> > all.
> 
> I don't think this is what my patch does. EOI still clears ISR
> for each interrupt.
> 
> > Two different optimization, do not try lump them together.
> > > > > 
> > > > > > We already have
> > > > > > call to kvm_lapic_sync_from_vapic() on exit path which should be
> > > > > > extended to do the above.
> > > > > 
> > > > > It already does this. It calls apic_set_tpr
> > > > > which calls apic_update_ppr which calls
> > > > > apic_update_isr.
> > > > > 
> > > > It does it only if vapic is in use (and it is usually not).
> > > 
> > > When it's not we don't need to update ppr and so
> > > no need to update isr on this exit.
> > If there was eoi we need to update both.
> 
> By same logic we should call update_ppr on each entry.
> The overhead is unlikely to be measureable either :).
> 
It is small enough for us to not care about it on RHEL6 where it is
called on each entry.

> > > 
> > > > But the if()
> > > > is already there so we do not need to worry that one additional if() on
> > > > the exit path will slow KVM to the crawl.
> > > 
> > > The number of things we need to do on each entry keeps going up, if we
> > > just keep adding stuff it won't end well.
> > > 
> > You do not add stuff. The if() is already there.
> 
> 
> Your proposal was to check userspace eoi record
> each time when eoi is pending, no?
Yes.

> This would certainly add some overhead.
> 
Only when eoi is pending. This is rare.

> I also find the logic easier to follow as is -
> it is contained in lapic.c without relying
> on being called from x86.c as just the right moment.
> 
See the patch. It change nothing outside of lapic.c.

diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index d184a41..8fb5eca 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -323,20 +323,6 @@ static inline int apic_find_highest_isr(struct kvm_lapic *apic)
 	return result;
 }
 
-static void apic_update_isr(struct kvm_lapic *apic)
-{
-	int vector;
-	if (!eoi_enabled(apic->vcpu) ||
-	    !apic->vcpu->arch.eoi.pending ||
-	    eoi_get_pending(apic->vcpu))
-		return;
-	apic->vcpu->arch.eoi.pending = false;
-	vector = apic_find_highest_isr(apic);
-	if (vector == -1)
-		return;
-	apic_clear_vector(vector, apic->regs + APIC_ISR);
-}
-
 static void apic_update_ppr(struct kvm_lapic *apic)
 {
 	u32 tpr, isrv, ppr, old_ppr;
@@ -344,7 +330,6 @@ static void apic_update_ppr(struct kvm_lapic *apic)
 
 	old_ppr = apic_get_reg(apic, APIC_PROCPRI);
 	tpr = apic_get_reg(apic, APIC_TASKPRI);
-	apic_update_isr(apic);
 	isr = apic_find_highest_isr(apic);
 	isrv = (isr != -1) ? isr : 0;
 
@@ -1361,14 +1346,19 @@ void kvm_lapic_sync_from_vapic(struct kvm_vcpu *vcpu)
 	u32 data;
 	void *vapic;
 
-	if (!irqchip_in_kernel(vcpu->kvm) || !vcpu->arch.apic->vapic_addr)
+	if (!irqchip_in_kernel(vcpu->kvm) || !vcpu->arch.apic->vapic_addr ||
+			!apic->vcpu->arch.eoi.pending)
 		return;
 
-	vapic = kmap_atomic(vcpu->arch.apic->vapic_page);
-	data = *(u32 *)(vapic + offset_in_page(vcpu->arch.apic->vapic_addr));
-	kunmap_atomic(vapic);
+	if (apic->vcpu->arch.eoi.pending && !eoi_get_pending(apic->vcpu)) {
+		apic_set_eoi(apic);
+	} else {
+		vapic = kmap_atomic(vcpu->arch.apic->vapic_page);
+		data = *(u32 *)(vapic + offset_in_page(vcpu->arch.apic->vapic_addr));
+		kunmap_atomic(vapic);
 
-	apic_set_tpr(vcpu->arch.apic, data & 0xff);
+		apic_set_tpr(vcpu->arch.apic, data & 0xff);
+	}
 }
 
 void kvm_lapic_sync_to_vapic(struct kvm_vcpu *vcpu)
@@ -1469,13 +1459,10 @@ int kvm_hv_vapic_msr_read(struct kvm_vcpu *vcpu, u32 reg, u64 *data)
 
 int kvm_pv_enable_apic_eoi(struct kvm_vcpu *vcpu, u64 data)
 {
-	if (data == MSR_KVM_EOI_DISABLED) {
-		struct kvm_lapic *apic = vcpu->arch.apic;
-		if (apic && apic_enabled(apic))
-			apic_update_isr(apic);
-	} else if (kvm_gfn_to_hva_cache_init(vcpu->kvm, &vcpu->arch.eoi.data,
-					     data))
-		return 1;
+	if (data != MSR_KVM_EOI_DISABLED)
+		if (kvm_gfn_to_hva_cache_init(vcpu->kvm, &vcpu->arch.eoi.data,
+					data))
+			return 1;
 
 	vcpu->arch.eoi.msr_val = data;
 	vcpu->arch.eoi.pending = false;
--
			Gleb.

^ permalink raw reply related	[flat|nested] 32+ messages in thread

* Re: [PATCHv1 dont apply] RFC: kvm eoi PV using shared memory
  2012-04-16 15:10               ` Gleb Natapov
@ 2012-04-16 16:33                 ` Michael S. Tsirkin
  2012-04-16 17:51                   ` Gleb Natapov
  2012-04-16 17:24                 ` Michael S. Tsirkin
  1 sibling, 1 reply; 32+ messages in thread
From: Michael S. Tsirkin @ 2012-04-16 16:33 UTC (permalink / raw)
  To: Gleb Natapov; +Cc: kvm

On Mon, Apr 16, 2012 at 06:10:11PM +0300, Gleb Natapov wrote:
> On Mon, Apr 16, 2012 at 04:13:29PM +0300, Michael S. Tsirkin wrote:
> > On Mon, Apr 16, 2012 at 03:30:47PM +0300, Gleb Natapov wrote:
> > > On Mon, Apr 16, 2012 at 03:18:25PM +0300, Michael S. Tsirkin wrote:
> > > > On Mon, Apr 16, 2012 at 02:24:46PM +0300, Gleb Natapov wrote:
> > > > > On Mon, Apr 16, 2012 at 02:09:20PM +0300, Michael S. Tsirkin wrote:
> > > > > > Thanks very much for the review. I'll address the comments.
> > > > > > Some questions on your comments below.
> > > > > > 
> > > > > > On Mon, Apr 16, 2012 at 01:08:07PM +0300, Gleb Natapov wrote:
> > > > > > > > @@ -37,6 +38,8 @@
> > > > > > > >  #define MSR_KVM_SYSTEM_TIME_NEW 0x4b564d01
> > > > > > > >  #define MSR_KVM_ASYNC_PF_EN 0x4b564d02
> > > > > > > >  #define MSR_KVM_STEAL_TIME  0x4b564d03
> > > > > > > > +#define MSR_KVM_EOI_EN      0x4b564d04
> > > > > > > > +#define MSR_KVM_EOI_DISABLED 0x0L
> > > > > > > This is valid gpa. Follow others MSR example i.e align the address to,
> > > > > > > lets say dword, and use lsb as enable bit.
> > > > > > 
> > > > > > We only need a single byte, since this is per-CPU -
> > > > > > it's better to save the memory, so no alignment is required.
> > > > > > An explicit disable msr would also address this, right?
> > > > > > 
> > > > > We do not have shortage of memory.
> > > > > Better make all MSRs works the same
> > > > > way.
> > > > 
> > > > I agree it's nice to have EOI and ASYNC_PF look similar
> > > > but wasting memory is also bad.  I'll ponder this some more.
> > > > 
> > > Steal time and kvm clock too and may be others (if anything left at
> > > all). I hope you are kidding about wasting of 4 bytes per vcpu.
> > 
> > Not vcpu - cpu. It's wasted whenever kernel/kvm.c is built so it has
> > cost on physical machines as well.
> > 
> There are less real cpus than vcpus usually :)

I'm adding this percpu always. This makes it cheap to
access but it means it is allocated on physical cpus -
just unused there.

> > > > > BTW have you added new MSR to msrs_to_save array? I forgot to
> > > > > checked.
> > > > 
> > > > I didn't yet. Trying to understand how will that affect
> > > > cross-version migration - any input?
> > > > 
> > > Not sure. You need to check what userspace does with them.
> > > 
> > > > > > > > +static void apic_update_isr(struct kvm_lapic *apic)
> > > > > > > > +{
> > > > > > > > +	int vector;
> > > > > > > > +	if (!eoi_enabled(apic->vcpu) ||
> > > > > > > > +	    !apic->vcpu->arch.eoi.pending ||
> > > > > > > > +	    eoi_get_pending(apic->vcpu))
> > > > > > > > +		return;
> > > > > > > > +	apic->vcpu->arch.eoi.pending = false;
> > > > > > > > +	vector = apic_find_highest_isr(apic);
> > > > > > > > +	if (vector == -1)
> > > > > > > > +		return;
> > > > > > > > +	apic_clear_vector(vector, apic->regs + APIC_ISR);
> > > > > > > > +}
> > > > > > > > +
> > > > > > > We should just call apic_set_eoi() on exit if eoi.pending && !eoi_get_pending().
> > > > > > > This removes the need for the function and its calls.
> > > > > > 
> > > > > > It's a bit of a waste: that one does all kind extra things
> > > > > > which we know we don't need, some of the atomics. And it's datapath
> > > > > > so extra stuff is not free.
> > > > > > 
> > > > > How much time those extra things are taking compared to vmexit you
> > > > > already serving? And there is a good chance you will do them during
> > > > > vmentry anyway while trying to inject (or just check for) new interrupt.
> > > > 
> > > > No need to do them twice :)
> > > > 
> > > > > > Probably a good idea to replace the call on MSR disable - I think
> > > > > > apic_update_ppr is a better thing to call there.
> > > > > > 
> > > > > > Is there anything else that I missed?
> > > > > I think that simple things are better then complex things if the end result is
> > > > > the same :) Try it and see how much simpler it is.
> > > > 
> > > > It doesn't seem to be simpler at all. The common functionality is
> > > > about 4 lines.
> > > Send patch for us to see.
> > 
> > That's what you are replying to, no?
> > You can see that it is 4 lines of code.
> No. I mean something like patch below. Applies on top of yours. Did not
> check that it works or even compiles.
> 
> > 
> > > lapic changes should be minimal.
> > 
> > Exactly my motivation.
> > 
> My patch removes 13 lines more :)

I'll take a look, thanks.

> > > > 
> > > > > Have you measured
> > > > > that what you are trying to optimize actually worth optimizing? That you
> > > > > can measure the optimization at all?
> > > > 
> > > > The claim is not that it's measureable. The claim is that
> > > > it does not scale to keep adding things to do on each entry.
> > > > 
> > > Only if there is something to do. "Premature optimization is the root of
> > > all evil". The PV eoi is about not exiting on eoi unnecessary. You are
> > > mixing this with trying to avoid calling eoi code for given interrupt at
> > > all.
> > 
> > I don't think this is what my patch does. EOI still clears ISR
> > for each interrupt.
> > 
> > > Two different optimization, do not try lump them together.
> > > > > > 
> > > > > > > We already have
> > > > > > > call to kvm_lapic_sync_from_vapic() on exit path which should be
> > > > > > > extended to do the above.
> > > > > > 
> > > > > > It already does this. It calls apic_set_tpr
> > > > > > which calls apic_update_ppr which calls
> > > > > > apic_update_isr.
> > > > > > 
> > > > > It does it only if vapic is in use (and it is usually not).
> > > > 
> > > > When it's not we don't need to update ppr and so
> > > > no need to update isr on this exit.
> > > If there was eoi we need to update both.
> > 
> > By same logic we should call update_ppr on each entry.
> > The overhead is unlikely to be measureable either :).
> > 
> It is small enough for us to not care about it on RHEL6 where it is
> called on each entry.

Exactly. So why do not we do it?

> > > > 
> > > > > But the if()
> > > > > is already there so we do not need to worry that one additional if() on
> > > > > the exit path will slow KVM to the crawl.
> > > > 
> > > > The number of things we need to do on each entry keeps going up, if we
> > > > just keep adding stuff it won't end well.
> > > > 
> > > You do not add stuff. The if() is already there.
> > 
> > 
> > Your proposal was to check userspace eoi record
> > each time when eoi is pending, no?
> Yes.
> 
> > This would certainly add some overhead.
> > 
> Only when eoi is pending. This is rare.

This is exactly while guest handles an interrupt.
It's not all that rare at all: e.g. device
drivers cause an exit from interrupt
handler by doing io.

> > I also find the logic easier to follow as is -
> > it is contained in lapic.c without relying
> > on being called from x86.c as just the right moment.
> > 
> See the patch. It change nothing outside of lapic.c.

I'll take a look, thanks.

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [PATCHv1 dont apply] RFC: kvm eoi PV using shared memory
  2012-04-16 16:33                 ` Michael S. Tsirkin
@ 2012-04-16 17:51                   ` Gleb Natapov
  2012-04-16 19:01                     ` Michael S. Tsirkin
  0 siblings, 1 reply; 32+ messages in thread
From: Gleb Natapov @ 2012-04-16 17:51 UTC (permalink / raw)
  To: Michael S. Tsirkin; +Cc: kvm

On Mon, Apr 16, 2012 at 07:33:28PM +0300, Michael S. Tsirkin wrote:
> On Mon, Apr 16, 2012 at 06:10:11PM +0300, Gleb Natapov wrote:
> > On Mon, Apr 16, 2012 at 04:13:29PM +0300, Michael S. Tsirkin wrote:
> > > On Mon, Apr 16, 2012 at 03:30:47PM +0300, Gleb Natapov wrote:
> > > > On Mon, Apr 16, 2012 at 03:18:25PM +0300, Michael S. Tsirkin wrote:
> > > > > On Mon, Apr 16, 2012 at 02:24:46PM +0300, Gleb Natapov wrote:
> > > > > > On Mon, Apr 16, 2012 at 02:09:20PM +0300, Michael S. Tsirkin wrote:
> > > > > > > Thanks very much for the review. I'll address the comments.
> > > > > > > Some questions on your comments below.
> > > > > > > 
> > > > > > > On Mon, Apr 16, 2012 at 01:08:07PM +0300, Gleb Natapov wrote:
> > > > > > > > > @@ -37,6 +38,8 @@
> > > > > > > > >  #define MSR_KVM_SYSTEM_TIME_NEW 0x4b564d01
> > > > > > > > >  #define MSR_KVM_ASYNC_PF_EN 0x4b564d02
> > > > > > > > >  #define MSR_KVM_STEAL_TIME  0x4b564d03
> > > > > > > > > +#define MSR_KVM_EOI_EN      0x4b564d04
> > > > > > > > > +#define MSR_KVM_EOI_DISABLED 0x0L
> > > > > > > > This is valid gpa. Follow others MSR example i.e align the address to,
> > > > > > > > lets say dword, and use lsb as enable bit.
> > > > > > > 
> > > > > > > We only need a single byte, since this is per-CPU -
> > > > > > > it's better to save the memory, so no alignment is required.
> > > > > > > An explicit disable msr would also address this, right?
> > > > > > > 
> > > > > > We do not have shortage of memory.
> > > > > > Better make all MSRs works the same
> > > > > > way.
> > > > > 
> > > > > I agree it's nice to have EOI and ASYNC_PF look similar
> > > > > but wasting memory is also bad.  I'll ponder this some more.
> > > > > 
> > > > Steal time and kvm clock too and may be others (if anything left at
> > > > all). I hope you are kidding about wasting of 4 bytes per vcpu.
> > > 
> > > Not vcpu - cpu. It's wasted whenever kernel/kvm.c is built so it has
> > > cost on physical machines as well.
> > > 
> > There are less real cpus than vcpus usually :)
> 
> I'm adding this percpu always. This makes it cheap to
> access but it means it is allocated on physical cpus -
> just unused there.
> 
I got it. So suppose you have 1024 cpus, so if you'll use dword instead
of byte you will spend additional 3072 bytes (which you likely spend
anyway due to alignment that will be done to your u8). How much memory
do you expect to have with your 1024 cpus to care about 3072 bytes?
Are we seriously discussing it?

> > > > > > BTW have you added new MSR to msrs_to_save array? I forgot to
> > > > > > checked.
> > > > > 
> > > > > I didn't yet. Trying to understand how will that affect
> > > > > cross-version migration - any input?
> > > > > 
> > > > Not sure. You need to check what userspace does with them.
> > > > 
> > > > > > > > > +static void apic_update_isr(struct kvm_lapic *apic)
> > > > > > > > > +{
> > > > > > > > > +	int vector;
> > > > > > > > > +	if (!eoi_enabled(apic->vcpu) ||
> > > > > > > > > +	    !apic->vcpu->arch.eoi.pending ||
> > > > > > > > > +	    eoi_get_pending(apic->vcpu))
> > > > > > > > > +		return;
> > > > > > > > > +	apic->vcpu->arch.eoi.pending = false;
> > > > > > > > > +	vector = apic_find_highest_isr(apic);
> > > > > > > > > +	if (vector == -1)
> > > > > > > > > +		return;
> > > > > > > > > +	apic_clear_vector(vector, apic->regs + APIC_ISR);
> > > > > > > > > +}
> > > > > > > > > +
> > > > > > > > We should just call apic_set_eoi() on exit if eoi.pending && !eoi_get_pending().
> > > > > > > > This removes the need for the function and its calls.
> > > > > > > 
> > > > > > > It's a bit of a waste: that one does all kind extra things
> > > > > > > which we know we don't need, some of the atomics. And it's datapath
> > > > > > > so extra stuff is not free.
> > > > > > > 
> > > > > > How much time those extra things are taking compared to vmexit you
> > > > > > already serving? And there is a good chance you will do them during
> > > > > > vmentry anyway while trying to inject (or just check for) new interrupt.
> > > > > 
> > > > > No need to do them twice :)
> > > > > 
> > > > > > > Probably a good idea to replace the call on MSR disable - I think
> > > > > > > apic_update_ppr is a better thing to call there.
> > > > > > > 
> > > > > > > Is there anything else that I missed?
> > > > > > I think that simple things are better then complex things if the end result is
> > > > > > the same :) Try it and see how much simpler it is.
> > > > > 
> > > > > It doesn't seem to be simpler at all. The common functionality is
> > > > > about 4 lines.
> > > > Send patch for us to see.
> > > 
> > > That's what you are replying to, no?
> > > You can see that it is 4 lines of code.
> > No. I mean something like patch below. Applies on top of yours. Did not
> > check that it works or even compiles.
> > 
> > > 
> > > > lapic changes should be minimal.
> > > 
> > > Exactly my motivation.
> > > 
> > My patch removes 13 lines more :)
> 
> I'll take a look, thanks.
> 
> > > > > 
> > > > > > Have you measured
> > > > > > that what you are trying to optimize actually worth optimizing? That you
> > > > > > can measure the optimization at all?
> > > > > 
> > > > > The claim is not that it's measureable. The claim is that
> > > > > it does not scale to keep adding things to do on each entry.
> > > > > 
> > > > Only if there is something to do. "Premature optimization is the root of
> > > > all evil". The PV eoi is about not exiting on eoi unnecessary. You are
> > > > mixing this with trying to avoid calling eoi code for given interrupt at
> > > > all.
> > > 
> > > I don't think this is what my patch does. EOI still clears ISR
> > > for each interrupt.
> > > 
> > > > Two different optimization, do not try lump them together.
> > > > > > > 
> > > > > > > > We already have
> > > > > > > > call to kvm_lapic_sync_from_vapic() on exit path which should be
> > > > > > > > extended to do the above.
> > > > > > > 
> > > > > > > It already does this. It calls apic_set_tpr
> > > > > > > which calls apic_update_ppr which calls
> > > > > > > apic_update_isr.
> > > > > > > 
> > > > > > It does it only if vapic is in use (and it is usually not).
> > > > > 
> > > > > When it's not we don't need to update ppr and so
> > > > > no need to update isr on this exit.
> > > > If there was eoi we need to update both.
> > > 
> > > By same logic we should call update_ppr on each entry.
> > > The overhead is unlikely to be measureable either :).
> > > 
> > It is small enough for us to not care about it on RHEL6 where it is
> > called on each entry.
> 
> Exactly. So why do not we do it?
> 
Calling it on each entry is a little bit excessive, calling it on each
interrupt it OK. But even when it is called on each entry it does not
create performance problems.

> > > > > 
> > > > > > But the if()
> > > > > > is already there so we do not need to worry that one additional if() on
> > > > > > the exit path will slow KVM to the crawl.
> > > > > 
> > > > > The number of things we need to do on each entry keeps going up, if we
> > > > > just keep adding stuff it won't end well.
> > > > > 
> > > > You do not add stuff. The if() is already there.
> > > 
> > > 
> > > Your proposal was to check userspace eoi record
> > > each time when eoi is pending, no?
> > Yes.
> > 
> > > This would certainly add some overhead.
> > > 
> > Only when eoi is pending. This is rare.
> 
> This is exactly while guest handles an interrupt.
> It's not all that rare at all: e.g. device
> drivers cause an exit from interrupt
> handler by doing io.
So eoi will be coalesced with io that device driver does. Exactly what
we want. But in great scheme of things interrupts are rare :) The
optimizations is disabled when interrupts are coming faster that they
are served anyway.

> 
> > > I also find the logic easier to follow as is -
> > > it is contained in lapic.c without relying
> > > on being called from x86.c as just the right moment.
> > > 
> > See the patch. It change nothing outside of lapic.c.
> 
> I'll take a look, thanks.

--
			Gleb.

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [PATCHv1 dont apply] RFC: kvm eoi PV using shared memory
  2012-04-16 17:51                   ` Gleb Natapov
@ 2012-04-16 19:01                     ` Michael S. Tsirkin
  2012-04-17  8:45                       ` Gleb Natapov
  0 siblings, 1 reply; 32+ messages in thread
From: Michael S. Tsirkin @ 2012-04-16 19:01 UTC (permalink / raw)
  To: Gleb Natapov; +Cc: kvm

On Mon, Apr 16, 2012 at 08:51:16PM +0300, Gleb Natapov wrote:
> > > Only when eoi is pending. This is rare.
> > 
> > This is exactly while guest handles an interrupt.
> > It's not all that rare at all: e.g. device
> > drivers cause an exit from interrupt
> > handler by doing io.
> So eoi will be coalesced with io that device driver does. Exactly what
> we want.

It won't. While we handle interrupts eoi is still set.
So there will be a couple of tests + read from userspace
Wasted not a huge overhead but it's the principle of the thing.

-- 
MST

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [PATCHv1 dont apply] RFC: kvm eoi PV using shared memory
  2012-04-16 19:01                     ` Michael S. Tsirkin
@ 2012-04-17  8:45                       ` Gleb Natapov
  0 siblings, 0 replies; 32+ messages in thread
From: Gleb Natapov @ 2012-04-17  8:45 UTC (permalink / raw)
  To: Michael S. Tsirkin; +Cc: kvm

On Mon, Apr 16, 2012 at 10:01:29PM +0300, Michael S. Tsirkin wrote:
> On Mon, Apr 16, 2012 at 08:51:16PM +0300, Gleb Natapov wrote:
> > > > Only when eoi is pending. This is rare.
> > > 
> > > This is exactly while guest handles an interrupt.
> > > It's not all that rare at all: e.g. device
> > > drivers cause an exit from interrupt
> > > handler by doing io.
> > So eoi will be coalesced with io that device driver does. Exactly what
> > we want.
> 
> It won't. While we handle interrupts eoi is still set.
> So there will be a couple of tests + read from userspace
> Wasted not a huge overhead but it's the principle of the thing.
> 
Linux acks irq before calling device handler. Windows for some devices
does IO to a device before acking.

--
			Gleb.

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [PATCHv1 dont apply] RFC: kvm eoi PV using shared memory
  2012-04-16 15:10               ` Gleb Natapov
  2012-04-16 16:33                 ` Michael S. Tsirkin
@ 2012-04-16 17:24                 ` Michael S. Tsirkin
  2012-04-16 17:37                   ` Gleb Natapov
  1 sibling, 1 reply; 32+ messages in thread
From: Michael S. Tsirkin @ 2012-04-16 17:24 UTC (permalink / raw)
  To: Gleb Natapov; +Cc: kvm

On Mon, Apr 16, 2012 at 06:10:11PM +0300, Gleb Natapov wrote:
> > > lapic changes should be minimal.
> > 
> > Exactly my motivation.
> > 
> My patch removes 13 lines more :)

Haven't checked whether your patch is correct yet
but I see it's checking the eoi register on each exit.

I think it's clear this would make code a bit shorter
(not necessarily clearer as we are relying on ) but
as I said even though the extra overhead is likely
negligeable I have a feeling it's a wrong approach since
this won't scale as we add more features.

Let's see what do others think.

> > I also find the logic easier to follow as is -
> > it is contained in lapic.c without relying
> > on being called from x86.c as just the right moment.
> > 
> See the patch. It change nothing outside of lapic.c.

True but you rely on x86.c to call kvm_sync_lapic_from_vapic
at the right time before injecting interrupts.
I haven't checked whether that is always the case but
to me, this makes the code less clear and more fragile.

Again, it appears to be a matter of taste.

-- 
MST

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [PATCHv1 dont apply] RFC: kvm eoi PV using shared memory
  2012-04-16 17:24                 ` Michael S. Tsirkin
@ 2012-04-16 17:37                   ` Gleb Natapov
  2012-04-16 18:56                     ` Michael S. Tsirkin
  0 siblings, 1 reply; 32+ messages in thread
From: Gleb Natapov @ 2012-04-16 17:37 UTC (permalink / raw)
  To: Michael S. Tsirkin; +Cc: kvm

On Mon, Apr 16, 2012 at 08:24:16PM +0300, Michael S. Tsirkin wrote:
> On Mon, Apr 16, 2012 at 06:10:11PM +0300, Gleb Natapov wrote:
> > > > lapic changes should be minimal.
> > > 
> > > Exactly my motivation.
> > > 
> > My patch removes 13 lines more :)
> 
> Haven't checked whether your patch is correct yet
> but I see it's checking the eoi register on each exit.
> 
Only if eoi.pending == true on exit.

> I think it's clear this would make code a bit shorter
> (not necessarily clearer as we are relying on ) but
> as I said even though the extra overhead is likely
> negligeable I have a feeling it's a wrong approach since
> this won't scale as we add more features.
> 
> Let's see what do others think.
> 
What I do not like about not calling eoi here is that we
have to reason about all the paths into apic and check that
we clear isr on all of them. And that is not all. eoi handler
set event request, is it OK to skip it? May be, or may be not.
 
> > > I also find the logic easier to follow as is -
> > > it is contained in lapic.c without relying
> > > on being called from x86.c as just the right moment.
> > > 
> > See the patch. It change nothing outside of lapic.c.
> 
> True but you rely on x86.c to call kvm_sync_lapic_from_vapic
> at the right time before injecting interrupts.
Not before injecting interrupts, but on vmexit.

> I haven't checked whether that is always the case but
> to me, this makes the code less clear and more fragile.
> 
We call a lot of apic functions from x86.c. That's were interrupt
injection happens.

> Again, it appears to be a matter of taste.
> 
> -- 
> MST

--
			Gleb.

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [PATCHv1 dont apply] RFC: kvm eoi PV using shared memory
  2012-04-16 17:37                   ` Gleb Natapov
@ 2012-04-16 18:56                     ` Michael S. Tsirkin
  2012-04-17  8:59                       ` Gleb Natapov
  0 siblings, 1 reply; 32+ messages in thread
From: Michael S. Tsirkin @ 2012-04-16 18:56 UTC (permalink / raw)
  To: Gleb Natapov; +Cc: kvm

On Mon, Apr 16, 2012 at 08:37:37PM +0300, Gleb Natapov wrote:
> On Mon, Apr 16, 2012 at 08:24:16PM +0300, Michael S. Tsirkin wrote:
> > On Mon, Apr 16, 2012 at 06:10:11PM +0300, Gleb Natapov wrote:
> > > > > lapic changes should be minimal.
> > > > 
> > > > Exactly my motivation.
> > > > 
> > > My patch removes 13 lines more :)
> > 
> > Haven't checked whether your patch is correct yet
> > but I see it's checking the eoi register on each exit.
> > 
> Only if eoi.pending == true on exit.

it's checking eoi.pending always and eoi register when
that is true.

> > I think it's clear this would make code a bit shorter
> > (not necessarily clearer as we are relying on ) but
> > as I said even though the extra overhead is likely
> > negligeable I have a feeling it's a wrong approach since
> > this won't scale as we add more features.
> > 
> > Let's see what do others think.
> > 
> What I do not like about not calling eoi here is that we
> have to reason about all the paths into apic and check that
> we clear isr on all of them.

Not at all. Instead, check each time before we read isr
or ppr.

> And that is not all. eoi handler
> set event request, is it OK to skip it? May be, or may be not.

I think it's for reinjection of lower prio interrupt.
Since eoi optimization is disabled in that case we don't need to set
event request.

> > > > I also find the logic easier to follow as is -
> > > > it is contained in lapic.c without relying
> > > > on being called from x86.c as just the right moment.
> > > > 
> > > See the patch. It change nothing outside of lapic.c.
> > 
> > True but you rely on x86.c to call kvm_sync_lapic_from_vapic
> > at the right time before injecting interrupts.
> Not before injecting interrupts, but on vmexit.

sync is called on entry, not on exit, no?


> > I haven't checked whether that is always the case but
> > to me, this makes the code less clear and more fragile.
> > 
> We call a lot of apic functions from x86.c. That's were interrupt
> injection happens.
> 
> > Again, it appears to be a matter of taste.
> > 
> > -- 
> > MST
> 
> --
> 			Gleb.

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [PATCHv1 dont apply] RFC: kvm eoi PV using shared memory
  2012-04-16 18:56                     ` Michael S. Tsirkin
@ 2012-04-17  8:59                       ` Gleb Natapov
  0 siblings, 0 replies; 32+ messages in thread
From: Gleb Natapov @ 2012-04-17  8:59 UTC (permalink / raw)
  To: Michael S. Tsirkin; +Cc: kvm

On Mon, Apr 16, 2012 at 09:56:02PM +0300, Michael S. Tsirkin wrote:
> On Mon, Apr 16, 2012 at 08:37:37PM +0300, Gleb Natapov wrote:
> > On Mon, Apr 16, 2012 at 08:24:16PM +0300, Michael S. Tsirkin wrote:
> > > On Mon, Apr 16, 2012 at 06:10:11PM +0300, Gleb Natapov wrote:
> > > > > > lapic changes should be minimal.
> > > > > 
> > > > > Exactly my motivation.
> > > > > 
> > > > My patch removes 13 lines more :)
> > > 
> > > Haven't checked whether your patch is correct yet
> > > but I see it's checking the eoi register on each exit.
> > > 
> > Only if eoi.pending == true on exit.
> 
> it's checking eoi.pending always and eoi register when
> that is true.
> 
We already have if there, so we can reuse it. Instead of checking
vapic_addr in kvm_lapic_sync_from_vapic() let it check apic_attention
bitmask. vapic_addr and eoi.pending will set bit there and if() will
become if(!apic_attention) return. Zero overhead.

> > > I think it's clear this would make code a bit shorter
> > > (not necessarily clearer as we are relying on ) but
> > > as I said even though the extra overhead is likely
> > > negligeable I have a feeling it's a wrong approach since
> > > this won't scale as we add more features.
> > > 
> > > Let's see what do others think.
> > > 
> > What I do not like about not calling eoi here is that we
> > have to reason about all the paths into apic and check that
> > we clear isr on all of them.
> 
> Not at all. Instead, check each time before we read isr
> or ppr.
Or inject interrupt, or ... Luckily lapic code calls apic_update_ppr() a
lot (just in case), so adding check there likely covers any relevant
entry into the apic, but this is exactly reasoning I would like to avoid
if possible :) I do not see bug per se with your current scheme otherwise I
would point it out.

> 
> > And that is not all. eoi handler
> > set event request, is it OK to skip it? May be, or may be not.
> 
> I think it's for reinjection of lower prio interrupt.
Me too. But skipping setting that bit should not be taken lightly. We
need to know for sure. Current code tries to err on safe side and
prefers to set event request when not needed instead of missing it when
it is needed. Those rare cases are hard to debug.

> Since eoi optimization is disabled in that case we don't need to set
> event request.
> 
> > > > > I also find the logic easier to follow as is -
> > > > > it is contained in lapic.c without relying
> > > > > on being called from x86.c as just the right moment.
> > > > > 
> > > > See the patch. It change nothing outside of lapic.c.
> > > 
> > > True but you rely on x86.c to call kvm_sync_lapic_from_vapic
> > > at the right time before injecting interrupts.
> > Not before injecting interrupts, but on vmexit.
> 
> sync is called on entry, not on exit, no?
No. Look at the code. Entry is to late.

> 
> 
> > > I haven't checked whether that is always the case but
> > > to me, this makes the code less clear and more fragile.
> > > 
> > We call a lot of apic functions from x86.c. That's were interrupt
> > injection happens.
> > 
> > > Again, it appears to be a matter of taste.
> > > 
> > > -- 
> > > MST
> > 
> > --
> > 			Gleb.

--
			Gleb.

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [PATCHv1 dont apply] RFC: kvm eoi PV using shared memory
  2012-04-16 12:18         ` Michael S. Tsirkin
  2012-04-16 12:30           ` Gleb Natapov
@ 2012-04-17  9:24           ` Avi Kivity
  1 sibling, 0 replies; 32+ messages in thread
From: Avi Kivity @ 2012-04-17  9:24 UTC (permalink / raw)
  To: Michael S. Tsirkin; +Cc: Gleb Natapov, kvm

On 04/16/2012 03:18 PM, Michael S. Tsirkin wrote:
> On Mon, Apr 16, 2012 at 02:24:46PM +0300, Gleb Natapov wrote:
> > On Mon, Apr 16, 2012 at 02:09:20PM +0300, Michael S. Tsirkin wrote:
> > > Thanks very much for the review. I'll address the comments.
> > > Some questions on your comments below.
> > > 
> > > On Mon, Apr 16, 2012 at 01:08:07PM +0300, Gleb Natapov wrote:
> > > > > @@ -37,6 +38,8 @@
> > > > >  #define MSR_KVM_SYSTEM_TIME_NEW 0x4b564d01
> > > > >  #define MSR_KVM_ASYNC_PF_EN 0x4b564d02
> > > > >  #define MSR_KVM_STEAL_TIME  0x4b564d03
> > > > > +#define MSR_KVM_EOI_EN      0x4b564d04
> > > > > +#define MSR_KVM_EOI_DISABLED 0x0L
> > > > This is valid gpa. Follow others MSR example i.e align the address to,
> > > > lets say dword, and use lsb as enable bit.
> > > 
> > > We only need a single byte, since this is per-CPU -
> > > it's better to save the memory, so no alignment is required.
> > > An explicit disable msr would also address this, right?
> > > 
> > We do not have shortage of memory.
> > Better make all MSRs works the same
> > way.
>
> I agree it's nice to have EOI and ASYNC_PF look similar
> but wasting memory is also bad.  I'll ponder this some more.

Wasting three bytes?

> > BTW have you added new MSR to msrs_to_save array? I forgot to
> > checked.
>
> I didn't yet. Trying to understand how will that affect
> cross-version migration - any input?

It will just work.

-- 
error compiling committee.c: too many arguments to function


^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [PATCHv1 dont apply] RFC: kvm eoi PV using shared memory
  2012-04-16 10:08   ` Gleb Natapov
  2012-04-16 11:09     ` Michael S. Tsirkin
@ 2012-04-17  9:22     ` Avi Kivity
  1 sibling, 0 replies; 32+ messages in thread
From: Avi Kivity @ 2012-04-17  9:22 UTC (permalink / raw)
  To: Gleb Natapov; +Cc: Michael S. Tsirkin, kvm

On 04/16/2012 01:08 PM, Gleb Natapov wrote:
> >  	}
> >  
> > +	
> > +	if (kvm_para_has_feature(KVM_FEATURE_EOI)) {
> > +		kvm_guest_native_apic_write = apic->write;
> > +		apic->write = kvm_guest_apic_write;
> > +		wrmsrl(MSR_KVM_EOI_EN, __pa(&__get_cpu_var(apic_eoi)));
> > +	}
> Can happen on more then one cpu. After it happens on two kvm_guest_apic_write()
> will call to itself.

It's also hacky.  Try static_cpu_has() in the apic code (and add an eoi
method).


-- 
error compiling committee.c: too many arguments to function


^ permalink raw reply	[flat|nested] 32+ messages in thread

end of thread, other threads:[~2012-04-17  9:24 UTC | newest]

Thread overview: 32+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2012-04-10 13:27 [PATCHv0 dont apply] RFC: kvm eoi PV using shared memory Michael S. Tsirkin
2012-04-10 14:03 ` Avi Kivity
2012-04-10 14:26   ` Michael S. Tsirkin
2012-04-10 14:33     ` Avi Kivity
2012-04-10 14:53       ` Michael S. Tsirkin
2012-04-10 15:00         ` Avi Kivity
2012-04-10 15:14           ` Michael S. Tsirkin
2012-04-10 16:08             ` Avi Kivity
2012-04-10 17:06               ` Michael S. Tsirkin
2012-04-10 17:59     ` Gleb Natapov
2012-04-10 19:30       ` Michael S. Tsirkin
2012-04-10 19:33         ` Gleb Natapov
2012-04-10 19:40           ` Michael S. Tsirkin
2012-04-10 19:42             ` Gleb Natapov
2012-04-15 16:18 ` [PATCHv1 " Michael S. Tsirkin
2012-04-16 10:08   ` Gleb Natapov
2012-04-16 11:09     ` Michael S. Tsirkin
2012-04-16 11:24       ` Gleb Natapov
2012-04-16 12:18         ` Michael S. Tsirkin
2012-04-16 12:30           ` Gleb Natapov
2012-04-16 13:13             ` Michael S. Tsirkin
2012-04-16 15:10               ` Gleb Natapov
2012-04-16 16:33                 ` Michael S. Tsirkin
2012-04-16 17:51                   ` Gleb Natapov
2012-04-16 19:01                     ` Michael S. Tsirkin
2012-04-17  8:45                       ` Gleb Natapov
2012-04-16 17:24                 ` Michael S. Tsirkin
2012-04-16 17:37                   ` Gleb Natapov
2012-04-16 18:56                     ` Michael S. Tsirkin
2012-04-17  8:59                       ` Gleb Natapov
2012-04-17  9:24           ` Avi Kivity
2012-04-17  9:22     ` Avi Kivity

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox