All of lore.kernel.org
 help / color / mirror / Atom feed
From: "Michael S. Tsirkin" <mst@redhat.com>
To: kvm@vger.kernel.org
Subject: [PATCHv0 dont apply] RFC: kvm eoi PV using shared memory
Date: Tue, 10 Apr 2012 16:27:57 +0300	[thread overview]
Message-ID: <20120410132756.GA14101@redhat.com> (raw)

I took a stub at implementing PV EOI using shared memory.
This should reduce the number of exits an interrupt
causes as much as by half.

A partially complete draft for both host and guest parts
is below.

The idea is simple: there's a bit, per APIC, in guest memory,
that tells the guest that it does not need EOI.
We set it before injecting an interrupt and clear
before injecting a nested one. Guest tests it using
a test and clear operation - this is necessary
so that host can detect interrupt nesting -
and if set, it can skip the EOI MSR.

There's a new MSR to set the address of said register
in guest memory. Otherwise not much changes:
- Guest EOI is not required
- ISR is automatically cleared before injection

Some things are incomplete: add feature negotiation
options, qemu support for said options.
No testing was done beyond compiling the kernel.

I would appreciate early feedback.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>

--

diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h
index d854101..8430f41 100644
--- a/arch/x86/include/asm/apic.h
+++ b/arch/x86/include/asm/apic.h
@@ -457,8 +457,13 @@ static inline u32 safe_apic_wait_icr_idle(void) { return 0; }
 
 #endif /* CONFIG_X86_LOCAL_APIC */
 
+DECLARE_EARLY_PER_CPU(unsigned long, apic_eoi);
+
 static inline void ack_APIC_irq(void)
 {
+	if (__test_and_clear_bit(0, &__get_cpu_var(apic_eoi)))
+		return;
+
 	/*
 	 * ack_APIC_irq() actually gets compiled as a single instruction
 	 * ... yummie.
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index e216ba0..0ee1472 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -481,6 +481,12 @@ struct kvm_vcpu_arch {
 		u64 length;
 		u64 status;
 	} osvw;
+
+	struct {
+		u64 msr_val;
+		struct gfn_to_hva_cache data;
+		int vector;
+	} eoi;
 };
 
 struct kvm_lpage_info {
diff --git a/arch/x86/include/asm/kvm_para.h b/arch/x86/include/asm/kvm_para.h
index 734c376..e22b9f8 100644
--- a/arch/x86/include/asm/kvm_para.h
+++ b/arch/x86/include/asm/kvm_para.h
@@ -37,6 +37,8 @@
 #define MSR_KVM_SYSTEM_TIME_NEW 0x4b564d01
 #define MSR_KVM_ASYNC_PF_EN 0x4b564d02
 #define MSR_KVM_STEAL_TIME  0x4b564d03
+#define MSR_KVM_EOI_EN      0x4b564d04
+#define MSR_KVM_EOI_ENABLED 0x1
 
 struct kvm_steal_time {
 	__u64 steal;
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index 11544d8..1b3f9fa 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -89,6 +89,9 @@ EXPORT_EARLY_PER_CPU_SYMBOL(x86_bios_cpu_apicid);
  */
 DEFINE_EARLY_PER_CPU(int, x86_cpu_to_logical_apicid, BAD_APICID);
 
+DEFINE_EARLY_PER_CPU(unsigned long, apic_eoi, 0);
+EXPORT_EARLY_PER_CPU_SYMBOL(apic_eoi);
+
 /*
  * Knob to control our willingness to enable the local APIC.
  *
diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
index b8ba6e4..8b50f3a 100644
--- a/arch/x86/kernel/kvm.c
+++ b/arch/x86/kernel/kvm.c
@@ -39,6 +39,7 @@
 #include <asm/desc.h>
 #include <asm/tlbflush.h>
 #include <asm/idle.h>
+#include <asm/apic.h>
 
 static int kvmapf = 1;
 
@@ -307,6 +308,9 @@ void __cpuinit kvm_guest_cpu_init(void)
 		       smp_processor_id());
 	}
 
+	wrmsrl(MSR_KVM_EOI_EN, __pa(this_cpu_ptr(apic_eoi)) |
+	       MSR_KVM_EOI_ENABLED);
+
 	if (has_steal_clock)
 		kvm_register_steal_time();
 }
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 8584322..9e38e12 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -265,7 +265,61 @@ int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq)
 			irq->level, irq->trig_mode);
 }
 
-static inline int apic_find_highest_isr(struct kvm_lapic *apic)
+static int eoi_put_user(struct kvm_vcpu *vcpu, u32 val)
+{
+
+	return kvm_write_guest_cached(vcpu->kvm, &vcpu->arch.eoi.data, &val,
+				      sizeof(val));
+}
+
+static int eoi_get_user(struct kvm_vcpu *vcpu, u32 *val)
+{
+
+	return kvm_read_guest_cached(vcpu->kvm, &vcpu->arch.eoi.data, val,
+				      sizeof(*val));
+}
+
+static inline bool eoi_enabled(struct kvm_vcpu *vcpu)
+{
+	return (vcpu->arch.eoi.msr_val & MSR_KVM_EOI_ENABLED);
+}
+
+static int eoi_get_pending_vector(struct kvm_vcpu *vcpu)
+{
+	u32 val;
+	if (eoi_get_user(vcpu, &val) < 0)
+		apic_debug("Can't read EOI MSR value: 0x%llx\n",
+			   (unsigned long long)vcpi->arch.eoi.msr_val);
+	if (!(val & 0x1))
+		vcpu->arch.eoi.vector = -1;
+	return vcpu->arch.eoi.vector;
+}
+
+static void eoi_set_pending_vector(struct kvm_vcpu *vcpu, int vector)
+{
+	BUG_ON(vcpu->arch.eoi.vector != -1);
+	if (eoi_put_user(vcpu, 0x1) < 0) {
+		apic_debug("Can't set EOI MSR value: 0x%llx\n",
+			   (unsigned long long)vcpi->arch.eoi.msr_val);
+		return;
+	}
+	vcpu->arch.eoi.vector = vector;
+}
+
+static int eoi_clr_pending_vector(struct kvm_vcpu *vcpu)
+{
+	int vector;
+	vector = vcpu->arch.eoi.vector;
+	if (vector != -1 && eoi_put_user(vcpu, 0x0) < 0) {
+		apic_debug("Can't clear EOI MSR value: 0x%llx\n",
+			   (unsigned long long)vcpi->arch.eoi.msr_val);
+		return -1;
+	}
+	vcpu->arch.eoi.vector = -1;
+	return vector;
+}
+
+static inline int __apic_find_highest_isr(struct kvm_lapic *apic)
 {
 	int result;
 
@@ -275,6 +329,17 @@ static inline int apic_find_highest_isr(struct kvm_lapic *apic)
 	return result;
 }
 
+static inline int apic_find_highest_isr(struct kvm_lapic *apic)
+{
+	int vector;
+	if (eoi_enabled(apic->vcpu)) {
+		vector = eoi_get_pending_vector(apic->vcpu);
+		if (vector != -1)
+			return vector;
+	}
+	return __apic_find_highest_isr(apic);
+}
+
 static void apic_update_ppr(struct kvm_lapic *apic)
 {
 	u32 tpr, isrv, ppr, old_ppr;
@@ -488,6 +553,8 @@ static void apic_set_eoi(struct kvm_lapic *apic)
 	if (vector == -1)
 		return;
 
+	if (eoi_enabled(apic->vcpu))
+		eoi_clr_pending_vector(apic->vcpu);
 	apic_clear_vector(vector, apic->regs + APIC_ISR);
 	apic_update_ppr(apic);
 
@@ -1236,11 +1303,25 @@ int kvm_get_apic_interrupt(struct kvm_vcpu *vcpu)
 {
 	int vector = kvm_apic_has_interrupt(vcpu);
 	struct kvm_lapic *apic = vcpu->arch.apic;
+	bool set_isr = true;
 
 	if (vector == -1)
 		return -1;
 
-	apic_set_vector(vector, apic->regs + APIC_ISR);
+	if (eoi_enabled(vcpu)) {
+		/* Anything pending? If yes disable eoi optimization. */
+		if (unlikely(apic_find_highest_isr(apic) >= 0)) {
+			int v = eoi_clr_pending_vector(vcpu);
+			if (v != -1)
+				apic_set_vector(v, apic->regs + APIC_ISR);
+		} else {
+			eoi_set_pending_vector(vcpu, vector);
+			set_isr = false;
+		}
+	}
+
+	if (set_isr)
+		apic_set_vector(vector, apic->regs + APIC_ISR);
 	apic_update_ppr(apic);
 	apic_clear_irr(vector, apic);
 	return vector;
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 4044ce0..4d00a4d 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -1502,6 +1502,27 @@ static int kvm_pv_enable_async_pf(struct kvm_vcpu *vcpu, u64 data)
 	return 0;
 }
 
+static int kvm_pv_enable_apic_eoi(struct kvm_vcpu *vcpu, u64 data)
+{
+	gpa_t gpa = data & ~MSR_KVM_EOI_ENABLED;
+
+	/* Bit 1 is reserved, Should be zero. */
+	if (data & 0x2)
+		return 1;
+
+	vcpu->arch.eoi.msr_val = data;
+	vcpu->arch.eoi.vector = -1;
+
+	if (!(data & MSR_KVM_EOI_ENABLED)) {
+		return 0;
+	}
+
+	if (kvm_gfn_to_hva_cache_init(vcpu->kvm, &vcpu->arch.eoi.data, gpa))
+		return 1;
+
+	return 0;
+}
+
 static void kvmclock_reset(struct kvm_vcpu *vcpu)
 {
 	if (vcpu->arch.time_page) {
@@ -1627,6 +1648,10 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data)
 		if (kvm_pv_enable_async_pf(vcpu, data))
 			return 1;
 		break;
+	case MSR_KVM_EOI_EN:
+		if (kvm_pv_enable_apic_eoi(vcpu, data))
+			return 1;
+		break;
 	case MSR_KVM_STEAL_TIME:
 
 		if (unlikely(!sched_info_on()))
diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c
index 638a4f3..05c1bf9 100644
--- a/drivers/block/virtio_blk.c
+++ b/drivers/block/virtio_blk.c
@@ -372,7 +372,6 @@ static int virtblk_name_format(char *prefix, int index, char *buf, int buflen)
 {
 	const int base = 'z' - 'a' + 1;
 	char *begin = buf + strlen(prefix);
-	char *begin = buf + strlen(prefix);
 	char *end = buf + buflen;
 	char *p;
 	int unit;
diff --git a/net/core/dev.c b/net/core/dev.c
index 9d713b8..e42529b 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2455,6 +2455,11 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
 		rc = NET_XMIT_SUCCESS;
 	} else {
 		skb_dst_force(skb);
+		/* Orphan the skb - required if we might hang on to it
+		 * for indefinite time. */
+		if (unlikely(dev->priv_flags & IFF_TX_CAN_STALL))
+			skb_orphan(skb);
+
 		rc = q->enqueue(skb, q) & NET_XMIT_MASK;
 		if (qdisc_run_begin(q)) {
 			if (unlikely(contended)) {
@@ -2517,11 +2522,6 @@ int dev_queue_xmit(struct sk_buff *skb)
 	struct Qdisc *q;
 	int rc = -ENOMEM;
 
-	/* Orphan the skb - required if we might hang on to it
-	 * for indefinite time. */
-	if (dev->priv_flags & IFF_TX_CAN_STALL)
-		skb_orphan(skb);
-
 	/* Disable soft irqs for various locks below. Also
 	 * stops preemption for RCU.
 	 */
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index 27883d1..644ca53 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -120,14 +120,11 @@ int sch_direct_xmit(struct sk_buff *skb, struct Qdisc *q,
 	/* And release qdisc */
 	spin_unlock(root_lock);
 
-	/* Orphan the skb - required if we might hang on to it
-	 * for indefinite time. */
-	if (dev->priv_flags & IFF_TX_CAN_STALL)
-		skb_orphan(skb);
-
 	HARD_TX_LOCK(dev, txq, smp_processor_id());
-	if (!netif_xmit_frozen_or_stopped(txq))
+	if (likely(!netif_xmit_frozen_or_stopped(txq)))
 		ret = dev_hard_start_xmit(skb, dev, txq);
+	else if (dev->priv_flags & IFF_TX_CAN_STALL)
+		skb_orphan(skb);
 
 	HARD_TX_UNLOCK(dev, txq);
 
@@ -695,7 +692,7 @@ static void attach_one_default_qdisc(struct net_device *dev,
 {
 	struct Qdisc *qdisc = &noqueue_qdisc;
 
-	if (dev->tx_queue_len) {
+	if (dev->tx_queue_len && !(dev->priv_flags & IFF_TX_CAN_STALL)) {
 		qdisc = qdisc_create_dflt(dev_queue,
 					  &pfifo_fast_ops, TC_H_ROOT);
 		if (!qdisc) {

             reply	other threads:[~2012-04-10 13:27 UTC|newest]

Thread overview: 32+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2012-04-10 13:27 Michael S. Tsirkin [this message]
2012-04-10 14:03 ` [PATCHv0 dont apply] RFC: kvm eoi PV using shared memory Avi Kivity
2012-04-10 14:26   ` Michael S. Tsirkin
2012-04-10 14:33     ` Avi Kivity
2012-04-10 14:53       ` Michael S. Tsirkin
2012-04-10 15:00         ` Avi Kivity
2012-04-10 15:14           ` Michael S. Tsirkin
2012-04-10 16:08             ` Avi Kivity
2012-04-10 17:06               ` Michael S. Tsirkin
2012-04-10 17:59     ` Gleb Natapov
2012-04-10 19:30       ` Michael S. Tsirkin
2012-04-10 19:33         ` Gleb Natapov
2012-04-10 19:40           ` Michael S. Tsirkin
2012-04-10 19:42             ` Gleb Natapov
2012-04-15 16:18 ` [PATCHv1 " Michael S. Tsirkin
2012-04-16 10:08   ` Gleb Natapov
2012-04-16 11:09     ` Michael S. Tsirkin
2012-04-16 11:24       ` Gleb Natapov
2012-04-16 12:18         ` Michael S. Tsirkin
2012-04-16 12:30           ` Gleb Natapov
2012-04-16 13:13             ` Michael S. Tsirkin
2012-04-16 15:10               ` Gleb Natapov
2012-04-16 16:33                 ` Michael S. Tsirkin
2012-04-16 17:51                   ` Gleb Natapov
2012-04-16 19:01                     ` Michael S. Tsirkin
2012-04-17  8:45                       ` Gleb Natapov
2012-04-16 17:24                 ` Michael S. Tsirkin
2012-04-16 17:37                   ` Gleb Natapov
2012-04-16 18:56                     ` Michael S. Tsirkin
2012-04-17  8:59                       ` Gleb Natapov
2012-04-17  9:24           ` Avi Kivity
2012-04-17  9:22     ` Avi Kivity

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20120410132756.GA14101@redhat.com \
    --to=mst@redhat.com \
    --cc=kvm@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.