All of lore.kernel.org
 help / color / mirror / Atom feed
From: Avi Kivity <avi@redhat.com>
To: "Michael S. Tsirkin" <mst@redhat.com>
Cc: kvm@vger.kernel.org
Subject: Re: [PATCHv0 dont apply] RFC: kvm eoi PV using shared memory
Date: Tue, 10 Apr 2012 17:03:22 +0300	[thread overview]
Message-ID: <4F843DAA.1000808@redhat.com> (raw)
In-Reply-To: <20120410132756.GA14101@redhat.com>

On 04/10/2012 04:27 PM, Michael S. Tsirkin wrote:
> I took a stub at implementing PV EOI using shared memory.
> This should reduce the number of exits an interrupt
> causes as much as by half.
>
> A partially complete draft for both host and guest parts
> is below.
>
> The idea is simple: there's a bit, per APIC, in guest memory,
> that tells the guest that it does not need EOI.
> We set it before injecting an interrupt and clear
> before injecting a nested one. Guest tests it using
> a test and clear operation - this is necessary
> so that host can detect interrupt nesting -
> and if set, it can skip the EOI MSR.
>
> There's a new MSR to set the address of said register
> in guest memory. Otherwise not much changes:
> - Guest EOI is not required
> - ISR is automatically cleared before injection
>
> Some things are incomplete: add feature negotiation
> options, qemu support for said options.
> No testing was done beyond compiling the kernel.
>
> I would appreciate early feedback.
>
> Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
>
> --
>
> diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h
> index d854101..8430f41 100644
> --- a/arch/x86/include/asm/apic.h
> +++ b/arch/x86/include/asm/apic.h
> @@ -457,8 +457,13 @@ static inline u32 safe_apic_wait_icr_idle(void) { return 0; }
>  
>  #endif /* CONFIG_X86_LOCAL_APIC */
>  
> +DECLARE_EARLY_PER_CPU(unsigned long, apic_eoi);
> +
>  static inline void ack_APIC_irq(void)
>  {
> +	if (__test_and_clear_bit(0, &__get_cpu_var(apic_eoi)))
> +		return;
> +

While __test_and_clear_bit() is implemented in a single instruction,
it's not required to be.  Better have the instruction there explicitly.

>  	/*
>  	 * ack_APIC_irq() actually gets compiled as a single instruction
>  	 * ... yummie.
> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> index e216ba0..0ee1472 100644
> --- a/arch/x86/include/asm/kvm_host.h
> +++ b/arch/x86/include/asm/kvm_host.h
> @@ -481,6 +481,12 @@ struct kvm_vcpu_arch {
>  		u64 length;
>  		u64 status;
>  	} osvw;
> +
> +	struct {
> +		u64 msr_val;
> +		struct gfn_to_hva_cache data;
> +		int vector;
> +	} eoi;
>  };

Needs to be cleared on INIT.

>  
>
> @@ -307,6 +308,9 @@ void __cpuinit kvm_guest_cpu_init(void)
>  		       smp_processor_id());
>  	}
>  
> +	wrmsrl(MSR_KVM_EOI_EN, __pa(this_cpu_ptr(apic_eoi)) |
> +	       MSR_KVM_EOI_ENABLED);
> +

Clear on kexec.

>  	if (has_steal_clock)
>  		kvm_register_steal_time();
>  }
> diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
> index 8584322..9e38e12 100644
> --- a/arch/x86/kvm/lapic.c
> +++ b/arch/x86/kvm/lapic.c
> @@ -265,7 +265,61 @@ int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq)
>  			irq->level, irq->trig_mode);
>  }
>  
> -static inline int apic_find_highest_isr(struct kvm_lapic *apic)
> +static int eoi_put_user(struct kvm_vcpu *vcpu, u32 val)
> +{
> +
> +	return kvm_write_guest_cached(vcpu->kvm, &vcpu->arch.eoi.data, &val,
> +				      sizeof(val));
> +}
> +
> +static int eoi_get_user(struct kvm_vcpu *vcpu, u32 *val)
> +{
> +
> +	return kvm_read_guest_cached(vcpu->kvm, &vcpu->arch.eoi.data, val,
> +				      sizeof(*val));
> +}
> +
> +static inline bool eoi_enabled(struct kvm_vcpu *vcpu)
> +{
> +	return (vcpu->arch.eoi.msr_val & MSR_KVM_EOI_ENABLED);
> +}
> +
> +static int eoi_get_pending_vector(struct kvm_vcpu *vcpu)
> +{
> +	u32 val;
> +	if (eoi_get_user(vcpu, &val) < 0)
> +		apic_debug("Can't read EOI MSR value: 0x%llx\n",
> +			   (unsigned long long)vcpi->arch.eoi.msr_val);
> +	if (!(val & 0x1))
> +		vcpu->arch.eoi.vector = -1;
> +	return vcpu->arch.eoi.vector;
> +}
> +
> +static void eoi_set_pending_vector(struct kvm_vcpu *vcpu, int vector)
> +{
> +	BUG_ON(vcpu->arch.eoi.vector != -1);
> +	if (eoi_put_user(vcpu, 0x1) < 0) {
> +		apic_debug("Can't set EOI MSR value: 0x%llx\n",
> +			   (unsigned long long)vcpi->arch.eoi.msr_val);
> +		return;
> +	}
> +	vcpu->arch.eoi.vector = vector;
> +}
> +
> +static int eoi_clr_pending_vector(struct kvm_vcpu *vcpu)
> +{
> +	int vector;
> +	vector = vcpu->arch.eoi.vector;
> +	if (vector != -1 && eoi_put_user(vcpu, 0x0) < 0) {
> +		apic_debug("Can't clear EOI MSR value: 0x%llx\n",
> +			   (unsigned long long)vcpi->arch.eoi.msr_val);
> +		return -1;
> +	}
> +	vcpu->arch.eoi.vector = -1;
> +	return vector;
> +}



> +
> +static inline int __apic_find_highest_isr(struct kvm_lapic *apic)
>  {
>  	int result;
>  
> @@ -275,6 +329,17 @@ static inline int apic_find_highest_isr(struct kvm_lapic *apic)
>  	return result;
>  }
>  
> +static inline int apic_find_highest_isr(struct kvm_lapic *apic)
> +{
> +	int vector;
> +	if (eoi_enabled(apic->vcpu)) {
> +		vector = eoi_get_pending_vector(apic->vcpu);
> +		if (vector != -1)
> +			return vector;
> +	}
> +	return __apic_find_highest_isr(apic);
> +}

Why aren't you modifying the ISR unconfitionally?

> +
>  static void apic_update_ppr(struct kvm_lapic *apic)
>  {
>  	u32 tpr, isrv, ppr, old_ppr;
> @@ -488,6 +553,8 @@ static void apic_set_eoi(struct kvm_lapic *apic)
>  	if (vector == -1)
>  		return;
>  
> +	if (eoi_enabled(apic->vcpu))
> +		eoi_clr_pending_vector(apic->vcpu);
>  	apic_clear_vector(vector, apic->regs + APIC_ISR);
>  	apic_update_ppr(apic);
>  
> @@ -1236,11 +1303,25 @@ int kvm_get_apic_interrupt(struct kvm_vcpu *vcpu)
>  {
>  	int vector = kvm_apic_has_interrupt(vcpu);
>  	struct kvm_lapic *apic = vcpu->arch.apic;
> +	bool set_isr = true;
>  
>  	if (vector == -1)
>  		return -1;
>  
> -	apic_set_vector(vector, apic->regs + APIC_ISR);
> +	if (eoi_enabled(vcpu)) {
> +		/* Anything pending? If yes disable eoi optimization. */
> +		if (unlikely(apic_find_highest_isr(apic) >= 0)) {
> +			int v = eoi_clr_pending_vector(vcpu);

ISR != pending, that's IRR.  If ISR vector has lower priority than the
new vector, then we don't need to disable eoi avoidance.

> +			if (v != -1)
> +				apic_set_vector(v, apic->regs + APIC_ISR);
> +		} else {
> +			eoi_set_pending_vector(vcpu, vector);
> +			set_isr = false;

Weird.  Just set it normally.  Remember that reading the ISR needs to
return the correct value.

We need to process the avoided EOI before any APIC read/writes, to be
sure the guest sees the updated values.  Same for IOAPIC, EOI affects
remote_irr.  That may been we need to sample it after every exit, or
perhaps disable the feature for level-triggered interrupts.


> +		}
> +	}
> +
> +	if (set_isr)
> +		apic_set_vector(vector, apic->regs + APIC_ISR);
>  	apic_update_ppr(apic);
>  	apic_clear_irr(vector, apic);
>  	return vector;
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
>

-- 
error compiling committee.c: too many arguments to function


  reply	other threads:[~2012-04-10 14:03 UTC|newest]

Thread overview: 32+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2012-04-10 13:27 [PATCHv0 dont apply] RFC: kvm eoi PV using shared memory Michael S. Tsirkin
2012-04-10 14:03 ` Avi Kivity [this message]
2012-04-10 14:26   ` Michael S. Tsirkin
2012-04-10 14:33     ` Avi Kivity
2012-04-10 14:53       ` Michael S. Tsirkin
2012-04-10 15:00         ` Avi Kivity
2012-04-10 15:14           ` Michael S. Tsirkin
2012-04-10 16:08             ` Avi Kivity
2012-04-10 17:06               ` Michael S. Tsirkin
2012-04-10 17:59     ` Gleb Natapov
2012-04-10 19:30       ` Michael S. Tsirkin
2012-04-10 19:33         ` Gleb Natapov
2012-04-10 19:40           ` Michael S. Tsirkin
2012-04-10 19:42             ` Gleb Natapov
2012-04-15 16:18 ` [PATCHv1 " Michael S. Tsirkin
2012-04-16 10:08   ` Gleb Natapov
2012-04-16 11:09     ` Michael S. Tsirkin
2012-04-16 11:24       ` Gleb Natapov
2012-04-16 12:18         ` Michael S. Tsirkin
2012-04-16 12:30           ` Gleb Natapov
2012-04-16 13:13             ` Michael S. Tsirkin
2012-04-16 15:10               ` Gleb Natapov
2012-04-16 16:33                 ` Michael S. Tsirkin
2012-04-16 17:51                   ` Gleb Natapov
2012-04-16 19:01                     ` Michael S. Tsirkin
2012-04-17  8:45                       ` Gleb Natapov
2012-04-16 17:24                 ` Michael S. Tsirkin
2012-04-16 17:37                   ` Gleb Natapov
2012-04-16 18:56                     ` Michael S. Tsirkin
2012-04-17  8:59                       ` Gleb Natapov
2012-04-17  9:24           ` Avi Kivity
2012-04-17  9:22     ` Avi Kivity

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=4F843DAA.1000808@redhat.com \
    --to=avi@redhat.com \
    --cc=kvm@vger.kernel.org \
    --cc=mst@redhat.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.