From: Gleb Natapov <gleb@redhat.com>
To: "Michael S. Tsirkin" <mst@redhat.com>
Cc: x86@kernel.org, kvm@vger.kernel.org,
Ingo Molnar <mingo@redhat.com>, "H. Peter Anvin" <hpa@zytor.com>,
Avi Kivity <avi@redhat.com>,
Marcelo Tosatti <mtosatti@redhat.com>,
Linus Torvalds <torvalds@linux-foundation.org>,
linux-kernel@vger.kernel.org
Subject: Re: [PATCH RFC dontapply 5/5] kvm_para: guest side for eoi avoidance
Date: Tue, 24 Apr 2012 10:07:15 +0300 [thread overview]
Message-ID: <20120424070715.GH15413@redhat.com> (raw)
In-Reply-To: <20120424065834.GA23957@redhat.com>
On Tue, Apr 24, 2012 at 09:58:35AM +0300, Michael S. Tsirkin wrote:
> On Tue, Apr 24, 2012 at 09:50:02AM +0300, Gleb Natapov wrote:
> > On Mon, Apr 23, 2012 at 05:04:40PM +0300, Michael S. Tsirkin wrote:
> > > The idea is simple: there's a bit, per APIC, in guest memory,
> > > that tells the guest that it does not need EOI.
> > > Guest tests it using a single est and clear operation - this is
> > > necessary so that host can detect interrupt nesting - and if set, it can
> > > skip the EOI MSR.
> > >
> > > This patch is not final yet, pls don't apply
> > > until host side is also finalized. Including
> > > it here for completeness to show another user
> > > of the new eoi_write interface.
> > >
> > > Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
>
> Just pointing out the above: this last patch in the
> series is for illustrative purposes at this point.
>
> > > ---
> > > arch/x86/include/asm/bitops.h | 6 +++-
> > > arch/x86/include/asm/kvm_para.h | 2 +
> > > arch/x86/kernel/kvm.c | 51 ++++++++++++++++++++++++++++++++++++--
> > > 3 files changed, 54 insertions(+), 5 deletions(-)
> > >
> > > diff --git a/arch/x86/include/asm/bitops.h b/arch/x86/include/asm/bitops.h
> > > index b97596e..c9c70ea 100644
> > > --- a/arch/x86/include/asm/bitops.h
> > > +++ b/arch/x86/include/asm/bitops.h
> > > @@ -26,11 +26,13 @@
> > > #if __GNUC__ < 4 || (__GNUC__ == 4 && __GNUC_MINOR__ < 1)
> > > /* Technically wrong, but this avoids compilation errors on some gcc
> > > versions. */
> > > -#define BITOP_ADDR(x) "=m" (*(volatile long *) (x))
> > > +#define BITOP_ADDR_CONSTRAINT "=m"
> > > #else
> > > -#define BITOP_ADDR(x) "+m" (*(volatile long *) (x))
> > > +#define BITOP_ADDR_CONSTRAINT "+m"
> > > #endif
> > >
> > > +#define BITOP_ADDR(x) BITOP_ADDR_CONSTRAINT (*(volatile long *) (x))
> > > +
> > > #define ADDR BITOP_ADDR(addr)
> > >
> > > /*
> > > diff --git a/arch/x86/include/asm/kvm_para.h b/arch/x86/include/asm/kvm_para.h
> > > index 734c376..195840d 100644
> > > --- a/arch/x86/include/asm/kvm_para.h
> > > +++ b/arch/x86/include/asm/kvm_para.h
> > > @@ -22,6 +22,7 @@
> > > #define KVM_FEATURE_CLOCKSOURCE2 3
> > > #define KVM_FEATURE_ASYNC_PF 4
> > > #define KVM_FEATURE_STEAL_TIME 5
> > > +#define KVM_FEATURE_EOI 6
> > >
> > > /* The last 8 bits are used to indicate how to interpret the flags field
> > > * in pvclock structure. If no bits are set, all flags are ignored.
> > > @@ -37,6 +38,7 @@
> > > #define MSR_KVM_SYSTEM_TIME_NEW 0x4b564d01
> > > #define MSR_KVM_ASYNC_PF_EN 0x4b564d02
> > > #define MSR_KVM_STEAL_TIME 0x4b564d03
> > > +#define MSR_KVM_EOI_EN 0x4b564d04
> > >
> > > struct kvm_steal_time {
> > > __u64 steal;
> > > diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
> > > index b8ba6e4..ad66cdd 100644
> > > --- a/arch/x86/kernel/kvm.c
> > > +++ b/arch/x86/kernel/kvm.c
> > > @@ -39,6 +39,8 @@
> > > #include <asm/desc.h>
> > > #include <asm/tlbflush.h>
> > > #include <asm/idle.h>
> > > +#include <asm/apic.h>
> > > +#include <asm/apicdef.h>
> > >
> > > static int kvmapf = 1;
> > >
> > > @@ -290,6 +292,27 @@ static void kvm_register_steal_time(void)
> > > cpu, __pa(st));
> > > }
> > >
> > > +static DEFINE_PER_CPU(u16, kvm_apic_eoi) __aligned(2) = 0;
> > > +
> > > +/* Our own copy of __test_and_clear_bit to make sure
> > > + * it is done with a single instruction */
> > > +static inline int kvm_test_and_clear_bit(int nr, volatile u16* addr)
> > > +{
> > > + int oldbit;
> > > +
> > > + asm volatile("btr %2,%1\n\t"
> > > + "sbb %0,%0"
> > > + : "=r" (oldbit), BITOP_ADDR_CONSTRAINT(*addr) : "Ir" (nr));
> > > + return oldbit;
> > > +}
> > > +
> > > +static void kvm_guest_apic_eoi_write(u32 reg, u32 val)
> > > +{
> > > + if (kvm_test_and_clear_bit(0, &__get_cpu_var(kvm_apic_eoi)))
> > > + return;
> > > + apic->write(APIC_EOI, APIC_EOI_ACK);
> > > +}
> > > +
> > > void __cpuinit kvm_guest_cpu_init(void)
> > > {
> > > if (!kvm_para_available())
> > > @@ -307,11 +330,17 @@ void __cpuinit kvm_guest_cpu_init(void)
> > > smp_processor_id());
> > > }
> > >
> > > + if (kvm_para_has_feature(KVM_FEATURE_EOI)) {
> > > + __get_cpu_var(kvm_apic_eoi) = 0;
> > > + wrmsrl(MSR_KVM_EOI_EN, __pa(&__get_cpu_var(kvm_apic_eoi)) |
> > > + KVM_MSR_ENABLED);
> > > + }
> > > +
> > > if (has_steal_clock)
> > > kvm_register_steal_time();
> > > }
> > >
> > > -static void kvm_pv_disable_apf(void *unused)
> > > +static void kvm_pv_disable_apf(void)
> > > {
> > > if (!__get_cpu_var(apf_reason).enabled)
> > > return;
> > > @@ -323,11 +352,18 @@ static void kvm_pv_disable_apf(void *unused)
> > > smp_processor_id());
> > > }
> > >
> > > +static void kvm_pv_guest_cpu_reboot(void *unused)
> > > +{
> > > + if (kvm_para_has_feature(KVM_FEATURE_EOI))
> > > + wrmsrl(MSR_KVM_EOI_EN, 0);
> > > + kvm_pv_disable_apf();
> > > +}
> > > +
> > > static int kvm_pv_reboot_notify(struct notifier_block *nb,
> > > unsigned long code, void *unused)
> > > {
> > > if (code == SYS_RESTART)
> > > - on_each_cpu(kvm_pv_disable_apf, NULL, 1);
> > > + on_each_cpu(kvm_pv_guest_cpu_reboot, NULL, 1);
> > > return NOTIFY_DONE;
> > > }
> > >
> > > @@ -378,7 +414,9 @@ static void __cpuinit kvm_guest_cpu_online(void *dummy)
> > > static void kvm_guest_cpu_offline(void *dummy)
> > > {
> > > kvm_disable_steal_time();
> > > - kvm_pv_disable_apf(NULL);
> > > + if (kvm_para_has_feature(KVM_FEATURE_EOI))
> > > + wrmsrl(MSR_KVM_EOI_EN, 0);
> > > + kvm_pv_disable_apf();
> > > apf_task_wake_all();
> > > }
> > >
> > > @@ -431,6 +469,13 @@ void __init kvm_guest_init(void)
> > > pv_time_ops.steal_clock = kvm_steal_clock;
> > > }
> > >
> > > + if (kvm_para_has_feature(KVM_FEATURE_EOI)) {
> > > + /* Should happen once after apic is initialized */
> > > + BUG_ON(!apic);
> > What if kernel was started with noapic?
>
> I didn't test this yet. I thought noapic sets the apic pointer to
> apic_noop, not to NULL.
Actually it is nolapic and it exists only for 32bit kernels.
>
> So this case will get slowed down slightly if it
> actually runs on kvm. It does not look like noapic
> guest is worth optimising for.
>
It is not. Just make sure it does not trigger the assertion.
> > > + BUG_ON(apic->eoi_write == kvm_guest_apic_eoi_write);
> > > + apic->eoi_write = kvm_guest_apic_eoi_write;
> > > + }
> > > +
> > > #ifdef CONFIG_SMP
> > > smp_ops.smp_prepare_boot_cpu = kvm_smp_prepare_boot_cpu;
> > > register_cpu_notifier(&kvm_cpu_notifier);
> > > --
> > > 1.7.9.111.gf3fb0
> >
> > --
> > Gleb.
--
Gleb.
next prev parent reply other threads:[~2012-04-24 7:07 UTC|newest]
Thread overview: 21+ messages / expand[flat|nested] mbox.gz Atom feed top
2012-04-23 14:03 [PATCH RFC 0/5] apic: eoi optimization support Michael S. Tsirkin
2012-04-23 14:04 ` [PATCH RFC 1/5] apic: fix typo EIO_ACK -> EOI_ACK and document Michael S. Tsirkin
2012-04-23 14:04 ` [PATCH RFC 2/5] apic: use symbolic APIC_EOI_ACK Michael S. Tsirkin
2012-04-23 14:04 ` [PATCH RFC 3/5] x86: add apic->eoi_write callback Michael S. Tsirkin
2012-04-23 14:04 ` [PATCH RFC 4/5] x86: eoi micro-optimization Michael S. Tsirkin
2012-04-23 14:04 ` [PATCH RFC dontapply 5/5] kvm_para: guest side for eoi avoidance Michael S. Tsirkin
2012-04-24 6:50 ` Gleb Natapov
2012-04-24 6:58 ` Michael S. Tsirkin
2012-04-24 7:07 ` Gleb Natapov [this message]
2012-05-08 15:26 ` Paolo Bonzini
2012-05-08 15:28 ` Gleb Natapov
2012-05-08 15:45 ` H. Peter Anvin
2012-05-08 16:32 ` Gleb Natapov
2012-05-08 16:57 ` Michael S. Tsirkin
2012-05-08 18:06 ` H. Peter Anvin
2012-05-08 19:36 ` Michael S. Tsirkin
2012-05-07 10:35 ` [PATCH RFC 0/5] apic: eoi optimization support Ingo Molnar
2012-05-07 10:59 ` Michael S. Tsirkin
2012-05-07 11:40 ` Ingo Molnar
2012-05-07 11:47 ` Avi Kivity
2012-05-07 11:57 ` Ingo Molnar
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20120424070715.GH15413@redhat.com \
--to=gleb@redhat.com \
--cc=avi@redhat.com \
--cc=hpa@zytor.com \
--cc=kvm@vger.kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=mingo@redhat.com \
--cc=mst@redhat.com \
--cc=mtosatti@redhat.com \
--cc=torvalds@linux-foundation.org \
--cc=x86@kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.