* Re: [PATCH 5/5] KVM: PPC: booke: Improve timer register emulation
2011-08-26 23:31 [PATCH 5/5] KVM: PPC: booke: Improve timer register emulation Scott Wood
@ 2011-09-05 22:45 ` Alexander Graf
2011-09-06 3:17 ` Liu Yu-B13201
` (19 subsequent siblings)
20 siblings, 0 replies; 22+ messages in thread
From: Alexander Graf @ 2011-09-05 22:45 UTC (permalink / raw)
To: kvm-ppc
On 27.08.2011, at 01:31, Scott Wood wrote:
> From: Liu Yu <yu.liu@freescale.com>
>
> Decrementers are now properly driven by TCR/TSR, and the guest
> has full read/write access to these registers.
>
> The decrementer keeps ticking (and setting the TSR bit) regardless of
> whether the interrupts are enabled with TCR.
>
> The decrementer stops at zero, rather than going negative.
>
> Signed-off-by: Liu Yu <yu.liu@freescale.com>
> [scott: added dequeue in kvmppc_booke_irqprio_deliver, and dec stop-at-zero]
> Signed-off-by: Scott Wood <scottwood@freescale.com>
> ---
> arch/powerpc/include/asm/kvm_host.h | 2 +-
> arch/powerpc/include/asm/kvm_ppc.h | 11 +++++
> arch/powerpc/kvm/book3s.c | 8 ++++
> arch/powerpc/kvm/booke.c | 80 +++++++++++++++++++++++++++--------
> arch/powerpc/kvm/booke.h | 4 ++
> arch/powerpc/kvm/booke_emulate.c | 11 ++++-
> arch/powerpc/kvm/emulate.c | 45 ++++++++-----------
> arch/powerpc/kvm/powerpc.c | 20 +--------
> 8 files changed, 114 insertions(+), 67 deletions(-)
>
> diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
> index 3305af4..ea08c79 100644
> --- a/arch/powerpc/include/asm/kvm_host.h
> +++ b/arch/powerpc/include/asm/kvm_host.h
> @@ -334,7 +334,7 @@ struct kvm_vcpu_arch {
> u32 tbl;
> u32 tbu;
> u32 tcr;
> - u32 tsr;
> + ulong tsr; /* we need to perform set/clr_bits() which requires ulong */
> u32 ivor[64];
> ulong ivpr;
> u32 pvr;
> diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h
> index bdaa6c8..ddaa615 100644
> --- a/arch/powerpc/include/asm/kvm_ppc.h
> +++ b/arch/powerpc/include/asm/kvm_ppc.h
> @@ -66,6 +66,7 @@ extern int kvmppc_emulate_instruction(struct kvm_run *run,
> extern int kvmppc_emulate_mmio(struct kvm_run *run, struct kvm_vcpu *vcpu);
> extern void kvmppc_emulate_dec(struct kvm_vcpu *vcpu);
> extern u32 kvmppc_get_dec(struct kvm_vcpu *vcpu, u64 tb);
> +extern void kvmppc_decrementer_func(unsigned long data);
>
> /* Core-specific hooks */
>
> @@ -197,4 +198,14 @@ int kvm_vcpu_ioctl_config_tlb(struct kvm_vcpu *vcpu,
> int kvm_vcpu_ioctl_dirty_tlb(struct kvm_vcpu *vcpu,
> struct kvm_dirty_tlb *cfg);
>
> +static inline void kvmppc_wakeup_vcpu(struct kvm_vcpu *vcpu)
> +{
> + if (waitqueue_active(&vcpu->wq)) {
> + wake_up_interruptible(&vcpu->wq);
> + vcpu->stat.halt_wakeup++;
> + } else if (vcpu->cpu != -1) {
> + smp_send_reschedule(vcpu->cpu);
> + }
> +}
> +
> #endif /* __POWERPC_KVM_PPC_H__ */
> diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c
> index f68a34d..b057856 100644
> --- a/arch/powerpc/kvm/book3s.c
> +++ b/arch/powerpc/kvm/book3s.c
> @@ -514,3 +514,11 @@ out:
> mutex_unlock(&kvm->slots_lock);
> return r;
> }
> +
> +void kvmppc_decrementer_func(unsigned long data)
> +{
> + struct kvm_vcpu *vcpu = (struct kvm_vcpu *)data;
> +
> + kvmppc_core_queue_dec(vcpu);
> + kvmppc_wakeup_vcpu(vcpu);
> +}
> diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c
> index 0ed62c1..502f9ff 100644
> --- a/arch/powerpc/kvm/booke.c
> +++ b/arch/powerpc/kvm/booke.c
> @@ -205,7 +205,8 @@ void kvmppc_core_dequeue_external(struct kvm_vcpu *vcpu,
> static int kvmppc_booke_irqprio_deliver(struct kvm_vcpu *vcpu,
> unsigned int priority)
> {
> - int allowed = 0;
> + int allowed = 1;
> + int dequeue = 0;
> ulong uninitialized_var(msr_mask);
> bool update_esr = false, update_dear = false;
> ulong crit_raw = vcpu->arch.shared->critical;
> @@ -258,10 +259,15 @@ static int kvmppc_booke_irqprio_deliver(struct kvm_vcpu *vcpu,
> allowed = vcpu->arch.shared->msr & MSR_ME;
> msr_mask = 0;
> break;
> - case BOOKE_IRQPRIO_EXTERNAL:
> case BOOKE_IRQPRIO_DECREMENTER:
> + if (!(vcpu->arch.tcr & TCR_DIE)) {
> + allowed = 0;
> + dequeue = 1;
> + }
> + /* fall through */
> + case BOOKE_IRQPRIO_EXTERNAL:
> case BOOKE_IRQPRIO_FIT:
> - allowed = vcpu->arch.shared->msr & MSR_EE;
> + allowed = allowed && vcpu->arch.shared->msr & MSR_EE;
> allowed = allowed && !crit;
> msr_mask = MSR_CE|MSR_ME|MSR_DE;
> break;
> @@ -269,6 +275,8 @@ static int kvmppc_booke_irqprio_deliver(struct kvm_vcpu *vcpu,
> allowed = vcpu->arch.shared->msr & MSR_DE;
> msr_mask = MSR_ME;
> break;
> + default:
> + allowed = 0;
> }
>
> if (allowed) {
> @@ -283,6 +291,9 @@ static int kvmppc_booke_irqprio_deliver(struct kvm_vcpu *vcpu,
>
> if (!keep_irq)
> clear_bit(priority, &vcpu->arch.pending_exceptions);
> + } else if (dequeue) {
> + /* Should only happen due to races with masking */
> + clear_bit(priority, &vcpu->arch.pending_exceptions);
> }
>
> return allowed;
> @@ -721,25 +732,16 @@ static int set_sregs_base(struct kvm_vcpu *vcpu,
> vcpu->arch.shared->esr = sregs->u.e.esr;
> vcpu->arch.shared->dar = sregs->u.e.dear;
> vcpu->arch.vrsave = sregs->u.e.vrsave;
> - vcpu->arch.tcr = sregs->u.e.tcr;
> + kvmppc_set_tcr(vcpu, sregs->u.e.tcr);
>
> - if (sregs->u.e.update_special & KVM_SREGS_E_UPDATE_DEC)
> + if (sregs->u.e.update_special & KVM_SREGS_E_UPDATE_DEC) {
> vcpu->arch.dec = sregs->u.e.dec;
> -
> - kvmppc_emulate_dec(vcpu);
> + kvmppc_emulate_dec(vcpu);
> + }
>
> if (sregs->u.e.update_special & KVM_SREGS_E_UPDATE_TSR) {
> - /*
> - * FIXME: existing KVM timer handling is incomplete.
> - * TSR cannot be read by the guest, and its value in
> - * vcpu->arch is always zero. For now, just handle
> - * the case where the caller is trying to inject a
> - * decrementer interrupt.
> - */
> -
> - if ((sregs->u.e.tsr & TSR_DIS) &&
> - (vcpu->arch.tcr & TCR_DIE))
> - kvmppc_core_queue_dec(vcpu);
> + kvmppc_clr_tsr_bits(vcpu, ~0);
> + kvmppc_set_tsr_bits(vcpu, sregs->u.e.tsr);
> }
>
> return 0;
> @@ -895,6 +897,48 @@ void kvmppc_core_destroy_vm(struct kvm *kvm)
> {
> }
>
> +void kvmppc_set_tcr(struct kvm_vcpu *vcpu, u32 new_tcr)
> +{
> + vcpu->arch.tcr = new_tcr;
> + smp_wmb();
> +
> + /*
> + * Since TCR changed, we need to check
> + * if blocked interrupts are deliverable.
> + */
> + if ((new_tcr & TCR_DIE) && (vcpu->arch.tsr & TSR_DIS)) {
> + kvmppc_core_queue_dec(vcpu);
> + kvmppc_wakeup_vcpu(vcpu);
> + }
> +}
> +
> +void kvmppc_set_tsr_bits(struct kvm_vcpu *vcpu, u32 tsr_bits)
> +{
> + set_bits(tsr_bits, &vcpu->arch.tsr);
I'm still missing the point why you need set_bits/clear_bits. Can't you just use |= and &= ~ like everyone else? :)
> + smp_wmb();
> +
> + if ((tsr_bits & TSR_DIS) && (vcpu->arch.tcr & TCR_DIE)) {
> + kvmppc_core_queue_dec(vcpu);
> + kvmppc_wakeup_vcpu(vcpu);
> + }
Please combine into a function and call that for updating tsr and tcr.
Alex
^ permalink raw reply [flat|nested] 22+ messages in thread* RE: [PATCH 5/5] KVM: PPC: booke: Improve timer register emulation
2011-08-26 23:31 [PATCH 5/5] KVM: PPC: booke: Improve timer register emulation Scott Wood
2011-09-05 22:45 ` Alexander Graf
@ 2011-09-06 3:17 ` Liu Yu-B13201
2011-09-06 8:04 ` Alexander Graf
` (18 subsequent siblings)
20 siblings, 0 replies; 22+ messages in thread
From: Liu Yu-B13201 @ 2011-09-06 3:17 UTC (permalink / raw)
To: kvm-ppc
> -----Original Message-----
> From: kvm-ppc-owner@vger.kernel.org
> [mailto:kvm-ppc-owner@vger.kernel.org] On Behalf Of Alexander Graf
> Sent: Tuesday, September 06, 2011 6:45 AM
> To: Wood Scott-B07421
> Cc: kvm-ppc@vger.kernel.org
> Subject: Re: [PATCH 5/5] KVM: PPC: booke: Improve timer
> register emulation
>
>
> On 27.08.2011, at 01:31, Scott Wood wrote:
>
> > From: Liu Yu <yu.liu@freescale.com>
> >
> > Decrementers are now properly driven by TCR/TSR, and the guest
> > has full read/write access to these registers.
> >
> > The decrementer keeps ticking (and setting the TSR bit)
> regardless of
> > whether the interrupts are enabled with TCR.
> >
> > The decrementer stops at zero, rather than going negative.
> >
> > Signed-off-by: Liu Yu <yu.liu@freescale.com>
> > [scott: added dequeue in kvmppc_booke_irqprio_deliver, and
> dec stop-at-zero]
> > Signed-off-by: Scott Wood <scottwood@freescale.com>
> > ---
> > arch/powerpc/include/asm/kvm_host.h | 2 +-
> > arch/powerpc/include/asm/kvm_ppc.h | 11 +++++
> > arch/powerpc/kvm/book3s.c | 8 ++++
> > arch/powerpc/kvm/booke.c | 80
> +++++++++++++++++++++++++++--------
> > arch/powerpc/kvm/booke.h | 4 ++
> > arch/powerpc/kvm/booke_emulate.c | 11 ++++-
> > arch/powerpc/kvm/emulate.c | 45 ++++++++-----------
> > arch/powerpc/kvm/powerpc.c | 20 +--------
> > 8 files changed, 114 insertions(+), 67 deletions(-)
> >
> > diff --git a/arch/powerpc/include/asm/kvm_host.h
> b/arch/powerpc/include/asm/kvm_host.h
> > index 3305af4..ea08c79 100644
> > --- a/arch/powerpc/include/asm/kvm_host.h
> > +++ b/arch/powerpc/include/asm/kvm_host.h
> > @@ -334,7 +334,7 @@ struct kvm_vcpu_arch {
> > u32 tbl;
> > u32 tbu;
> > u32 tcr;
> > - u32 tsr;
> > + ulong tsr; /* we need to perform set/clr_bits() which
> requires ulong */
> > u32 ivor[64];
> > ulong ivpr;
> > u32 pvr;
> > diff --git a/arch/powerpc/include/asm/kvm_ppc.h
> b/arch/powerpc/include/asm/kvm_ppc.h
> > index bdaa6c8..ddaa615 100644
> > --- a/arch/powerpc/include/asm/kvm_ppc.h
> > +++ b/arch/powerpc/include/asm/kvm_ppc.h
> > @@ -66,6 +66,7 @@ extern int
> kvmppc_emulate_instruction(struct kvm_run *run,
> > extern int kvmppc_emulate_mmio(struct kvm_run *run, struct
> kvm_vcpu *vcpu);
> > extern void kvmppc_emulate_dec(struct kvm_vcpu *vcpu);
> > extern u32 kvmppc_get_dec(struct kvm_vcpu *vcpu, u64 tb);
> > +extern void kvmppc_decrementer_func(unsigned long data);
> >
> > /* Core-specific hooks */
> >
> > @@ -197,4 +198,14 @@ int kvm_vcpu_ioctl_config_tlb(struct
> kvm_vcpu *vcpu,
> > int kvm_vcpu_ioctl_dirty_tlb(struct kvm_vcpu *vcpu,
> > struct kvm_dirty_tlb *cfg);
> >
> > +static inline void kvmppc_wakeup_vcpu(struct kvm_vcpu *vcpu)
> > +{
> > + if (waitqueue_active(&vcpu->wq)) {
> > + wake_up_interruptible(&vcpu->wq);
> > + vcpu->stat.halt_wakeup++;
> > + } else if (vcpu->cpu != -1) {
> > + smp_send_reschedule(vcpu->cpu);
> > + }
> > +}
> > +
> > #endif /* __POWERPC_KVM_PPC_H__ */
> > diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c
> > index f68a34d..b057856 100644
> > --- a/arch/powerpc/kvm/book3s.c
> > +++ b/arch/powerpc/kvm/book3s.c
> > @@ -514,3 +514,11 @@ out:
> > mutex_unlock(&kvm->slots_lock);
> > return r;
> > }
> > +
> > +void kvmppc_decrementer_func(unsigned long data)
> > +{
> > + struct kvm_vcpu *vcpu = (struct kvm_vcpu *)data;
> > +
> > + kvmppc_core_queue_dec(vcpu);
> > + kvmppc_wakeup_vcpu(vcpu);
> > +}
> > diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c
> > index 0ed62c1..502f9ff 100644
> > --- a/arch/powerpc/kvm/booke.c
> > +++ b/arch/powerpc/kvm/booke.c
> > @@ -205,7 +205,8 @@ void
> kvmppc_core_dequeue_external(struct kvm_vcpu *vcpu,
> > static int kvmppc_booke_irqprio_deliver(struct kvm_vcpu *vcpu,
> > unsigned int priority)
> > {
> > - int allowed = 0;
> > + int allowed = 1;
> > + int dequeue = 0;
> > ulong uninitialized_var(msr_mask);
> > bool update_esr = false, update_dear = false;
> > ulong crit_raw = vcpu->arch.shared->critical;
> > @@ -258,10 +259,15 @@ static int
> kvmppc_booke_irqprio_deliver(struct kvm_vcpu *vcpu,
> > allowed = vcpu->arch.shared->msr & MSR_ME;
> > msr_mask = 0;
> > break;
> > - case BOOKE_IRQPRIO_EXTERNAL:
> > case BOOKE_IRQPRIO_DECREMENTER:
> > + if (!(vcpu->arch.tcr & TCR_DIE)) {
> > + allowed = 0;
> > + dequeue = 1;
> > + }
> > + /* fall through */
> > + case BOOKE_IRQPRIO_EXTERNAL:
> > case BOOKE_IRQPRIO_FIT:
> > - allowed = vcpu->arch.shared->msr & MSR_EE;
> > + allowed = allowed && vcpu->arch.shared->msr & MSR_EE;
> > allowed = allowed && !crit;
> > msr_mask = MSR_CE|MSR_ME|MSR_DE;
> > break;
> > @@ -269,6 +275,8 @@ static int
> kvmppc_booke_irqprio_deliver(struct kvm_vcpu *vcpu,
> > allowed = vcpu->arch.shared->msr & MSR_DE;
> > msr_mask = MSR_ME;
> > break;
> > + default:
> > + allowed = 0;
> > }
> >
> > if (allowed) {
> > @@ -283,6 +291,9 @@ static int
> kvmppc_booke_irqprio_deliver(struct kvm_vcpu *vcpu,
> >
> > if (!keep_irq)
> > clear_bit(priority,
> &vcpu->arch.pending_exceptions);
> > + } else if (dequeue) {
> > + /* Should only happen due to races with masking */
> > + clear_bit(priority, &vcpu->arch.pending_exceptions);
> > }
> >
> > return allowed;
> > @@ -721,25 +732,16 @@ static int set_sregs_base(struct
> kvm_vcpu *vcpu,
> > vcpu->arch.shared->esr = sregs->u.e.esr;
> > vcpu->arch.shared->dar = sregs->u.e.dear;
> > vcpu->arch.vrsave = sregs->u.e.vrsave;
> > - vcpu->arch.tcr = sregs->u.e.tcr;
> > + kvmppc_set_tcr(vcpu, sregs->u.e.tcr);
> >
> > - if (sregs->u.e.update_special & KVM_SREGS_E_UPDATE_DEC)
> > + if (sregs->u.e.update_special & KVM_SREGS_E_UPDATE_DEC) {
> > vcpu->arch.dec = sregs->u.e.dec;
> > -
> > - kvmppc_emulate_dec(vcpu);
> > + kvmppc_emulate_dec(vcpu);
> > + }
> >
> > if (sregs->u.e.update_special & KVM_SREGS_E_UPDATE_TSR) {
> > - /*
> > - * FIXME: existing KVM timer handling is incomplete.
> > - * TSR cannot be read by the guest, and its value in
> > - * vcpu->arch is always zero. For now, just handle
> > - * the case where the caller is trying to inject a
> > - * decrementer interrupt.
> > - */
> > -
> > - if ((sregs->u.e.tsr & TSR_DIS) &&
> > - (vcpu->arch.tcr & TCR_DIE))
> > - kvmppc_core_queue_dec(vcpu);
> > + kvmppc_clr_tsr_bits(vcpu, ~0);
> > + kvmppc_set_tsr_bits(vcpu, sregs->u.e.tsr);
> > }
> >
> > return 0;
> > @@ -895,6 +897,48 @@ void kvmppc_core_destroy_vm(struct kvm *kvm)
> > {
> > }
> >
> > +void kvmppc_set_tcr(struct kvm_vcpu *vcpu, u32 new_tcr)
> > +{
> > + vcpu->arch.tcr = new_tcr;
> > + smp_wmb();
> > +
> > + /*
> > + * Since TCR changed, we need to check
> > + * if blocked interrupts are deliverable.
> > + */
> > + if ((new_tcr & TCR_DIE) && (vcpu->arch.tsr & TSR_DIS)) {
> > + kvmppc_core_queue_dec(vcpu);
> > + kvmppc_wakeup_vcpu(vcpu);
> > + }
> > +}
> > +
> > +void kvmppc_set_tsr_bits(struct kvm_vcpu *vcpu, u32 tsr_bits)
> > +{
> > + set_bits(tsr_bits, &vcpu->arch.tsr);
>
> I'm still missing the point why you need set_bits/clear_bits.
> Can't you just use |= and &= ~ like everyone else? :)
>
Because set_bits/clear_bits is atomic.
set/clr_tsr_bits() may be called in different threads so that on different cores.
We need to make sure modification to be atomic to prevent missing changes.
Thanks,
Yu
^ permalink raw reply [flat|nested] 22+ messages in thread* Re: [PATCH 5/5] KVM: PPC: booke: Improve timer register emulation
2011-08-26 23:31 [PATCH 5/5] KVM: PPC: booke: Improve timer register emulation Scott Wood
2011-09-05 22:45 ` Alexander Graf
2011-09-06 3:17 ` Liu Yu-B13201
@ 2011-09-06 8:04 ` Alexander Graf
2011-09-06 19:05 ` Alexander Graf
` (17 subsequent siblings)
20 siblings, 0 replies; 22+ messages in thread
From: Alexander Graf @ 2011-09-06 8:04 UTC (permalink / raw)
To: kvm-ppc
On 06.09.2011, at 05:17, Liu Yu-B13201 wrote:
>
>
>> -----Original Message-----
>> From: kvm-ppc-owner@vger.kernel.org
>> [mailto:kvm-ppc-owner@vger.kernel.org] On Behalf Of Alexander Graf
>> Sent: Tuesday, September 06, 2011 6:45 AM
>> To: Wood Scott-B07421
>> Cc: kvm-ppc@vger.kernel.org
>> Subject: Re: [PATCH 5/5] KVM: PPC: booke: Improve timer
>> register emulation
>>
>>
>> On 27.08.2011, at 01:31, Scott Wood wrote:
>>
>>> From: Liu Yu <yu.liu@freescale.com>
>>>
>>> Decrementers are now properly driven by TCR/TSR, and the guest
>>> has full read/write access to these registers.
>>>
>>> The decrementer keeps ticking (and setting the TSR bit)
>> regardless of
>>> whether the interrupts are enabled with TCR.
>>>
>>> The decrementer stops at zero, rather than going negative.
>>>
>>> Signed-off-by: Liu Yu <yu.liu@freescale.com>
>>> [scott: added dequeue in kvmppc_booke_irqprio_deliver, and
>> dec stop-at-zero]
>>> Signed-off-by: Scott Wood <scottwood@freescale.com>
>>> ---
>>> arch/powerpc/include/asm/kvm_host.h | 2 +-
>>> arch/powerpc/include/asm/kvm_ppc.h | 11 +++++
>>> arch/powerpc/kvm/book3s.c | 8 ++++
>>> arch/powerpc/kvm/booke.c | 80
>> +++++++++++++++++++++++++++--------
>>> arch/powerpc/kvm/booke.h | 4 ++
>>> arch/powerpc/kvm/booke_emulate.c | 11 ++++-
>>> arch/powerpc/kvm/emulate.c | 45 ++++++++-----------
>>> arch/powerpc/kvm/powerpc.c | 20 +--------
>>> 8 files changed, 114 insertions(+), 67 deletions(-)
>>>
>>> diff --git a/arch/powerpc/include/asm/kvm_host.h
>> b/arch/powerpc/include/asm/kvm_host.h
>>> index 3305af4..ea08c79 100644
>>> --- a/arch/powerpc/include/asm/kvm_host.h
>>> +++ b/arch/powerpc/include/asm/kvm_host.h
>>> @@ -334,7 +334,7 @@ struct kvm_vcpu_arch {
>>> u32 tbl;
>>> u32 tbu;
>>> u32 tcr;
>>> - u32 tsr;
>>> + ulong tsr; /* we need to perform set/clr_bits() which
>> requires ulong */
>>> u32 ivor[64];
>>> ulong ivpr;
>>> u32 pvr;
>>> diff --git a/arch/powerpc/include/asm/kvm_ppc.h
>> b/arch/powerpc/include/asm/kvm_ppc.h
>>> index bdaa6c8..ddaa615 100644
>>> --- a/arch/powerpc/include/asm/kvm_ppc.h
>>> +++ b/arch/powerpc/include/asm/kvm_ppc.h
>>> @@ -66,6 +66,7 @@ extern int
>> kvmppc_emulate_instruction(struct kvm_run *run,
>>> extern int kvmppc_emulate_mmio(struct kvm_run *run, struct
>> kvm_vcpu *vcpu);
>>> extern void kvmppc_emulate_dec(struct kvm_vcpu *vcpu);
>>> extern u32 kvmppc_get_dec(struct kvm_vcpu *vcpu, u64 tb);
>>> +extern void kvmppc_decrementer_func(unsigned long data);
>>>
>>> /* Core-specific hooks */
>>>
>>> @@ -197,4 +198,14 @@ int kvm_vcpu_ioctl_config_tlb(struct
>> kvm_vcpu *vcpu,
>>> int kvm_vcpu_ioctl_dirty_tlb(struct kvm_vcpu *vcpu,
>>> struct kvm_dirty_tlb *cfg);
>>>
>>> +static inline void kvmppc_wakeup_vcpu(struct kvm_vcpu *vcpu)
>>> +{
>>> + if (waitqueue_active(&vcpu->wq)) {
>>> + wake_up_interruptible(&vcpu->wq);
>>> + vcpu->stat.halt_wakeup++;
>>> + } else if (vcpu->cpu != -1) {
>>> + smp_send_reschedule(vcpu->cpu);
>>> + }
>>> +}
>>> +
>>> #endif /* __POWERPC_KVM_PPC_H__ */
>>> diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c
>>> index f68a34d..b057856 100644
>>> --- a/arch/powerpc/kvm/book3s.c
>>> +++ b/arch/powerpc/kvm/book3s.c
>>> @@ -514,3 +514,11 @@ out:
>>> mutex_unlock(&kvm->slots_lock);
>>> return r;
>>> }
>>> +
>>> +void kvmppc_decrementer_func(unsigned long data)
>>> +{
>>> + struct kvm_vcpu *vcpu = (struct kvm_vcpu *)data;
>>> +
>>> + kvmppc_core_queue_dec(vcpu);
>>> + kvmppc_wakeup_vcpu(vcpu);
>>> +}
>>> diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c
>>> index 0ed62c1..502f9ff 100644
>>> --- a/arch/powerpc/kvm/booke.c
>>> +++ b/arch/powerpc/kvm/booke.c
>>> @@ -205,7 +205,8 @@ void
>> kvmppc_core_dequeue_external(struct kvm_vcpu *vcpu,
>>> static int kvmppc_booke_irqprio_deliver(struct kvm_vcpu *vcpu,
>>> unsigned int priority)
>>> {
>>> - int allowed = 0;
>>> + int allowed = 1;
>>> + int dequeue = 0;
>>> ulong uninitialized_var(msr_mask);
>>> bool update_esr = false, update_dear = false;
>>> ulong crit_raw = vcpu->arch.shared->critical;
>>> @@ -258,10 +259,15 @@ static int
>> kvmppc_booke_irqprio_deliver(struct kvm_vcpu *vcpu,
>>> allowed = vcpu->arch.shared->msr & MSR_ME;
>>> msr_mask = 0;
>>> break;
>>> - case BOOKE_IRQPRIO_EXTERNAL:
>>> case BOOKE_IRQPRIO_DECREMENTER:
>>> + if (!(vcpu->arch.tcr & TCR_DIE)) {
>>> + allowed = 0;
>>> + dequeue = 1;
>>> + }
>>> + /* fall through */
>>> + case BOOKE_IRQPRIO_EXTERNAL:
>>> case BOOKE_IRQPRIO_FIT:
>>> - allowed = vcpu->arch.shared->msr & MSR_EE;
>>> + allowed = allowed && vcpu->arch.shared->msr & MSR_EE;
>>> allowed = allowed && !crit;
>>> msr_mask = MSR_CE|MSR_ME|MSR_DE;
>>> break;
>>> @@ -269,6 +275,8 @@ static int
>> kvmppc_booke_irqprio_deliver(struct kvm_vcpu *vcpu,
>>> allowed = vcpu->arch.shared->msr & MSR_DE;
>>> msr_mask = MSR_ME;
>>> break;
>>> + default:
>>> + allowed = 0;
>>> }
>>>
>>> if (allowed) {
>>> @@ -283,6 +291,9 @@ static int
>> kvmppc_booke_irqprio_deliver(struct kvm_vcpu *vcpu,
>>>
>>> if (!keep_irq)
>>> clear_bit(priority,
>> &vcpu->arch.pending_exceptions);
>>> + } else if (dequeue) {
>>> + /* Should only happen due to races with masking */
>>> + clear_bit(priority, &vcpu->arch.pending_exceptions);
>>> }
>>>
>>> return allowed;
>>> @@ -721,25 +732,16 @@ static int set_sregs_base(struct
>> kvm_vcpu *vcpu,
>>> vcpu->arch.shared->esr = sregs->u.e.esr;
>>> vcpu->arch.shared->dar = sregs->u.e.dear;
>>> vcpu->arch.vrsave = sregs->u.e.vrsave;
>>> - vcpu->arch.tcr = sregs->u.e.tcr;
>>> + kvmppc_set_tcr(vcpu, sregs->u.e.tcr);
>>>
>>> - if (sregs->u.e.update_special & KVM_SREGS_E_UPDATE_DEC)
>>> + if (sregs->u.e.update_special & KVM_SREGS_E_UPDATE_DEC) {
>>> vcpu->arch.dec = sregs->u.e.dec;
>>> -
>>> - kvmppc_emulate_dec(vcpu);
>>> + kvmppc_emulate_dec(vcpu);
>>> + }
>>>
>>> if (sregs->u.e.update_special & KVM_SREGS_E_UPDATE_TSR) {
>>> - /*
>>> - * FIXME: existing KVM timer handling is incomplete.
>>> - * TSR cannot be read by the guest, and its value in
>>> - * vcpu->arch is always zero. For now, just handle
>>> - * the case where the caller is trying to inject a
>>> - * decrementer interrupt.
>>> - */
>>> -
>>> - if ((sregs->u.e.tsr & TSR_DIS) &&
>>> - (vcpu->arch.tcr & TCR_DIE))
>>> - kvmppc_core_queue_dec(vcpu);
>>> + kvmppc_clr_tsr_bits(vcpu, ~0);
>>> + kvmppc_set_tsr_bits(vcpu, sregs->u.e.tsr);
>>> }
>>>
>>> return 0;
>>> @@ -895,6 +897,48 @@ void kvmppc_core_destroy_vm(struct kvm *kvm)
>>> {
>>> }
>>>
>>> +void kvmppc_set_tcr(struct kvm_vcpu *vcpu, u32 new_tcr)
>>> +{
>>> + vcpu->arch.tcr = new_tcr;
>>> + smp_wmb();
>>> +
>>> + /*
>>> + * Since TCR changed, we need to check
>>> + * if blocked interrupts are deliverable.
>>> + */
>>> + if ((new_tcr & TCR_DIE) && (vcpu->arch.tsr & TSR_DIS)) {
>>> + kvmppc_core_queue_dec(vcpu);
>>> + kvmppc_wakeup_vcpu(vcpu);
>>> + }
>>> +}
>>> +
>>> +void kvmppc_set_tsr_bits(struct kvm_vcpu *vcpu, u32 tsr_bits)
>>> +{
>>> + set_bits(tsr_bits, &vcpu->arch.tsr);
>>
>> I'm still missing the point why you need set_bits/clear_bits.
>> Can't you just use |= and &= ~ like everyone else? :)
>>
>
> Because set_bits/clear_bits is atomic.
> set/clr_tsr_bits() may be called in different threads so that on different cores.
> We need to make sure modification to be atomic to prevent missing changes.
Ah, so you can set the TSR bit in the decrementer timer callback? Hrm. Please add a comment there saying so. All the other cases should be under vcpu_load() scope. If they're not, we have bugs in our code :). So they shouldn't need atomic operations (except for now they do because there's one user that doesn't do vcpu_load).
Alex
^ permalink raw reply [flat|nested] 22+ messages in thread* Re: [PATCH 5/5] KVM: PPC: booke: Improve timer register emulation
2011-08-26 23:31 [PATCH 5/5] KVM: PPC: booke: Improve timer register emulation Scott Wood
` (2 preceding siblings ...)
2011-09-06 8:04 ` Alexander Graf
@ 2011-09-06 19:05 ` Alexander Graf
2011-09-07 10:16 ` Liu Yu-B13201
` (16 subsequent siblings)
20 siblings, 0 replies; 22+ messages in thread
From: Alexander Graf @ 2011-09-06 19:05 UTC (permalink / raw)
To: kvm-ppc
Am 06.09.2011 um 10:04 schrieb Alexander Graf <agraf@suse.de>:
>
> On 06.09.2011, at 05:17, Liu Yu-B13201 wrote:
>
>>
>>
>>> -----Original Message-----
>>> From: kvm-ppc-owner@vger.kernel.org
>>> [mailto:kvm-ppc-owner@vger.kernel.org] On Behalf Of Alexander Graf
>>> Sent: Tuesday, September 06, 2011 6:45 AM
>>> To: Wood Scott-B07421
>>> Cc: kvm-ppc@vger.kernel.org
>>> Subject: Re: [PATCH 5/5] KVM: PPC: booke: Improve timer
>>> register emulation
>>>
>>>
>>> On 27.08.2011, at 01:31, Scott Wood wrote:
>>>
>>>> From: Liu Yu <yu.liu@freescale.com>
>>>>
>>>> Decrementers are now properly driven by TCR/TSR, and the guest
>>>> has full read/write access to these registers.
>>>>
>>>> The decrementer keeps ticking (and setting the TSR bit)
>>> regardless of
>>>> whether the interrupts are enabled with TCR.
>>>>
>>>> The decrementer stops at zero, rather than going negative.
>>>>
>>>> Signed-off-by: Liu Yu <yu.liu@freescale.com>
>>>> [scott: added dequeue in kvmppc_booke_irqprio_deliver, and
>>> dec stop-at-zero]
>>>> Signed-off-by: Scott Wood <scottwood@freescale.com>
>>>> ---
>>>> arch/powerpc/include/asm/kvm_host.h | 2 +-
>>>> arch/powerpc/include/asm/kvm_ppc.h | 11 +++++
>>>> arch/powerpc/kvm/book3s.c | 8 ++++
>>>> arch/powerpc/kvm/booke.c | 80
>>> +++++++++++++++++++++++++++--------
>>>> arch/powerpc/kvm/booke.h | 4 ++
>>>> arch/powerpc/kvm/booke_emulate.c | 11 ++++-
>>>> arch/powerpc/kvm/emulate.c | 45 ++++++++-----------
>>>> arch/powerpc/kvm/powerpc.c | 20 +--------
>>>> 8 files changed, 114 insertions(+), 67 deletions(-)
>>>>
>>>> diff --git a/arch/powerpc/include/asm/kvm_host.h
>>> b/arch/powerpc/include/asm/kvm_host.h
>>>> index 3305af4..ea08c79 100644
>>>> --- a/arch/powerpc/include/asm/kvm_host.h
>>>> +++ b/arch/powerpc/include/asm/kvm_host.h
>>>> @@ -334,7 +334,7 @@ struct kvm_vcpu_arch {
>>>> u32 tbl;
>>>> u32 tbu;
>>>> u32 tcr;
>>>> - u32 tsr;
>>>> + ulong tsr; /* we need to perform set/clr_bits() which
>>> requires ulong */
>>>> u32 ivor[64];
>>>> ulong ivpr;
>>>> u32 pvr;
>>>> diff --git a/arch/powerpc/include/asm/kvm_ppc.h
>>> b/arch/powerpc/include/asm/kvm_ppc.h
>>>> index bdaa6c8..ddaa615 100644
>>>> --- a/arch/powerpc/include/asm/kvm_ppc.h
>>>> +++ b/arch/powerpc/include/asm/kvm_ppc.h
>>>> @@ -66,6 +66,7 @@ extern int
>>> kvmppc_emulate_instruction(struct kvm_run *run,
>>>> extern int kvmppc_emulate_mmio(struct kvm_run *run, struct
>>> kvm_vcpu *vcpu);
>>>> extern void kvmppc_emulate_dec(struct kvm_vcpu *vcpu);
>>>> extern u32 kvmppc_get_dec(struct kvm_vcpu *vcpu, u64 tb);
>>>> +extern void kvmppc_decrementer_func(unsigned long data);
>>>>
>>>> /* Core-specific hooks */
>>>>
>>>> @@ -197,4 +198,14 @@ int kvm_vcpu_ioctl_config_tlb(struct
>>> kvm_vcpu *vcpu,
>>>> int kvm_vcpu_ioctl_dirty_tlb(struct kvm_vcpu *vcpu,
>>>> struct kvm_dirty_tlb *cfg);
>>>>
>>>> +static inline void kvmppc_wakeup_vcpu(struct kvm_vcpu *vcpu)
>>>> +{
>>>> + if (waitqueue_active(&vcpu->wq)) {
>>>> + wake_up_interruptible(&vcpu->wq);
>>>> + vcpu->stat.halt_wakeup++;
>>>> + } else if (vcpu->cpu != -1) {
>>>> + smp_send_reschedule(vcpu->cpu);
>>>> + }
>>>> +}
>>>> +
>>>> #endif /* __POWERPC_KVM_PPC_H__ */
>>>> diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c
>>>> index f68a34d..b057856 100644
>>>> --- a/arch/powerpc/kvm/book3s.c
>>>> +++ b/arch/powerpc/kvm/book3s.c
>>>> @@ -514,3 +514,11 @@ out:
>>>> mutex_unlock(&kvm->slots_lock);
>>>> return r;
>>>> }
>>>> +
>>>> +void kvmppc_decrementer_func(unsigned long data)
>>>> +{
>>>> + struct kvm_vcpu *vcpu = (struct kvm_vcpu *)data;
>>>> +
>>>> + kvmppc_core_queue_dec(vcpu);
>>>> + kvmppc_wakeup_vcpu(vcpu);
>>>> +}
>>>> diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c
>>>> index 0ed62c1..502f9ff 100644
>>>> --- a/arch/powerpc/kvm/booke.c
>>>> +++ b/arch/powerpc/kvm/booke.c
>>>> @@ -205,7 +205,8 @@ void
>>> kvmppc_core_dequeue_external(struct kvm_vcpu *vcpu,
>>>> static int kvmppc_booke_irqprio_deliver(struct kvm_vcpu *vcpu,
>>>> unsigned int priority)
>>>> {
>>>> - int allowed = 0;
>>>> + int allowed = 1;
>>>> + int dequeue = 0;
>>>> ulong uninitialized_var(msr_mask);
>>>> bool update_esr = false, update_dear = false;
>>>> ulong crit_raw = vcpu->arch.shared->critical;
>>>> @@ -258,10 +259,15 @@ static int
>>> kvmppc_booke_irqprio_deliver(struct kvm_vcpu *vcpu,
>>>> allowed = vcpu->arch.shared->msr & MSR_ME;
>>>> msr_mask = 0;
>>>> break;
>>>> - case BOOKE_IRQPRIO_EXTERNAL:
>>>> case BOOKE_IRQPRIO_DECREMENTER:
>>>> + if (!(vcpu->arch.tcr & TCR_DIE)) {
>>>> + allowed = 0;
>>>> + dequeue = 1;
>>>> + }
>>>> + /* fall through */
>>>> + case BOOKE_IRQPRIO_EXTERNAL:
>>>> case BOOKE_IRQPRIO_FIT:
>>>> - allowed = vcpu->arch.shared->msr & MSR_EE;
>>>> + allowed = allowed && vcpu->arch.shared->msr & MSR_EE;
>>>> allowed = allowed && !crit;
>>>> msr_mask = MSR_CE|MSR_ME|MSR_DE;
>>>> break;
>>>> @@ -269,6 +275,8 @@ static int
>>> kvmppc_booke_irqprio_deliver(struct kvm_vcpu *vcpu,
>>>> allowed = vcpu->arch.shared->msr & MSR_DE;
>>>> msr_mask = MSR_ME;
>>>> break;
>>>> + default:
>>>> + allowed = 0;
>>>> }
>>>>
>>>> if (allowed) {
>>>> @@ -283,6 +291,9 @@ static int
>>> kvmppc_booke_irqprio_deliver(struct kvm_vcpu *vcpu,
>>>>
>>>> if (!keep_irq)
>>>> clear_bit(priority,
>>> &vcpu->arch.pending_exceptions);
>>>> + } else if (dequeue) {
>>>> + /* Should only happen due to races with masking */
>>>> + clear_bit(priority, &vcpu->arch.pending_exceptions);
>>>> }
>>>>
>>>> return allowed;
>>>> @@ -721,25 +732,16 @@ static int set_sregs_base(struct
>>> kvm_vcpu *vcpu,
>>>> vcpu->arch.shared->esr = sregs->u.e.esr;
>>>> vcpu->arch.shared->dar = sregs->u.e.dear;
>>>> vcpu->arch.vrsave = sregs->u.e.vrsave;
>>>> - vcpu->arch.tcr = sregs->u.e.tcr;
>>>> + kvmppc_set_tcr(vcpu, sregs->u.e.tcr);
>>>>
>>>> - if (sregs->u.e.update_special & KVM_SREGS_E_UPDATE_DEC)
>>>> + if (sregs->u.e.update_special & KVM_SREGS_E_UPDATE_DEC) {
>>>> vcpu->arch.dec = sregs->u.e.dec;
>>>> -
>>>> - kvmppc_emulate_dec(vcpu);
>>>> + kvmppc_emulate_dec(vcpu);
>>>> + }
>>>>
>>>> if (sregs->u.e.update_special & KVM_SREGS_E_UPDATE_TSR) {
>>>> - /*
>>>> - * FIXME: existing KVM timer handling is incomplete.
>>>> - * TSR cannot be read by the guest, and its value in
>>>> - * vcpu->arch is always zero. For now, just handle
>>>> - * the case where the caller is trying to inject a
>>>> - * decrementer interrupt.
>>>> - */
>>>> -
>>>> - if ((sregs->u.e.tsr & TSR_DIS) &&
>>>> - (vcpu->arch.tcr & TCR_DIE))
>>>> - kvmppc_core_queue_dec(vcpu);
>>>> + kvmppc_clr_tsr_bits(vcpu, ~0);
>>>> + kvmppc_set_tsr_bits(vcpu, sregs->u.e.tsr);
>>>> }
>>>>
>>>> return 0;
>>>> @@ -895,6 +897,48 @@ void kvmppc_core_destroy_vm(struct kvm *kvm)
>>>> {
>>>> }
>>>>
>>>> +void kvmppc_set_tcr(struct kvm_vcpu *vcpu, u32 new_tcr)
>>>> +{
>>>> + vcpu->arch.tcr = new_tcr;
>>>> + smp_wmb();
>>>> +
>>>> + /*
>>>> + * Since TCR changed, we need to check
>>>> + * if blocked interrupts are deliverable.
>>>> + */
>>>> + if ((new_tcr & TCR_DIE) && (vcpu->arch.tsr & TSR_DIS)) {
>>>> + kvmppc_core_queue_dec(vcpu);
>>>> + kvmppc_wakeup_vcpu(vcpu);
>>>> + }
>>>> +}
>>>> +
>>>> +void kvmppc_set_tsr_bits(struct kvm_vcpu *vcpu, u32 tsr_bits)
>>>> +{
>>>> + set_bits(tsr_bits, &vcpu->arch.tsr);
>>>
>>> I'm still missing the point why you need set_bits/clear_bits.
>>> Can't you just use |= and &= ~ like everyone else? :)
>>>
>>
>> Because set_bits/clear_bits is atomic.
>> set/clr_tsr_bits() may be called in different threads so that on different cores.
>> We need to make sure modification to be atomic to prevent missing changes.
>
> Ah, so you can set the TSR bit in the decrementer timer callback? Hrm. Please add a comment there saying so. All the other cases should be under vcpu_load() scope. If they're not, we have bugs in our code :). So they shouldn't need atomic operations (except for now they do because there's one user that doesn't do vcpu_load).
Thinking about this for a bit more, shouldn't we be able to share the TSR with the guest so that it can clear the DEC interrupt without exiting the guest? Then we can't do ulong because that would make the pv abi 32/64 specific.
Overall, I'm also not very sure it's too useful to set TSR from non-vcpu context. After all, in most cases the guest should have interrupts enabled at that point and probably wants to receive the interrupt in that moment, not some seconds later when the next intercept occurs. So we still want to kick the vcpu thread on dec interrupts. And then we don't need anything to be atomic.
Alex
>
^ permalink raw reply [flat|nested] 22+ messages in thread* RE: [PATCH 5/5] KVM: PPC: booke: Improve timer register emulation
2011-08-26 23:31 [PATCH 5/5] KVM: PPC: booke: Improve timer register emulation Scott Wood
` (3 preceding siblings ...)
2011-09-06 19:05 ` Alexander Graf
@ 2011-09-07 10:16 ` Liu Yu-B13201
2011-09-07 10:41 ` Alexander Graf
` (15 subsequent siblings)
20 siblings, 0 replies; 22+ messages in thread
From: Liu Yu-B13201 @ 2011-09-07 10:16 UTC (permalink / raw)
To: kvm-ppc
> -----Original Message-----
> From: kvm-ppc-owner@vger.kernel.org
> [mailto:kvm-ppc-owner@vger.kernel.org] On Behalf Of Alexander Graf
> Sent: Wednesday, September 07, 2011 3:06 AM
> To: Liu Yu-B13201
> Cc: Wood Scott-B07421; kvm-ppc@vger.kernel.org
> Subject: Re: [PATCH 5/5] KVM: PPC: booke: Improve timer
> register emulation
>
>
>
> Am 06.09.2011 um 10:04 schrieb Alexander Graf <agraf@suse.de>:
>
> >
> > On 06.09.2011, at 05:17, Liu Yu-B13201 wrote:
> >
> >>
> >>
> >>> -----Original Message-----
> >>> From: kvm-ppc-owner@vger.kernel.org
> >>> [mailto:kvm-ppc-owner@vger.kernel.org] On Behalf Of Alexander Graf
> >>> Sent: Tuesday, September 06, 2011 6:45 AM
> >>> To: Wood Scott-B07421
> >>> Cc: kvm-ppc@vger.kernel.org
> >>> Subject: Re: [PATCH 5/5] KVM: PPC: booke: Improve timer
> >>> register emulation
> >>>
> >>>
> >>> On 27.08.2011, at 01:31, Scott Wood wrote:
> >>>
> >>>> From: Liu Yu <yu.liu@freescale.com>
> >>>>
> >>>> Decrementers are now properly driven by TCR/TSR, and the guest
> >>>> has full read/write access to these registers.
> >>>>
> >>>> The decrementer keeps ticking (and setting the TSR bit)
> >>> regardless of
> >>>> whether the interrupts are enabled with TCR.
> >>>>
> >>>> The decrementer stops at zero, rather than going negative.
> >>>>
> >>>> Signed-off-by: Liu Yu <yu.liu@freescale.com>
> >>>> [scott: added dequeue in kvmppc_booke_irqprio_deliver, and
> >>> dec stop-at-zero]
> >>>> Signed-off-by: Scott Wood <scottwood@freescale.com>
> >>>> ---
> >>>> arch/powerpc/include/asm/kvm_host.h | 2 +-
> >>>> arch/powerpc/include/asm/kvm_ppc.h | 11 +++++
> >>>> arch/powerpc/kvm/book3s.c | 8 ++++
> >>>> arch/powerpc/kvm/booke.c | 80
> >>> +++++++++++++++++++++++++++--------
> >>>> arch/powerpc/kvm/booke.h | 4 ++
> >>>> arch/powerpc/kvm/booke_emulate.c | 11 ++++-
> >>>> arch/powerpc/kvm/emulate.c | 45 ++++++++-----------
> >>>> arch/powerpc/kvm/powerpc.c | 20 +--------
> >>>> 8 files changed, 114 insertions(+), 67 deletions(-)
> >>>>
> >>>> diff --git a/arch/powerpc/include/asm/kvm_host.h
> >>> b/arch/powerpc/include/asm/kvm_host.h
> >>>> index 3305af4..ea08c79 100644
> >>>> --- a/arch/powerpc/include/asm/kvm_host.h
> >>>> +++ b/arch/powerpc/include/asm/kvm_host.h
> >>>> @@ -334,7 +334,7 @@ struct kvm_vcpu_arch {
> >>>> u32 tbl;
> >>>> u32 tbu;
> >>>> u32 tcr;
> >>>> - u32 tsr;
> >>>> + ulong tsr; /* we need to perform set/clr_bits() which
> >>> requires ulong */
> >>>> u32 ivor[64];
> >>>> ulong ivpr;
> >>>> u32 pvr;
> >>>> diff --git a/arch/powerpc/include/asm/kvm_ppc.h
> >>> b/arch/powerpc/include/asm/kvm_ppc.h
> >>>> index bdaa6c8..ddaa615 100644
> >>>> --- a/arch/powerpc/include/asm/kvm_ppc.h
> >>>> +++ b/arch/powerpc/include/asm/kvm_ppc.h
> >>>> @@ -66,6 +66,7 @@ extern int
> >>> kvmppc_emulate_instruction(struct kvm_run *run,
> >>>> extern int kvmppc_emulate_mmio(struct kvm_run *run, struct
> >>> kvm_vcpu *vcpu);
> >>>> extern void kvmppc_emulate_dec(struct kvm_vcpu *vcpu);
> >>>> extern u32 kvmppc_get_dec(struct kvm_vcpu *vcpu, u64 tb);
> >>>> +extern void kvmppc_decrementer_func(unsigned long data);
> >>>>
> >>>> /* Core-specific hooks */
> >>>>
> >>>> @@ -197,4 +198,14 @@ int kvm_vcpu_ioctl_config_tlb(struct
> >>> kvm_vcpu *vcpu,
> >>>> int kvm_vcpu_ioctl_dirty_tlb(struct kvm_vcpu *vcpu,
> >>>> struct kvm_dirty_tlb *cfg);
> >>>>
> >>>> +static inline void kvmppc_wakeup_vcpu(struct kvm_vcpu *vcpu)
> >>>> +{
> >>>> + if (waitqueue_active(&vcpu->wq)) {
> >>>> + wake_up_interruptible(&vcpu->wq);
> >>>> + vcpu->stat.halt_wakeup++;
> >>>> + } else if (vcpu->cpu != -1) {
> >>>> + smp_send_reschedule(vcpu->cpu);
> >>>> + }
> >>>> +}
> >>>> +
> >>>> #endif /* __POWERPC_KVM_PPC_H__ */
> >>>> diff --git a/arch/powerpc/kvm/book3s.c
> b/arch/powerpc/kvm/book3s.c
> >>>> index f68a34d..b057856 100644
> >>>> --- a/arch/powerpc/kvm/book3s.c
> >>>> +++ b/arch/powerpc/kvm/book3s.c
> >>>> @@ -514,3 +514,11 @@ out:
> >>>> mutex_unlock(&kvm->slots_lock);
> >>>> return r;
> >>>> }
> >>>> +
> >>>> +void kvmppc_decrementer_func(unsigned long data)
> >>>> +{
> >>>> + struct kvm_vcpu *vcpu = (struct kvm_vcpu *)data;
> >>>> +
> >>>> + kvmppc_core_queue_dec(vcpu);
> >>>> + kvmppc_wakeup_vcpu(vcpu);
> >>>> +}
> >>>> diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c
> >>>> index 0ed62c1..502f9ff 100644
> >>>> --- a/arch/powerpc/kvm/booke.c
> >>>> +++ b/arch/powerpc/kvm/booke.c
> >>>> @@ -205,7 +205,8 @@ void
> >>> kvmppc_core_dequeue_external(struct kvm_vcpu *vcpu,
> >>>> static int kvmppc_booke_irqprio_deliver(struct kvm_vcpu *vcpu,
> >>>> unsigned int priority)
> >>>> {
> >>>> - int allowed = 0;
> >>>> + int allowed = 1;
> >>>> + int dequeue = 0;
> >>>> ulong uninitialized_var(msr_mask);
> >>>> bool update_esr = false, update_dear = false;
> >>>> ulong crit_raw = vcpu->arch.shared->critical;
> >>>> @@ -258,10 +259,15 @@ static int
> >>> kvmppc_booke_irqprio_deliver(struct kvm_vcpu *vcpu,
> >>>> allowed = vcpu->arch.shared->msr & MSR_ME;
> >>>> msr_mask = 0;
> >>>> break;
> >>>> - case BOOKE_IRQPRIO_EXTERNAL:
> >>>> case BOOKE_IRQPRIO_DECREMENTER:
> >>>> + if (!(vcpu->arch.tcr & TCR_DIE)) {
> >>>> + allowed = 0;
> >>>> + dequeue = 1;
> >>>> + }
> >>>> + /* fall through */
> >>>> + case BOOKE_IRQPRIO_EXTERNAL:
> >>>> case BOOKE_IRQPRIO_FIT:
> >>>> - allowed = vcpu->arch.shared->msr & MSR_EE;
> >>>> + allowed = allowed && vcpu->arch.shared->msr & MSR_EE;
> >>>> allowed = allowed && !crit;
> >>>> msr_mask = MSR_CE|MSR_ME|MSR_DE;
> >>>> break;
> >>>> @@ -269,6 +275,8 @@ static int
> >>> kvmppc_booke_irqprio_deliver(struct kvm_vcpu *vcpu,
> >>>> allowed = vcpu->arch.shared->msr & MSR_DE;
> >>>> msr_mask = MSR_ME;
> >>>> break;
> >>>> + default:
> >>>> + allowed = 0;
> >>>> }
> >>>>
> >>>> if (allowed) {
> >>>> @@ -283,6 +291,9 @@ static int
> >>> kvmppc_booke_irqprio_deliver(struct kvm_vcpu *vcpu,
> >>>>
> >>>> if (!keep_irq)
> >>>> clear_bit(priority,
> >>> &vcpu->arch.pending_exceptions);
> >>>> + } else if (dequeue) {
> >>>> + /* Should only happen due to races with masking */
> >>>> + clear_bit(priority, &vcpu->arch.pending_exceptions);
> >>>> }
> >>>>
> >>>> return allowed;
> >>>> @@ -721,25 +732,16 @@ static int set_sregs_base(struct
> >>> kvm_vcpu *vcpu,
> >>>> vcpu->arch.shared->esr = sregs->u.e.esr;
> >>>> vcpu->arch.shared->dar = sregs->u.e.dear;
> >>>> vcpu->arch.vrsave = sregs->u.e.vrsave;
> >>>> - vcpu->arch.tcr = sregs->u.e.tcr;
> >>>> + kvmppc_set_tcr(vcpu, sregs->u.e.tcr);
> >>>>
> >>>> - if (sregs->u.e.update_special & KVM_SREGS_E_UPDATE_DEC)
> >>>> + if (sregs->u.e.update_special & KVM_SREGS_E_UPDATE_DEC) {
> >>>> vcpu->arch.dec = sregs->u.e.dec;
> >>>> -
> >>>> - kvmppc_emulate_dec(vcpu);
> >>>> + kvmppc_emulate_dec(vcpu);
> >>>> + }
> >>>>
> >>>> if (sregs->u.e.update_special & KVM_SREGS_E_UPDATE_TSR) {
> >>>> - /*
> >>>> - * FIXME: existing KVM timer handling is incomplete.
> >>>> - * TSR cannot be read by the guest, and its value in
> >>>> - * vcpu->arch is always zero. For now, just handle
> >>>> - * the case where the caller is trying to inject a
> >>>> - * decrementer interrupt.
> >>>> - */
> >>>> -
> >>>> - if ((sregs->u.e.tsr & TSR_DIS) &&
> >>>> - (vcpu->arch.tcr & TCR_DIE))
> >>>> - kvmppc_core_queue_dec(vcpu);
> >>>> + kvmppc_clr_tsr_bits(vcpu, ~0);
> >>>> + kvmppc_set_tsr_bits(vcpu, sregs->u.e.tsr);
> >>>> }
> >>>>
> >>>> return 0;
> >>>> @@ -895,6 +897,48 @@ void kvmppc_core_destroy_vm(struct kvm *kvm)
> >>>> {
> >>>> }
> >>>>
> >>>> +void kvmppc_set_tcr(struct kvm_vcpu *vcpu, u32 new_tcr)
> >>>> +{
> >>>> + vcpu->arch.tcr = new_tcr;
> >>>> + smp_wmb();
> >>>> +
> >>>> + /*
> >>>> + * Since TCR changed, we need to check
> >>>> + * if blocked interrupts are deliverable.
> >>>> + */
> >>>> + if ((new_tcr & TCR_DIE) && (vcpu->arch.tsr & TSR_DIS)) {
> >>>> + kvmppc_core_queue_dec(vcpu);
> >>>> + kvmppc_wakeup_vcpu(vcpu);
> >>>> + }
> >>>> +}
> >>>> +
> >>>> +void kvmppc_set_tsr_bits(struct kvm_vcpu *vcpu, u32 tsr_bits)
> >>>> +{
> >>>> + set_bits(tsr_bits, &vcpu->arch.tsr);
> >>>
> >>> I'm still missing the point why you need set_bits/clear_bits.
> >>> Can't you just use |= and &= ~ like everyone else? :)
> >>>
> >>
> >> Because set_bits/clear_bits is atomic.
> >> set/clr_tsr_bits() may be called in different threads so
> that on different cores.
> >> We need to make sure modification to be atomic to prevent
> missing changes.
> >
> > Ah, so you can set the TSR bit in the decrementer timer
> callback? Hrm. Please add a comment there saying so. All the
> other cases should be under vcpu_load() scope. If they're
> not, we have bugs in our code :). So they shouldn't need
> atomic operations (except for now they do because there's one
> user that doesn't do vcpu_load).
>
> Thinking about this for a bit more, shouldn't we be able to
> share the TSR with the guest so that it can clear the DEC
> interrupt without exiting the guest? Then we can't do ulong
> because that would make the pv abi 32/64 specific.
>
> Overall, I'm also not very sure it's too useful to set TSR
> from non-vcpu context. After all, in most cases the guest
> should have interrupts enabled at that point and probably
> wants to receive the interrupt in that moment, not some
> seconds later when the next intercept occurs. So we still
> want to kick the vcpu thread on dec interrupts. And then we
> don't need anything to be atomic.
>
We don't only set TSR for dec interrupt case.
In future, we would send out watchdog stuff which also set/clear TSR in non-vcpu context.
The watchdog bit of TSR is not only related to watchdog interrupt but maintains a simple statemachine.
Not set those states may confuse guest OS.
Thanks,
Yu
^ permalink raw reply [flat|nested] 22+ messages in thread* Re: [PATCH 5/5] KVM: PPC: booke: Improve timer register emulation
2011-08-26 23:31 [PATCH 5/5] KVM: PPC: booke: Improve timer register emulation Scott Wood
` (4 preceding siblings ...)
2011-09-07 10:16 ` Liu Yu-B13201
@ 2011-09-07 10:41 ` Alexander Graf
2011-09-08 2:20 ` Liu Yu-B13201
` (14 subsequent siblings)
20 siblings, 0 replies; 22+ messages in thread
From: Alexander Graf @ 2011-09-07 10:41 UTC (permalink / raw)
To: kvm-ppc
On 07.09.2011, at 12:16, Liu Yu-B13201 wrote:
>
>
>> -----Original Message-----
>> From: kvm-ppc-owner@vger.kernel.org
>> [mailto:kvm-ppc-owner@vger.kernel.org] On Behalf Of Alexander Graf
>> Sent: Wednesday, September 07, 2011 3:06 AM
>> To: Liu Yu-B13201
>> Cc: Wood Scott-B07421; kvm-ppc@vger.kernel.org
>> Subject: Re: [PATCH 5/5] KVM: PPC: booke: Improve timer
>> register emulation
>>
>>
>>
>> Am 06.09.2011 um 10:04 schrieb Alexander Graf <agraf@suse.de>:
>>
>>>
>>> On 06.09.2011, at 05:17, Liu Yu-B13201 wrote:
>>>
>>>>
>>>>
>>>>> -----Original Message-----
>>>>> From: kvm-ppc-owner@vger.kernel.org
>>>>> [mailto:kvm-ppc-owner@vger.kernel.org] On Behalf Of Alexander Graf
>>>>> Sent: Tuesday, September 06, 2011 6:45 AM
>>>>> To: Wood Scott-B07421
>>>>> Cc: kvm-ppc@vger.kernel.org
>>>>> Subject: Re: [PATCH 5/5] KVM: PPC: booke: Improve timer
>>>>> register emulation
>>>>>
>>>>>
>>>>> On 27.08.2011, at 01:31, Scott Wood wrote:
>>>>>
>>>>>> From: Liu Yu <yu.liu@freescale.com>
>>>>>>
>>>>>> Decrementers are now properly driven by TCR/TSR, and the guest
>>>>>> has full read/write access to these registers.
>>>>>>
>>>>>> The decrementer keeps ticking (and setting the TSR bit)
>>>>> regardless of
>>>>>> whether the interrupts are enabled with TCR.
>>>>>>
>>>>>> The decrementer stops at zero, rather than going negative.
>>>>>>
>>>>>> Signed-off-by: Liu Yu <yu.liu@freescale.com>
>>>>>> [scott: added dequeue in kvmppc_booke_irqprio_deliver, and
>>>>> dec stop-at-zero]
>>>>>> Signed-off-by: Scott Wood <scottwood@freescale.com>
>>>>>> ---
>>>>>> arch/powerpc/include/asm/kvm_host.h | 2 +-
>>>>>> arch/powerpc/include/asm/kvm_ppc.h | 11 +++++
>>>>>> arch/powerpc/kvm/book3s.c | 8 ++++
>>>>>> arch/powerpc/kvm/booke.c | 80
>>>>> +++++++++++++++++++++++++++--------
>>>>>> arch/powerpc/kvm/booke.h | 4 ++
>>>>>> arch/powerpc/kvm/booke_emulate.c | 11 ++++-
>>>>>> arch/powerpc/kvm/emulate.c | 45 ++++++++-----------
>>>>>> arch/powerpc/kvm/powerpc.c | 20 +--------
>>>>>> 8 files changed, 114 insertions(+), 67 deletions(-)
>>>>>>
>>>>>> diff --git a/arch/powerpc/include/asm/kvm_host.h
>>>>> b/arch/powerpc/include/asm/kvm_host.h
>>>>>> index 3305af4..ea08c79 100644
>>>>>> --- a/arch/powerpc/include/asm/kvm_host.h
>>>>>> +++ b/arch/powerpc/include/asm/kvm_host.h
>>>>>> @@ -334,7 +334,7 @@ struct kvm_vcpu_arch {
>>>>>> u32 tbl;
>>>>>> u32 tbu;
>>>>>> u32 tcr;
>>>>>> - u32 tsr;
>>>>>> + ulong tsr; /* we need to perform set/clr_bits() which
>>>>> requires ulong */
>>>>>> u32 ivor[64];
>>>>>> ulong ivpr;
>>>>>> u32 pvr;
>>>>>> diff --git a/arch/powerpc/include/asm/kvm_ppc.h
>>>>> b/arch/powerpc/include/asm/kvm_ppc.h
>>>>>> index bdaa6c8..ddaa615 100644
>>>>>> --- a/arch/powerpc/include/asm/kvm_ppc.h
>>>>>> +++ b/arch/powerpc/include/asm/kvm_ppc.h
>>>>>> @@ -66,6 +66,7 @@ extern int
>>>>> kvmppc_emulate_instruction(struct kvm_run *run,
>>>>>> extern int kvmppc_emulate_mmio(struct kvm_run *run, struct
>>>>> kvm_vcpu *vcpu);
>>>>>> extern void kvmppc_emulate_dec(struct kvm_vcpu *vcpu);
>>>>>> extern u32 kvmppc_get_dec(struct kvm_vcpu *vcpu, u64 tb);
>>>>>> +extern void kvmppc_decrementer_func(unsigned long data);
>>>>>>
>>>>>> /* Core-specific hooks */
>>>>>>
>>>>>> @@ -197,4 +198,14 @@ int kvm_vcpu_ioctl_config_tlb(struct
>>>>> kvm_vcpu *vcpu,
>>>>>> int kvm_vcpu_ioctl_dirty_tlb(struct kvm_vcpu *vcpu,
>>>>>> struct kvm_dirty_tlb *cfg);
>>>>>>
>>>>>> +static inline void kvmppc_wakeup_vcpu(struct kvm_vcpu *vcpu)
>>>>>> +{
>>>>>> + if (waitqueue_active(&vcpu->wq)) {
>>>>>> + wake_up_interruptible(&vcpu->wq);
>>>>>> + vcpu->stat.halt_wakeup++;
>>>>>> + } else if (vcpu->cpu != -1) {
>>>>>> + smp_send_reschedule(vcpu->cpu);
>>>>>> + }
>>>>>> +}
>>>>>> +
>>>>>> #endif /* __POWERPC_KVM_PPC_H__ */
>>>>>> diff --git a/arch/powerpc/kvm/book3s.c
>> b/arch/powerpc/kvm/book3s.c
>>>>>> index f68a34d..b057856 100644
>>>>>> --- a/arch/powerpc/kvm/book3s.c
>>>>>> +++ b/arch/powerpc/kvm/book3s.c
>>>>>> @@ -514,3 +514,11 @@ out:
>>>>>> mutex_unlock(&kvm->slots_lock);
>>>>>> return r;
>>>>>> }
>>>>>> +
>>>>>> +void kvmppc_decrementer_func(unsigned long data)
>>>>>> +{
>>>>>> + struct kvm_vcpu *vcpu = (struct kvm_vcpu *)data;
>>>>>> +
>>>>>> + kvmppc_core_queue_dec(vcpu);
>>>>>> + kvmppc_wakeup_vcpu(vcpu);
>>>>>> +}
>>>>>> diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c
>>>>>> index 0ed62c1..502f9ff 100644
>>>>>> --- a/arch/powerpc/kvm/booke.c
>>>>>> +++ b/arch/powerpc/kvm/booke.c
>>>>>> @@ -205,7 +205,8 @@ void
>>>>> kvmppc_core_dequeue_external(struct kvm_vcpu *vcpu,
>>>>>> static int kvmppc_booke_irqprio_deliver(struct kvm_vcpu *vcpu,
>>>>>> unsigned int priority)
>>>>>> {
>>>>>> - int allowed = 0;
>>>>>> + int allowed = 1;
>>>>>> + int dequeue = 0;
>>>>>> ulong uninitialized_var(msr_mask);
>>>>>> bool update_esr = false, update_dear = false;
>>>>>> ulong crit_raw = vcpu->arch.shared->critical;
>>>>>> @@ -258,10 +259,15 @@ static int
>>>>> kvmppc_booke_irqprio_deliver(struct kvm_vcpu *vcpu,
>>>>>> allowed = vcpu->arch.shared->msr & MSR_ME;
>>>>>> msr_mask = 0;
>>>>>> break;
>>>>>> - case BOOKE_IRQPRIO_EXTERNAL:
>>>>>> case BOOKE_IRQPRIO_DECREMENTER:
>>>>>> + if (!(vcpu->arch.tcr & TCR_DIE)) {
>>>>>> + allowed = 0;
>>>>>> + dequeue = 1;
>>>>>> + }
>>>>>> + /* fall through */
>>>>>> + case BOOKE_IRQPRIO_EXTERNAL:
>>>>>> case BOOKE_IRQPRIO_FIT:
>>>>>> - allowed = vcpu->arch.shared->msr & MSR_EE;
>>>>>> + allowed = allowed && vcpu->arch.shared->msr & MSR_EE;
>>>>>> allowed = allowed && !crit;
>>>>>> msr_mask = MSR_CE|MSR_ME|MSR_DE;
>>>>>> break;
>>>>>> @@ -269,6 +275,8 @@ static int
>>>>> kvmppc_booke_irqprio_deliver(struct kvm_vcpu *vcpu,
>>>>>> allowed = vcpu->arch.shared->msr & MSR_DE;
>>>>>> msr_mask = MSR_ME;
>>>>>> break;
>>>>>> + default:
>>>>>> + allowed = 0;
>>>>>> }
>>>>>>
>>>>>> if (allowed) {
>>>>>> @@ -283,6 +291,9 @@ static int
>>>>> kvmppc_booke_irqprio_deliver(struct kvm_vcpu *vcpu,
>>>>>>
>>>>>> if (!keep_irq)
>>>>>> clear_bit(priority,
>>>>> &vcpu->arch.pending_exceptions);
>>>>>> + } else if (dequeue) {
>>>>>> + /* Should only happen due to races with masking */
>>>>>> + clear_bit(priority, &vcpu->arch.pending_exceptions);
>>>>>> }
>>>>>>
>>>>>> return allowed;
>>>>>> @@ -721,25 +732,16 @@ static int set_sregs_base(struct
>>>>> kvm_vcpu *vcpu,
>>>>>> vcpu->arch.shared->esr = sregs->u.e.esr;
>>>>>> vcpu->arch.shared->dar = sregs->u.e.dear;
>>>>>> vcpu->arch.vrsave = sregs->u.e.vrsave;
>>>>>> - vcpu->arch.tcr = sregs->u.e.tcr;
>>>>>> + kvmppc_set_tcr(vcpu, sregs->u.e.tcr);
>>>>>>
>>>>>> - if (sregs->u.e.update_special & KVM_SREGS_E_UPDATE_DEC)
>>>>>> + if (sregs->u.e.update_special & KVM_SREGS_E_UPDATE_DEC) {
>>>>>> vcpu->arch.dec = sregs->u.e.dec;
>>>>>> -
>>>>>> - kvmppc_emulate_dec(vcpu);
>>>>>> + kvmppc_emulate_dec(vcpu);
>>>>>> + }
>>>>>>
>>>>>> if (sregs->u.e.update_special & KVM_SREGS_E_UPDATE_TSR) {
>>>>>> - /*
>>>>>> - * FIXME: existing KVM timer handling is incomplete.
>>>>>> - * TSR cannot be read by the guest, and its value in
>>>>>> - * vcpu->arch is always zero. For now, just handle
>>>>>> - * the case where the caller is trying to inject a
>>>>>> - * decrementer interrupt.
>>>>>> - */
>>>>>> -
>>>>>> - if ((sregs->u.e.tsr & TSR_DIS) &&
>>>>>> - (vcpu->arch.tcr & TCR_DIE))
>>>>>> - kvmppc_core_queue_dec(vcpu);
>>>>>> + kvmppc_clr_tsr_bits(vcpu, ~0);
>>>>>> + kvmppc_set_tsr_bits(vcpu, sregs->u.e.tsr);
>>>>>> }
>>>>>>
>>>>>> return 0;
>>>>>> @@ -895,6 +897,48 @@ void kvmppc_core_destroy_vm(struct kvm *kvm)
>>>>>> {
>>>>>> }
>>>>>>
>>>>>> +void kvmppc_set_tcr(struct kvm_vcpu *vcpu, u32 new_tcr)
>>>>>> +{
>>>>>> + vcpu->arch.tcr = new_tcr;
>>>>>> + smp_wmb();
>>>>>> +
>>>>>> + /*
>>>>>> + * Since TCR changed, we need to check
>>>>>> + * if blocked interrupts are deliverable.
>>>>>> + */
>>>>>> + if ((new_tcr & TCR_DIE) && (vcpu->arch.tsr & TSR_DIS)) {
>>>>>> + kvmppc_core_queue_dec(vcpu);
>>>>>> + kvmppc_wakeup_vcpu(vcpu);
>>>>>> + }
>>>>>> +}
>>>>>> +
>>>>>> +void kvmppc_set_tsr_bits(struct kvm_vcpu *vcpu, u32 tsr_bits)
>>>>>> +{
>>>>>> + set_bits(tsr_bits, &vcpu->arch.tsr);
>>>>>
>>>>> I'm still missing the point why you need set_bits/clear_bits.
>>>>> Can't you just use |= and &= ~ like everyone else? :)
>>>>>
>>>>
>>>> Because set_bits/clear_bits is atomic.
>>>> set/clr_tsr_bits() may be called in different threads so
>> that on different cores.
>>>> We need to make sure modification to be atomic to prevent
>> missing changes.
>>>
>>> Ah, so you can set the TSR bit in the decrementer timer
>> callback? Hrm. Please add a comment there saying so. All the
>> other cases should be under vcpu_load() scope. If they're
>> not, we have bugs in our code :). So they shouldn't need
>> atomic operations (except for now they do because there's one
>> user that doesn't do vcpu_load).
>>
>> Thinking about this for a bit more, shouldn't we be able to
>> share the TSR with the guest so that it can clear the DEC
>> interrupt without exiting the guest? Then we can't do ulong
>> because that would make the pv abi 32/64 specific.
>>
>> Overall, I'm also not very sure it's too useful to set TSR
>> from non-vcpu context. After all, in most cases the guest
>> should have interrupts enabled at that point and probably
>> wants to receive the interrupt in that moment, not some
>> seconds later when the next intercept occurs. So we still
>> want to kick the vcpu thread on dec interrupts. And then we
>> don't need anything to be atomic.
>>
>
> We don't only set TSR for dec interrupt case.
> In future, we would send out watchdog stuff which also set/clear TSR in non-vcpu context.
> The watchdog bit of TSR is not only related to watchdog interrupt but maintains a simple statemachine.
> Not set those states may confuse guest OS.
Yes, but why can't we do this in the vcpu thread's context so we only ever have a single instance accessing the vcpu struct? It makes a lot of things a lot easier. And for the watchdog, we don't have to set the bit unless it's clean, right? So we don't add any overhead to the vcpu unless it's actually cleaning the watchdog TSR bit.
Alex
^ permalink raw reply [flat|nested] 22+ messages in thread* RE: [PATCH 5/5] KVM: PPC: booke: Improve timer register emulation
2011-08-26 23:31 [PATCH 5/5] KVM: PPC: booke: Improve timer register emulation Scott Wood
` (5 preceding siblings ...)
2011-09-07 10:41 ` Alexander Graf
@ 2011-09-08 2:20 ` Liu Yu-B13201
2011-09-08 9:21 ` Liu Yu-B13201
` (13 subsequent siblings)
20 siblings, 0 replies; 22+ messages in thread
From: Liu Yu-B13201 @ 2011-09-08 2:20 UTC (permalink / raw)
To: kvm-ppc
> -----Original Message-----
> From: Alexander Graf [mailto:agraf@suse.de]
> Sent: Wednesday, September 07, 2011 6:42 PM
> To: Liu Yu-B13201
> Cc: Wood Scott-B07421; kvm-ppc@vger.kernel.org
> Subject: Re: [PATCH 5/5] KVM: PPC: booke: Improve timer
> register emulation
>
>
> On 07.09.2011, at 12:16, Liu Yu-B13201 wrote:
>
> >
> >
> >> -----Original Message-----
> >> From: kvm-ppc-owner@vger.kernel.org
> >> [mailto:kvm-ppc-owner@vger.kernel.org] On Behalf Of Alexander Graf
> >> Sent: Wednesday, September 07, 2011 3:06 AM
> >> To: Liu Yu-B13201
> >> Cc: Wood Scott-B07421; kvm-ppc@vger.kernel.org
> >> Subject: Re: [PATCH 5/5] KVM: PPC: booke: Improve timer
> >> register emulation
> >>
> >>
> >>
> >> Am 06.09.2011 um 10:04 schrieb Alexander Graf <agraf@suse.de>:
> >>
> >>>
> >>> On 06.09.2011, at 05:17, Liu Yu-B13201 wrote:
> >>>
> >>>>
> >>>>
> >>>>> -----Original Message-----
> >>>>> From: kvm-ppc-owner@vger.kernel.org
> >>>>> [mailto:kvm-ppc-owner@vger.kernel.org] On Behalf Of
> Alexander Graf
> >>>>> Sent: Tuesday, September 06, 2011 6:45 AM
> >>>>> To: Wood Scott-B07421
> >>>>> Cc: kvm-ppc@vger.kernel.org
> >>>>> Subject: Re: [PATCH 5/5] KVM: PPC: booke: Improve timer
> >>>>> register emulation
> >>>>>
> >>>>>
> >>>>> On 27.08.2011, at 01:31, Scott Wood wrote:
> >>>>>
> >>>>>> From: Liu Yu <yu.liu@freescale.com>
> >>>>>>
> >>>>>> Decrementers are now properly driven by TCR/TSR, and the guest
> >>>>>> has full read/write access to these registers.
> >>>>>>
> >>>>>> The decrementer keeps ticking (and setting the TSR bit)
> >>>>> regardless of
> >>>>>> whether the interrupts are enabled with TCR.
> >>>>>>
> >>>>>> The decrementer stops at zero, rather than going negative.
> >>>>>>
> >>>>>> Signed-off-by: Liu Yu <yu.liu@freescale.com>
> >>>>>> [scott: added dequeue in kvmppc_booke_irqprio_deliver, and
> >>>>> dec stop-at-zero]
> >>>>>> Signed-off-by: Scott Wood <scottwood@freescale.com>
> >>>>>> ---
> >>>>>> arch/powerpc/include/asm/kvm_host.h | 2 +-
> >>>>>> arch/powerpc/include/asm/kvm_ppc.h | 11 +++++
> >>>>>> arch/powerpc/kvm/book3s.c | 8 ++++
> >>>>>> arch/powerpc/kvm/booke.c | 80
> >>>>> +++++++++++++++++++++++++++--------
> >>>>>> arch/powerpc/kvm/booke.h | 4 ++
> >>>>>> arch/powerpc/kvm/booke_emulate.c | 11 ++++-
> >>>>>> arch/powerpc/kvm/emulate.c | 45 ++++++++-----------
> >>>>>> arch/powerpc/kvm/powerpc.c | 20 +--------
> >>>>>> 8 files changed, 114 insertions(+), 67 deletions(-)
> >>>>>>
> >>>>>> diff --git a/arch/powerpc/include/asm/kvm_host.h
> >>>>> b/arch/powerpc/include/asm/kvm_host.h
> >>>>>> index 3305af4..ea08c79 100644
> >>>>>> --- a/arch/powerpc/include/asm/kvm_host.h
> >>>>>> +++ b/arch/powerpc/include/asm/kvm_host.h
> >>>>>> @@ -334,7 +334,7 @@ struct kvm_vcpu_arch {
> >>>>>> u32 tbl;
> >>>>>> u32 tbu;
> >>>>>> u32 tcr;
> >>>>>> - u32 tsr;
> >>>>>> + ulong tsr; /* we need to perform set/clr_bits() which
> >>>>> requires ulong */
> >>>>>> u32 ivor[64];
> >>>>>> ulong ivpr;
> >>>>>> u32 pvr;
> >>>>>> diff --git a/arch/powerpc/include/asm/kvm_ppc.h
> >>>>> b/arch/powerpc/include/asm/kvm_ppc.h
> >>>>>> index bdaa6c8..ddaa615 100644
> >>>>>> --- a/arch/powerpc/include/asm/kvm_ppc.h
> >>>>>> +++ b/arch/powerpc/include/asm/kvm_ppc.h
> >>>>>> @@ -66,6 +66,7 @@ extern int
> >>>>> kvmppc_emulate_instruction(struct kvm_run *run,
> >>>>>> extern int kvmppc_emulate_mmio(struct kvm_run *run, struct
> >>>>> kvm_vcpu *vcpu);
> >>>>>> extern void kvmppc_emulate_dec(struct kvm_vcpu *vcpu);
> >>>>>> extern u32 kvmppc_get_dec(struct kvm_vcpu *vcpu, u64 tb);
> >>>>>> +extern void kvmppc_decrementer_func(unsigned long data);
> >>>>>>
> >>>>>> /* Core-specific hooks */
> >>>>>>
> >>>>>> @@ -197,4 +198,14 @@ int kvm_vcpu_ioctl_config_tlb(struct
> >>>>> kvm_vcpu *vcpu,
> >>>>>> int kvm_vcpu_ioctl_dirty_tlb(struct kvm_vcpu *vcpu,
> >>>>>> struct kvm_dirty_tlb *cfg);
> >>>>>>
> >>>>>> +static inline void kvmppc_wakeup_vcpu(struct kvm_vcpu *vcpu)
> >>>>>> +{
> >>>>>> + if (waitqueue_active(&vcpu->wq)) {
> >>>>>> + wake_up_interruptible(&vcpu->wq);
> >>>>>> + vcpu->stat.halt_wakeup++;
> >>>>>> + } else if (vcpu->cpu != -1) {
> >>>>>> + smp_send_reschedule(vcpu->cpu);
> >>>>>> + }
> >>>>>> +}
> >>>>>> +
> >>>>>> #endif /* __POWERPC_KVM_PPC_H__ */
> >>>>>> diff --git a/arch/powerpc/kvm/book3s.c
> >> b/arch/powerpc/kvm/book3s.c
> >>>>>> index f68a34d..b057856 100644
> >>>>>> --- a/arch/powerpc/kvm/book3s.c
> >>>>>> +++ b/arch/powerpc/kvm/book3s.c
> >>>>>> @@ -514,3 +514,11 @@ out:
> >>>>>> mutex_unlock(&kvm->slots_lock);
> >>>>>> return r;
> >>>>>> }
> >>>>>> +
> >>>>>> +void kvmppc_decrementer_func(unsigned long data)
> >>>>>> +{
> >>>>>> + struct kvm_vcpu *vcpu = (struct kvm_vcpu *)data;
> >>>>>> +
> >>>>>> + kvmppc_core_queue_dec(vcpu);
> >>>>>> + kvmppc_wakeup_vcpu(vcpu);
> >>>>>> +}
> >>>>>> diff --git a/arch/powerpc/kvm/booke.c
> b/arch/powerpc/kvm/booke.c
> >>>>>> index 0ed62c1..502f9ff 100644
> >>>>>> --- a/arch/powerpc/kvm/booke.c
> >>>>>> +++ b/arch/powerpc/kvm/booke.c
> >>>>>> @@ -205,7 +205,8 @@ void
> >>>>> kvmppc_core_dequeue_external(struct kvm_vcpu *vcpu,
> >>>>>> static int kvmppc_booke_irqprio_deliver(struct kvm_vcpu *vcpu,
> >>>>>> unsigned int priority)
> >>>>>> {
> >>>>>> - int allowed = 0;
> >>>>>> + int allowed = 1;
> >>>>>> + int dequeue = 0;
> >>>>>> ulong uninitialized_var(msr_mask);
> >>>>>> bool update_esr = false, update_dear = false;
> >>>>>> ulong crit_raw = vcpu->arch.shared->critical;
> >>>>>> @@ -258,10 +259,15 @@ static int
> >>>>> kvmppc_booke_irqprio_deliver(struct kvm_vcpu *vcpu,
> >>>>>> allowed = vcpu->arch.shared->msr & MSR_ME;
> >>>>>> msr_mask = 0;
> >>>>>> break;
> >>>>>> - case BOOKE_IRQPRIO_EXTERNAL:
> >>>>>> case BOOKE_IRQPRIO_DECREMENTER:
> >>>>>> + if (!(vcpu->arch.tcr & TCR_DIE)) {
> >>>>>> + allowed = 0;
> >>>>>> + dequeue = 1;
> >>>>>> + }
> >>>>>> + /* fall through */
> >>>>>> + case BOOKE_IRQPRIO_EXTERNAL:
> >>>>>> case BOOKE_IRQPRIO_FIT:
> >>>>>> - allowed = vcpu->arch.shared->msr & MSR_EE;
> >>>>>> + allowed = allowed && vcpu->arch.shared->msr & MSR_EE;
> >>>>>> allowed = allowed && !crit;
> >>>>>> msr_mask = MSR_CE|MSR_ME|MSR_DE;
> >>>>>> break;
> >>>>>> @@ -269,6 +275,8 @@ static int
> >>>>> kvmppc_booke_irqprio_deliver(struct kvm_vcpu *vcpu,
> >>>>>> allowed = vcpu->arch.shared->msr & MSR_DE;
> >>>>>> msr_mask = MSR_ME;
> >>>>>> break;
> >>>>>> + default:
> >>>>>> + allowed = 0;
> >>>>>> }
> >>>>>>
> >>>>>> if (allowed) {
> >>>>>> @@ -283,6 +291,9 @@ static int
> >>>>> kvmppc_booke_irqprio_deliver(struct kvm_vcpu *vcpu,
> >>>>>>
> >>>>>> if (!keep_irq)
> >>>>>> clear_bit(priority,
> >>>>> &vcpu->arch.pending_exceptions);
> >>>>>> + } else if (dequeue) {
> >>>>>> + /* Should only happen due to races with masking */
> >>>>>> + clear_bit(priority, &vcpu->arch.pending_exceptions);
> >>>>>> }
> >>>>>>
> >>>>>> return allowed;
> >>>>>> @@ -721,25 +732,16 @@ static int set_sregs_base(struct
> >>>>> kvm_vcpu *vcpu,
> >>>>>> vcpu->arch.shared->esr = sregs->u.e.esr;
> >>>>>> vcpu->arch.shared->dar = sregs->u.e.dear;
> >>>>>> vcpu->arch.vrsave = sregs->u.e.vrsave;
> >>>>>> - vcpu->arch.tcr = sregs->u.e.tcr;
> >>>>>> + kvmppc_set_tcr(vcpu, sregs->u.e.tcr);
> >>>>>>
> >>>>>> - if (sregs->u.e.update_special & KVM_SREGS_E_UPDATE_DEC)
> >>>>>> + if (sregs->u.e.update_special & KVM_SREGS_E_UPDATE_DEC) {
> >>>>>> vcpu->arch.dec = sregs->u.e.dec;
> >>>>>> -
> >>>>>> - kvmppc_emulate_dec(vcpu);
> >>>>>> + kvmppc_emulate_dec(vcpu);
> >>>>>> + }
> >>>>>>
> >>>>>> if (sregs->u.e.update_special & KVM_SREGS_E_UPDATE_TSR) {
> >>>>>> - /*
> >>>>>> - * FIXME: existing KVM timer handling is incomplete.
> >>>>>> - * TSR cannot be read by the guest, and its value in
> >>>>>> - * vcpu->arch is always zero. For now, just handle
> >>>>>> - * the case where the caller is trying to inject a
> >>>>>> - * decrementer interrupt.
> >>>>>> - */
> >>>>>> -
> >>>>>> - if ((sregs->u.e.tsr & TSR_DIS) &&
> >>>>>> - (vcpu->arch.tcr & TCR_DIE))
> >>>>>> - kvmppc_core_queue_dec(vcpu);
> >>>>>> + kvmppc_clr_tsr_bits(vcpu, ~0);
> >>>>>> + kvmppc_set_tsr_bits(vcpu, sregs->u.e.tsr);
> >>>>>> }
> >>>>>>
> >>>>>> return 0;
> >>>>>> @@ -895,6 +897,48 @@ void
> kvmppc_core_destroy_vm(struct kvm *kvm)
> >>>>>> {
> >>>>>> }
> >>>>>>
> >>>>>> +void kvmppc_set_tcr(struct kvm_vcpu *vcpu, u32 new_tcr)
> >>>>>> +{
> >>>>>> + vcpu->arch.tcr = new_tcr;
> >>>>>> + smp_wmb();
> >>>>>> +
> >>>>>> + /*
> >>>>>> + * Since TCR changed, we need to check
> >>>>>> + * if blocked interrupts are deliverable.
> >>>>>> + */
> >>>>>> + if ((new_tcr & TCR_DIE) && (vcpu->arch.tsr & TSR_DIS)) {
> >>>>>> + kvmppc_core_queue_dec(vcpu);
> >>>>>> + kvmppc_wakeup_vcpu(vcpu);
> >>>>>> + }
> >>>>>> +}
> >>>>>> +
> >>>>>> +void kvmppc_set_tsr_bits(struct kvm_vcpu *vcpu, u32 tsr_bits)
> >>>>>> +{
> >>>>>> + set_bits(tsr_bits, &vcpu->arch.tsr);
> >>>>>
> >>>>> I'm still missing the point why you need set_bits/clear_bits.
> >>>>> Can't you just use |= and &= ~ like everyone else? :)
> >>>>>
> >>>>
> >>>> Because set_bits/clear_bits is atomic.
> >>>> set/clr_tsr_bits() may be called in different threads so
> >> that on different cores.
> >>>> We need to make sure modification to be atomic to prevent
> >> missing changes.
> >>>
> >>> Ah, so you can set the TSR bit in the decrementer timer
> >> callback? Hrm. Please add a comment there saying so. All the
> >> other cases should be under vcpu_load() scope. If they're
> >> not, we have bugs in our code :). So they shouldn't need
> >> atomic operations (except for now they do because there's one
> >> user that doesn't do vcpu_load).
> >>
> >> Thinking about this for a bit more, shouldn't we be able to
> >> share the TSR with the guest so that it can clear the DEC
> >> interrupt without exiting the guest? Then we can't do ulong
> >> because that would make the pv abi 32/64 specific.
> >>
> >> Overall, I'm also not very sure it's too useful to set TSR
> >> from non-vcpu context. After all, in most cases the guest
> >> should have interrupts enabled at that point and probably
> >> wants to receive the interrupt in that moment, not some
> >> seconds later when the next intercept occurs. So we still
> >> want to kick the vcpu thread on dec interrupts. And then we
> >> don't need anything to be atomic.
> >>
> >
> > We don't only set TSR for dec interrupt case.
> > In future, we would send out watchdog stuff which also
> set/clear TSR in non-vcpu context.
> > The watchdog bit of TSR is not only related to watchdog
> interrupt but maintains a simple statemachine.
> > Not set those states may confuse guest OS.
>
> Yes, but why can't we do this in the vcpu thread's context so
> we only ever have a single instance accessing the vcpu
> struct? It makes a lot of things a lot easier. And for the
> watchdog, we don't have to set the bit unless it's clean,
> right? So we don't add any overhead to the vcpu unless it's
> actually cleaning the watchdog TSR bit.
>
Yes. We can try lazy update. :)
Thanks,
Yu
^ permalink raw reply [flat|nested] 22+ messages in thread* RE: [PATCH 5/5] KVM: PPC: booke: Improve timer register emulation
2011-08-26 23:31 [PATCH 5/5] KVM: PPC: booke: Improve timer register emulation Scott Wood
` (6 preceding siblings ...)
2011-09-08 2:20 ` Liu Yu-B13201
@ 2011-09-08 9:21 ` Liu Yu-B13201
2011-09-08 15:34 ` Scott Wood
` (12 subsequent siblings)
20 siblings, 0 replies; 22+ messages in thread
From: Liu Yu-B13201 @ 2011-09-08 9:21 UTC (permalink / raw)
To: kvm-ppc
> -----Original Message-----
> From: Alexander Graf [mailto:agraf@suse.de]
> Sent: Wednesday, September 07, 2011 6:42 PM
> To: Liu Yu-B13201
> Cc: Wood Scott-B07421; kvm-ppc@vger.kernel.org
> Subject: Re: [PATCH 5/5] KVM: PPC: booke: Improve timer
> register emulation
>
>
> On 07.09.2011, at 12:16, Liu Yu-B13201 wrote:
>
> >
> >
> >> -----Original Message-----
> >> From: kvm-ppc-owner@vger.kernel.org
> >> [mailto:kvm-ppc-owner@vger.kernel.org] On Behalf Of Alexander Graf
> >> Sent: Wednesday, September 07, 2011 3:06 AM
> >> To: Liu Yu-B13201
> >> Cc: Wood Scott-B07421; kvm-ppc@vger.kernel.org
> >> Subject: Re: [PATCH 5/5] KVM: PPC: booke: Improve timer
> >> register emulation
> >>
> >>
> >>
> >> Am 06.09.2011 um 10:04 schrieb Alexander Graf <agraf@suse.de>:
> >>
> >>>
> >>> On 06.09.2011, at 05:17, Liu Yu-B13201 wrote:
> >>>
> >>>>
> >>>>
> >>>>> -----Original Message-----
> >>>>> From: kvm-ppc-owner@vger.kernel.org
> >>>>> [mailto:kvm-ppc-owner@vger.kernel.org] On Behalf Of
> Alexander Graf
> >>>>> Sent: Tuesday, September 06, 2011 6:45 AM
> >>>>> To: Wood Scott-B07421
> >>>>> Cc: kvm-ppc@vger.kernel.org
> >>>>> Subject: Re: [PATCH 5/5] KVM: PPC: booke: Improve timer
> >>>>> register emulation
> >>>>>
> >>>>>
> >>>>> On 27.08.2011, at 01:31, Scott Wood wrote:
> >>>>>
> >>>>>> From: Liu Yu <yu.liu@freescale.com>
> >>>>>>
> >>>>>> Decrementers are now properly driven by TCR/TSR, and the guest
> >>>>>> has full read/write access to these registers.
> >>>>>>
> >>>>>> The decrementer keeps ticking (and setting the TSR bit)
> >>>>> regardless of
> >>>>>> whether the interrupts are enabled with TCR.
> >>>>>>
> >>>>>> The decrementer stops at zero, rather than going negative.
> >>>>>>
> >>>>>> Signed-off-by: Liu Yu <yu.liu@freescale.com>
> >>>>>> [scott: added dequeue in kvmppc_booke_irqprio_deliver, and
> >>>>> dec stop-at-zero]
> >>>>>> Signed-off-by: Scott Wood <scottwood@freescale.com>
> >>>>>> ---
> >>>>>> arch/powerpc/include/asm/kvm_host.h | 2 +-
> >>>>>> arch/powerpc/include/asm/kvm_ppc.h | 11 +++++
> >>>>>> arch/powerpc/kvm/book3s.c | 8 ++++
> >>>>>> arch/powerpc/kvm/booke.c | 80
> >>>>> +++++++++++++++++++++++++++--------
> >>>>>> arch/powerpc/kvm/booke.h | 4 ++
> >>>>>> arch/powerpc/kvm/booke_emulate.c | 11 ++++-
> >>>>>> arch/powerpc/kvm/emulate.c | 45 ++++++++-----------
> >>>>>> arch/powerpc/kvm/powerpc.c | 20 +--------
> >>>>>> 8 files changed, 114 insertions(+), 67 deletions(-)
> >>>>>>
> >>>>>> diff --git a/arch/powerpc/include/asm/kvm_host.h
> >>>>> b/arch/powerpc/include/asm/kvm_host.h
> >>>>>> index 3305af4..ea08c79 100644
> >>>>>> --- a/arch/powerpc/include/asm/kvm_host.h
> >>>>>> +++ b/arch/powerpc/include/asm/kvm_host.h
> >>>>>> @@ -334,7 +334,7 @@ struct kvm_vcpu_arch {
> >>>>>> u32 tbl;
> >>>>>> u32 tbu;
> >>>>>> u32 tcr;
> >>>>>> - u32 tsr;
> >>>>>> + ulong tsr; /* we need to perform set/clr_bits() which
> >>>>> requires ulong */
> >>>>>> u32 ivor[64];
> >>>>>> ulong ivpr;
> >>>>>> u32 pvr;
> >>>>>> diff --git a/arch/powerpc/include/asm/kvm_ppc.h
> >>>>> b/arch/powerpc/include/asm/kvm_ppc.h
> >>>>>> index bdaa6c8..ddaa615 100644
> >>>>>> --- a/arch/powerpc/include/asm/kvm_ppc.h
> >>>>>> +++ b/arch/powerpc/include/asm/kvm_ppc.h
> >>>>>> @@ -66,6 +66,7 @@ extern int
> >>>>> kvmppc_emulate_instruction(struct kvm_run *run,
> >>>>>> extern int kvmppc_emulate_mmio(struct kvm_run *run, struct
> >>>>> kvm_vcpu *vcpu);
> >>>>>> extern void kvmppc_emulate_dec(struct kvm_vcpu *vcpu);
> >>>>>> extern u32 kvmppc_get_dec(struct kvm_vcpu *vcpu, u64 tb);
> >>>>>> +extern void kvmppc_decrementer_func(unsigned long data);
> >>>>>>
> >>>>>> /* Core-specific hooks */
> >>>>>>
> >>>>>> @@ -197,4 +198,14 @@ int kvm_vcpu_ioctl_config_tlb(struct
> >>>>> kvm_vcpu *vcpu,
> >>>>>> int kvm_vcpu_ioctl_dirty_tlb(struct kvm_vcpu *vcpu,
> >>>>>> struct kvm_dirty_tlb *cfg);
> >>>>>>
> >>>>>> +static inline void kvmppc_wakeup_vcpu(struct kvm_vcpu *vcpu)
> >>>>>> +{
> >>>>>> + if (waitqueue_active(&vcpu->wq)) {
> >>>>>> + wake_up_interruptible(&vcpu->wq);
> >>>>>> + vcpu->stat.halt_wakeup++;
> >>>>>> + } else if (vcpu->cpu != -1) {
> >>>>>> + smp_send_reschedule(vcpu->cpu);
> >>>>>> + }
> >>>>>> +}
> >>>>>> +
> >>>>>> #endif /* __POWERPC_KVM_PPC_H__ */
> >>>>>> diff --git a/arch/powerpc/kvm/book3s.c
> >> b/arch/powerpc/kvm/book3s.c
> >>>>>> index f68a34d..b057856 100644
> >>>>>> --- a/arch/powerpc/kvm/book3s.c
> >>>>>> +++ b/arch/powerpc/kvm/book3s.c
> >>>>>> @@ -514,3 +514,11 @@ out:
> >>>>>> mutex_unlock(&kvm->slots_lock);
> >>>>>> return r;
> >>>>>> }
> >>>>>> +
> >>>>>> +void kvmppc_decrementer_func(unsigned long data)
> >>>>>> +{
> >>>>>> + struct kvm_vcpu *vcpu = (struct kvm_vcpu *)data;
> >>>>>> +
> >>>>>> + kvmppc_core_queue_dec(vcpu);
> >>>>>> + kvmppc_wakeup_vcpu(vcpu);
> >>>>>> +}
> >>>>>> diff --git a/arch/powerpc/kvm/booke.c
> b/arch/powerpc/kvm/booke.c
> >>>>>> index 0ed62c1..502f9ff 100644
> >>>>>> --- a/arch/powerpc/kvm/booke.c
> >>>>>> +++ b/arch/powerpc/kvm/booke.c
> >>>>>> @@ -205,7 +205,8 @@ void
> >>>>> kvmppc_core_dequeue_external(struct kvm_vcpu *vcpu,
> >>>>>> static int kvmppc_booke_irqprio_deliver(struct kvm_vcpu *vcpu,
> >>>>>> unsigned int priority)
> >>>>>> {
> >>>>>> - int allowed = 0;
> >>>>>> + int allowed = 1;
> >>>>>> + int dequeue = 0;
> >>>>>> ulong uninitialized_var(msr_mask);
> >>>>>> bool update_esr = false, update_dear = false;
> >>>>>> ulong crit_raw = vcpu->arch.shared->critical;
> >>>>>> @@ -258,10 +259,15 @@ static int
> >>>>> kvmppc_booke_irqprio_deliver(struct kvm_vcpu *vcpu,
> >>>>>> allowed = vcpu->arch.shared->msr & MSR_ME;
> >>>>>> msr_mask = 0;
> >>>>>> break;
> >>>>>> - case BOOKE_IRQPRIO_EXTERNAL:
> >>>>>> case BOOKE_IRQPRIO_DECREMENTER:
> >>>>>> + if (!(vcpu->arch.tcr & TCR_DIE)) {
> >>>>>> + allowed = 0;
> >>>>>> + dequeue = 1;
> >>>>>> + }
> >>>>>> + /* fall through */
> >>>>>> + case BOOKE_IRQPRIO_EXTERNAL:
> >>>>>> case BOOKE_IRQPRIO_FIT:
> >>>>>> - allowed = vcpu->arch.shared->msr & MSR_EE;
> >>>>>> + allowed = allowed && vcpu->arch.shared->msr & MSR_EE;
> >>>>>> allowed = allowed && !crit;
> >>>>>> msr_mask = MSR_CE|MSR_ME|MSR_DE;
> >>>>>> break;
> >>>>>> @@ -269,6 +275,8 @@ static int
> >>>>> kvmppc_booke_irqprio_deliver(struct kvm_vcpu *vcpu,
> >>>>>> allowed = vcpu->arch.shared->msr & MSR_DE;
> >>>>>> msr_mask = MSR_ME;
> >>>>>> break;
> >>>>>> + default:
> >>>>>> + allowed = 0;
> >>>>>> }
> >>>>>>
> >>>>>> if (allowed) {
> >>>>>> @@ -283,6 +291,9 @@ static int
> >>>>> kvmppc_booke_irqprio_deliver(struct kvm_vcpu *vcpu,
> >>>>>>
> >>>>>> if (!keep_irq)
> >>>>>> clear_bit(priority,
> >>>>> &vcpu->arch.pending_exceptions);
> >>>>>> + } else if (dequeue) {
> >>>>>> + /* Should only happen due to races with masking */
> >>>>>> + clear_bit(priority, &vcpu->arch.pending_exceptions);
> >>>>>> }
> >>>>>>
> >>>>>> return allowed;
> >>>>>> @@ -721,25 +732,16 @@ static int set_sregs_base(struct
> >>>>> kvm_vcpu *vcpu,
> >>>>>> vcpu->arch.shared->esr = sregs->u.e.esr;
> >>>>>> vcpu->arch.shared->dar = sregs->u.e.dear;
> >>>>>> vcpu->arch.vrsave = sregs->u.e.vrsave;
> >>>>>> - vcpu->arch.tcr = sregs->u.e.tcr;
> >>>>>> + kvmppc_set_tcr(vcpu, sregs->u.e.tcr);
> >>>>>>
> >>>>>> - if (sregs->u.e.update_special & KVM_SREGS_E_UPDATE_DEC)
> >>>>>> + if (sregs->u.e.update_special & KVM_SREGS_E_UPDATE_DEC) {
> >>>>>> vcpu->arch.dec = sregs->u.e.dec;
> >>>>>> -
> >>>>>> - kvmppc_emulate_dec(vcpu);
> >>>>>> + kvmppc_emulate_dec(vcpu);
> >>>>>> + }
> >>>>>>
> >>>>>> if (sregs->u.e.update_special & KVM_SREGS_E_UPDATE_TSR) {
> >>>>>> - /*
> >>>>>> - * FIXME: existing KVM timer handling is incomplete.
> >>>>>> - * TSR cannot be read by the guest, and its value in
> >>>>>> - * vcpu->arch is always zero. For now, just handle
> >>>>>> - * the case where the caller is trying to inject a
> >>>>>> - * decrementer interrupt.
> >>>>>> - */
> >>>>>> -
> >>>>>> - if ((sregs->u.e.tsr & TSR_DIS) &&
> >>>>>> - (vcpu->arch.tcr & TCR_DIE))
> >>>>>> - kvmppc_core_queue_dec(vcpu);
> >>>>>> + kvmppc_clr_tsr_bits(vcpu, ~0);
> >>>>>> + kvmppc_set_tsr_bits(vcpu, sregs->u.e.tsr);
> >>>>>> }
> >>>>>>
> >>>>>> return 0;
> >>>>>> @@ -895,6 +897,48 @@ void
> kvmppc_core_destroy_vm(struct kvm *kvm)
> >>>>>> {
> >>>>>> }
> >>>>>>
> >>>>>> +void kvmppc_set_tcr(struct kvm_vcpu *vcpu, u32 new_tcr)
> >>>>>> +{
> >>>>>> + vcpu->arch.tcr = new_tcr;
> >>>>>> + smp_wmb();
> >>>>>> +
> >>>>>> + /*
> >>>>>> + * Since TCR changed, we need to check
> >>>>>> + * if blocked interrupts are deliverable.
> >>>>>> + */
> >>>>>> + if ((new_tcr & TCR_DIE) && (vcpu->arch.tsr & TSR_DIS)) {
> >>>>>> + kvmppc_core_queue_dec(vcpu);
> >>>>>> + kvmppc_wakeup_vcpu(vcpu);
> >>>>>> + }
> >>>>>> +}
> >>>>>> +
> >>>>>> +void kvmppc_set_tsr_bits(struct kvm_vcpu *vcpu, u32 tsr_bits)
> >>>>>> +{
> >>>>>> + set_bits(tsr_bits, &vcpu->arch.tsr);
> >>>>>
> >>>>> I'm still missing the point why you need set_bits/clear_bits.
> >>>>> Can't you just use |= and &= ~ like everyone else? :)
> >>>>>
> >>>>
> >>>> Because set_bits/clear_bits is atomic.
> >>>> set/clr_tsr_bits() may be called in different threads so
> >> that on different cores.
> >>>> We need to make sure modification to be atomic to prevent
> >> missing changes.
> >>>
> >>> Ah, so you can set the TSR bit in the decrementer timer
> >> callback? Hrm. Please add a comment there saying so. All the
> >> other cases should be under vcpu_load() scope. If they're
> >> not, we have bugs in our code :). So they shouldn't need
> >> atomic operations (except for now they do because there's one
> >> user that doesn't do vcpu_load).
> >>
> >> Thinking about this for a bit more, shouldn't we be able to
> >> share the TSR with the guest so that it can clear the DEC
> >> interrupt without exiting the guest? Then we can't do ulong
> >> because that would make the pv abi 32/64 specific.
> >>
> >> Overall, I'm also not very sure it's too useful to set TSR
> >> from non-vcpu context. After all, in most cases the guest
> >> should have interrupts enabled at that point and probably
> >> wants to receive the interrupt in that moment, not some
> >> seconds later when the next intercept occurs. So we still
> >> want to kick the vcpu thread on dec interrupts. And then we
> >> don't need anything to be atomic.
> >>
> >
> > We don't only set TSR for dec interrupt case.
> > In future, we would send out watchdog stuff which also
> set/clear TSR in non-vcpu context.
> > The watchdog bit of TSR is not only related to watchdog
> interrupt but maintains a simple statemachine.
> > Not set those states may confuse guest OS.
>
> Yes, but why can't we do this in the vcpu thread's context so
> we only ever have a single instance accessing the vcpu
> struct? It makes a lot of things a lot easier. And for the
> watchdog, we don't have to set the bit unless it's clean,
> right? So we don't add any overhead to the vcpu unless it's
> actually cleaning the watchdog TSR bit.
>
Hmm, just want to make it clear.
If we want to do the TSR updates in vcpu thread, we need to record all the TSR update requests
And when guest access TSR, we update TSR according to those requests.
Doesn't this sound somewhat more complicated and harder to read than atomic way?
Thanks,
Yu
^ permalink raw reply [flat|nested] 22+ messages in thread* Re: [PATCH 5/5] KVM: PPC: booke: Improve timer register emulation
2011-08-26 23:31 [PATCH 5/5] KVM: PPC: booke: Improve timer register emulation Scott Wood
` (7 preceding siblings ...)
2011-09-08 9:21 ` Liu Yu-B13201
@ 2011-09-08 15:34 ` Scott Wood
2011-09-08 15:39 ` Alexander Graf
` (11 subsequent siblings)
20 siblings, 0 replies; 22+ messages in thread
From: Scott Wood @ 2011-09-08 15:34 UTC (permalink / raw)
To: kvm-ppc
On Wed, Sep 07, 2011 at 12:41:35PM +0200, Alexander Graf wrote:
> Yes, but why can't we do this in the vcpu thread's context so we only
> ever have a single instance accessing the vcpu struct? It makes a lot
> of things a lot easier.
Why? We don't do it for external interrupts. It would complicate
locking, not simplify it -- we'd need to defer the execution to some
context which can block, and then kick the guest out of guest mode, and
make sure it doesn't reenter before we get the mutex...
> And for the watchdog, we don't have to set the bit unless it's clean,
> right?
Not sure what you mean by "clean". We always set the bit when the timer
expires.
Likewise for the decrementer.
-Scott
^ permalink raw reply [flat|nested] 22+ messages in thread* Re: [PATCH 5/5] KVM: PPC: booke: Improve timer register emulation
2011-08-26 23:31 [PATCH 5/5] KVM: PPC: booke: Improve timer register emulation Scott Wood
` (8 preceding siblings ...)
2011-09-08 15:34 ` Scott Wood
@ 2011-09-08 15:39 ` Alexander Graf
2011-09-15 20:52 ` Scott Wood
` (10 subsequent siblings)
20 siblings, 0 replies; 22+ messages in thread
From: Alexander Graf @ 2011-09-08 15:39 UTC (permalink / raw)
To: kvm-ppc
On 08.09.2011, at 17:34, Scott Wood wrote:
> On Wed, Sep 07, 2011 at 12:41:35PM +0200, Alexander Graf wrote:
>> Yes, but why can't we do this in the vcpu thread's context so we only
>> ever have a single instance accessing the vcpu struct? It makes a lot
>> of things a lot easier.
>
> Why? We don't do it for external interrupts. It would complicate
> locking, not simplify it -- we'd need to defer the execution to some
> context which can block, and then kick the guest out of guest mode, and
> make sure it doesn't reenter before we get the mutex...
We need to kick the vcpu thread out of context anyways because we're otherwise delaying delivery of the decr interrupt. So we can just as well handle it all there then. Being lazy is good, but please not at the cost of latency.
>
>> And for the watchdog, we don't have to set the bit unless it's clean,
>> right?
>
> Not sure what you mean by "clean". We always set the bit when the timer
> expires.
>
> Likewise for the decrementer.
What I mean is that we don't want to have a timer periodically triggering in the background for watchdog when the guest never clears the watchdog delivered bit.
Alex
^ permalink raw reply [flat|nested] 22+ messages in thread* Re: [PATCH 5/5] KVM: PPC: booke: Improve timer register emulation
2011-08-26 23:31 [PATCH 5/5] KVM: PPC: booke: Improve timer register emulation Scott Wood
` (9 preceding siblings ...)
2011-09-08 15:39 ` Alexander Graf
@ 2011-09-15 20:52 ` Scott Wood
2011-09-19 9:32 ` Alexander Graf
` (9 subsequent siblings)
20 siblings, 0 replies; 22+ messages in thread
From: Scott Wood @ 2011-09-15 20:52 UTC (permalink / raw)
To: kvm-ppc
On 09/08/2011 10:39 AM, Alexander Graf wrote:
>
> On 08.09.2011, at 17:34, Scott Wood wrote:
>
>> On Wed, Sep 07, 2011 at 12:41:35PM +0200, Alexander Graf wrote:
>>> Yes, but why can't we do this in the vcpu thread's context so we only
>>> ever have a single instance accessing the vcpu struct? It makes a lot
>>> of things a lot easier.
>>
>> Why? We don't do it for external interrupts. It would complicate
>> locking, not simplify it -- we'd need to defer the execution to some
>> context which can block, and then kick the guest out of guest mode, and
>> make sure it doesn't reenter before we get the mutex...
>
> We need to kick the vcpu thread out of context anyways because we're
> otherwise delaying delivery of the decr interrupt. So we can just as
> well handle it all there then. Being lazy is good, but please not at
> the cost of latency.
That means we need a way to tell the thread to do this. Instead of
using set_bit(), you want dedicated variables for pending_decr,
pending_fit, pending_watchdog? And then we check them on every exit?
Besides avoiding the overhead of an atomic operation (premature
optimization), what does this buy us? The ability to move the
dequeue-after-mask from kvmppc_booke_irqprio_deliver() to
kvmppc_set_tcr() (only to be replaced by the check for pending_foo)?
How is using set_bits() for TSR any different from the set_bit() we use
in kvmppc_booke_queue_irqprio()?
>>> And for the watchdog, we don't have to set the bit unless it's clean,
>>> right?
>>
>> Not sure what you mean by "clean". We always set the bit when the timer
>> expires.
>>
>> Likewise for the decrementer.
>
> What I mean is that we don't want to have a timer periodically triggering in the background for watchdog when the guest never clears the watchdog delivered bit.
I suppose we could stop the timer if the watchdog has hit final
expiration with no action configured, and restart it if the bits get
cleared. It's not really a case that warrants optimization (in the
default case where the guest doesn't use the watchdog, the period will
be so high that the timer will never expire), but we do want to avoid a
situation where the guest sets a period that is too short and ends up
timer-storming the host.
-Scott
^ permalink raw reply [flat|nested] 22+ messages in thread* Re: [PATCH 5/5] KVM: PPC: booke: Improve timer register emulation
2011-08-26 23:31 [PATCH 5/5] KVM: PPC: booke: Improve timer register emulation Scott Wood
` (10 preceding siblings ...)
2011-09-15 20:52 ` Scott Wood
@ 2011-09-19 9:32 ` Alexander Graf
2011-09-19 19:12 ` Scott Wood
` (8 subsequent siblings)
20 siblings, 0 replies; 22+ messages in thread
From: Alexander Graf @ 2011-09-19 9:32 UTC (permalink / raw)
To: kvm-ppc
On 15.09.2011, at 22:52, Scott Wood wrote:
> On 09/08/2011 10:39 AM, Alexander Graf wrote:
>>
>> On 08.09.2011, at 17:34, Scott Wood wrote:
>>
>>> On Wed, Sep 07, 2011 at 12:41:35PM +0200, Alexander Graf wrote:
>>>> Yes, but why can't we do this in the vcpu thread's context so we only
>>>> ever have a single instance accessing the vcpu struct? It makes a lot
>>>> of things a lot easier.
>>>
>>> Why? We don't do it for external interrupts. It would complicate
>>> locking, not simplify it -- we'd need to defer the execution to some
>>> context which can block, and then kick the guest out of guest mode, and
>>> make sure it doesn't reenter before we get the mutex...
>>
>> We need to kick the vcpu thread out of context anyways because we're
>> otherwise delaying delivery of the decr interrupt. So we can just as
>> well handle it all there then. Being lazy is good, but please not at
>> the cost of latency.
>
> That means we need a way to tell the thread to do this. Instead of
> using set_bit(), you want dedicated variables for pending_decr,
> pending_fit, pending_watchdog? And then we check them on every exit?
Well, whatever we do we have to kvm_vcpu_kick() the receiving vcpu out of context. So we can just as well tell it to inject its own interrupt and don't have to deal with atomic operations in most cases then, no?
>
> Besides avoiding the overhead of an atomic operation (premature
> optimization), what does this buy us? The ability to move the
> dequeue-after-mask from kvmppc_booke_irqprio_deliver() to
> kvmppc_set_tcr() (only to be replaced by the check for pending_foo)?
>
> How is using set_bits() for TSR any different from the set_bit() we use
> in kvmppc_booke_queue_irqprio()?
It keeps guest visible registers vcpu private. I want the vcpu thread to own all vcpu registers - and TSR is a vcpu register.
>
>>>> And for the watchdog, we don't have to set the bit unless it's clean,
>>>> right?
>>>
>>> Not sure what you mean by "clean". We always set the bit when the timer
>>> expires.
>>>
>>> Likewise for the decrementer.
>>
>> What I mean is that we don't want to have a timer periodically triggering in the background for watchdog when the guest never clears the watchdog delivered bit.
>
> I suppose we could stop the timer if the watchdog has hit final
> expiration with no action configured, and restart it if the bits get
> cleared. It's not really a case that warrants optimization (in the
> default case where the guest doesn't use the watchdog, the period will
> be so high that the timer will never expire), but we do want to avoid a
> situation where the guest sets a period that is too short and ends up
> timer-storming the host.
or where the guest simply has the watchdog only used once, then keeps the irq masked and we still happily burn CPU by triggering a timer when it's not even affecting state. Yup :)
Alex
^ permalink raw reply [flat|nested] 22+ messages in thread* Re: [PATCH 5/5] KVM: PPC: booke: Improve timer register emulation
2011-08-26 23:31 [PATCH 5/5] KVM: PPC: booke: Improve timer register emulation Scott Wood
` (11 preceding siblings ...)
2011-09-19 9:32 ` Alexander Graf
@ 2011-09-19 19:12 ` Scott Wood
2011-09-24 7:27 ` Alexander Graf
` (7 subsequent siblings)
20 siblings, 0 replies; 22+ messages in thread
From: Scott Wood @ 2011-09-19 19:12 UTC (permalink / raw)
To: kvm-ppc
On 09/19/2011 04:32 AM, Alexander Graf wrote:
>
> On 15.09.2011, at 22:52, Scott Wood wrote:
>
>> On 09/08/2011 10:39 AM, Alexander Graf wrote:
>>>
>>> On 08.09.2011, at 17:34, Scott Wood wrote:
>>>
>>>> On Wed, Sep 07, 2011 at 12:41:35PM +0200, Alexander Graf wrote:
>>>>> Yes, but why can't we do this in the vcpu thread's context so we only
>>>>> ever have a single instance accessing the vcpu struct? It makes a lot
>>>>> of things a lot easier.
>>>>
>>>> Why? We don't do it for external interrupts. It would complicate
>>>> locking, not simplify it -- we'd need to defer the execution to some
>>>> context which can block, and then kick the guest out of guest mode, and
>>>> make sure it doesn't reenter before we get the mutex...
>>>
>>> We need to kick the vcpu thread out of context anyways because we're
>>> otherwise delaying delivery of the decr interrupt. So we can just as
>>> well handle it all there then. Being lazy is good, but please not at
>>> the cost of latency.
>>
>> That means we need a way to tell the thread to do this. Instead of
>> using set_bit(), you want dedicated variables for pending_decr,
>> pending_fit, pending_watchdog? And then we check them on every exit?
>
> Well, whatever we do we have to kvm_vcpu_kick() the receiving vcpu
> out of context.
Yes, the point is how the information about the event is communicated.
> So we can just as well tell it to inject its own interrupt
> and don't have to deal with atomic operations in most cases then, no?
Either we use atomics, or we split each event into its own variable. We
already use atomics for injecting other interrupts.
It seems simpler to just use the architected format rather than invent
something new. And if we do something non-architected, that makes it
more likely that injection of the interrupt triggered by qemu setting
the bit with sregs is broken (untested special case).
>> Besides avoiding the overhead of an atomic operation (premature
>> optimization), what does this buy us? The ability to move the
>> dequeue-after-mask from kvmppc_booke_irqprio_deliver() to
>> kvmppc_set_tcr() (only to be replaced by the check for pending_foo)?
>>
>> How is using set_bits() for TSR any different from the set_bit() we use
>> in kvmppc_booke_queue_irqprio()?
>
> It keeps guest visible registers vcpu private. I want the vcpu thread to own all vcpu registers - and TSR is a vcpu register.
It is private to the vcpu implementation. I don't get why it should be
private to the thread just because it's an architected register -- would
it be any different if we called it "not_tsr" (or "pending_timers")
whose field "not_dis" (or a bit called BOOKE_TIMER_DECR) happened to be
at the right location, and we happen to be able to very easily turn it
into tsr when the guest wants to read it? :-)
-Scott
^ permalink raw reply [flat|nested] 22+ messages in thread* Re: [PATCH 5/5] KVM: PPC: booke: Improve timer register emulation
2011-08-26 23:31 [PATCH 5/5] KVM: PPC: booke: Improve timer register emulation Scott Wood
` (12 preceding siblings ...)
2011-09-19 19:12 ` Scott Wood
@ 2011-09-24 7:27 ` Alexander Graf
2011-09-27 0:44 ` Scott Wood
` (6 subsequent siblings)
20 siblings, 0 replies; 22+ messages in thread
From: Alexander Graf @ 2011-09-24 7:27 UTC (permalink / raw)
To: kvm-ppc
On 19.09.2011, at 21:12, Scott Wood wrote:
> On 09/19/2011 04:32 AM, Alexander Graf wrote:
>>
>> On 15.09.2011, at 22:52, Scott Wood wrote:
>>
>>> On 09/08/2011 10:39 AM, Alexander Graf wrote:
>>>>
>>>> On 08.09.2011, at 17:34, Scott Wood wrote:
>>>>
>>>>> On Wed, Sep 07, 2011 at 12:41:35PM +0200, Alexander Graf wrote:
>>>>>> Yes, but why can't we do this in the vcpu thread's context so we only
>>>>>> ever have a single instance accessing the vcpu struct? It makes a lot
>>>>>> of things a lot easier.
>>>>>
>>>>> Why? We don't do it for external interrupts. It would complicate
>>>>> locking, not simplify it -- we'd need to defer the execution to some
>>>>> context which can block, and then kick the guest out of guest mode, and
>>>>> make sure it doesn't reenter before we get the mutex...
>>>>
>>>> We need to kick the vcpu thread out of context anyways because we're
>>>> otherwise delaying delivery of the decr interrupt. So we can just as
>>>> well handle it all there then. Being lazy is good, but please not at
>>>> the cost of latency.
>>>
>>> That means we need a way to tell the thread to do this. Instead of
>>> using set_bit(), you want dedicated variables for pending_decr,
>>> pending_fit, pending_watchdog? And then we check them on every exit?
>>
>> Well, whatever we do we have to kvm_vcpu_kick() the receiving vcpu
>> out of context.
>
> Yes, the point is how the information about the event is communicated.
>
>> So we can just as well tell it to inject its own interrupt
>> and don't have to deal with atomic operations in most cases then, no?
>
> Either we use atomics, or we split each event into its own variable. We
> already use atomics for injecting other interrupts.
>
> It seems simpler to just use the architected format rather than invent
> something new. And if we do something non-architected, that makes it
> more likely that injection of the interrupt triggered by qemu setting
> the bit with sregs is broken (untested special case).
>
>>> Besides avoiding the overhead of an atomic operation (premature
>>> optimization), what does this buy us? The ability to move the
>>> dequeue-after-mask from kvmppc_booke_irqprio_deliver() to
>>> kvmppc_set_tcr() (only to be replaced by the check for pending_foo)?
>>>
>>> How is using set_bits() for TSR any different from the set_bit() we use
>>> in kvmppc_booke_queue_irqprio()?
>>
>> It keeps guest visible registers vcpu private. I want the vcpu thread to own all vcpu registers - and TSR is a vcpu register.
>
> It is private to the vcpu implementation. I don't get why it should be
> private to the thread just because it's an architected register -- would
> it be any different if we called it "not_tsr" (or "pending_timers")
> whose field "not_dis" (or a bit called BOOKE_TIMER_DECR) happened to be
> at the right location, and we happen to be able to very easily turn it
> into tsr when the guest wants to read it? :-)
I think I'm getting your point. So what we want is:
in timer handler:
set_bit(TSR_DIS, vcpu->arch.tsr);
kvm_make_request(KVM_REQ_PPC_TSR_UPDATE, vcpu);
kvm_vcpu_kick(vcpu);
in vcpu entry code:
if (vcpu->requests)
if (kvm_check_request(KVM_REQ_PPC_TSR_UPDATE, vcpu))
kvmppc_update_tsr(vcpu);
void kvmppc_update_tsr(struct kvm_vcpu *vcpu)
{
if (vcpu->arch.tsr & TSR_DIS &&
vcpu->arch.tcr & TCR_DIE) {
kvmppc_core_queue_dec(vcpu);
}
// XXX also implement dequeue!
}
in emulate code:
case SPR_TSR:
vcpu->arch.tsr &= ~TSR_DIS;
kvmppc_update_tsr(vcpu);
Right?
Alex
^ permalink raw reply [flat|nested] 22+ messages in thread* Re: [PATCH 5/5] KVM: PPC: booke: Improve timer register emulation
2011-08-26 23:31 [PATCH 5/5] KVM: PPC: booke: Improve timer register emulation Scott Wood
` (13 preceding siblings ...)
2011-09-24 7:27 ` Alexander Graf
@ 2011-09-27 0:44 ` Scott Wood
2011-09-27 8:14 ` Alexander Graf
` (5 subsequent siblings)
20 siblings, 0 replies; 22+ messages in thread
From: Scott Wood @ 2011-09-27 0:44 UTC (permalink / raw)
To: kvm-ppc
On 09/24/2011 02:27 AM, Alexander Graf wrote:
> I think I'm getting your point. So what we want is:
>
> in timer handler:
>
> set_bit(TSR_DIS, vcpu->arch.tsr);
> kvm_make_request(KVM_REQ_PPC_TSR_UPDATE, vcpu);
> kvm_vcpu_kick(vcpu);
>
> in vcpu entry code:
>
> if (vcpu->requests)
> if (kvm_check_request(KVM_REQ_PPC_TSR_UPDATE, vcpu))
> kvmppc_update_tsr(vcpu);
>
> void kvmppc_update_tsr(struct kvm_vcpu *vcpu)
> {
> if (vcpu->arch.tsr & TSR_DIS &&
> vcpu->arch.tcr & TCR_DIE) {
> kvmppc_core_queue_dec(vcpu);
> }
> // XXX also implement dequeue!
> }
OK, this avoids the possibility for the guest to clear DIS/DIE after
enqueue but before delivery (which in the current patch is dealt with by
an extra check at delivery), and we only have to check one thing in the
common nothing-requested case (even if more things use this mechanism,
as long as we don't run out of bits).
If we convert external IRQs to using this as well, and expand the timer
case to include non-booke (maybe just use KVM_REQ_PENDING_TIMER?), we
should be able to de-atomicize pending_exceptions (which should be a
bigger win than de-atomicizing arch.tsr).
> case SPR_TSR:
> vcpu->arch.tsr &= ~TSR_DIS;
> kvmppc_update_tsr(vcpu);
Well, we should probably replace TSR_DIS here with the actual value that
the guest wrote. :-)
-Scott
^ permalink raw reply [flat|nested] 22+ messages in thread* Re: [PATCH 5/5] KVM: PPC: booke: Improve timer register emulation
2011-08-26 23:31 [PATCH 5/5] KVM: PPC: booke: Improve timer register emulation Scott Wood
` (14 preceding siblings ...)
2011-09-27 0:44 ` Scott Wood
@ 2011-09-27 8:14 ` Alexander Graf
2011-09-27 16:01 ` Bhushan Bharat-R65777
` (4 subsequent siblings)
20 siblings, 0 replies; 22+ messages in thread
From: Alexander Graf @ 2011-09-27 8:14 UTC (permalink / raw)
To: kvm-ppc
On 27.09.2011, at 02:44, Scott Wood wrote:
> On 09/24/2011 02:27 AM, Alexander Graf wrote:
>> I think I'm getting your point. So what we want is:
>>
>> in timer handler:
>>
>> set_bit(TSR_DIS, vcpu->arch.tsr);
>> kvm_make_request(KVM_REQ_PPC_TSR_UPDATE, vcpu);
>> kvm_vcpu_kick(vcpu);
>>
>> in vcpu entry code:
>>
>> if (vcpu->requests)
>> if (kvm_check_request(KVM_REQ_PPC_TSR_UPDATE, vcpu))
>> kvmppc_update_tsr(vcpu);
>>
>> void kvmppc_update_tsr(struct kvm_vcpu *vcpu)
>> {
>> if (vcpu->arch.tsr & TSR_DIS &&
>> vcpu->arch.tcr & TCR_DIE) {
>> kvmppc_core_queue_dec(vcpu);
>> }
>> // XXX also implement dequeue!
>> }
>
> OK, this avoids the possibility for the guest to clear DIS/DIE after
> enqueue but before delivery (which in the current patch is dealt with by
> an extra check at delivery), and we only have to check one thing in the
> common nothing-requested case (even if more things use this mechanism,
> as long as we don't run out of bits).
>
> If we convert external IRQs to using this as well, and expand the timer
> case to include non-booke (maybe just use KVM_REQ_PENDING_TIMER?), we
> should be able to de-atomicize pending_exceptions (which should be a
> bigger win than de-atomicizing arch.tsr).
>
>> case SPR_TSR:
>> vcpu->arch.tsr &= ~TSR_DIS;
>> kvmppc_update_tsr(vcpu);
>
> Well, we should probably replace TSR_DIS here with the actual value that
> the guest wrote. :-)
Yes, and use the atomic operation. This is all just pseudo-code.
I like the idea of moving pending_exceptions off of atomic. This really is a private field that no other vcpu should have access to. We only have to think hard about how to implement SET_INTERRUPT for another vcpu then. But I think this could just be another kvm_make_request() type.
Alex
^ permalink raw reply [flat|nested] 22+ messages in thread* RE: [PATCH 5/5] KVM: PPC: booke: Improve timer register emulation
2011-08-26 23:31 [PATCH 5/5] KVM: PPC: booke: Improve timer register emulation Scott Wood
` (15 preceding siblings ...)
2011-09-27 8:14 ` Alexander Graf
@ 2011-09-27 16:01 ` Bhushan Bharat-R65777
2011-09-27 16:09 ` Alexander Graf
` (3 subsequent siblings)
20 siblings, 0 replies; 22+ messages in thread
From: Bhushan Bharat-R65777 @ 2011-09-27 16:01 UTC (permalink / raw)
To: kvm-ppc
> -----Original Message-----
> From: kvm-ppc-owner@vger.kernel.org [mailto:kvm-ppc-
> owner@vger.kernel.org] On Behalf Of Alexander Graf
> Sent: Tuesday, September 27, 2011 1:45 PM
> To: Wood Scott-B07421
> Cc: Liu Yu-B13201; Wood Scott-B07421; kvm-ppc@vger.kernel.org
> Subject: Re: [PATCH 5/5] KVM: PPC: booke: Improve timer register
> emulation
>
>
> On 27.09.2011, at 02:44, Scott Wood wrote:
>
> > On 09/24/2011 02:27 AM, Alexander Graf wrote:
> >> I think I'm getting your point. So what we want is:
> >>
> >> in timer handler:
> >>
> >> set_bit(TSR_DIS, vcpu->arch.tsr);
> >> kvm_make_request(KVM_REQ_PPC_TSR_UPDATE, vcpu);
> >> kvm_vcpu_kick(vcpu);
Still there can be a case where first request not handled and another event happens? Or we would like to pause till first request is handled by vcpu?
Thanks
-Bharat
> >>
> >> in vcpu entry code:
> >>
> >> if (vcpu->requests)
> >> if (kvm_check_request(KVM_REQ_PPC_TSR_UPDATE, vcpu))
> >> kvmppc_update_tsr(vcpu);
> >>
> >> void kvmppc_update_tsr(struct kvm_vcpu *vcpu) {
> >> if (vcpu->arch.tsr & TSR_DIS &&
> >> vcpu->arch.tcr & TCR_DIE) {
> >> kvmppc_core_queue_dec(vcpu);
> >> }
> >> // XXX also implement dequeue!
> >> }
> >
> > OK, this avoids the possibility for the guest to clear DIS/DIE after
> > enqueue but before delivery (which in the current patch is dealt with
> > by an extra check at delivery), and we only have to check one thing in
> > the common nothing-requested case (even if more things use this
> > mechanism, as long as we don't run out of bits).
> >
> > If we convert external IRQs to using this as well, and expand the
> > timer case to include non-booke (maybe just use
> > KVM_REQ_PENDING_TIMER?), we should be able to de-atomicize
> > pending_exceptions (which should be a bigger win than de-atomicizing
> arch.tsr).
> >
> >> case SPR_TSR:
> >> vcpu->arch.tsr &= ~TSR_DIS;
> >> kvmppc_update_tsr(vcpu);
> >
> > Well, we should probably replace TSR_DIS here with the actual value
> > that the guest wrote. :-)
>
> Yes, and use the atomic operation. This is all just pseudo-code.
>
> I like the idea of moving pending_exceptions off of atomic. This really
> is a private field that no other vcpu should have access to. We only have
> to think hard about how to implement SET_INTERRUPT for another vcpu then.
> But I think this could just be another kvm_make_request() type.
>
>
> Alex
>
> --
> To unsubscribe from this list: send the line "unsubscribe kvm-ppc" in the
> body of a message to majordomo@vger.kernel.org More majordomo info at
> http://vger.kernel.org/majordomo-info.html
^ permalink raw reply [flat|nested] 22+ messages in thread* Re: [PATCH 5/5] KVM: PPC: booke: Improve timer register emulation
2011-08-26 23:31 [PATCH 5/5] KVM: PPC: booke: Improve timer register emulation Scott Wood
` (16 preceding siblings ...)
2011-09-27 16:01 ` Bhushan Bharat-R65777
@ 2011-09-27 16:09 ` Alexander Graf
2011-09-27 16:21 ` Bhushan Bharat-R65777
` (2 subsequent siblings)
20 siblings, 0 replies; 22+ messages in thread
From: Alexander Graf @ 2011-09-27 16:09 UTC (permalink / raw)
To: kvm-ppc
On 27.09.2011, at 18:01, Bhushan Bharat-R65777 wrote:
>
>
>> -----Original Message-----
>> From: kvm-ppc-owner@vger.kernel.org [mailto:kvm-ppc-
>> owner@vger.kernel.org] On Behalf Of Alexander Graf
>> Sent: Tuesday, September 27, 2011 1:45 PM
>> To: Wood Scott-B07421
>> Cc: Liu Yu-B13201; Wood Scott-B07421; kvm-ppc@vger.kernel.org
>> Subject: Re: [PATCH 5/5] KVM: PPC: booke: Improve timer register
>> emulation
>>
>>
>> On 27.09.2011, at 02:44, Scott Wood wrote:
>>
>>> On 09/24/2011 02:27 AM, Alexander Graf wrote:
>>>> I think I'm getting your point. So what we want is:
>>>>
>>>> in timer handler:
>>>>
>>>> set_bit(TSR_DIS, vcpu->arch.tsr);
>>>> kvm_make_request(KVM_REQ_PPC_TSR_UPDATE, vcpu);
>>>> kvm_vcpu_kick(vcpu);
>
> Still there can be a case where first request not handled and another event happens? Or we would like to pause till first request is handled by vcpu?
If the first DIS request is not handled and another event happens, the interrupts get coalesced (like on real hardware). If there is another TSR bit set still, int_pending should still be active and the guest exits the next time it sets MSR_EE, making us inject the next interrupt.
Alex
^ permalink raw reply [flat|nested] 22+ messages in thread* RE: [PATCH 5/5] KVM: PPC: booke: Improve timer register emulation
2011-08-26 23:31 [PATCH 5/5] KVM: PPC: booke: Improve timer register emulation Scott Wood
` (17 preceding siblings ...)
2011-09-27 16:09 ` Alexander Graf
@ 2011-09-27 16:21 ` Bhushan Bharat-R65777
2011-09-27 16:32 ` Alexander Graf
2011-09-27 16:45 ` Scott Wood
20 siblings, 0 replies; 22+ messages in thread
From: Bhushan Bharat-R65777 @ 2011-09-27 16:21 UTC (permalink / raw)
To: kvm-ppc
> -----Original Message-----
> From: kvm-ppc-owner@vger.kernel.org [mailto:kvm-ppc-
> owner@vger.kernel.org] On Behalf Of Alexander Graf
> Sent: Tuesday, September 27, 2011 9:39 PM
> To: Bhushan Bharat-R65777
> Cc: Wood Scott-B07421; Liu Yu-B13201; kvm-ppc@vger.kernel.org
> Subject: Re: [PATCH 5/5] KVM: PPC: booke: Improve timer register
> emulation
>
>
> On 27.09.2011, at 18:01, Bhushan Bharat-R65777 wrote:
>
> >
> >
> >> -----Original Message-----
> >> From: kvm-ppc-owner@vger.kernel.org [mailto:kvm-ppc-
> >> owner@vger.kernel.org] On Behalf Of Alexander Graf
> >> Sent: Tuesday, September 27, 2011 1:45 PM
> >> To: Wood Scott-B07421
> >> Cc: Liu Yu-B13201; Wood Scott-B07421; kvm-ppc@vger.kernel.org
> >> Subject: Re: [PATCH 5/5] KVM: PPC: booke: Improve timer register
> >> emulation
> >>
> >>
> >> On 27.09.2011, at 02:44, Scott Wood wrote:
> >>
> >>> On 09/24/2011 02:27 AM, Alexander Graf wrote:
> >>>> I think I'm getting your point. So what we want is:
> >>>>
> >>>> in timer handler:
> >>>>
> >>>> set_bit(TSR_DIS, vcpu->arch.tsr);
> >>>> kvm_make_request(KVM_REQ_PPC_TSR_UPDATE, vcpu);
> >>>> kvm_vcpu_kick(vcpu);
> >
> > Still there can be a case where first request not handled and another
> event happens? Or we would like to pause till first request is handled by
> vcpu?
>
> If the first DIS request is not handled and another event happens, the
> interrupts get coalesced (like on real hardware). If there is another TSR
> bit set still, int_pending should still be active and the guest exits the
> next time it sets MSR_EE, making us inject the next interrupt.
>
This may not work for watchdog, where every event causes a state change.
Thanks
-Bharat
^ permalink raw reply [flat|nested] 22+ messages in thread* Re: [PATCH 5/5] KVM: PPC: booke: Improve timer register emulation
2011-08-26 23:31 [PATCH 5/5] KVM: PPC: booke: Improve timer register emulation Scott Wood
` (18 preceding siblings ...)
2011-09-27 16:21 ` Bhushan Bharat-R65777
@ 2011-09-27 16:32 ` Alexander Graf
2011-09-27 16:45 ` Scott Wood
20 siblings, 0 replies; 22+ messages in thread
From: Alexander Graf @ 2011-09-27 16:32 UTC (permalink / raw)
To: kvm-ppc
On 27.09.2011, at 18:21, Bhushan Bharat-R65777 wrote:
>
>
>> -----Original Message-----
>> From: kvm-ppc-owner@vger.kernel.org [mailto:kvm-ppc-
>> owner@vger.kernel.org] On Behalf Of Alexander Graf
>> Sent: Tuesday, September 27, 2011 9:39 PM
>> To: Bhushan Bharat-R65777
>> Cc: Wood Scott-B07421; Liu Yu-B13201; kvm-ppc@vger.kernel.org
>> Subject: Re: [PATCH 5/5] KVM: PPC: booke: Improve timer register
>> emulation
>>
>>
>> On 27.09.2011, at 18:01, Bhushan Bharat-R65777 wrote:
>>
>>>
>>>
>>>> -----Original Message-----
>>>> From: kvm-ppc-owner@vger.kernel.org [mailto:kvm-ppc-
>>>> owner@vger.kernel.org] On Behalf Of Alexander Graf
>>>> Sent: Tuesday, September 27, 2011 1:45 PM
>>>> To: Wood Scott-B07421
>>>> Cc: Liu Yu-B13201; Wood Scott-B07421; kvm-ppc@vger.kernel.org
>>>> Subject: Re: [PATCH 5/5] KVM: PPC: booke: Improve timer register
>>>> emulation
>>>>
>>>>
>>>> On 27.09.2011, at 02:44, Scott Wood wrote:
>>>>
>>>>> On 09/24/2011 02:27 AM, Alexander Graf wrote:
>>>>>> I think I'm getting your point. So what we want is:
>>>>>>
>>>>>> in timer handler:
>>>>>>
>>>>>> set_bit(TSR_DIS, vcpu->arch.tsr);
>>>>>> kvm_make_request(KVM_REQ_PPC_TSR_UPDATE, vcpu);
>>>>>> kvm_vcpu_kick(vcpu);
>>>
>>> Still there can be a case where first request not handled and another
>> event happens? Or we would like to pause till first request is handled by
>> vcpu?
>>
>> If the first DIS request is not handled and another event happens, the
>> interrupts get coalesced (like on real hardware). If there is another TSR
>> bit set still, int_pending should still be active and the guest exits the
>> next time it sets MSR_EE, making us inject the next interrupt.
>>
>
> This may not work for watchdog, where every event causes a state change.
Not sure I understand. What state does it change except for the TSR bit?
Alex
^ permalink raw reply [flat|nested] 22+ messages in thread* Re: [PATCH 5/5] KVM: PPC: booke: Improve timer register emulation
2011-08-26 23:31 [PATCH 5/5] KVM: PPC: booke: Improve timer register emulation Scott Wood
` (19 preceding siblings ...)
2011-09-27 16:32 ` Alexander Graf
@ 2011-09-27 16:45 ` Scott Wood
20 siblings, 0 replies; 22+ messages in thread
From: Scott Wood @ 2011-09-27 16:45 UTC (permalink / raw)
To: kvm-ppc
On 09/27/2011 11:21 AM, Bhushan Bharat-R65777 wrote:
>
>
>> -----Original Message-----
>> From: kvm-ppc-owner@vger.kernel.org [mailto:kvm-ppc-
>> owner@vger.kernel.org] On Behalf Of Alexander Graf
>> Sent: Tuesday, September 27, 2011 9:39 PM
>> To: Bhushan Bharat-R65777
>> Cc: Wood Scott-B07421; Liu Yu-B13201; kvm-ppc@vger.kernel.org
>> Subject: Re: [PATCH 5/5] KVM: PPC: booke: Improve timer register
>> emulation
>>
>>
>> On 27.09.2011, at 18:01, Bhushan Bharat-R65777 wrote:
>>
>>>
>>>
>>>> -----Original Message-----
>>>> From: kvm-ppc-owner@vger.kernel.org [mailto:kvm-ppc-
>>>> owner@vger.kernel.org] On Behalf Of Alexander Graf
>>>> Sent: Tuesday, September 27, 2011 1:45 PM
>>>> To: Wood Scott-B07421
>>>> Cc: Liu Yu-B13201; Wood Scott-B07421; kvm-ppc@vger.kernel.org
>>>> Subject: Re: [PATCH 5/5] KVM: PPC: booke: Improve timer register
>>>> emulation
>>>>
>>>>
>>>> On 27.09.2011, at 02:44, Scott Wood wrote:
>>>>
>>>>> On 09/24/2011 02:27 AM, Alexander Graf wrote:
>>>>>> I think I'm getting your point. So what we want is:
>>>>>>
>>>>>> in timer handler:
>>>>>>
>>>>>> set_bit(TSR_DIS, vcpu->arch.tsr);
>>>>>> kvm_make_request(KVM_REQ_PPC_TSR_UPDATE, vcpu);
>>>>>> kvm_vcpu_kick(vcpu);
>>>
>>> Still there can be a case where first request not handled and another
>> event happens? Or we would like to pause till first request is handled by
>> vcpu?
>>
>> If the first DIS request is not handled and another event happens, the
>> interrupts get coalesced (like on real hardware). If there is another TSR
>> bit set still, int_pending should still be active and the guest exits the
>> next time it sets MSR_EE, making us inject the next interrupt.
>>
>
> This may not work for watchdog, where every event causes a state change.
For watchdog, this would only be used for delivering the interrupt (or a
final expiration exit to qemu, which would be a separate request bit).
ENW/WIS would be handled from timer context, like DIS is here.
The watchdog timer could use a compare and swap to avoid this sort of race:
vcpu thread watchdog timer
----------- --------------
*expires*
set ENW
*expires*
see ENW set
clear ENW+WIS
set WIS
sees WIS but not ENW
...if we care. It's something that shouldn't be possible in hardware,
but not of much consequence (the guest is unlikely to clear ENW+WIS one
time but only ENW the next). Not hard to avoid either, though.
Other races are possible, but (unless I'm missing one) they don't
produce results that couldn't have been produced by the vcpu thread
being a few cycles slower.
-Scott
^ permalink raw reply [flat|nested] 22+ messages in thread