From: Gleb Natapov <gleb@redhat.com>
To: Marcelo Tosatti <mtosatti@redhat.com>
Cc: kvm@vger.kernel.org
Subject: Re: [PATCH 08/10] Move IO APIC to its own lock.
Date: Wed, 15 Jul 2009 23:48:17 +0300 [thread overview]
Message-ID: <20090715204817.GE8356@redhat.com> (raw)
In-Reply-To: <20090715175759.GA5689@amt.cnet>
On Wed, Jul 15, 2009 at 02:57:59PM -0300, Marcelo Tosatti wrote:
> On Tue, Jul 14, 2009 at 05:30:43PM +0300, Gleb Natapov wrote:
> >
> > Signed-off-by: Gleb Natapov <gleb@redhat.com>
> > ---
> > arch/ia64/kvm/kvm-ia64.c | 27 ++++++++++++++++++------
> > arch/x86/kvm/lapic.c | 5 +---
> > arch/x86/kvm/x86.c | 30 ++++++++++++++++++---------
> > virt/kvm/ioapic.c | 50 ++++++++++++++++++++++++++++-----------------
> > virt/kvm/ioapic.h | 1 +
> > 5 files changed, 73 insertions(+), 40 deletions(-)
> >
> > diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c
> > index 0ad09f0..dd7ef2d 100644
> > --- a/arch/ia64/kvm/kvm-ia64.c
> > +++ b/arch/ia64/kvm/kvm-ia64.c
> > @@ -850,9 +850,16 @@ static int kvm_vm_ioctl_get_irqchip(struct kvm *kvm,
> >
> > r = 0;
> > switch (chip->chip_id) {
> > - case KVM_IRQCHIP_IOAPIC:
> > - memcpy(&chip->chip.ioapic, ioapic_irqchip(kvm),
> > - sizeof(struct kvm_ioapic_state));
> > + case KVM_IRQCHIP_IOAPIC: {
> > + struct kvm_ioapic *ioapic = ioapic_irqchip(kvm);
> > + if (ioapic) {
> > + spin_lock(&ioapic->lock);
> > + memcpy(&chip->chip.ioapic, ioapic,
> > + sizeof(struct kvm_ioapic_state));
> > + spin_unlock(&ioapic->lock);
> > + } else
> > + r = -EINVAL;
> > + }
> > break;
> > default:
> > r = -EINVAL;
> > @@ -867,10 +874,16 @@ static int kvm_vm_ioctl_set_irqchip(struct kvm *kvm, struct kvm_irqchip *chip)
> >
> > r = 0;
> > switch (chip->chip_id) {
> > - case KVM_IRQCHIP_IOAPIC:
> > - memcpy(ioapic_irqchip(kvm),
> > - &chip->chip.ioapic,
> > - sizeof(struct kvm_ioapic_state));
> > + case KVM_IRQCHIP_IOAPIC: {
> > + struct kvm_ioapic *ioapic = ioapic_irqchip(kvm);
> > + if (ioapic) {
> > + spin_lock(&ioapic->lock);
> > + memcpy(ioapic, &chip->chip.ioapic,
> > + sizeof(struct kvm_ioapic_state));
> > + spin_unlock(&ioapic->lock);
> > + } else
> > + r = -EINVAL;
> > + }
> > break;
> > default:
> > r = -EINVAL;
> > diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
> > index e2e2849..61ffcfc 100644
> > --- a/arch/x86/kvm/lapic.c
> > +++ b/arch/x86/kvm/lapic.c
> > @@ -471,11 +471,8 @@ static void apic_set_eoi(struct kvm_lapic *apic)
> > trigger_mode = IOAPIC_LEVEL_TRIG;
> > else
> > trigger_mode = IOAPIC_EDGE_TRIG;
> > - if (!(apic_get_reg(apic, APIC_SPIV) & APIC_SPIV_DIRECTED_EOI)) {
> > - mutex_lock(&apic->vcpu->kvm->irq_lock);
> > + if (!(apic_get_reg(apic, APIC_SPIV) & APIC_SPIV_DIRECTED_EOI))
> > kvm_ioapic_update_eoi(apic->vcpu->kvm, vector, trigger_mode);
> > - mutex_unlock(&apic->vcpu->kvm->irq_lock);
> > - }
> > }
> >
> > static void apic_send_ipi(struct kvm_lapic *apic)
> > diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> > index 48567fa..de99b84 100644
> > --- a/arch/x86/kvm/x86.c
> > +++ b/arch/x86/kvm/x86.c
> > @@ -2010,10 +2010,16 @@ static int kvm_vm_ioctl_get_irqchip(struct kvm *kvm, struct kvm_irqchip *chip)
> > &pic_irqchip(kvm)->pics[1],
> > sizeof(struct kvm_pic_state));
> > break;
> > - case KVM_IRQCHIP_IOAPIC:
> > - memcpy(&chip->chip.ioapic,
> > - ioapic_irqchip(kvm),
> > - sizeof(struct kvm_ioapic_state));
> > + case KVM_IRQCHIP_IOAPIC: {
> > + struct kvm_ioapic *ioapic = ioapic_irqchip(kvm);
> > + if (ioapic) {
> > + spin_lock(&ioapic->lock);
> > + memcpy(&chip->chip.ioapic, ioapic,
> > + sizeof(struct kvm_ioapic_state));
> > + spin_unlock(&ioapic->lock);
> > + } else
> > + r = -EINVAL;
> > + }
> > break;
> > default:
> > r = -EINVAL;
> > @@ -2042,12 +2048,16 @@ static int kvm_vm_ioctl_set_irqchip(struct kvm *kvm, struct kvm_irqchip *chip)
> > sizeof(struct kvm_pic_state));
> > spin_unlock(&pic_irqchip(kvm)->lock);
> > break;
> > - case KVM_IRQCHIP_IOAPIC:
> > - mutex_lock(&kvm->irq_lock);
> > - memcpy(ioapic_irqchip(kvm),
> > - &chip->chip.ioapic,
> > - sizeof(struct kvm_ioapic_state));
> > - mutex_unlock(&kvm->irq_lock);
> > + case KVM_IRQCHIP_IOAPIC: {
> > + struct kvm_ioapic *ioapic = ioapic_irqchip(kvm);
> > + if (ioapic) {
> > + spin_lock(&ioapic->lock);
> > + memcpy(ioapic, &chip->chip.ioapic,
> > + sizeof(struct kvm_ioapic_state));
> > + spin_unlock(&ioapic->lock);
> > + } else
> > + r = -EINVAL;
> > + }
> > break;
> > default:
> > r = -EINVAL;
> > diff --git a/virt/kvm/ioapic.c b/virt/kvm/ioapic.c
> > index fa05f67..300ee3b 100644
> > --- a/virt/kvm/ioapic.c
> > +++ b/virt/kvm/ioapic.c
> > @@ -182,6 +182,7 @@ int kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int level)
> > union kvm_ioapic_redirect_entry entry;
> > int ret = 1;
> >
> > + spin_lock(&ioapic->lock);
> > if (irq >= 0 && irq < IOAPIC_NUM_PINS) {
> > entry = ioapic->redirtbl[irq];
> > level ^= entry.fields.polarity;
> > @@ -196,34 +197,44 @@ int kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int level)
> > }
> > trace_kvm_ioapic_set_irq(entry.bits, irq, ret == 0);
> > }
> > + spin_unlock(&ioapic->lock);
> > +
> > return ret;
> > }
> >
> > -static void __kvm_ioapic_update_eoi(struct kvm_ioapic *ioapic, int pin,
> > - int trigger_mode)
> > +static void __kvm_ioapic_update_eoi(struct kvm_ioapic *ioapic, int vector,
> > + int trigger_mode)
> > {
> > - union kvm_ioapic_redirect_entry *ent;
> > + int i;
> > +
> > + for (i = 0; i < IOAPIC_NUM_PINS; i++) {
> > + union kvm_ioapic_redirect_entry *ent = &ioapic->redirtbl[i];
> > +
> > + if (ent->fields.vector != vector)
> > + continue;
> >
> > - ent = &ioapic->redirtbl[pin];
> > + /* ack notifier may call back into ioapic via kvm_set_irq() */
> > + spin_unlock(&ioapic->lock);
> > + kvm_notify_acked_irq(ioapic->kvm, KVM_IRQCHIP_IOAPIC, i);
> > + spin_lock(&ioapic->lock);
>
> While traversing the IOAPIC pins you drop the lock and acquire it again,
> which is error prone: what if the redirection table is changed in
> between, for example?
>
Yes, the ack notifiers is a problematic part. The only thing that
current ack notifiers do is set_irq_level(0) so this is not the problem
in practice. I'll see if I can eliminate this dropping of the lock here.
> Also, irq_lock was held during ack and mask notifier callbacks, so they
> (the callbacks) did not have to worry about concurrency.
>
Is it possible to have more then one ack for the same interrupt line at
the same time?
> You questioned the purpose of irq_lock earlier, and thats what it is:
> synchronization between pic and ioapic blur at the irq notifiers.
>
Pic has its own locking and it fails miserably in regards ack notifiers.
I bet nobody tried device assignment with pic. It will not work.
irq_lock is indeed used to synchronization ioapic access, but the way
it is done requires calling kvm_set_irq() with lock held and this adds
unnecessary lock on a msi irq injection path.
> Using RCU to synchronize ack/mask notifier list walk versus list
> addition is good, but i'm not entirely sure that smaller locks like you
> are proposing is a benefit long term (things become much more tricky).
Without removing irq_lock RCU is useless since the list walking is always
done with irq_lock held. I see smaller locks as simplification BTW. The
thing is tricky now, or so I felt while trying to understand what
irq_lock actually protects. With smaller locks with names that clearly
indicates which data structure it protects I feel the code is much
easy to understand.
>
> Maybe it is the only way forward (and so you push this complexity to the
> ack/mask notifiers), but i think it can be avoided until its proven that
> there is contention on irq_lock (which is reduced by faster search).
This is not about contention. This is about existence of the lock in the
first place. With the patch set there is no lock at msi irq injection
path at all and this is better than having lock no matter if the lock is
congested or not. There is still a lock on ioapic path (which MSI does
not use) and removing of this lock should be done only after we see
that it is congested, because removing it involves adding a number of
atomic accesses, so it is not clear win without lock contention.
(removing it will also solve ack notification problem hmmm)
>
>
> > - kvm_notify_acked_irq(ioapic->kvm, KVM_IRQCHIP_IOAPIC, pin);
> > + if (trigger_mode != IOAPIC_LEVEL_TRIG)
> > + continue;
> >
> > - if (trigger_mode == IOAPIC_LEVEL_TRIG) {
> > ASSERT(ent->fields.trig_mode == IOAPIC_LEVEL_TRIG);
> > ent->fields.remote_irr = 0;
> > - if (!ent->fields.mask && (ioapic->irr & (1 << pin)))
> > - ioapic_service(ioapic, pin);
> > + if (!ent->fields.mask && (ioapic->irr & (1 << i)))
> > + ioapic_service(ioapic, i);
> > }
> > }
> >
> > void kvm_ioapic_update_eoi(struct kvm *kvm, int vector, int trigger_mode)
> > {
> > struct kvm_ioapic *ioapic = kvm->arch.vioapic;
> > - int i;
> >
> > - for (i = 0; i < IOAPIC_NUM_PINS; i++)
> > - if (ioapic->redirtbl[i].fields.vector == vector)
> > - __kvm_ioapic_update_eoi(ioapic, i, trigger_mode);
> > + spin_lock(&ioapic->lock);
> > + __kvm_ioapic_update_eoi(ioapic, vector, trigger_mode);
> > + spin_unlock(&ioapic->lock);
> > }
> >
> > static inline struct kvm_ioapic *to_ioapic(struct kvm_io_device *dev)
> > @@ -248,8 +259,8 @@ static int ioapic_mmio_read(struct kvm_io_device *this, gpa_t addr, int len,
> > ioapic_debug("addr %lx\n", (unsigned long)addr);
> > ASSERT(!(addr & 0xf)); /* check alignment */
> >
> > - mutex_lock(&ioapic->kvm->irq_lock);
> > addr &= 0xff;
> > + spin_lock(&ioapic->lock);
> > switch (addr) {
> > case IOAPIC_REG_SELECT:
> > result = ioapic->ioregsel;
> > @@ -263,6 +274,8 @@ static int ioapic_mmio_read(struct kvm_io_device *this, gpa_t addr, int len,
> > result = 0;
> > break;
> > }
> > + spin_unlock(&ioapic->lock);
> > +
> > switch (len) {
> > case 8:
> > *(u64 *) val = result;
> > @@ -275,7 +288,6 @@ static int ioapic_mmio_read(struct kvm_io_device *this, gpa_t addr, int len,
> > default:
> > printk(KERN_WARNING "ioapic: wrong length %d\n", len);
> > }
> > - mutex_unlock(&ioapic->kvm->irq_lock);
> > return 0;
> > }
> >
> > @@ -291,15 +303,15 @@ static int ioapic_mmio_write(struct kvm_io_device *this, gpa_t addr, int len,
> > (void*)addr, len, val);
> > ASSERT(!(addr & 0xf)); /* check alignment */
> >
> > - mutex_lock(&ioapic->kvm->irq_lock);
> > if (len == 4 || len == 8)
> > data = *(u32 *) val;
> > else {
> > printk(KERN_WARNING "ioapic: Unsupported size %d\n", len);
> > - goto unlock;
> > + return 0;
> > }
> >
> > addr &= 0xff;
> > + spin_lock(&ioapic->lock);
> > switch (addr) {
> > case IOAPIC_REG_SELECT:
> > ioapic->ioregsel = data;
> > @@ -310,15 +322,14 @@ static int ioapic_mmio_write(struct kvm_io_device *this, gpa_t addr, int len,
> > break;
> > #ifdef CONFIG_IA64
> > case IOAPIC_REG_EOI:
> > - kvm_ioapic_update_eoi(ioapic->kvm, data, IOAPIC_LEVEL_TRIG);
> > + __kvm_ioapic_update_eoi(ioapic, data, IOAPIC_LEVEL_TRIG);
> > break;
> > #endif
> >
> > default:
> > break;
> > }
> > -unlock:
> > - mutex_unlock(&ioapic->kvm->irq_lock);
> > + spin_unlock(&ioapic->lock);
> > return 0;
> > }
> >
> > @@ -347,6 +358,7 @@ int kvm_ioapic_init(struct kvm *kvm)
> > ioapic = kzalloc(sizeof(struct kvm_ioapic), GFP_KERNEL);
> > if (!ioapic)
> > return -ENOMEM;
> > + spin_lock_init(&ioapic->lock);
> > kvm->arch.vioapic = ioapic;
> > kvm_ioapic_reset(ioapic);
> > kvm_iodevice_init(&ioapic->dev, &ioapic_mmio_ops);
> > diff --git a/virt/kvm/ioapic.h b/virt/kvm/ioapic.h
> > index 7080b71..557107e 100644
> > --- a/virt/kvm/ioapic.h
> > +++ b/virt/kvm/ioapic.h
> > @@ -44,6 +44,7 @@ struct kvm_ioapic {
> > struct kvm_io_device dev;
> > struct kvm *kvm;
> > void (*ack_notifier)(void *opaque, int irq);
> > + spinlock_t lock;
> > };
> >
> > #ifdef DEBUG
> > --
> > 1.6.2.1
> >
> > --
> > To unsubscribe from this list: send the line "unsubscribe kvm" in
> > the body of a message to majordomo@vger.kernel.org
> > More majordomo info at http://vger.kernel.org/majordomo-info.html
--
Gleb.
next prev parent reply other threads:[~2009-07-15 20:48 UTC|newest]
Thread overview: 31+ messages / expand[flat|nested] mbox.gz Atom feed top
2009-07-14 14:30 [PATCH 0/10] [RFC] more fine grained locking for IRQ injection Gleb Natapov
2009-07-14 14:30 ` [PATCH 01/10] Move irq routing data structure to rcu locking Gleb Natapov
2009-07-14 14:30 ` [PATCH 02/10] Unregister ack notifier callback on PIT freeing Gleb Natapov
2009-07-14 14:30 ` [PATCH 03/10] Move irq ack notifier list to arch independent code Gleb Natapov
2009-07-14 14:30 ` [PATCH 04/10] Convert irq notifiers lists to RCU locking Gleb Natapov
2009-07-14 14:30 ` [PATCH 05/10] Protect irq_sources_bitmap by kvm->lock instead of kvm->irq_lock Gleb Natapov
2009-07-14 14:30 ` [PATCH 06/10] Move irq routing to its own locking Gleb Natapov
2009-07-14 14:30 ` [PATCH 07/10] Move irq notifiers lists " Gleb Natapov
2009-07-14 14:30 ` [PATCH 08/10] Move IO APIC to its own lock Gleb Natapov
2009-07-15 17:57 ` Marcelo Tosatti
2009-07-15 20:48 ` Gleb Natapov [this message]
2009-07-15 21:38 ` Marcelo Tosatti
2009-07-16 7:51 ` Gleb Natapov
2009-07-16 18:09 ` Marcelo Tosatti
2009-07-16 20:49 ` Gleb Natapov
2009-07-17 15:19 ` Marcelo Tosatti
2009-07-17 17:32 ` Gleb Natapov
2009-07-17 20:09 ` Marcelo Tosatti
2009-07-14 14:30 ` [PATCH 09/10] Drop kvm->irq_lock lock Gleb Natapov
2009-07-14 14:30 ` [PATCH 10/10] Change irq routing table to use gsi indexed array Gleb Natapov
2009-07-15 18:18 ` Marcelo Tosatti
2009-07-15 20:52 ` Gleb Natapov
2009-07-15 21:42 ` Marcelo Tosatti
2009-07-16 6:05 ` Gleb Natapov
2009-07-15 19:17 ` Michael S. Tsirkin
2009-07-15 20:48 ` Gleb Natapov
-- strict thread matches above, loose matches on Subject: below --
2009-08-09 12:41 [PATCH 00/10] make interrupt injection lockless (almost) Gleb Natapov
2009-08-09 12:41 ` [PATCH 08/10] Move IO APIC to its own lock Gleb Natapov
2009-08-09 14:54 ` Avi Kivity
2009-08-09 14:57 ` Gleb Natapov
2009-08-09 15:10 ` Avi Kivity
2009-08-09 15:09 ` Gleb Natapov
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20090715204817.GE8356@redhat.com \
--to=gleb@redhat.com \
--cc=kvm@vger.kernel.org \
--cc=mtosatti@redhat.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.