* [PATCH 1/2] Remove kvm_commit_irq_routes from error messages @ 2012-05-24 17:02 Richard Weinberger 2012-05-24 17:02 ` [PATCH 2/2] Device assignment: Fix MSI IRQ affinity setting Richard Weinberger 0 siblings, 1 reply; 16+ messages in thread From: Richard Weinberger @ 2012-05-24 17:02 UTC (permalink / raw) To: kvm; +Cc: avi, mtosatti, tglx, Richard Weinberger Make my life a bit easier and report the correct function names. s/kvm_commit_irq_routes/kvm_irqchip_commit_routes Signed-off-by: Richard Weinberger <richard@nod.at> --- hw/device-assignment.c | 4 ++-- hw/msi.c | 2 +- hw/msix.c | 4 ++-- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/hw/device-assignment.c b/hw/device-assignment.c index 1daadb9..09726f9 100644 --- a/hw/device-assignment.c +++ b/hw/device-assignment.c @@ -958,7 +958,7 @@ static void assigned_dev_update_msi(PCIDevice *pci_dev) kvm_add_routing_entry(kvm_state, assigned_dev->entry); if (kvm_irqchip_commit_routes(kvm_state) < 0) { - perror("assigned_dev_update_msi: kvm_commit_irq_routes"); + perror("assigned_dev_update_msi: kvm_irqchip_commit_routes"); assigned_dev->cap.state &= ~ASSIGNED_DEVICE_MSI_ENABLED; return; } @@ -1053,7 +1053,7 @@ static int assigned_dev_update_msix_mmio(PCIDevice *pci_dev) } if (r == 0 && kvm_irqchip_commit_routes(kvm_state) < 0) { - perror("assigned_dev_update_msix_mmio: kvm_commit_irq_routes"); + perror("assigned_dev_update_msix_mmio: kvm_irqchip_commit_routes"); return -EINVAL; } diff --git a/hw/msi.c b/hw/msi.c index 4fcf769..1a20e83 100644 --- a/hw/msi.c +++ b/hw/msi.c @@ -180,7 +180,7 @@ static void kvm_msi_update(PCIDevice *dev) if (changed) { r = kvm_irqchip_commit_routes(kvm_state); if (r) { - fprintf(stderr, "%s: kvm_commit_irq_routes failed: %s\n", __func__, + fprintf(stderr, "%s: kvm_irqchip_commit_routes failed: %s\n", __func__, strerror(-r)); exit(1); } diff --git a/hw/msix.c b/hw/msix.c index 5515a32..34b7455 100644 --- a/hw/msix.c +++ b/hw/msix.c @@ -91,7 +91,7 @@ static void kvm_msix_update(PCIDevice *dev, int vector, *entry = new_entry; r = kvm_irqchip_commit_routes(kvm_state); if (r) { - fprintf(stderr, "%s: kvm_commit_irq_routes failed: %s\n", __func__, + fprintf(stderr, "%s: kvm_irqchip_commit_routes failed: %s\n", __func__, strerror(-r)); exit(1); } @@ -112,7 +112,7 @@ static int kvm_msix_vector_add(PCIDevice *dev, unsigned vector) r = kvm_irqchip_commit_routes(kvm_state); if (r < 0) { - fprintf(stderr, "%s: kvm_commit_irq_routes failed: %s\n", __func__, strerror(-r)); + fprintf(stderr, "%s: kvm_irqchip_commit_routes failed: %s\n", __func__, strerror(-r)); return r; } return 0; -- 1.7.3.1 ^ permalink raw reply related [flat|nested] 16+ messages in thread
* [PATCH 2/2] Device assignment: Fix MSI IRQ affinity setting 2012-05-24 17:02 [PATCH 1/2] Remove kvm_commit_irq_routes from error messages Richard Weinberger @ 2012-05-24 17:02 ` Richard Weinberger 2012-05-24 18:20 ` Alex Williamson 2012-05-24 20:47 ` Jan Kiszka 0 siblings, 2 replies; 16+ messages in thread From: Richard Weinberger @ 2012-05-24 17:02 UTC (permalink / raw) To: kvm; +Cc: avi, mtosatti, tglx, Richard Weinberger MSI interrupt affinity setting on the guest ended always up on vcpu0, no matter what. IOW writes to /proc/irq/<IRQ>/smp_affinity are irgnored. This patch fixes the MSI IRQ routing and avoids the utter madness of tearing down and setting up the interrupt completely when this changes. Signed-off-by: Thomas Gleixner <tglx@linutronix.de> Signed-off-by: Richard Weinberger <richard@nod.at> --- hw/device-assignment.c | 73 ++++++++++++++++++++++++++++++++++++++++++++++-- 1 files changed, 70 insertions(+), 3 deletions(-) diff --git a/hw/device-assignment.c b/hw/device-assignment.c index 09726f9..78d57c8 100644 --- a/hw/device-assignment.c +++ b/hw/device-assignment.c @@ -913,6 +913,50 @@ void assigned_dev_update_irqs(void) } } +static void assigned_dev_update_msi_route(PCIDevice *pci_dev) +{ + AssignedDevice *adev = DO_UPCAST(AssignedDevice, dev, pci_dev); + uint8_t ctrl_byte = pci_get_byte(pci_dev->config + pci_dev->msi_cap + + PCI_MSI_FLAGS); + struct kvm_irq_routing_entry *old, new; + KVMMsiMessage msg; + int r; + + if (!(ctrl_byte & PCI_MSI_FLAGS_ENABLE)) + return; + + msg.addr_lo = pci_get_long(pci_dev->config + pci_dev->msi_cap + + PCI_MSI_ADDRESS_LO); + msg.addr_hi = pci_get_long(pci_dev->config + pci_dev->msi_cap + + PCI_MSI_ADDRESS_HI); + msg.data = pci_get_long(pci_dev->config + pci_dev->msi_cap + + PCI_MSI_DATA_32); + + old = adev->entry; + new = *old; + new.u.msi.address_lo = msg.addr_lo; + new.u.msi.address_hi = msg.addr_hi; + new.u.msi.data = msg.data; + + if (memcmp(old, &new, sizeof(new)) == 0) + return; + + r = kvm_update_routing_entry(old, &new); + if (r < 0) { + fprintf(stderr, "%s: kvm_update_msi failed: %s\n", __func__, + strerror(-r)); + exit(1); + } + + *old = new; + r = kvm_irqchip_commit_routes(kvm_state); + if (r) { + fprintf(stderr, "%s: kvm_irqchip_commit_routes failed: %s\n", __func__, + strerror(-r)); + exit(1); + } +} + static void assigned_dev_update_msi(PCIDevice *pci_dev) { struct kvm_assigned_irq assigned_irq_data; @@ -1116,6 +1160,14 @@ static uint32_t assigned_dev_pci_read_config(PCIDevice *pci_dev, uint32_t virt_val = pci_default_read_config(pci_dev, address, len); uint32_t real_val, emulate_mask, full_emulation_mask; + if (assigned_dev->cap.available & ASSIGNED_DEVICE_CAP_MSI) { + uint32_t msi_start = pci_dev->msi_cap; + uint32_t msi_end = msi_start + PCI_MSI_DATA_64 + 3; + + if (address >= msi_start && (address + len) < msi_end) + return virt_val; + } + emulate_mask = 0; memcpy(&emulate_mask, assigned_dev->emulate_config_read + address, len); emulate_mask = le32_to_cpu(emulate_mask); @@ -1130,6 +1182,17 @@ static uint32_t assigned_dev_pci_read_config(PCIDevice *pci_dev, } } +static void handle_cfg_write_msi(PCIDevice *pci_dev, AssignedDevice *adev) +{ + if (!kvm_enabled() || !kvm_irqchip_in_kernel()) + return; + + if (adev->entry && (adev->irq_requested_type & KVM_DEV_IRQ_GUEST_MSI)) + assigned_dev_update_msi_route(pci_dev); + else + assigned_dev_update_msi(pci_dev); +} + static void assigned_dev_pci_write_config(PCIDevice *pci_dev, uint32_t address, uint32_t val, int len) { @@ -1155,9 +1218,13 @@ static void assigned_dev_pci_write_config(PCIDevice *pci_dev, uint32_t address, } } if (assigned_dev->cap.available & ASSIGNED_DEVICE_CAP_MSI) { - if (range_covers_byte(address, len, - pci_dev->msi_cap + PCI_MSI_FLAGS)) { - assigned_dev_update_msi(pci_dev); + uint32_t msi_start = pci_dev->msi_cap; + uint32_t msi_end = msi_start + PCI_MSI_DATA_64 + 3; + + if (address >= msi_start && (address + len) < msi_end) { + if (address == msi_start + PCI_MSI_DATA_32) + handle_cfg_write_msi(pci_dev, assigned_dev); + return; } } if (assigned_dev->cap.available & ASSIGNED_DEVICE_CAP_MSIX) { -- 1.7.3.1 ^ permalink raw reply related [flat|nested] 16+ messages in thread
* Re: [PATCH 2/2] Device assignment: Fix MSI IRQ affinity setting 2012-05-24 17:02 ` [PATCH 2/2] Device assignment: Fix MSI IRQ affinity setting Richard Weinberger @ 2012-05-24 18:20 ` Alex Williamson [not found] ` <CAEMbtc+ycsC6u=CZ_Yg6C=WV=VqjA2uEDM5KWPM_7n3sZh_9Pw@mail.gmail.com> 2012-05-24 21:39 ` Thomas Gleixner 2012-05-24 20:47 ` Jan Kiszka 1 sibling, 2 replies; 16+ messages in thread From: Alex Williamson @ 2012-05-24 18:20 UTC (permalink / raw) To: Richard Weinberger; +Cc: kvm, avi, mtosatti, tglx On Thu, 2012-05-24 at 18:02 +0100, Richard Weinberger wrote: > MSI interrupt affinity setting on the guest ended always up on vcpu0, > no matter what. > IOW writes to /proc/irq/<IRQ>/smp_affinity are irgnored. > This patch fixes the MSI IRQ routing and avoids the utter madness of > tearing down and setting up the interrupt completely when this changes. > > Signed-off-by: Thomas Gleixner <tglx@linutronix.de> > Signed-off-by: Richard Weinberger <richard@nod.at> > --- > hw/device-assignment.c | 73 ++++++++++++++++++++++++++++++++++++++++++++++-- > 1 files changed, 70 insertions(+), 3 deletions(-) > > diff --git a/hw/device-assignment.c b/hw/device-assignment.c > index 09726f9..78d57c8 100644 > --- a/hw/device-assignment.c > +++ b/hw/device-assignment.c > @@ -913,6 +913,50 @@ void assigned_dev_update_irqs(void) > } > } > > +static void assigned_dev_update_msi_route(PCIDevice *pci_dev) > +{ > + AssignedDevice *adev = DO_UPCAST(AssignedDevice, dev, pci_dev); > + uint8_t ctrl_byte = pci_get_byte(pci_dev->config + pci_dev->msi_cap + > + PCI_MSI_FLAGS); > + struct kvm_irq_routing_entry *old, new; > + KVMMsiMessage msg; > + int r; Please follow qemu coding style for braces throughout. > + > + if (!(ctrl_byte & PCI_MSI_FLAGS_ENABLE)) > + return; > + > + msg.addr_lo = pci_get_long(pci_dev->config + pci_dev->msi_cap + > + PCI_MSI_ADDRESS_LO); > + msg.addr_hi = pci_get_long(pci_dev->config + pci_dev->msi_cap + > + PCI_MSI_ADDRESS_HI); Odd, since we only expose a 32bit MSI capability to the guest... > + msg.data = pci_get_long(pci_dev->config + pci_dev->msi_cap + > + PCI_MSI_DATA_32); Should be pci_get_word() > + > + old = adev->entry; > + new = *old; > + new.u.msi.address_lo = msg.addr_lo; > + new.u.msi.address_hi = msg.addr_hi; > + new.u.msi.data = msg.data; > + > + if (memcmp(old, &new, sizeof(new)) == 0) > + return; > + > + r = kvm_update_routing_entry(old, &new); How does this work? old is now new, so kvm_update_routing_entry() is never going to match to the existing entry if address_lo or data actually change. > + if (r < 0) { > + fprintf(stderr, "%s: kvm_update_msi failed: %s\n", __func__, > + strerror(-r)); > + exit(1); > + } > + > + *old = new; huh? > + r = kvm_irqchip_commit_routes(kvm_state); > + if (r) { > + fprintf(stderr, "%s: kvm_irqchip_commit_routes failed: %s\n", __func__, > + strerror(-r)); > + exit(1); > + } > +} > + > static void assigned_dev_update_msi(PCIDevice *pci_dev) > { > struct kvm_assigned_irq assigned_irq_data; > @@ -1116,6 +1160,14 @@ static uint32_t assigned_dev_pci_read_config(PCIDevice *pci_dev, > uint32_t virt_val = pci_default_read_config(pci_dev, address, len); > uint32_t real_val, emulate_mask, full_emulation_mask; > > + if (assigned_dev->cap.available & ASSIGNED_DEVICE_CAP_MSI) { > + uint32_t msi_start = pci_dev->msi_cap; > + uint32_t msi_end = msi_start + PCI_MSI_DATA_64 + 3; > + > + if (address >= msi_start && (address + len) < msi_end) ranges_overlap() is meant for this. We only expose a 32bit MSI cap, so msi_end is wrong. > + return virt_val; > + } > + > emulate_mask = 0; > memcpy(&emulate_mask, assigned_dev->emulate_config_read + address, len); > emulate_mask = le32_to_cpu(emulate_mask); > @@ -1130,6 +1182,17 @@ static uint32_t assigned_dev_pci_read_config(PCIDevice *pci_dev, > } > } > > +static void handle_cfg_write_msi(PCIDevice *pci_dev, AssignedDevice *adev) > +{ > + if (!kvm_enabled() || !kvm_irqchip_in_kernel()) > + return; Unnecessary, device assignment doesn't work otherwise. > + > + if (adev->entry && (adev->irq_requested_type & KVM_DEV_IRQ_GUEST_MSI)) Should just be able to test irq_requested_type. > + assigned_dev_update_msi_route(pci_dev); > + else > + assigned_dev_update_msi(pci_dev); > +} > + > static void assigned_dev_pci_write_config(PCIDevice *pci_dev, uint32_t address, > uint32_t val, int len) > { > @@ -1155,9 +1218,13 @@ static void assigned_dev_pci_write_config(PCIDevice *pci_dev, uint32_t address, > } > } > if (assigned_dev->cap.available & ASSIGNED_DEVICE_CAP_MSI) { > - if (range_covers_byte(address, len, > - pci_dev->msi_cap + PCI_MSI_FLAGS)) { > - assigned_dev_update_msi(pci_dev); > + uint32_t msi_start = pci_dev->msi_cap; > + uint32_t msi_end = msi_start + PCI_MSI_DATA_64 + 3; > + > + if (address >= msi_start && (address + len) < msi_end) { Use ranges_overlap() please, msi_end is wrong. > + if (address == msi_start + PCI_MSI_DATA_32) > + handle_cfg_write_msi(pci_dev, assigned_dev); Why didn't we just use range_covers_byte(address, len, pci_dev->msi_cap + PCI_MSI_DATA_32) to start with? But how does this handle the enable bit? > + return; > } > } > if (assigned_dev->cap.available & ASSIGNED_DEVICE_CAP_MSIX) { Thanks, Alex ^ permalink raw reply [flat|nested] 16+ messages in thread
[parent not found: <CAEMbtc+ycsC6u=CZ_Yg6C=WV=VqjA2uEDM5KWPM_7n3sZh_9Pw@mail.gmail.com>]
* Re: [PATCH 2/2] Device assignment: Fix MSI IRQ affinity setting [not found] ` <CAEMbtc+ycsC6u=CZ_Yg6C=WV=VqjA2uEDM5KWPM_7n3sZh_9Pw@mail.gmail.com> @ 2012-05-24 19:27 ` Richard Weinberger 0 siblings, 0 replies; 16+ messages in thread From: Richard Weinberger @ 2012-05-24 19:27 UTC (permalink / raw) To: Alex Williamson; +Cc: avi, tglx, mtosatti, kvm On Thu, 24 May 2012 13:00:51 -0600, Alex Williamson <alex.williamson@redhat.com> wrote: >> How does this work? old is now new, so kvm_update_routing_entry() is >> never going to match to the existing entry if address_lo or data >> actually change. > > Apologies, I read memcpy above No problem. :) I'll address your comments and send v2 tomorrow. Thanks, //richard ^ permalink raw reply [flat|nested] 16+ messages in thread
* Re: [PATCH 2/2] Device assignment: Fix MSI IRQ affinity setting 2012-05-24 18:20 ` Alex Williamson [not found] ` <CAEMbtc+ycsC6u=CZ_Yg6C=WV=VqjA2uEDM5KWPM_7n3sZh_9Pw@mail.gmail.com> @ 2012-05-24 21:39 ` Thomas Gleixner 2012-05-24 21:53 ` Jan Kiszka 2012-05-24 22:05 ` Alex Williamson 1 sibling, 2 replies; 16+ messages in thread From: Thomas Gleixner @ 2012-05-24 21:39 UTC (permalink / raw) To: Alex Williamson Cc: Richard Weinberger, kvm, avi, Marcelo Tosatti, Bjorn Helgaas On Thu, 24 May 2012, Alex Williamson wrote: > On Thu, 2012-05-24 at 18:02 +0100, Richard Weinberger wrote: > > + if (address == msi_start + PCI_MSI_DATA_32) > > + handle_cfg_write_msi(pci_dev, assigned_dev); > > Why didn't we just use range_covers_byte(address, len, pci_dev->msi_cap > + PCI_MSI_DATA_32) to start with? But how does this handle the enable > bit? The problem with the current implementation is that it only changes the routing if the msi entry goes from masked to unmasked state. Linux does not mask the entries on affinity changes and never did, neither for MSI nor for MSI-X. I know it's probably not according to the spec, but we can't fix that retroactively. Thanks, tglx ^ permalink raw reply [flat|nested] 16+ messages in thread
* Re: [PATCH 2/2] Device assignment: Fix MSI IRQ affinity setting 2012-05-24 21:39 ` Thomas Gleixner @ 2012-05-24 21:53 ` Jan Kiszka 2012-05-24 22:11 ` Alex Williamson 2012-05-24 22:17 ` Michael S. Tsirkin 2012-05-24 22:05 ` Alex Williamson 1 sibling, 2 replies; 16+ messages in thread From: Jan Kiszka @ 2012-05-24 21:53 UTC (permalink / raw) To: Thomas Gleixner Cc: Alex Williamson, Richard Weinberger, kvm, avi, Marcelo Tosatti, Bjorn Helgaas, Michael S. Tsirkin On 2012-05-24 18:39, Thomas Gleixner wrote: > On Thu, 24 May 2012, Alex Williamson wrote: >> On Thu, 2012-05-24 at 18:02 +0100, Richard Weinberger wrote: >>> + if (address == msi_start + PCI_MSI_DATA_32) >>> + handle_cfg_write_msi(pci_dev, assigned_dev); >> >> Why didn't we just use range_covers_byte(address, len, pci_dev->msi_cap >> + PCI_MSI_DATA_32) to start with? But how does this handle the enable >> bit? > > The problem with the current implementation is that it only changes > the routing if the msi entry goes from masked to unmasked state. > > Linux does not mask the entries on affinity changes and never did, > neither for MSI nor for MSI-X. > > I know it's probably not according to the spec, but we can't fix that > retroactively. For MSI, this is allowed. For MSI-X, this would clearly be a Linux bug, waiting for hardware to dislike this spec violation. However, if this is the current behavior of such a prominent guest, I guess we have to stop optimizing the QEMU MSI-X code that it only updates routings on mask changes. Possibly other OSes get this wrong too... Thanks, for the clarification. Should go into the changelog. Jan -- Siemens AG, Corporate Technology, CT T DE IT 1 Corporate Competence Center Embedded Linux ^ permalink raw reply [flat|nested] 16+ messages in thread
* Re: [PATCH 2/2] Device assignment: Fix MSI IRQ affinity setting 2012-05-24 21:53 ` Jan Kiszka @ 2012-05-24 22:11 ` Alex Williamson 2012-05-24 23:01 ` Thomas Gleixner 2012-05-24 22:17 ` Michael S. Tsirkin 1 sibling, 1 reply; 16+ messages in thread From: Alex Williamson @ 2012-05-24 22:11 UTC (permalink / raw) To: Jan Kiszka Cc: Thomas Gleixner, Richard Weinberger, kvm, avi, Marcelo Tosatti, Bjorn Helgaas, Michael S. Tsirkin On Thu, 2012-05-24 at 18:53 -0300, Jan Kiszka wrote: > On 2012-05-24 18:39, Thomas Gleixner wrote: > > On Thu, 24 May 2012, Alex Williamson wrote: > >> On Thu, 2012-05-24 at 18:02 +0100, Richard Weinberger wrote: > >>> + if (address == msi_start + PCI_MSI_DATA_32) > >>> + handle_cfg_write_msi(pci_dev, assigned_dev); > >> > >> Why didn't we just use range_covers_byte(address, len, pci_dev->msi_cap > >> + PCI_MSI_DATA_32) to start with? But how does this handle the enable > >> bit? > > > > The problem with the current implementation is that it only changes > > the routing if the msi entry goes from masked to unmasked state. > > > > Linux does not mask the entries on affinity changes and never did, > > neither for MSI nor for MSI-X. > > > > I know it's probably not according to the spec, but we can't fix that > > retroactively. > > For MSI, this is allowed. For MSI-X, this would clearly be a Linux bug, > waiting for hardware to dislike this spec violation. > > However, if this is the current behavior of such a prominent guest, I > guess we have to stop optimizing the QEMU MSI-X code that it only > updates routings on mask changes. Possibly other OSes get this wrong too... > > Thanks, for the clarification. Should go into the changelog. Hmm, if Linux didn't mask MSIX before updating vectors it'd not only be a spec violation, but my testing of the recent changes to fix MSIX vector updates for exactly this would have failed... } else if (msix_masked(&orig) && !msix_masked(entry)) { ... update vector... So I'm not entirely sure I believe that. Thanks, Alex ^ permalink raw reply [flat|nested] 16+ messages in thread
* Re: [PATCH 2/2] Device assignment: Fix MSI IRQ affinity setting 2012-05-24 22:11 ` Alex Williamson @ 2012-05-24 23:01 ` Thomas Gleixner 2012-05-24 23:23 ` Alex Williamson 0 siblings, 1 reply; 16+ messages in thread From: Thomas Gleixner @ 2012-05-24 23:01 UTC (permalink / raw) To: Alex Williamson Cc: Jan Kiszka, Richard Weinberger, kvm, avi, Marcelo Tosatti, Bjorn Helgaas, Michael S. Tsirkin On Thu, 24 May 2012, Alex Williamson wrote: > On Thu, 2012-05-24 at 18:53 -0300, Jan Kiszka wrote: > > On 2012-05-24 18:39, Thomas Gleixner wrote: > > > On Thu, 24 May 2012, Alex Williamson wrote: > > >> On Thu, 2012-05-24 at 18:02 +0100, Richard Weinberger wrote: > > >>> + if (address == msi_start + PCI_MSI_DATA_32) > > >>> + handle_cfg_write_msi(pci_dev, assigned_dev); > > >> > > >> Why didn't we just use range_covers_byte(address, len, pci_dev->msi_cap > > >> + PCI_MSI_DATA_32) to start with? But how does this handle the enable > > >> bit? > > > > > > The problem with the current implementation is that it only changes > > > the routing if the msi entry goes from masked to unmasked state. > > > > > > Linux does not mask the entries on affinity changes and never did, > > > neither for MSI nor for MSI-X. > > > > > > I know it's probably not according to the spec, but we can't fix that > > > retroactively. > > > > For MSI, this is allowed. For MSI-X, this would clearly be a Linux bug, > > waiting for hardware to dislike this spec violation. > > > > However, if this is the current behavior of such a prominent guest, I > > guess we have to stop optimizing the QEMU MSI-X code that it only > > updates routings on mask changes. Possibly other OSes get this wrong too... > > > > Thanks, for the clarification. Should go into the changelog. > > Hmm, if Linux didn't mask MSIX before updating vectors it'd not only be > a spec violation, but my testing of the recent changes to fix MSIX > vector updates for exactly this would have failed... > > } else if (msix_masked(&orig) && !msix_masked(entry)) { > ... update vector... > > So I'm not entirely sure I believe that. Thanks, What happens is: A write to /proc/irq/$N/smp_affinity calls into irq_set_affinity() which does: if (irq_can_move_pcntxt(data)) { ret = chip->irq_set_affinity(data, mask, false); } else { irqd_set_move_pending(data); irq_copy_pending(desc, mask); } MSI and MSI-X fall into the !irq_can_move_pcntxt() code path unless the irq is remapped, which is not the case in a guest. That means that we merily copy the new mask and set the move pending bit. MSI/MSI-X use the edge handler so on the next incoming interrupt, we do irq_desc->chip->irq_ack() which ends up in ack_apic_edge() which does: static void ack_apic_edge(struct irq_data *data) { irq_complete_move(data->chip_data); irq_move_irq(data); ack_APIC_irq(); } irq_move_irq() is the interesting function. And that does irq_desc->chip->irq_mask() before calling the irq_set_affinity() function which actually changes the masks. ->irq_mask() ends up in mask_msi_irq(). Now that calls msi_set_mask_bit() and for MSI-X that actually masks the irq. So ignore my MSI-X comment. But for MSI this ends up in msi_mask_irq() which does: if (!desc->msi_attrib.maskbit) return 0; So in case desc->msi_attrib.maskbit is 0 we do not write anything out and then the masked/unmasked logic in qemu fails. Sorry, that I did not decode that down to this level before, but I was in a hurry and assumed correctly that qemu is doing something wrong. Not being familiar with that code did not help either :) So the proper fix is that qemu tells the guest that mask bit is supported and catches the mask bit toggling before writing it out to the hardware for those devices which do not support it. We'll have another look. Thanks, tglx ^ permalink raw reply [flat|nested] 16+ messages in thread
* Re: [PATCH 2/2] Device assignment: Fix MSI IRQ affinity setting 2012-05-24 23:01 ` Thomas Gleixner @ 2012-05-24 23:23 ` Alex Williamson 2012-05-24 23:56 ` Thomas Gleixner 0 siblings, 1 reply; 16+ messages in thread From: Alex Williamson @ 2012-05-24 23:23 UTC (permalink / raw) To: Thomas Gleixner Cc: Jan Kiszka, Richard Weinberger, kvm, avi, Marcelo Tosatti, Bjorn Helgaas, Michael S. Tsirkin On Fri, 2012-05-25 at 01:01 +0200, Thomas Gleixner wrote: > So the proper fix is that qemu tells the guest that mask bit is > supported and catches the mask bit toggling before writing it out to > the hardware for those devices which do not support it. We can't necessarily do that, we have to work with the config space we're give. Using the smallest possible MSI capability always works. Adding mask bits may not fit in with the existing capabilities of the physical device. Thanks, Alex ^ permalink raw reply [flat|nested] 16+ messages in thread
* Re: [PATCH 2/2] Device assignment: Fix MSI IRQ affinity setting 2012-05-24 23:23 ` Alex Williamson @ 2012-05-24 23:56 ` Thomas Gleixner 2012-05-25 2:37 ` Jan Kiszka 0 siblings, 1 reply; 16+ messages in thread From: Thomas Gleixner @ 2012-05-24 23:56 UTC (permalink / raw) To: Alex Williamson Cc: Jan Kiszka, Richard Weinberger, kvm, avi, Marcelo Tosatti, Bjorn Helgaas, Michael S. Tsirkin On Thu, 24 May 2012, Alex Williamson wrote: > On Fri, 2012-05-25 at 01:01 +0200, Thomas Gleixner wrote: > > So the proper fix is that qemu tells the guest that mask bit is > > supported and catches the mask bit toggling before writing it out to > > the hardware for those devices which do not support it. > > We can't necessarily do that, we have to work with the config space > we're give. Using the smallest possible MSI capability always works. > Adding mask bits may not fit in with the existing capabilities of the > physical device. Thanks, I see what you mean. A random device driver of a random guest OS might rely on that information. Unlikely, but .... So we need some logic to circumvent the masked/unmasked logic in case that property is not set, right ? Thanks, tglx ^ permalink raw reply [flat|nested] 16+ messages in thread
* Re: [PATCH 2/2] Device assignment: Fix MSI IRQ affinity setting 2012-05-24 23:56 ` Thomas Gleixner @ 2012-05-25 2:37 ` Jan Kiszka 0 siblings, 0 replies; 16+ messages in thread From: Jan Kiszka @ 2012-05-25 2:37 UTC (permalink / raw) To: Thomas Gleixner Cc: Alex Williamson, Richard Weinberger, kvm, avi, Marcelo Tosatti, Bjorn Helgaas, Michael S. Tsirkin [-- Attachment #1: Type: text/plain, Size: 1475 bytes --] On 2012-05-24 20:56, Thomas Gleixner wrote: > On Thu, 24 May 2012, Alex Williamson wrote: > >> On Fri, 2012-05-25 at 01:01 +0200, Thomas Gleixner wrote: >>> So the proper fix is that qemu tells the guest that mask bit is >>> supported and catches the mask bit toggling before writing it out to >>> the hardware for those devices which do not support it. >> >> We can't necessarily do that, we have to work with the config space >> we're give. Using the smallest possible MSI capability always works. >> Adding mask bits may not fit in with the existing capabilities of the >> physical device. Thanks, > > I see what you mean. A random device driver of a random guest OS might > rely on that information. Unlikely, but .... > > So we need some logic to circumvent the masked/unmasked logic in case > that property is not set, right ? For MSI emulation in QEMU (including device assignment) it is quite simple: don't assume that the guest will always mask or even disable before fiddling with some MSI vector configuration. That is not required by the spec, so we can't rely on it. The patches I have in a semi-finished state will do precisely this. But there is still some use for a dev-assign fix based on the current code for qemu-kvm-1.1. BTW, along with the switch of device assignment to generic MSI support, we should also gain support for MSI vector masking - provided the underlying device comes with that feature as well. Jan [-- Attachment #2: OpenPGP digital signature --] [-- Type: application/pgp-signature, Size: 262 bytes --] ^ permalink raw reply [flat|nested] 16+ messages in thread
* Re: [PATCH 2/2] Device assignment: Fix MSI IRQ affinity setting 2012-05-24 21:53 ` Jan Kiszka 2012-05-24 22:11 ` Alex Williamson @ 2012-05-24 22:17 ` Michael S. Tsirkin 2012-05-24 23:06 ` Thomas Gleixner 1 sibling, 1 reply; 16+ messages in thread From: Michael S. Tsirkin @ 2012-05-24 22:17 UTC (permalink / raw) To: Jan Kiszka Cc: Thomas Gleixner, Alex Williamson, Richard Weinberger, kvm, avi, Marcelo Tosatti, Bjorn Helgaas On Thu, May 24, 2012 at 06:53:15PM -0300, Jan Kiszka wrote: > On 2012-05-24 18:39, Thomas Gleixner wrote: > > On Thu, 24 May 2012, Alex Williamson wrote: > >> On Thu, 2012-05-24 at 18:02 +0100, Richard Weinberger wrote: > >>> + if (address == msi_start + PCI_MSI_DATA_32) > >>> + handle_cfg_write_msi(pci_dev, assigned_dev); > >> > >> Why didn't we just use range_covers_byte(address, len, pci_dev->msi_cap > >> + PCI_MSI_DATA_32) to start with? But how does this handle the enable > >> bit? > > > > The problem with the current implementation is that it only changes > > the routing if the msi entry goes from masked to unmasked state. > > > > Linux does not mask the entries on affinity changes and never did, > > neither for MSI nor for MSI-X. > > > > I know it's probably not according to the spec, but we can't fix that > > retroactively. > > For MSI, this is allowed. For MSI-X, this would clearly be a Linux bug, > waiting for hardware to dislike this spec violation. > > However, if this is the current behavior of such a prominent guest, I > guess we have to stop optimizing the QEMU MSI-X code that it only > updates routings on mask changes. Possibly other OSes get this wrong too... Very strange, a clear spec violation. I'll have to dig in the source to verify this. > Thanks, for the clarification. Should go into the changelog. > > Jan > > -- > Siemens AG, Corporate Technology, CT T DE IT 1 > Corporate Competence Center Embedded Linux ^ permalink raw reply [flat|nested] 16+ messages in thread
* Re: [PATCH 2/2] Device assignment: Fix MSI IRQ affinity setting 2012-05-24 22:17 ` Michael S. Tsirkin @ 2012-05-24 23:06 ` Thomas Gleixner 2012-05-24 23:19 ` Thomas Gleixner 0 siblings, 1 reply; 16+ messages in thread From: Thomas Gleixner @ 2012-05-24 23:06 UTC (permalink / raw) To: Michael S. Tsirkin Cc: Jan Kiszka, Alex Williamson, Richard Weinberger, kvm, avi, Marcelo Tosatti, Bjorn Helgaas On Fri, 25 May 2012, Michael S. Tsirkin wrote: > On Thu, May 24, 2012 at 06:53:15PM -0300, Jan Kiszka wrote: > > On 2012-05-24 18:39, Thomas Gleixner wrote: > > > On Thu, 24 May 2012, Alex Williamson wrote: > > >> On Thu, 2012-05-24 at 18:02 +0100, Richard Weinberger wrote: > > >>> + if (address == msi_start + PCI_MSI_DATA_32) > > >>> + handle_cfg_write_msi(pci_dev, assigned_dev); > > >> > > >> Why didn't we just use range_covers_byte(address, len, pci_dev->msi_cap > > >> + PCI_MSI_DATA_32) to start with? But how does this handle the enable > > >> bit? > > > > > > The problem with the current implementation is that it only changes > > > the routing if the msi entry goes from masked to unmasked state. > > > > > > Linux does not mask the entries on affinity changes and never did, > > > neither for MSI nor for MSI-X. > > > > > > I know it's probably not according to the spec, but we can't fix that > > > retroactively. > > > > For MSI, this is allowed. For MSI-X, this would clearly be a Linux bug, > > waiting for hardware to dislike this spec violation. > > > > However, if this is the current behavior of such a prominent guest, I > > guess we have to stop optimizing the QEMU MSI-X code that it only > > updates routings on mask changes. Possibly other OSes get this wrong too... > > Very strange, a clear spec violation. I'll have to dig in the source to > verify this. Stop digging. MSI-X is correct. ^ permalink raw reply [flat|nested] 16+ messages in thread
* Re: [PATCH 2/2] Device assignment: Fix MSI IRQ affinity setting 2012-05-24 23:06 ` Thomas Gleixner @ 2012-05-24 23:19 ` Thomas Gleixner 0 siblings, 0 replies; 16+ messages in thread From: Thomas Gleixner @ 2012-05-24 23:19 UTC (permalink / raw) To: Michael S. Tsirkin Cc: Jan Kiszka, Alex Williamson, Richard Weinberger, kvm, avi, Marcelo Tosatti, Bjorn Helgaas On Fri, 25 May 2012, Thomas Gleixner wrote: > On Fri, 25 May 2012, Michael S. Tsirkin wrote: > > On Thu, May 24, 2012 at 06:53:15PM -0300, Jan Kiszka wrote: > > > On 2012-05-24 18:39, Thomas Gleixner wrote: > > > > On Thu, 24 May 2012, Alex Williamson wrote: > > > >> On Thu, 2012-05-24 at 18:02 +0100, Richard Weinberger wrote: > > > >>> + if (address == msi_start + PCI_MSI_DATA_32) > > > >>> + handle_cfg_write_msi(pci_dev, assigned_dev); > > > >> > > > >> Why didn't we just use range_covers_byte(address, len, pci_dev->msi_cap > > > >> + PCI_MSI_DATA_32) to start with? But how does this handle the enable > > > >> bit? > > > > > > > > The problem with the current implementation is that it only changes > > > > the routing if the msi entry goes from masked to unmasked state. > > > > > > > > Linux does not mask the entries on affinity changes and never did, > > > > neither for MSI nor for MSI-X. > > > > > > > > I know it's probably not according to the spec, but we can't fix that > > > > retroactively. > > > > > > For MSI, this is allowed. For MSI-X, this would clearly be a Linux bug, > > > waiting for hardware to dislike this spec violation. > > > > > > However, if this is the current behavior of such a prominent guest, I > > > guess we have to stop optimizing the QEMU MSI-X code that it only > > > updates routings on mask changes. Possibly other OSes get this wrong too... > > > > Very strange, a clear spec violation. I'll have to dig in the source to > > verify this. > > Stop digging. MSI-X is correct. This was based off an older version of qemu-kvm, where the routing for MSI-X was broken for other reasons. But that seems to be fixed now. I use the age excuse :) Thanks, tglx ^ permalink raw reply [flat|nested] 16+ messages in thread
* Re: [PATCH 2/2] Device assignment: Fix MSI IRQ affinity setting 2012-05-24 21:39 ` Thomas Gleixner 2012-05-24 21:53 ` Jan Kiszka @ 2012-05-24 22:05 ` Alex Williamson 1 sibling, 0 replies; 16+ messages in thread From: Alex Williamson @ 2012-05-24 22:05 UTC (permalink / raw) To: Thomas Gleixner Cc: Richard Weinberger, kvm, avi, Marcelo Tosatti, Bjorn Helgaas On Thu, 2012-05-24 at 23:39 +0200, Thomas Gleixner wrote: > On Thu, 24 May 2012, Alex Williamson wrote: > > On Thu, 2012-05-24 at 18:02 +0100, Richard Weinberger wrote: > > > + if (address == msi_start + PCI_MSI_DATA_32) > > > + handle_cfg_write_msi(pci_dev, assigned_dev); > > > > Why didn't we just use range_covers_byte(address, len, pci_dev->msi_cap > > + PCI_MSI_DATA_32) to start with? But how does this handle the enable > > bit? > > The problem with the current implementation is that it only changes > the routing if the msi entry goes from masked to unmasked state. We don't expose a maskable MSI capability to the guest, so I think you mean enable/disable. > Linux does not mask the entries on affinity changes and never did, > neither for MSI nor for MSI-X. > > I know it's probably not according to the spec, but we can't fix that > retroactively. We need to do both then, enable MSI based on the enable bit and update routing based on address updates. It seems like this code is counting on data being written after the enable bit is set, which is not guaranteed to happen. Thanks, Alex ^ permalink raw reply [flat|nested] 16+ messages in thread
* Re: [PATCH 2/2] Device assignment: Fix MSI IRQ affinity setting 2012-05-24 17:02 ` [PATCH 2/2] Device assignment: Fix MSI IRQ affinity setting Richard Weinberger 2012-05-24 18:20 ` Alex Williamson @ 2012-05-24 20:47 ` Jan Kiszka 1 sibling, 0 replies; 16+ messages in thread From: Jan Kiszka @ 2012-05-24 20:47 UTC (permalink / raw) To: Richard Weinberger; +Cc: kvm, avi, mtosatti, tglx On 2012-05-24 14:02, Richard Weinberger wrote: > MSI interrupt affinity setting on the guest ended always up on vcpu0, > no matter what. > IOW writes to /proc/irq/<IRQ>/smp_affinity are irgnored. > This patch fixes the MSI IRQ routing and avoids the utter madness of > tearing down and setting up the interrupt completely when this changes. The device assignment code will soon be significantly refactored in this regard (MSI/MSI-X handling will use generic QEMU services instead of open-coding their own bugs). Also for this reason, it would be very good to explain in the commit log what was broken or missing so that affinities were not respected. Thanks, Jan -- Siemens AG, Corporate Technology, CT T DE IT 1 Corporate Competence Center Embedded Linux ^ permalink raw reply [flat|nested] 16+ messages in thread
end of thread, other threads:[~2012-05-25 2:37 UTC | newest]
Thread overview: 16+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2012-05-24 17:02 [PATCH 1/2] Remove kvm_commit_irq_routes from error messages Richard Weinberger
2012-05-24 17:02 ` [PATCH 2/2] Device assignment: Fix MSI IRQ affinity setting Richard Weinberger
2012-05-24 18:20 ` Alex Williamson
[not found] ` <CAEMbtc+ycsC6u=CZ_Yg6C=WV=VqjA2uEDM5KWPM_7n3sZh_9Pw@mail.gmail.com>
2012-05-24 19:27 ` Richard Weinberger
2012-05-24 21:39 ` Thomas Gleixner
2012-05-24 21:53 ` Jan Kiszka
2012-05-24 22:11 ` Alex Williamson
2012-05-24 23:01 ` Thomas Gleixner
2012-05-24 23:23 ` Alex Williamson
2012-05-24 23:56 ` Thomas Gleixner
2012-05-25 2:37 ` Jan Kiszka
2012-05-24 22:17 ` Michael S. Tsirkin
2012-05-24 23:06 ` Thomas Gleixner
2012-05-24 23:19 ` Thomas Gleixner
2012-05-24 22:05 ` Alex Williamson
2012-05-24 20:47 ` Jan Kiszka
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox