From mboxrd@z Thu Jan 1 00:00:00 1970 From: Andrew Cooper Subject: Re: [PATCH resend 2/3] x86: enable multi-vector MSI Date: Tue, 16 Jul 2013 12:36:38 +0100 Message-ID: <51E53046.1040809@citrix.com> References: <51E535F902000078000E547F@nat28.tlf.novell.com> <51E5392502000078000E54AE@nat28.tlf.novell.com> Mime-Version: 1.0 Content-Type: multipart/mixed; boundary="===============6312654838834273862==" Return-path: In-Reply-To: <51E5392502000078000E54AE@nat28.tlf.novell.com> List-Unsubscribe: , List-Post: List-Help: List-Subscribe: , Sender: xen-devel-bounces@lists.xen.org Errors-To: xen-devel-bounces@lists.xen.org To: Jan Beulich Cc: Keir Fraser , xiantao.zhang@intel.com, xen-devel List-Id: xen-devel@lists.xenproject.org --===============6312654838834273862== Content-Type: multipart/alternative; boundary="------------010105020307040600010807" --------------010105020307040600010807 Content-Type: text/plain; charset="ISO-8859-1" Content-Transfer-Encoding: 7bit On 16/07/13 11:14, Jan Beulich wrote: > This implies > - extending the public interface to have a way to request a block of > MSIs > - allocating a block of contiguous pIRQ-s for the target domain (but > note that the Xen IRQs allocated have no need of being contiguous) > - repeating certain operations for all involved IRQs > - fixing multi_msi_enable() > - adjusting the mask bit accesses for maskable MSIs > > Signed-off-by: Jan Beulich > > --- a/xen/arch/x86/irq.c > +++ b/xen/arch/x86/irq.c > @@ -1863,6 +1863,25 @@ int get_free_pirq(struct domain *d, int > return -ENOSPC; > } > > +int get_free_pirqs(struct domain *d, unsigned int nr) > +{ > + unsigned int i, found = 0; > + > + ASSERT(spin_is_locked(&d->event_lock)); > + > + for ( i = d->nr_pirqs - 1; i >= nr_irqs_gsi; --i ) > + if ( is_free_pirq(d, pirq_info(d, i)) ) > + { > + pirq_get_info(d, i); > + if ( ++found == nr ) > + return i; > + } > + else > + found = 0; > + > + return -ENOSPC; > +} > + Is there any reason why this loop is backwards? Unless I am mistaken, guests can choose their own pirqs when binding them, reducing the likelyhood that the top of the available space will be free. > int map_domain_pirq( > struct domain *d, int pirq, int irq, int type, void *data) > { > @@ -1918,11 +1937,12 @@ int map_domain_pirq( > > desc = irq_to_desc(irq); > > - if ( type == MAP_PIRQ_TYPE_MSI ) > + if ( type == MAP_PIRQ_TYPE_MSI || type == MAP_PIRQ_TYPE_MULTI_MSI ) > { > struct msi_info *msi = (struct msi_info *)data; > struct msi_desc *msi_desc; > struct pci_dev *pdev; > + unsigned int nr = 0; > > ASSERT(spin_is_locked(&pcidevs_lock)); > > @@ -1933,7 +1953,14 @@ int map_domain_pirq( > pdev = pci_get_pdev(msi->seg, msi->bus, msi->devfn); > ret = pci_enable_msi(msi, &msi_desc); > if ( ret ) > + { > + if ( ret > 0 ) > + { > + msi->entry_nr = ret; > + ret = -ENFILE; > + } > goto done; > + } > > spin_lock_irqsave(&desc->lock, flags); > > @@ -1947,25 +1974,73 @@ int map_domain_pirq( > goto done; > } > > - ret = setup_msi_irq(desc, msi_desc); > - if ( ret ) > + while ( !(ret = setup_msi_irq(desc, msi_desc + nr)) ) > { > + if ( opt_irq_vector_map == OPT_IRQ_VECTOR_MAP_PERDEV && > + !desc->arch.used_vectors ) > + { > + desc->arch.used_vectors = &pdev->arch.used_vectors; > + if ( desc->arch.vector != IRQ_VECTOR_UNASSIGNED ) > + { > + int vector = desc->arch.vector; > + > + ASSERT(!test_bit(vector, desc->arch.used_vectors)); > + set_bit(vector, desc->arch.used_vectors); > + } > + } > + if ( type == MAP_PIRQ_TYPE_MSI || > + msi_desc->msi_attrib.type != PCI_CAP_ID_MSI || > + ++nr == msi->entry_nr ) > + break; > + > + set_domain_irq_pirq(d, irq, info); > spin_unlock_irqrestore(&desc->lock, flags); > - pci_disable_msi(msi_desc); > - goto done; > + > + info = NULL; > + irq = create_irq(NUMA_NO_NODE); > + ret = irq >= 0 ? prepare_domain_irq_pirq(d, irq, pirq + nr, &info) > + : irq; > + if ( ret ) > + break; > + msi_desc[nr].irq = irq; > + > + if ( irq_permit_access(d, irq) != 0 ) > + printk(XENLOG_G_WARNING > + "dom%d: could not permit access to IRQ%d (pirq %d)\n", > + d->domain_id, irq, pirq); > + > + desc = irq_to_desc(irq); > + spin_lock_irqsave(&desc->lock, flags); > + > + if ( desc->handler != &no_irq_type ) > + { > + dprintk(XENLOG_G_ERR, "dom%d: irq %d (pirq %u) in use (%s)\n", > + d->domain_id, irq, pirq + nr, desc->handler->typename); > + ret = -EBUSY; > + break; > + } > } > > - if ( opt_irq_vector_map == OPT_IRQ_VECTOR_MAP_PERDEV > - && !desc->arch.used_vectors ) > + if ( ret ) > { > - desc->arch.used_vectors = &pdev->arch.used_vectors; > - if ( desc->arch.vector != IRQ_VECTOR_UNASSIGNED ) > + spin_unlock_irqrestore(&desc->lock, flags); > + while ( nr-- ) > { > - int vector = desc->arch.vector; > - ASSERT(!test_bit(vector, desc->arch.used_vectors)); > - > - set_bit(vector, desc->arch.used_vectors); > + if ( irq >= 0 ) > + { > + if ( irq_deny_access(d, irq) ) > + printk(XENLOG_G_ERR > + "dom%d: could not revoke access to IRQ%d (pirq %d)\n", > + d->domain_id, irq, pirq); > + destroy_irq(irq); > + } > + if ( info ) > + cleanup_domain_irq_pirq(d, irq, info); > + info = pirq_info(d, pirq + nr); > + irq = info->arch.irq; > } > + pci_disable_msi(msi_desc); > + goto done; > } > > set_domain_irq_pirq(d, irq, info); > @@ -1996,7 +2071,8 @@ int unmap_domain_pirq(struct domain *d, > { > unsigned long flags; > struct irq_desc *desc; > - int irq, ret = 0; > + int irq, ret = 0, rc; > + unsigned int i, nr = 1; > bool_t forced_unbind; > struct pirq *info; > struct msi_desc *msi_desc = NULL; > @@ -2018,6 +2094,18 @@ int unmap_domain_pirq(struct domain *d, > > desc = irq_to_desc(irq); > msi_desc = desc->msi_desc; > + if ( msi_desc && msi_desc->msi_attrib.type == PCI_CAP_ID_MSI ) > + { > + if ( msi_desc->msi_attrib.entry_nr ) > + { > + printk(XENLOG_G_ERR > + "dom%d: trying to unmap secondary MSI pirq %d\n", > + d->domain_id, pirq); > + ret = -EBUSY; > + goto done; > + } > + nr = msi_desc->msi.nvec; > + } > > ret = xsm_unmap_domain_irq(XSM_HOOK, d, irq, msi_desc); > if ( ret ) > @@ -2033,37 +2121,83 @@ int unmap_domain_pirq(struct domain *d, > > spin_lock_irqsave(&desc->lock, flags); > > - BUG_ON(irq != domain_pirq_to_irq(d, pirq)); > - > - if ( !forced_unbind ) > - clear_domain_irq_pirq(d, irq, info); > - else > + for ( i = 0; ; ) > { > - info->arch.irq = -irq; > - radix_tree_replace_slot( > - radix_tree_lookup_slot(&d->arch.irq_pirq, irq), > - radix_tree_int_to_ptr(-pirq)); > + BUG_ON(irq != domain_pirq_to_irq(d, pirq + i)); > + > + if ( !forced_unbind ) > + clear_domain_irq_pirq(d, irq, info); > + else > + { > + info->arch.irq = -irq; > + radix_tree_replace_slot( > + radix_tree_lookup_slot(&d->arch.irq_pirq, irq), > + radix_tree_int_to_ptr(-pirq)); > + } > + > + if ( msi_desc ) > + { > + desc->handler = &no_irq_type; > + desc->msi_desc = NULL; > + } > + > + if ( ++i == nr ) > + break; > + > + spin_unlock_irqrestore(&desc->lock, flags); > + > + if ( !forced_unbind ) > + cleanup_domain_irq_pirq(d, irq, info); > + > + rc = irq_deny_access(d, irq); > + if ( rc ) > + { > + printk(XENLOG_G_ERR > + "dom%d: could not deny access to IRQ%d (pirq %d)\n", > + d->domain_id, irq, pirq + i); > + ret = rc; > + } > + > + do { > + info = pirq_info(d, pirq + i); > + if ( info && (irq = info->arch.irq) > 0 ) > + break; > + printk(XENLOG_G_ERR "dom%d: MSI pirq %d not mapped\n", > + d->domain_id, pirq + i); > + } while ( ++i < nr ); > + > + if ( i == nr ) > + { > + desc = NULL; > + break; > + } > + > + desc = irq_to_desc(irq); > + BUG_ON(desc->msi_desc != msi_desc + i); > + > + spin_lock_irqsave(&desc->lock, flags); > } > > - if ( msi_desc ) > + if ( desc ) > { > - desc->handler = &no_irq_type; > - desc->msi_desc = NULL; > + spin_unlock_irqrestore(&desc->lock, flags); > + > + if ( !forced_unbind ) > + cleanup_domain_irq_pirq(d, irq, info); > + > + rc = irq_deny_access(d, irq); > + if ( rc ) > + { > + printk(XENLOG_G_ERR > + "dom%d: could not deny access to IRQ%d (pirq %d)\n", > + d->domain_id, irq, pirq + nr - 1); > + ret = rc; > + } > } > > - spin_unlock_irqrestore(&desc->lock, flags); > if (msi_desc) > msi_free_irq(msi_desc); > > - if ( !forced_unbind ) > - cleanup_domain_irq_pirq(d, irq, info); > - > - ret = irq_deny_access(d, irq); > - if ( ret ) > - printk(XENLOG_G_ERR > - "dom%d: could not deny access to IRQ%d (pirq %d)\n", > - d->domain_id, irq, pirq); > - > done: > return ret; > } > --- a/xen/arch/x86/msi.c > +++ b/xen/arch/x86/msi.c > @@ -236,6 +236,11 @@ static int write_msi_msg(struct msi_desc > u8 bus = dev->bus; > u8 slot = PCI_SLOT(dev->devfn); > u8 func = PCI_FUNC(dev->devfn); > + int nr = entry->msi_attrib.entry_nr; > + > + ASSERT((msg->data & (entry[-nr].msi.nvec - 1)) == nr); > + if ( nr ) > + return 0; > > pci_conf_write32(seg, bus, slot, func, msi_lower_address_reg(pos), > msg->address_lo); > @@ -359,8 +364,8 @@ static void msi_set_mask_bit(struct irq_ > u8 func = PCI_FUNC(entry->dev->devfn); > > mask_bits = pci_conf_read32(seg, bus, slot, func, entry->msi.mpos); > - mask_bits &= ~(1); > - mask_bits |= flag; > + mask_bits &= ~((u32)1 << entry->msi_attrib.entry_nr); > + mask_bits |= (u32)flag << entry->msi_attrib.entry_nr; > pci_conf_write32(seg, bus, slot, func, entry->msi.mpos, mask_bits); > } > break; > @@ -384,10 +389,11 @@ static int msi_get_mask_bit(const struct > case PCI_CAP_ID_MSI: > if (!entry->dev || !entry->msi_attrib.maskbit) > break; > - return pci_conf_read32(entry->dev->seg, entry->dev->bus, > - PCI_SLOT(entry->dev->devfn), > - PCI_FUNC(entry->dev->devfn), > - entry->msi.mpos) & 1; > + return (pci_conf_read32(entry->dev->seg, entry->dev->bus, > + PCI_SLOT(entry->dev->devfn), > + PCI_FUNC(entry->dev->devfn), > + entry->msi.mpos) >> > + entry->msi_attrib.entry_nr) & 1; > case PCI_CAP_ID_MSIX: > return readl(entry->mask_base + PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET) & 1; > } > @@ -453,17 +459,20 @@ static hw_irq_controller pci_msi_nonmask > .set_affinity = set_msi_affinity > }; > > -static struct msi_desc* alloc_msi_entry(void) > +static struct msi_desc *alloc_msi_entry(unsigned int nr) > { > struct msi_desc *entry; > > - entry = xmalloc(struct msi_desc); > + entry = xmalloc_array(struct msi_desc, nr); > if ( !entry ) > return NULL; > > INIT_LIST_HEAD(&entry->list); > - entry->dev = NULL; > - entry->remap_index = -1; > + while ( nr-- ) > + { > + entry[nr].dev = NULL; > + entry[nr].remap_index = -1; > + } > > return entry; > } > @@ -488,17 +497,24 @@ int __setup_msi_irq(struct irq_desc *des > > int msi_free_irq(struct msi_desc *entry) > { > - destroy_irq(entry->irq); > + unsigned int nr = entry->msi.nvec; > + > if ( entry->msi_attrib.type == PCI_CAP_ID_MSIX ) > { > unsigned long start; > start = (unsigned long)entry->mask_base & ~(PAGE_SIZE - 1); > msix_put_fixmap(entry->dev, virt_to_fix(start)); > + nr = 1; > } > > - /* Free the unused IRTE if intr remap enabled */ > - if ( iommu_intremap ) > - iommu_update_ire_from_msi(entry, NULL); > + while ( nr-- ) > + { > + destroy_irq(entry[nr].irq); > + > + /* Free the unused IRTE if intr remap enabled */ > + if ( iommu_intremap ) > + iommu_update_ire_from_msi(entry + nr, NULL); > + } > > list_del(&entry->list); > xfree(entry); > @@ -531,11 +547,12 @@ static struct msi_desc *find_msi_entry(s > **/ > static int msi_capability_init(struct pci_dev *dev, > int irq, > - struct msi_desc **desc) > + struct msi_desc **desc, > + unsigned int nvec) > { > struct msi_desc *entry; > int pos; > - unsigned int maxvec, mpos; > + unsigned int i, maxvec, mpos; > u16 control, seg = dev->seg; > u8 bus = dev->bus; > u8 slot = PCI_SLOT(dev->devfn); > @@ -545,27 +562,34 @@ static int msi_capability_init(struct pc > pos = pci_find_cap_offset(seg, bus, slot, func, PCI_CAP_ID_MSI); > control = pci_conf_read16(seg, bus, slot, func, msi_control_reg(pos)); > maxvec = multi_msi_capable(control); > + if ( nvec > maxvec ) > + return maxvec; > control &= ~PCI_MSI_FLAGS_QSIZE; > + multi_msi_enable(control, nvec); > > /* MSI Entry Initialization */ > msi_set_enable(dev, 0); /* Ensure msi is disabled as I set it up */ > > - entry = alloc_msi_entry(); > + entry = alloc_msi_entry(nvec); > if ( !entry ) > return -ENOMEM; > > - entry->msi_attrib.type = PCI_CAP_ID_MSI; > - entry->msi_attrib.is_64 = is_64bit_address(control); > - entry->msi_attrib.entry_nr = 0; > - entry->msi_attrib.maskbit = is_mask_bit_support(control); > - entry->msi_attrib.masked = 1; > - entry->msi_attrib.pos = pos; > mpos = msi_mask_bits_reg(pos, is_64bit_address(control)); > - entry->msi.nvec = 1; > + for ( i = 0; i < nvec; ++i ) > + { > + entry[i].msi_attrib.type = PCI_CAP_ID_MSI; > + entry[i].msi_attrib.is_64 = is_64bit_address(control); > + entry[i].msi_attrib.entry_nr = i; > + entry[i].msi_attrib.maskbit = is_mask_bit_support(control); > + entry[i].msi_attrib.masked = 1; > + entry[i].msi_attrib.pos = pos; > + if ( entry[i].msi_attrib.maskbit ) > + entry[i].msi.mpos = mpos; > + entry[i].msi.nvec = 0; > + entry[i].dev = dev; > + } > + entry->msi.nvec = nvec; > entry->irq = irq; > - if ( is_mask_bit_support(control) ) > - entry->msi.mpos = mpos; > - entry->dev = dev; > if ( entry->msi_attrib.maskbit ) > { > u32 maskbits; > @@ -693,7 +717,7 @@ static int msix_capability_init(struct p > > if ( desc ) > { > - entry = alloc_msi_entry(); > + entry = alloc_msi_entry(1); > if ( !entry ) > return -ENOMEM; > ASSERT(msi); > @@ -851,7 +875,6 @@ static int msix_capability_init(struct p > > static int __pci_enable_msi(struct msi_info *msi, struct msi_desc **desc) > { > - int status; > struct pci_dev *pdev; > struct msi_desc *old_desc; > > @@ -880,8 +903,7 @@ static int __pci_enable_msi(struct msi_i > pci_disable_msi(old_desc); > } > > - status = msi_capability_init(pdev, msi->irq, desc); > - return status; > + return msi_capability_init(pdev, msi->irq, desc, msi->entry_nr); > } > > static void __pci_disable_msi(struct msi_desc *entry) > @@ -1101,6 +1123,8 @@ int pci_restore_msi_state(struct pci_dev > > list_for_each_entry_safe( entry, tmp, &pdev->msi_list, list ) > { > + unsigned int i = 0, nr = 1; > + > irq = entry->irq; > desc = &irq_desc[irq]; > > @@ -1110,30 +1134,58 @@ int pci_restore_msi_state(struct pci_dev > > if (desc->msi_desc != entry) > { > + bogus: > dprintk(XENLOG_ERR, > - "Restore MSI for dev %04x:%02x:%02x:%x not set before?\n", > + "Restore MSI for %04x:%02x:%02x:%u entry %u not set?\n", > pdev->seg, pdev->bus, PCI_SLOT(pdev->devfn), > - PCI_FUNC(pdev->devfn)); > + PCI_FUNC(pdev->devfn), i); > spin_unlock_irqrestore(&desc->lock, flags); > return -EINVAL; > } > > if ( entry->msi_attrib.type == PCI_CAP_ID_MSI ) > + { > msi_set_enable(pdev, 0); > + nr = entry->msi.nvec; > + } > else if ( entry->msi_attrib.type == PCI_CAP_ID_MSIX ) > msix_set_enable(pdev, 0); > > msg = entry->msg; > write_msi_msg(entry, &msg); > > - msi_set_mask_bit(desc, entry->msi_attrib.masked); > + for ( i = 0; ; ) > + { > + msi_set_mask_bit(desc, entry[i].msi_attrib.masked); > + spin_unlock_irqrestore(&desc->lock, flags); > + > + if ( !--nr ) > + break; > + > + desc = &irq_desc[entry[++i].irq]; > + spin_lock_irqsave(&desc->lock, flags); > + if ( desc->msi_desc != entry + i ) > + goto bogus; > + } > + > + spin_unlock_irqrestore(&desc->lock, flags); > > if ( entry->msi_attrib.type == PCI_CAP_ID_MSI ) > + { > + unsigned int cpos = msi_control_reg(entry->msi_attrib.pos); > + u16 control = pci_conf_read16(pdev->seg, pdev->bus, > + PCI_SLOT(pdev->devfn), > + PCI_FUNC(pdev->devfn), cpos); > + > + control &= ~PCI_MSI_FLAGS_QSIZE; > + multi_msi_enable(control, entry->msi.nvec); > + pci_conf_write16(pdev->seg, pdev->bus, PCI_SLOT(pdev->devfn), > + PCI_FUNC(pdev->devfn), cpos, control); > + > msi_set_enable(pdev, 1); > + } > else if ( entry->msi_attrib.type == PCI_CAP_ID_MSIX ) > msix_set_enable(pdev, 1); > - > - spin_unlock_irqrestore(&desc->lock, flags); > } > > return 0; > --- a/xen/arch/x86/physdev.c > +++ b/xen/arch/x86/physdev.c > @@ -140,8 +140,11 @@ int physdev_map_pirq(domid_t domid, int > break; > > case MAP_PIRQ_TYPE_MSI: > + if ( !msi->table_base ) > + msi->entry_nr = 1; > irq = *index; > if ( irq == -1 ) > + case MAP_PIRQ_TYPE_MULTI_MSI: > irq = create_irq(NUMA_NO_NODE); > > if ( irq < nr_irqs_gsi || irq >= nr_irqs ) > @@ -179,6 +182,30 @@ int physdev_map_pirq(domid_t domid, int > goto done; > } > } > + else if ( type == MAP_PIRQ_TYPE_MULTI_MSI ) > + { > + if ( msi->entry_nr <= 0 || msi->entry_nr > 32 ) > + ret = -EDOM; > + else if ( msi->entry_nr != 1 && !iommu_intremap ) > + ret = -EOPNOTSUPP; > + else > + { > + while ( msi->entry_nr & (msi->entry_nr - 1) ) > + msi->entry_nr += msi->entry_nr & -msi->entry_nr; > + pirq = get_free_pirqs(d, msi->entry_nr); > + if ( pirq < 0 ) > + { > + while ( (msi->entry_nr >>= 1) > 1 ) > + if ( get_free_pirqs(d, msi->entry_nr) > 0 ) > + break; > + dprintk(XENLOG_G_ERR, "dom%d: no block of %d free pirqs\n", > + d->domain_id, msi->entry_nr << 1); > + ret = pirq; > + } > + } > + if ( ret < 0 ) > + goto done; > + } > else > { > pirq = get_free_pirq(d, type); > @@ -210,8 +237,15 @@ int physdev_map_pirq(domid_t domid, int > done: > spin_unlock(&d->event_lock); > spin_unlock(&pcidevs_lock); > - if ( (ret != 0) && (type == MAP_PIRQ_TYPE_MSI) && (*index == -1) ) > - destroy_irq(irq); > + if ( ret != 0 ) > + switch ( type ) > + { > + case MAP_PIRQ_TYPE_MSI: > + if ( *index == -1 ) > + case MAP_PIRQ_TYPE_MULTI_MSI: > + destroy_irq(irq); > + break; > + } Do we not need to create and destroy entry_nr irqs in this function, or is a multi-vector-msi now considered as just as single irq ? I ask because this appears to lack the "repeating certain operations for all involved IRQs" described in the comment. ~Andrew > free_domain: > rcu_unlock_domain(d); > return ret; > @@ -390,14 +424,22 @@ ret_t do_physdev_op(int cmd, XEN_GUEST_H > if ( copy_from_guest(&map, arg, 1) != 0 ) > break; > > - if ( map.type == MAP_PIRQ_TYPE_MSI_SEG ) > + switch ( map.type ) > { > + case MAP_PIRQ_TYPE_MSI_SEG: > map.type = MAP_PIRQ_TYPE_MSI; > msi.seg = map.bus >> 16; > - } > - else > - { > + break; > + > + case MAP_PIRQ_TYPE_MULTI_MSI: > + if ( map.table_base ) > + return -EINVAL; > + msi.seg = map.bus >> 16; > + break; > + > + default: > msi.seg = 0; > + break; > } > msi.bus = map.bus; > msi.devfn = map.devfn; > @@ -406,6 +448,8 @@ ret_t do_physdev_op(int cmd, XEN_GUEST_H > ret = physdev_map_pirq(map.domid, map.type, &map.index, &map.pirq, > &msi); > > + if ( map.type == MAP_PIRQ_TYPE_MULTI_MSI ) > + map.entry_nr = msi.entry_nr; > if ( __copy_to_guest(arg, &map, 1) ) > ret = -EFAULT; > break; > --- a/xen/include/asm-x86/irq.h > +++ b/xen/include/asm-x86/irq.h > @@ -141,6 +141,7 @@ int map_domain_pirq(struct domain *d, in > void *data); > int unmap_domain_pirq(struct domain *d, int pirq); > int get_free_pirq(struct domain *d, int type); > +int get_free_pirqs(struct domain *, unsigned int nr); > void free_domain_pirqs(struct domain *d); > int map_domain_emuirq_pirq(struct domain *d, int pirq, int irq); > int unmap_domain_pirq_emuirq(struct domain *d, int pirq); > --- a/xen/include/asm-x86/msi.h > +++ b/xen/include/asm-x86/msi.h > @@ -148,7 +148,7 @@ int msi_free_irq(struct msi_desc *entry) > #define multi_msi_capable(control) \ > (1 << ((control & PCI_MSI_FLAGS_QMASK) >> 1)) > #define multi_msi_enable(control, num) \ > - control |= (((num >> 1) << 4) & PCI_MSI_FLAGS_QSIZE); > + control |= (((fls(num) - 1) << 4) & PCI_MSI_FLAGS_QSIZE); > #define is_64bit_address(control) (!!(control & PCI_MSI_FLAGS_64BIT)) > #define is_mask_bit_support(control) (!!(control & PCI_MSI_FLAGS_MASKBIT)) > #define msi_enable(control, num) multi_msi_enable(control, num); \ > --- a/xen/include/public/physdev.h > +++ b/xen/include/public/physdev.h > @@ -151,21 +151,22 @@ DEFINE_XEN_GUEST_HANDLE(physdev_irq_t); > #define MAP_PIRQ_TYPE_GSI 0x1 > #define MAP_PIRQ_TYPE_UNKNOWN 0x2 > #define MAP_PIRQ_TYPE_MSI_SEG 0x3 > +#define MAP_PIRQ_TYPE_MULTI_MSI 0x4 > > #define PHYSDEVOP_map_pirq 13 > struct physdev_map_pirq { > domid_t domid; > /* IN */ > int type; > - /* IN */ > + /* IN (ignored for ..._MULTI_MSI) */ > int index; > /* IN or OUT */ > int pirq; > - /* IN - high 16 bits hold segment for MAP_PIRQ_TYPE_MSI_SEG */ > + /* IN - high 16 bits hold segment for ..._MSI_SEG and ..._MULTI_MSI */ > int bus; > /* IN */ > int devfn; > - /* IN */ > + /* IN (also OUT for ..._MULTI_MSI) */ > int entry_nr; > /* IN */ > uint64_t table_base; > > > > > _______________________________________________ > Xen-devel mailing list > Xen-devel@lists.xen.org > http://lists.xen.org/xen-devel --------------010105020307040600010807 Content-Type: text/html; charset="ISO-8859-1" Content-Transfer-Encoding: 7bit
On 16/07/13 11:14, Jan Beulich wrote:
This implies
- extending the public interface to have a way to request a block of
  MSIs
- allocating a block of contiguous pIRQ-s for the target domain (but
  note that the Xen IRQs allocated have no need of being contiguous)
- repeating certain operations for all involved IRQs
- fixing multi_msi_enable()
- adjusting the mask bit accesses for maskable MSIs

Signed-off-by: Jan Beulich <jbeulich@suse.com>

--- a/xen/arch/x86/irq.c
+++ b/xen/arch/x86/irq.c
@@ -1863,6 +1863,25 @@ int get_free_pirq(struct domain *d, int 
     return -ENOSPC;
 }
 
+int get_free_pirqs(struct domain *d, unsigned int nr)
+{
+    unsigned int i, found = 0;
+
+    ASSERT(spin_is_locked(&d->event_lock));
+
+    for ( i = d->nr_pirqs - 1; i >= nr_irqs_gsi; --i )
+        if ( is_free_pirq(d, pirq_info(d, i)) )
+        {
+            pirq_get_info(d, i);
+            if ( ++found == nr )
+                return i;
+        }
+        else
+            found = 0;
+
+    return -ENOSPC;
+}
+

Is there any reason why this loop is backwards? Unless I am mistaken, guests can choose their own pirqs when binding them, reducing the likelyhood that the top of the available space will be free.

 int map_domain_pirq(
     struct domain *d, int pirq, int irq, int type, void *data)
 {
@@ -1918,11 +1937,12 @@ int map_domain_pirq(
 
     desc = irq_to_desc(irq);
 
-    if ( type == MAP_PIRQ_TYPE_MSI )
+    if ( type == MAP_PIRQ_TYPE_MSI || type == MAP_PIRQ_TYPE_MULTI_MSI )
     {
         struct msi_info *msi = (struct msi_info *)data;
         struct msi_desc *msi_desc;
         struct pci_dev *pdev;
+        unsigned int nr = 0;
 
         ASSERT(spin_is_locked(&pcidevs_lock));
 
@@ -1933,7 +1953,14 @@ int map_domain_pirq(
         pdev = pci_get_pdev(msi->seg, msi->bus, msi->devfn);
         ret = pci_enable_msi(msi, &msi_desc);
         if ( ret )
+        {
+            if ( ret > 0 )
+            {
+                msi->entry_nr = ret;
+                ret = -ENFILE;
+            }
             goto done;
+        }
 
         spin_lock_irqsave(&desc->lock, flags);
 
@@ -1947,25 +1974,73 @@ int map_domain_pirq(
             goto done;
         }
 
-        ret = setup_msi_irq(desc, msi_desc);
-        if ( ret )
+        while ( !(ret = setup_msi_irq(desc, msi_desc + nr)) )
         {
+            if ( opt_irq_vector_map == OPT_IRQ_VECTOR_MAP_PERDEV &&
+                 !desc->arch.used_vectors )
+            {
+                desc->arch.used_vectors = &pdev->arch.used_vectors;
+                if ( desc->arch.vector != IRQ_VECTOR_UNASSIGNED )
+                {
+                    int vector = desc->arch.vector;
+
+                    ASSERT(!test_bit(vector, desc->arch.used_vectors));
+                    set_bit(vector, desc->arch.used_vectors);
+                }
+            }
+            if ( type == MAP_PIRQ_TYPE_MSI ||
+                 msi_desc->msi_attrib.type != PCI_CAP_ID_MSI ||
+                 ++nr == msi->entry_nr )
+                break;
+
+            set_domain_irq_pirq(d, irq, info);
             spin_unlock_irqrestore(&desc->lock, flags);
-            pci_disable_msi(msi_desc);
-            goto done;
+
+            info = NULL;
+            irq = create_irq(NUMA_NO_NODE);
+            ret = irq >= 0 ? prepare_domain_irq_pirq(d, irq, pirq + nr, &info)
+                           : irq;
+            if ( ret )
+                break;
+            msi_desc[nr].irq = irq;
+
+            if ( irq_permit_access(d, irq) != 0 )
+                printk(XENLOG_G_WARNING
+                       "dom%d: could not permit access to IRQ%d (pirq %d)\n",
+                       d->domain_id, irq, pirq);
+
+            desc = irq_to_desc(irq);
+            spin_lock_irqsave(&desc->lock, flags);
+
+            if ( desc->handler != &no_irq_type )
+            {
+                dprintk(XENLOG_G_ERR, "dom%d: irq %d (pirq %u) in use (%s)\n",
+                        d->domain_id, irq, pirq + nr, desc->handler->typename);
+                ret = -EBUSY;
+                break;
+            }
         }
 
-        if ( opt_irq_vector_map == OPT_IRQ_VECTOR_MAP_PERDEV
-             && !desc->arch.used_vectors )
+        if ( ret )
         {
-            desc->arch.used_vectors = &pdev->arch.used_vectors;
-            if ( desc->arch.vector != IRQ_VECTOR_UNASSIGNED )
+            spin_unlock_irqrestore(&desc->lock, flags);
+            while ( nr-- )
             {
-                int vector = desc->arch.vector;
-                ASSERT(!test_bit(vector, desc->arch.used_vectors));
-
-                set_bit(vector, desc->arch.used_vectors);
+                if ( irq >= 0 )
+                {
+                    if ( irq_deny_access(d, irq) )
+                        printk(XENLOG_G_ERR
+                               "dom%d: could not revoke access to IRQ%d (pirq %d)\n",
+                               d->domain_id, irq, pirq);
+                    destroy_irq(irq);
+                }
+                if ( info )
+                    cleanup_domain_irq_pirq(d, irq, info);
+                info = pirq_info(d, pirq + nr);
+                irq = info->arch.irq;
             }
+            pci_disable_msi(msi_desc);
+            goto done;
         }
 
         set_domain_irq_pirq(d, irq, info);
@@ -1996,7 +2071,8 @@ int unmap_domain_pirq(struct domain *d, 
 {
     unsigned long flags;
     struct irq_desc *desc;
-    int irq, ret = 0;
+    int irq, ret = 0, rc;
+    unsigned int i, nr = 1;
     bool_t forced_unbind;
     struct pirq *info;
     struct msi_desc *msi_desc = NULL;
@@ -2018,6 +2094,18 @@ int unmap_domain_pirq(struct domain *d, 
 
     desc = irq_to_desc(irq);
     msi_desc = desc->msi_desc;
+    if ( msi_desc && msi_desc->msi_attrib.type == PCI_CAP_ID_MSI )
+    {
+        if ( msi_desc->msi_attrib.entry_nr )
+        {
+            printk(XENLOG_G_ERR
+                   "dom%d: trying to unmap secondary MSI pirq %d\n",
+                   d->domain_id, pirq);
+            ret = -EBUSY;
+            goto done;
+        }
+        nr = msi_desc->msi.nvec;
+    }
 
     ret = xsm_unmap_domain_irq(XSM_HOOK, d, irq, msi_desc);
     if ( ret )
@@ -2033,37 +2121,83 @@ int unmap_domain_pirq(struct domain *d, 
 
     spin_lock_irqsave(&desc->lock, flags);
 
-    BUG_ON(irq != domain_pirq_to_irq(d, pirq));
-
-    if ( !forced_unbind )
-        clear_domain_irq_pirq(d, irq, info);
-    else
+    for ( i = 0; ; )
     {
-        info->arch.irq = -irq;
-        radix_tree_replace_slot(
-            radix_tree_lookup_slot(&d->arch.irq_pirq, irq),
-            radix_tree_int_to_ptr(-pirq));
+        BUG_ON(irq != domain_pirq_to_irq(d, pirq + i));
+
+        if ( !forced_unbind )
+            clear_domain_irq_pirq(d, irq, info);
+        else
+        {
+            info->arch.irq = -irq;
+            radix_tree_replace_slot(
+                radix_tree_lookup_slot(&d->arch.irq_pirq, irq),
+                radix_tree_int_to_ptr(-pirq));
+        }
+
+        if ( msi_desc )
+        {
+            desc->handler = &no_irq_type;
+            desc->msi_desc = NULL;
+        }
+
+        if ( ++i == nr )
+            break;
+
+        spin_unlock_irqrestore(&desc->lock, flags);
+
+        if ( !forced_unbind )
+           cleanup_domain_irq_pirq(d, irq, info);
+
+        rc = irq_deny_access(d, irq);
+        if ( rc )
+        {
+            printk(XENLOG_G_ERR
+                   "dom%d: could not deny access to IRQ%d (pirq %d)\n",
+                   d->domain_id, irq, pirq + i);
+            ret = rc;
+        }
+
+        do {
+            info = pirq_info(d, pirq + i);
+            if ( info && (irq = info->arch.irq) > 0 )
+                break;
+            printk(XENLOG_G_ERR "dom%d: MSI pirq %d not mapped\n",
+                   d->domain_id, pirq + i);
+        } while ( ++i < nr );
+
+        if ( i == nr )
+        {
+            desc = NULL;
+            break;
+        }
+
+        desc = irq_to_desc(irq);
+        BUG_ON(desc->msi_desc != msi_desc + i);
+
+        spin_lock_irqsave(&desc->lock, flags);
     }
 
-    if ( msi_desc )
+    if ( desc )
     {
-        desc->handler = &no_irq_type;
-        desc->msi_desc = NULL;
+        spin_unlock_irqrestore(&desc->lock, flags);
+
+        if ( !forced_unbind )
+            cleanup_domain_irq_pirq(d, irq, info);
+
+        rc = irq_deny_access(d, irq);
+        if ( rc )
+        {
+            printk(XENLOG_G_ERR
+                   "dom%d: could not deny access to IRQ%d (pirq %d)\n",
+                   d->domain_id, irq, pirq + nr - 1);
+            ret = rc;
+        }
     }
 
-    spin_unlock_irqrestore(&desc->lock, flags);
     if (msi_desc)
         msi_free_irq(msi_desc);
 
-    if ( !forced_unbind )
-        cleanup_domain_irq_pirq(d, irq, info);
-
-    ret = irq_deny_access(d, irq);
-    if ( ret )
-        printk(XENLOG_G_ERR
-               "dom%d: could not deny access to IRQ%d (pirq %d)\n",
-               d->domain_id, irq, pirq);
-
  done:
     return ret;
 }
--- a/xen/arch/x86/msi.c
+++ b/xen/arch/x86/msi.c
@@ -236,6 +236,11 @@ static int write_msi_msg(struct msi_desc
         u8 bus = dev->bus;
         u8 slot = PCI_SLOT(dev->devfn);
         u8 func = PCI_FUNC(dev->devfn);
+        int nr = entry->msi_attrib.entry_nr;
+
+        ASSERT((msg->data & (entry[-nr].msi.nvec - 1)) == nr);
+        if ( nr )
+            return 0;
 
         pci_conf_write32(seg, bus, slot, func, msi_lower_address_reg(pos),
                          msg->address_lo);
@@ -359,8 +364,8 @@ static void msi_set_mask_bit(struct irq_
             u8 func = PCI_FUNC(entry->dev->devfn);
 
             mask_bits = pci_conf_read32(seg, bus, slot, func, entry->msi.mpos);
-            mask_bits &= ~(1);
-            mask_bits |= flag;
+            mask_bits &= ~((u32)1 << entry->msi_attrib.entry_nr);
+            mask_bits |= (u32)flag << entry->msi_attrib.entry_nr;
             pci_conf_write32(seg, bus, slot, func, entry->msi.mpos, mask_bits);
         }
         break;
@@ -384,10 +389,11 @@ static int msi_get_mask_bit(const struct
     case PCI_CAP_ID_MSI:
         if (!entry->dev || !entry->msi_attrib.maskbit)
             break;
-        return pci_conf_read32(entry->dev->seg, entry->dev->bus,
-                               PCI_SLOT(entry->dev->devfn),
-                               PCI_FUNC(entry->dev->devfn),
-                               entry->msi.mpos) & 1;
+        return (pci_conf_read32(entry->dev->seg, entry->dev->bus,
+                                PCI_SLOT(entry->dev->devfn),
+                                PCI_FUNC(entry->dev->devfn),
+                                entry->msi.mpos) >>
+                entry->msi_attrib.entry_nr) & 1;
     case PCI_CAP_ID_MSIX:
         return readl(entry->mask_base + PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET) & 1;
     }
@@ -453,17 +459,20 @@ static hw_irq_controller pci_msi_nonmask
     .set_affinity = set_msi_affinity
 };
 
-static struct msi_desc* alloc_msi_entry(void)
+static struct msi_desc *alloc_msi_entry(unsigned int nr)
 {
     struct msi_desc *entry;
 
-    entry = xmalloc(struct msi_desc);
+    entry = xmalloc_array(struct msi_desc, nr);
     if ( !entry )
         return NULL;
 
     INIT_LIST_HEAD(&entry->list);
-    entry->dev = NULL;
-    entry->remap_index = -1;
+    while ( nr-- )
+    {
+        entry[nr].dev = NULL;
+        entry[nr].remap_index = -1;
+    }
 
     return entry;
 }
@@ -488,17 +497,24 @@ int __setup_msi_irq(struct irq_desc *des
 
 int msi_free_irq(struct msi_desc *entry)
 {
-    destroy_irq(entry->irq);
+    unsigned int nr = entry->msi.nvec;
+
     if ( entry->msi_attrib.type == PCI_CAP_ID_MSIX )
     {
         unsigned long start;
         start = (unsigned long)entry->mask_base & ~(PAGE_SIZE - 1);
         msix_put_fixmap(entry->dev, virt_to_fix(start));
+        nr = 1;
     }
 
-    /* Free the unused IRTE if intr remap enabled */
-    if ( iommu_intremap )
-        iommu_update_ire_from_msi(entry, NULL);
+    while ( nr-- )
+    {
+        destroy_irq(entry[nr].irq);
+
+        /* Free the unused IRTE if intr remap enabled */
+        if ( iommu_intremap )
+            iommu_update_ire_from_msi(entry + nr, NULL);
+    }
 
     list_del(&entry->list);
     xfree(entry);
@@ -531,11 +547,12 @@ static struct msi_desc *find_msi_entry(s
  **/
 static int msi_capability_init(struct pci_dev *dev,
                                int irq,
-                               struct msi_desc **desc)
+                               struct msi_desc **desc,
+                               unsigned int nvec)
 {
     struct msi_desc *entry;
     int pos;
-    unsigned int maxvec, mpos;
+    unsigned int i, maxvec, mpos;
     u16 control, seg = dev->seg;
     u8 bus = dev->bus;
     u8 slot = PCI_SLOT(dev->devfn);
@@ -545,27 +562,34 @@ static int msi_capability_init(struct pc
     pos = pci_find_cap_offset(seg, bus, slot, func, PCI_CAP_ID_MSI);
     control = pci_conf_read16(seg, bus, slot, func, msi_control_reg(pos));
     maxvec = multi_msi_capable(control);
+    if ( nvec > maxvec )
+        return maxvec;
     control &= ~PCI_MSI_FLAGS_QSIZE;
+    multi_msi_enable(control, nvec);
 
     /* MSI Entry Initialization */
     msi_set_enable(dev, 0); /* Ensure msi is disabled as I set it up */
 
-    entry = alloc_msi_entry();
+    entry = alloc_msi_entry(nvec);
     if ( !entry )
         return -ENOMEM;
 
-    entry->msi_attrib.type = PCI_CAP_ID_MSI;
-    entry->msi_attrib.is_64 = is_64bit_address(control);
-    entry->msi_attrib.entry_nr = 0;
-    entry->msi_attrib.maskbit = is_mask_bit_support(control);
-    entry->msi_attrib.masked = 1;
-    entry->msi_attrib.pos = pos;
     mpos = msi_mask_bits_reg(pos, is_64bit_address(control));
-    entry->msi.nvec = 1;
+    for ( i = 0; i < nvec; ++i )
+    {
+        entry[i].msi_attrib.type = PCI_CAP_ID_MSI;
+        entry[i].msi_attrib.is_64 = is_64bit_address(control);
+        entry[i].msi_attrib.entry_nr = i;
+        entry[i].msi_attrib.maskbit = is_mask_bit_support(control);
+        entry[i].msi_attrib.masked = 1;
+        entry[i].msi_attrib.pos = pos;
+        if ( entry[i].msi_attrib.maskbit )
+            entry[i].msi.mpos = mpos;
+        entry[i].msi.nvec = 0;
+        entry[i].dev = dev;
+    }
+    entry->msi.nvec = nvec;
     entry->irq = irq;
-    if ( is_mask_bit_support(control) )
-        entry->msi.mpos = mpos;
-    entry->dev = dev;
     if ( entry->msi_attrib.maskbit )
     {
         u32 maskbits;
@@ -693,7 +717,7 @@ static int msix_capability_init(struct p
 
     if ( desc )
     {
-        entry = alloc_msi_entry();
+        entry = alloc_msi_entry(1);
         if ( !entry )
             return -ENOMEM;
         ASSERT(msi);
@@ -851,7 +875,6 @@ static int msix_capability_init(struct p
 
 static int __pci_enable_msi(struct msi_info *msi, struct msi_desc **desc)
 {
-    int status;
     struct pci_dev *pdev;
     struct msi_desc *old_desc;
 
@@ -880,8 +903,7 @@ static int __pci_enable_msi(struct msi_i
         pci_disable_msi(old_desc);
     }
 
-    status = msi_capability_init(pdev, msi->irq, desc);
-    return status;
+    return msi_capability_init(pdev, msi->irq, desc, msi->entry_nr);
 }
 
 static void __pci_disable_msi(struct msi_desc *entry)
@@ -1101,6 +1123,8 @@ int pci_restore_msi_state(struct pci_dev
 
     list_for_each_entry_safe( entry, tmp, &pdev->msi_list, list )
     {
+        unsigned int i = 0, nr = 1;
+
         irq = entry->irq;
         desc = &irq_desc[irq];
 
@@ -1110,30 +1134,58 @@ int pci_restore_msi_state(struct pci_dev
 
         if (desc->msi_desc != entry)
         {
+    bogus:
             dprintk(XENLOG_ERR,
-                    "Restore MSI for dev %04x:%02x:%02x:%x not set before?\n",
+                    "Restore MSI for %04x:%02x:%02x:%u entry %u not set?\n",
                     pdev->seg, pdev->bus, PCI_SLOT(pdev->devfn),
-                    PCI_FUNC(pdev->devfn));
+                    PCI_FUNC(pdev->devfn), i);
             spin_unlock_irqrestore(&desc->lock, flags);
             return -EINVAL;
         }
 
         if ( entry->msi_attrib.type == PCI_CAP_ID_MSI )
+        {
             msi_set_enable(pdev, 0);
+            nr = entry->msi.nvec;
+        }
         else if ( entry->msi_attrib.type == PCI_CAP_ID_MSIX )
             msix_set_enable(pdev, 0);
 
         msg = entry->msg;
         write_msi_msg(entry, &msg);
 
-        msi_set_mask_bit(desc, entry->msi_attrib.masked);
+        for ( i = 0; ; )
+        {
+            msi_set_mask_bit(desc, entry[i].msi_attrib.masked);
+            spin_unlock_irqrestore(&desc->lock, flags);
+
+            if ( !--nr )
+                break;
+
+            desc = &irq_desc[entry[++i].irq];
+            spin_lock_irqsave(&desc->lock, flags);
+            if ( desc->msi_desc != entry + i )
+                goto bogus;
+        }
+
+        spin_unlock_irqrestore(&desc->lock, flags);
 
         if ( entry->msi_attrib.type == PCI_CAP_ID_MSI )
+        {
+            unsigned int cpos = msi_control_reg(entry->msi_attrib.pos);
+            u16 control = pci_conf_read16(pdev->seg, pdev->bus,
+                                          PCI_SLOT(pdev->devfn),
+                                          PCI_FUNC(pdev->devfn), cpos);
+
+            control &= ~PCI_MSI_FLAGS_QSIZE;
+            multi_msi_enable(control, entry->msi.nvec);
+            pci_conf_write16(pdev->seg, pdev->bus, PCI_SLOT(pdev->devfn),
+                             PCI_FUNC(pdev->devfn), cpos, control);
+
             msi_set_enable(pdev, 1);
+        }
         else if ( entry->msi_attrib.type == PCI_CAP_ID_MSIX )
             msix_set_enable(pdev, 1);
-
-        spin_unlock_irqrestore(&desc->lock, flags);
     }
 
     return 0;
--- a/xen/arch/x86/physdev.c
+++ b/xen/arch/x86/physdev.c
@@ -140,8 +140,11 @@ int physdev_map_pirq(domid_t domid, int 
         break;
 
     case MAP_PIRQ_TYPE_MSI:
+        if ( !msi->table_base )
+            msi->entry_nr = 1;
         irq = *index;
         if ( irq == -1 )
+    case MAP_PIRQ_TYPE_MULTI_MSI:
             irq = create_irq(NUMA_NO_NODE);
 
         if ( irq < nr_irqs_gsi || irq >= nr_irqs )
@@ -179,6 +182,30 @@ int physdev_map_pirq(domid_t domid, int 
                 goto done;
             }
         }
+        else if ( type == MAP_PIRQ_TYPE_MULTI_MSI )
+        {
+            if ( msi->entry_nr <= 0 || msi->entry_nr > 32 )
+                ret = -EDOM;
+            else if ( msi->entry_nr != 1 && !iommu_intremap )
+                ret = -EOPNOTSUPP;
+            else
+            {
+                while ( msi->entry_nr & (msi->entry_nr - 1) )
+                    msi->entry_nr += msi->entry_nr & -msi->entry_nr;
+                pirq = get_free_pirqs(d, msi->entry_nr);
+                if ( pirq < 0 )
+                {
+                    while ( (msi->entry_nr >>= 1) > 1 )
+                        if ( get_free_pirqs(d, msi->entry_nr) > 0 )
+                            break;
+                    dprintk(XENLOG_G_ERR, "dom%d: no block of %d free pirqs\n",
+                            d->domain_id, msi->entry_nr << 1);
+                    ret = pirq;
+                }
+            }
+            if ( ret < 0 )
+                goto done;
+        }
         else
         {
             pirq = get_free_pirq(d, type);
@@ -210,8 +237,15 @@ int physdev_map_pirq(domid_t domid, int 
  done:
     spin_unlock(&d->event_lock);
     spin_unlock(&pcidevs_lock);
-    if ( (ret != 0) && (type == MAP_PIRQ_TYPE_MSI) && (*index == -1) )
-        destroy_irq(irq);
+    if ( ret != 0 )
+        switch ( type )
+        {
+        case MAP_PIRQ_TYPE_MSI:
+            if ( *index == -1 )
+        case MAP_PIRQ_TYPE_MULTI_MSI:
+                destroy_irq(irq);
+            break;
+        }

Do we not need to create and destroy entry_nr irqs in this function, or is a multi-vector-msi now considered as just as single irq ?

I ask because this appears to lack the "repeating certain operations for all involved IRQs" described in the comment.

~Andrew

  free_domain:
     rcu_unlock_domain(d);
     return ret;
@@ -390,14 +424,22 @@ ret_t do_physdev_op(int cmd, XEN_GUEST_H
         if ( copy_from_guest(&map, arg, 1) != 0 )
             break;
 
-        if ( map.type == MAP_PIRQ_TYPE_MSI_SEG )
+        switch ( map.type )
         {
+        case MAP_PIRQ_TYPE_MSI_SEG:
             map.type = MAP_PIRQ_TYPE_MSI;
             msi.seg = map.bus >> 16;
-        }
-        else
-        {
+            break;
+
+        case MAP_PIRQ_TYPE_MULTI_MSI:
+            if ( map.table_base )
+                return -EINVAL;
+            msi.seg = map.bus >> 16;
+            break;
+
+        default:
             msi.seg = 0;
+            break;
         }
         msi.bus = map.bus;
         msi.devfn = map.devfn;
@@ -406,6 +448,8 @@ ret_t do_physdev_op(int cmd, XEN_GUEST_H
         ret = physdev_map_pirq(map.domid, map.type, &map.index, &map.pirq,
                                &msi);
 
+        if ( map.type == MAP_PIRQ_TYPE_MULTI_MSI )
+            map.entry_nr = msi.entry_nr;
         if ( __copy_to_guest(arg, &map, 1) )
             ret = -EFAULT;
         break;
--- a/xen/include/asm-x86/irq.h
+++ b/xen/include/asm-x86/irq.h
@@ -141,6 +141,7 @@ int map_domain_pirq(struct domain *d, in
                            void *data);
 int unmap_domain_pirq(struct domain *d, int pirq);
 int get_free_pirq(struct domain *d, int type);
+int get_free_pirqs(struct domain *, unsigned int nr);
 void free_domain_pirqs(struct domain *d);
 int map_domain_emuirq_pirq(struct domain *d, int pirq, int irq);
 int unmap_domain_pirq_emuirq(struct domain *d, int pirq);
--- a/xen/include/asm-x86/msi.h
+++ b/xen/include/asm-x86/msi.h
@@ -148,7 +148,7 @@ int msi_free_irq(struct msi_desc *entry)
 #define multi_msi_capable(control) \
 	(1 << ((control & PCI_MSI_FLAGS_QMASK) >> 1))
 #define multi_msi_enable(control, num) \
-	control |= (((num >> 1) << 4) & PCI_MSI_FLAGS_QSIZE);
+	control |= (((fls(num) - 1) << 4) & PCI_MSI_FLAGS_QSIZE);
 #define is_64bit_address(control)	(!!(control & PCI_MSI_FLAGS_64BIT))
 #define is_mask_bit_support(control)	(!!(control & PCI_MSI_FLAGS_MASKBIT))
 #define msi_enable(control, num) multi_msi_enable(control, num); \
--- a/xen/include/public/physdev.h
+++ b/xen/include/public/physdev.h
@@ -151,21 +151,22 @@ DEFINE_XEN_GUEST_HANDLE(physdev_irq_t);
 #define MAP_PIRQ_TYPE_GSI               0x1
 #define MAP_PIRQ_TYPE_UNKNOWN           0x2
 #define MAP_PIRQ_TYPE_MSI_SEG           0x3
+#define MAP_PIRQ_TYPE_MULTI_MSI         0x4
 
 #define PHYSDEVOP_map_pirq               13
 struct physdev_map_pirq {
     domid_t domid;
     /* IN */
     int type;
-    /* IN */
+    /* IN (ignored for ..._MULTI_MSI) */
     int index;
     /* IN or OUT */
     int pirq;
-    /* IN - high 16 bits hold segment for MAP_PIRQ_TYPE_MSI_SEG */
+    /* IN - high 16 bits hold segment for ..._MSI_SEG and ..._MULTI_MSI */
     int bus;
     /* IN */
     int devfn;
-    /* IN */
+    /* IN (also OUT for ..._MULTI_MSI) */
     int entry_nr;
     /* IN */
     uint64_t table_base;




_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

--------------010105020307040600010807-- --===============6312654838834273862== Content-Type: text/plain; charset="us-ascii" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit Content-Disposition: inline _______________________________________________ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel --===============6312654838834273862==--