linux-pci.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Christoph Hellwig <hch@lst.de>
To: linux-pci@vger.kernel.org
Cc: agordeev@redhat.com, linux-kernel@vger.kernel.org
Subject: [PATCH 5/5] pci: spread interrupt vectors in pci_alloc_irq_vectors
Date: Tue, 12 Jul 2016 18:20:18 +0900	[thread overview]
Message-ID: <1468315218-20490-6-git-send-email-hch@lst.de> (raw)
In-Reply-To: <1468315218-20490-1-git-send-email-hch@lst.de>

Set the affinity_mask in the PCI device before allocating vectors so that
the affinity can be propagated through the MSI descriptor structures to
the core IRQ code.  To facilitate this new __pci_enable_msi_range and
__pci_enable_msix_range helpers are factored out of their not prefixed
variants which assigning the new irq affinity mask in the PCI device
so that the low-level interrupt code can perform the interrupt affinity
assignment and do node-local allocations.

A new PCI_IRQ_NOAFFINITY flag is added to pci_alloc_irq_vectors so that
this function can also be used by drivers that don't wish to use the
automatic affinity assignment.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 Documentation/PCI/MSI-HOWTO.txt |   3 +
 drivers/pci/msi.c               | 127 ++++++++++++++++++++++++++--------------
 include/linux/pci.h             |   2 +
 3 files changed, 89 insertions(+), 43 deletions(-)

diff --git a/Documentation/PCI/MSI-HOWTO.txt b/Documentation/PCI/MSI-HOWTO.txt
index 0af91e8..16e9187 100644
--- a/Documentation/PCI/MSI-HOWTO.txt
+++ b/Documentation/PCI/MSI-HOWTO.txt
@@ -97,6 +97,9 @@ The flags argument should normally be set to 0, but can be used to pass the
 PCI_IRQ_NOMSI and PCI_IRQ_NOMSIX flag in case a device claims to support
 MSI or MSI-X, but the support is broken, or to pass PCI_IRQ_NOLEGACY in
 case the device does not support legacy interrupt lines.
+By default this function will spread the interrupts around the available
+CPUs, but this feature can be disabled by passing the PCI_IRQ_NOAFFINITY
+flag.
 
 To get the Linux IRQ numbers passed to request_irq and free_irq
 and the vectors use the following function:
diff --git a/drivers/pci/msi.c b/drivers/pci/msi.c
index 00657bf..692deff 100644
--- a/drivers/pci/msi.c
+++ b/drivers/pci/msi.c
@@ -569,6 +569,7 @@ static struct msi_desc *msi_setup_entry(struct pci_dev *dev, int nvec)
 	entry->msi_attrib.multi_cap	= (control & PCI_MSI_FLAGS_QMASK) >> 1;
 	entry->msi_attrib.multiple	= ilog2(__roundup_pow_of_two(nvec));
 	entry->nvec_used		= nvec;
+	entry->affinity			= dev->irq_affinity;
 
 	if (control & PCI_MSI_FLAGS_64BIT)
 		entry->mask_pos = dev->msi_cap + PCI_MSI_MASK_64;
@@ -680,10 +681,18 @@ static void __iomem *msix_map_region(struct pci_dev *dev, unsigned nr_entries)
 static int msix_setup_entries(struct pci_dev *dev, void __iomem *base,
 			      struct msix_entry *entries, int nvec)
 {
+	const struct cpumask *mask = NULL;
 	struct msi_desc *entry;
-	int i;
+	int cpu = -1, i;
 
 	for (i = 0; i < nvec; i++) {
+		if (dev->irq_affinity) {
+			cpu = cpumask_next(cpu, dev->irq_affinity);
+			if (cpu >= nr_cpu_ids)
+				cpu = cpumask_first(dev->irq_affinity);
+			mask = cpumask_of(cpu);
+		}
+
 		entry = alloc_msi_entry(&dev->dev);
 		if (!entry) {
 			if (!i)
@@ -703,6 +712,7 @@ static int msix_setup_entries(struct pci_dev *dev, void __iomem *base,
 		entry->msi_attrib.default_irq	= dev->irq;
 		entry->mask_base		= base;
 		entry->nvec_used		= 1;
+		entry->affinity			= mask;
 
 		list_add_tail(&entry->list, dev_to_msi_list(&dev->dev));
 	}
@@ -1028,19 +1038,8 @@ int pci_msi_enabled(void)
 }
 EXPORT_SYMBOL(pci_msi_enabled);
 
-/**
- * pci_enable_msi_range - configure device's MSI capability structure
- * @dev: device to configure
- * @minvec: minimal number of interrupts to configure
- * @maxvec: maximum number of interrupts to configure
- *
- * This function tries to allocate a maximum possible number of interrupts in a
- * range between @minvec and @maxvec. It returns a negative errno if an error
- * occurs. If it succeeds, it returns the actual number of interrupts allocated
- * and updates the @dev's irq member to the lowest new interrupt number;
- * the other interrupt numbers allocated to this device are consecutive.
- **/
-int pci_enable_msi_range(struct pci_dev *dev, int minvec, int maxvec)
+static int __pci_enable_msi_range(struct pci_dev *dev, int minvec, int maxvec,
+		unsigned int flags)
 {
 	int nvec;
 	int rc;
@@ -1068,20 +1067,77 @@ int pci_enable_msi_range(struct pci_dev *dev, int minvec, int maxvec)
 	else if (nvec > maxvec)
 		nvec = maxvec;
 
-	do {
+	for (;;) {
+		if (!(flags & PCI_IRQ_NOAFFINITY)) {
+			dev->irq_affinity = irq_create_affinity_mask(&nvec);
+			if (nvec < minvec)
+				return -ENOSPC;
+		}
+
 		rc = msi_capability_init(dev, nvec);
-		if (rc < 0) {
+		if (rc == 0)
+			return nvec;
+
+		kfree(dev->irq_affinity);
+		dev->irq_affinity = NULL;
+
+		if (rc < 0)
 			return rc;
-		} else if (rc > 0) {
-			if (rc < minvec)
+		if (rc < minvec)
+			return -ENOSPC;
+		nvec = rc;
+	}
+}
+
+/**
+ * pci_enable_msi_range - configure device's MSI capability structure
+ * @dev: device to configure
+ * @minvec: minimal number of interrupts to configure
+ * @maxvec: maximum number of interrupts to configure
+ *
+ * This function tries to allocate a maximum possible number of interrupts in a
+ * range between @minvec and @maxvec. It returns a negative errno if an error
+ * occurs. If it succeeds, it returns the actual number of interrupts allocated
+ * and updates the @dev's irq member to the lowest new interrupt number;
+ * the other interrupt numbers allocated to this device are consecutive.
+ **/
+int pci_enable_msi_range(struct pci_dev *dev, int minvec, int maxvec)
+{
+	return __pci_enable_msi_range(dev, minvec, maxvec, PCI_IRQ_NOAFFINITY);
+}
+EXPORT_SYMBOL(pci_enable_msi_range);
+
+static int __pci_enable_msix_range(struct pci_dev *dev,
+		struct msix_entry *entries, int minvec, int maxvec,
+		unsigned int flags)
+{
+	int nvec = maxvec;
+	int rc;
+
+	if (maxvec < minvec)
+		return -ERANGE;
+
+	for (;;) {
+		if (!(flags & PCI_IRQ_NOAFFINITY)) {
+			dev->irq_affinity = irq_create_affinity_mask(&nvec);
+			if (nvec < minvec)
 				return -ENOSPC;
-			nvec = rc;
 		}
-	} while (rc);
 
-	return nvec;
+		rc = pci_enable_msix(dev, entries, nvec);
+		if (rc == 0)
+			return nvec;
+
+		kfree(dev->irq_affinity);
+		dev->irq_affinity = NULL;
+
+		if (rc < 0)
+			return rc;
+		if (rc < minvec)
+			return -ENOSPC;
+		nvec = rc;
+	}
 }
-EXPORT_SYMBOL(pci_enable_msi_range);
 
 /**
  * pci_enable_msix_range - configure device's MSI-X capability structure
@@ -1099,26 +1155,10 @@ EXPORT_SYMBOL(pci_enable_msi_range);
  * with new allocated MSI-X interrupts.
  **/
 int pci_enable_msix_range(struct pci_dev *dev, struct msix_entry *entries,
-			       int minvec, int maxvec)
+		int minvec, int maxvec)
 {
-	int nvec = maxvec;
-	int rc;
-
-	if (maxvec < minvec)
-		return -ERANGE;
-
-	do {
-		rc = pci_enable_msix(dev, entries, nvec);
-		if (rc < 0) {
-			return rc;
-		} else if (rc > 0) {
-			if (rc < minvec)
-				return -ENOSPC;
-			nvec = rc;
-		}
-	} while (rc);
-
-	return nvec;
+	return __pci_enable_msix_range(dev, entries, minvec, maxvec,
+			PCI_IRQ_NOAFFINITY);
 }
 EXPORT_SYMBOL(pci_enable_msix_range);
 
@@ -1145,13 +1185,14 @@ int pci_alloc_irq_vectors(struct pci_dev *dev, unsigned int min_vecs,
 	int vecs = -ENOSPC;
 
 	if (!(flags & PCI_IRQ_NOMSIX)) {
-		vecs = pci_enable_msix_range(dev, NULL, min_vecs, max_vecs);
+		vecs = __pci_enable_msix_range(dev, NULL, min_vecs, max_vecs,
+				flags);
 		if (vecs > 0)
 			return vecs;
 	}
 
 	if (!(flags & PCI_IRQ_NOMSI)) {
-		vecs = pci_enable_msi_range(dev, min_vecs, max_vecs);
+		vecs = __pci_enable_msi_range(dev, min_vecs, max_vecs, flags);
 		if (vecs > 0)
 			return vecs;
 	}
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 52ecd49..f140661 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -320,6 +320,7 @@ struct pci_dev {
 	 * directly, use the values stored here. They might be different!
 	 */
 	unsigned int	irq;
+	struct cpumask	*irq_affinity;
 	struct resource resource[DEVICE_COUNT_RESOURCE]; /* I/O and memory regions + expansion ROMs */
 
 	bool match_driver;		/* Skip attaching driver */
@@ -1240,6 +1241,7 @@ int pci_set_vga_state(struct pci_dev *pdev, bool decode,
 #define PCI_IRQ_NOLEGACY	(1 << 0) /* don't use legacy interrupts */
 #define PCI_IRQ_NOMSI		(1 << 1) /* don't use MSI interrupts */
 #define PCI_IRQ_NOMSIX		(1 << 2) /* don't use MSI-X interrupts */
+#define PCI_IRQ_NOAFFINITY	(1 << 3) /* don't auto-assign affinity */
 
 /* kmem_cache style wrapper around pci_alloc_consistent() */
 
-- 
2.1.4


  parent reply	other threads:[~2016-07-12  9:20 UTC|newest]

Thread overview: 16+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2016-07-12  9:20 pci: automatic interrupt affinity for MSI/MSI-X capable devices V2 Christoph Hellwig
2016-07-12  9:20 ` [PATCH 1/5] pci: add a pci_msix_desc_addr helper Christoph Hellwig
2016-07-12 12:59   ` Alexander Gordeev
2016-07-12  9:20 ` [PATCH 2/5] pci: switch msix_program_entries to use pci_msix_desc_addr Christoph Hellwig
2016-07-12 12:59   ` Alexander Gordeev
2016-07-12  9:20 ` [PATCH 3/5] pci: make the entries argument to pci_enable_msix optional Christoph Hellwig
2016-07-12  9:20 ` [PATCH 4/5] pci: Provide sensible irq vector alloc/free routines Christoph Hellwig
2016-07-12 12:58   ` Alexander Gordeev
2016-07-12  9:20 ` Christoph Hellwig [this message]
2016-07-12 12:57   ` [PATCH 5/5] pci: spread interrupt vectors in pci_alloc_irq_vectors Alexander Gordeev
2016-07-21 21:06 ` pci: automatic interrupt affinity for MSI/MSI-X capable devices V2 Bjorn Helgaas
2016-07-21 21:10   ` Christoph Hellwig
  -- strict thread matches above, loose matches on Subject: below --
2016-07-10 11:57 pci: automatic interrupt affinity for MSI/MSI-X capable devices Christoph Hellwig
2016-07-10 11:57 ` [PATCH 5/5] pci: spread interrupt vectors in pci_alloc_irq_vectors Christoph Hellwig
2016-07-11 20:51   ` Alexander Gordeev
2016-07-12  9:17     ` Christoph Hellwig
2016-07-12 12:15       ` Alexander Gordeev

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1468315218-20490-6-git-send-email-hch@lst.de \
    --to=hch@lst.de \
    --cc=agordeev@redhat.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-pci@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).