linux-pci.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Christoph Hellwig <hch@lst.de>
To: tglx@linutronix.de, axboe@fb.com
Cc: agordeev@redhat.com, linux-block@vger.kernel.org,
	linux-pci@vger.kernel.org, linux-nvme@lists.infradead.org,
	linux-kernel@vger.kernel.org
Subject: [PATCH 08/13] pci: spread interrupt vectors in pci_alloc_irq_vectors
Date: Mon,  4 Jul 2016 17:39:29 +0900	[thread overview]
Message-ID: <1467621574-8277-9-git-send-email-hch@lst.de> (raw)
In-Reply-To: <1467621574-8277-1-git-send-email-hch@lst.de>

Set the affinity_mask in the PCI device before allocating vectors so that
the affinity can be propagated through the MSI descriptor structures to
the core IRQ code.  Add a new helper __pci_enable_msi_range which is
similar to __pci_enable_msix_range introduced in the last patch so that
we can allocate the affinity mask in a self-contained fashion and for the
right number of vectors.

A new PCI_IRQ_NOAFFINITY flag is added to pci_alloc_irq_vectors so that
this function can also be used by drivers that don't wish to use the
automatic affinity assignment.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 Documentation/PCI/MSI-HOWTO.txt |  3 +-
 drivers/pci/msi.c               | 72 ++++++++++++++++++++++++++++++++++++++---
 include/linux/pci.h             |  2 ++
 3 files changed, 72 insertions(+), 5 deletions(-)

diff --git a/Documentation/PCI/MSI-HOWTO.txt b/Documentation/PCI/MSI-HOWTO.txt
index 35d1326..dcd3f6d 100644
--- a/Documentation/PCI/MSI-HOWTO.txt
+++ b/Documentation/PCI/MSI-HOWTO.txt
@@ -95,7 +95,8 @@ argument set to this limit, and the PCI core will return -ENOSPC if it can't
 meet the minimum number of vectors.
 The flags argument should normally be set to 0, but can be used to
 pass the PCI_IRQ_NOMSI and PCI_IRQ_NOMSIX flag in case a device claims
-to support MSI or MSI-X, but the support is broken.
+to support MSI or MSI-X, but the support is broken, or to PCI_IRQ_NOAFFINITY
+if the driver does not wish to use the automatic affinity assignment feature.
 
 To get the Linux IRQ numbers passed to request_irq and free_irq
 and the vectors use the following function:
diff --git a/drivers/pci/msi.c b/drivers/pci/msi.c
index 6b0834d..7f38e07 100644
--- a/drivers/pci/msi.c
+++ b/drivers/pci/msi.c
@@ -568,6 +568,7 @@ static struct msi_desc *msi_setup_entry(struct pci_dev *dev, int nvec)
 	entry->msi_attrib.multi_cap	= (control & PCI_MSI_FLAGS_QMASK) >> 1;
 	entry->msi_attrib.multiple	= ilog2(__roundup_pow_of_two(nvec));
 	entry->nvec_used		= nvec;
+	entry->affinity			= dev->irq_affinity;
 
 	if (control & PCI_MSI_FLAGS_64BIT)
 		entry->mask_pos = dev->msi_cap + PCI_MSI_MASK_64;
@@ -679,10 +680,18 @@ static void __iomem *msix_map_region(struct pci_dev *dev, unsigned nr_entries)
 static int msix_setup_entries(struct pci_dev *dev, void __iomem *base,
 			      struct msix_entry *entries, int nvec)
 {
+	const struct cpumask *mask = NULL;
 	struct msi_desc *entry;
-	int i;
+	int cpu = -1, i;
 
 	for (i = 0; i < nvec; i++) {
+		if (dev->irq_affinity) {
+			cpu = cpumask_next(cpu, dev->irq_affinity);
+			if (cpu >= nr_cpu_ids)
+				cpu = cpumask_first(dev->irq_affinity);
+			mask = cpumask_of(cpu);
+		}
+
 		entry = alloc_msi_entry(&dev->dev);
 		if (!entry) {
 			if (!i)
@@ -699,6 +708,7 @@ static int msix_setup_entries(struct pci_dev *dev, void __iomem *base,
 		entry->msi_attrib.default_irq	= dev->irq;
 		entry->mask_base		= base;
 		entry->nvec_used		= 1;
+		entry->affinity			= mask;
 
 		list_add_tail(&entry->list, dev_to_msi_list(&dev->dev));
 	}
@@ -1121,8 +1131,53 @@ int pci_enable_msix_range(struct pci_dev *dev, struct msix_entry *entries,
 }
 EXPORT_SYMBOL(pci_enable_msix_range);
 
+static int __pci_enable_msi_range(struct pci_dev *dev, int min_vecs, int max_vecs,
+		unsigned int flags)
+{
+	int vecs, ret;
+
+	if (!pci_msi_supported(dev, min_vecs))
+		return -EINVAL;
+
+	vecs = pci_msi_vec_count(dev);
+	if (vecs < 0)
+		return vecs;
+	if (vecs < min_vecs)
+		return -EINVAL;
+	if (vecs > max_vecs)
+		vecs = max_vecs;
+
+retry:
+	if (vecs < min_vecs)
+		return -ENOSPC;
+
+	if (!(flags & PCI_IRQ_NOAFFINITY)) {
+		dev->irq_affinity = irq_create_affinity_mask(&vecs);
+		if (vecs < min_vecs) {
+			ret = -ERANGE;
+			goto out_fail;
+		}
+	}
+
+	ret = msi_capability_init(dev, vecs);
+	if (ret)
+		goto out_fail;
+
+	return vecs;
+
+out_fail:
+	kfree(dev->irq_affinity);
+	if (ret >= 0) {
+		/* retry with the actually supported number of vectors */
+		vecs = ret;
+		goto retry;
+	}
+
+	return ret;
+}
+
 static int __pci_enable_msix_range(struct pci_dev *dev, unsigned int min_vecs,
-		unsigned int max_vecs)
+		unsigned int max_vecs, unsigned int flags)
 {
 	int vecs = max_vecs, ret, i;
 
@@ -1138,6 +1193,13 @@ retry:
 	for (i = 0; i < vecs; i++)
 		dev->msix_vectors[i].entry = i;
 
+	if (!(flags & PCI_IRQ_NOAFFINITY)) {
+		dev->irq_affinity = irq_create_affinity_mask(&vecs);
+		ret = -ENOSPC;
+		if (vecs < min_vecs)
+			goto out_fail;
+	}
+
 	ret = pci_enable_msix(dev, dev->msix_vectors, vecs);
 	if (ret)
 		goto out_fail;
@@ -1147,6 +1209,8 @@ retry:
 out_fail:
 	kfree(dev->msix_vectors);
 	dev->msix_vectors = NULL;
+	kfree(dev->irq_affinity);
+	dev->irq_affinity = NULL;
 
 	if (ret >= 0) {
 		/* retry with the actually supported number of vectors */
@@ -1180,13 +1244,13 @@ int pci_alloc_irq_vectors(struct pci_dev *dev, unsigned int min_vecs,
 	int vecs;
 
 	if (!(flags & PCI_IRQ_NOMSIX)) {
-		vecs = __pci_enable_msix_range(dev, min_vecs, max_vecs);
+		vecs = __pci_enable_msix_range(dev, min_vecs, max_vecs, flags);
 		if (vecs > 0)
 			return vecs;
 	}
 
 	if (!(flags & PCI_IRQ_NOMSI)) {
-		vecs = pci_enable_msi_range(dev, min_vecs, max_vecs);
+		vecs = __pci_enable_msi_range(dev, min_vecs, max_vecs, flags);
 		if (vecs > 0)
 			return vecs;
 	}
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 129871f..6a64c54 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -321,6 +321,7 @@ struct pci_dev {
 	 */
 	unsigned int	irq;
 	struct msix_entry *msix_vectors;
+	struct cpumask	*irq_affinity;
 	struct resource resource[DEVICE_COUNT_RESOURCE]; /* I/O and memory regions + expansion ROMs */
 
 	bool match_driver;		/* Skip attaching driver */
@@ -1240,6 +1241,7 @@ int pci_set_vga_state(struct pci_dev *pdev, bool decode,
 
 #define PCI_IRQ_NOMSI		(1 << 0) /* don't try to use MSI interrupts */
 #define PCI_IRQ_NOMSIX		(1 << 1) /* don't try to use MSI-X interrupts */
+#define PCI_IRQ_NOAFFINITY	(1 << 2) /* don't auto-assign affinity */
 
 /* kmem_cache style wrapper around pci_alloc_consistent() */
 
-- 
2.1.4


  parent reply	other threads:[~2016-07-04  8:40 UTC|newest]

Thread overview: 29+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2016-07-04  8:39 automatic interrupt affinity for MSI/MSI-X capable devices V3 Christoph Hellwig
2016-07-04  8:39 ` [PATCH 01/13] irq/msi: Remove unused MSI_FLAG_IDENTITY_MAP Christoph Hellwig
2016-07-04  8:39 ` [PATCH 02/13] irq: Introduce IRQD_AFFINITY_MANAGED flag Christoph Hellwig
2016-07-04  8:39 ` [PATCH 03/13] irq: Add affinity hint to irq allocation Christoph Hellwig
     [not found]   ` <1468875386-31662-1-git-send-email-vincent.stehle@laposte.net>
2016-07-19  3:56     ` [PATCH next] genirq: fix missing irq allocation affinity hint Christoph Hellwig
2016-07-19 12:03       ` Thomas Gleixner
2016-07-04  8:39 ` [PATCH 04/13] irq: Use affinity hint in irqdesc allocation Christoph Hellwig
2016-07-04  8:39 ` [PATCH 05/13] irq/msi: Make use of affinity aware allocations Christoph Hellwig
2016-07-04  8:39 ` [PATCH 06/13] irq: add a helper spread an affinity mask for MSI/MSI-X vectors Christoph Hellwig
2016-07-04  8:39 ` [PATCH 07/13] pci: Provide sensible irq vector alloc/free routines Christoph Hellwig
2016-07-06  8:05   ` Alexander Gordeev
2016-07-10  3:47     ` Christoph Hellwig
2016-07-11 10:43       ` Alexander Gordeev
2016-07-12  9:13         ` Christoph Hellwig
2016-07-12 12:46           ` Alexander Gordeev
2016-07-04  8:39 ` Christoph Hellwig [this message]
2016-07-07 11:05   ` [PATCH 08/13] pci: spread interrupt vectors in pci_alloc_irq_vectors Alexander Gordeev
2016-07-10  3:57     ` Christoph Hellwig
2016-07-12  6:49       ` Alexander Gordeev
2016-07-04  8:39 ` [PATCH 09/13] blk-mq: don't redistribute hardware queues on a CPU hotplug event Christoph Hellwig
2016-07-04  8:39 ` [PATCH 10/13] blk-mq: only allocate a single mq_map per tag_set Christoph Hellwig
2016-07-04  8:39 ` [PATCH 11/13] blk-mq: allow the driver to pass in an affinity mask Christoph Hellwig
2016-07-04  8:39 ` [PATCH 12/13] nvme: switch to use pci_alloc_irq_vectors Christoph Hellwig
2016-07-07 19:30   ` Alexander Gordeev
2016-07-10  3:59     ` Christoph Hellwig
2016-07-04  8:39 ` [PATCH 13/13] nvme: remove the post_scan callout Christoph Hellwig
2016-07-04 10:30 ` automatic interrupt affinity for MSI/MSI-X capable devices V3 Thomas Gleixner
  -- strict thread matches above, loose matches on Subject: below --
2016-06-14 19:58 automatic interrupt affinity for MSI/MSI-X capable devices V2 Christoph Hellwig
2016-06-14 19:59 ` [PATCH 08/13] pci: spread interrupt vectors in pci_alloc_irq_vectors Christoph Hellwig
2016-06-25 20:22   ` Alexander Gordeev

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1467621574-8277-9-git-send-email-hch@lst.de \
    --to=hch@lst.de \
    --cc=agordeev@redhat.com \
    --cc=axboe@fb.com \
    --cc=linux-block@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-nvme@lists.infradead.org \
    --cc=linux-pci@vger.kernel.org \
    --cc=tglx@linutronix.de \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).