From: "Derrick, Jonathan" <jonathan.derrick@intel.com>
To: "Busch, Keith" <keith.busch@intel.com>,
"linux-pci@vger.kernel.org" <linux-pci@vger.kernel.org>,
"bhelgaas@google.com" <bhelgaas@google.com>,
"Bauer, Scott" <scott.bauer@intel.com>
Subject: Re: [PATCH] vmd: Interrupt affinity pairing to child devices
Date: Tue, 6 Feb 2018 18:10:19 +0000 [thread overview]
Message-ID: <1517940617.2496.8.camel@intel.com> (raw)
In-Reply-To: <20180201222305.25066-1-keith.busch@intel.com>
[-- Attachment #1: Type: text/plain, Size: 5410 bytes --]
Hi Keith, Bjorn,
This looks good.
Acked-by: Jon Derrick <jonathan.derrick@intel.com>
On Thu, 2018-02-01 at 15:23 -0700, Keith Busch wrote:
> Performance for devices in VMD domains suffer in NUMA environments if
> we're not respecting the desired IRQ CPU affinity. This patch fixes
> that by creating managed affinity irq vectors for the VMD device, and
> then drivers registering their chained interrupts will be assigned
> the
> h/w irq that most closely matches its desired IRQ affinity. A tie is
> awarded to the lesser used vector.
>
> Note, this only works for drivers that allocate their vectors with
> PCI_IRQ_AFFINITY. All other drivers will be assigned the least used
> vector without consideration for affinity.
>
> Signed-off-by: Keith Busch <keith.busch@intel.com>
> ---
> drivers/pci/host/vmd.c | 80
> ++++++++++++++++++++++++++++++++++++++++----------
> 1 file changed, 65 insertions(+), 15 deletions(-)
>
> diff --git a/drivers/pci/host/vmd.c b/drivers/pci/host/vmd.c
> index 930a8fa08bd6..ac84676e79a4 100644
> --- a/drivers/pci/host/vmd.c
> +++ b/drivers/pci/host/vmd.c
> @@ -166,10 +166,6 @@ static irq_hw_number_t vmd_get_hwirq(struct
> msi_domain_info *info,
> return 0;
> }
>
> -/*
> - * XXX: We can be even smarter selecting the best IRQ once we solve
> the
> - * affinity problem.
> - */
> static struct vmd_irq_list *vmd_next_irq(struct vmd_dev *vmd, struct
> msi_desc *desc)
> {
> int i, best = 1;
> @@ -188,24 +184,61 @@ static struct vmd_irq_list *vmd_next_irq(struct
> vmd_dev *vmd, struct msi_desc *d
> return &vmd->irqs[best];
> }
>
> +static struct vmd_irq_list *vmd_next_affinity_irq(struct vmd_dev
> *vmd, const struct cpumask *dest)
> +{
> + struct vmd_irq_list *irq = NULL;
> + const struct cpumask *vmd_mask;
> + unsigned long flags, match;
> + int i, best = 0;
> +
> + if (!dest || vmd->msix_count < 2)
> + return NULL;
> +
> + raw_spin_lock_irqsave(&list_lock, flags);
> + for (i = 1; i < vmd->msix_count; i++) {
> + struct cpumask tmp;
> +
> + vmd_mask = pci_irq_get_affinity(vmd->dev, i);
> + cpumask_and(&tmp, vmd_mask, dest);
> + match = cpumask_weight(&tmp);
> + if (match >= best) {
> + if (match == best && irq &&
> + (vmd->irqs[i].count >= irq->count))
> + continue;
> + irq = &vmd->irqs[i];
> + best = match;
> + }
> + }
> + if (irq)
> + irq->count++;
> + raw_spin_unlock_irqrestore(&list_lock, flags);
> +
> + return irq;
> +}
> +
> static int vmd_msi_init(struct irq_domain *domain, struct
> msi_domain_info *info,
> unsigned int virq, irq_hw_number_t hwirq,
> msi_alloc_info_t *arg)
> {
> - struct msi_desc *desc = arg->desc;
> - struct vmd_dev *vmd =
> vmd_from_bus(msi_desc_to_pci_dev(desc)->bus);
> + struct msi_desc *msidesc = arg->desc;
> + struct vmd_dev *vmd =
> vmd_from_bus(msi_desc_to_pci_dev(msidesc)->bus);
> struct vmd_irq *vmdirq = kzalloc(sizeof(*vmdirq),
> GFP_KERNEL);
> - unsigned int index, vector;
> + struct irq_desc *desc = irq_to_desc(virq);
> + unsigned int vector;
>
> if (!vmdirq)
> return -ENOMEM;
>
> INIT_LIST_HEAD(&vmdirq->node);
> - vmdirq->irq = vmd_next_irq(vmd, desc);
> - vmdirq->virq = virq;
> - index = index_from_irqs(vmd, vmdirq->irq);
> - vector = pci_irq_vector(vmd->dev, index);
>
> + if (desc && irqd_affinity_is_managed(&desc->irq_data))
> + vmdirq->irq = vmd_next_affinity_irq(vmd,
> + desc-
> >irq_common_data.affinity);
> + if (vmdirq->irq == NULL)
> + vmdirq->irq = vmd_next_irq(vmd, msidesc);
> +
> + vmdirq->virq = virq;
> + vector = pci_irq_vector(vmd->dev, index_from_irqs(vmd,
> vmdirq->irq));
> irq_domain_set_info(domain, virq, vector, info->chip,
> vmdirq,
> handle_untracked_irq, vmd, NULL);
> return 0;
> @@ -233,9 +266,11 @@ static int vmd_msi_prepare(struct irq_domain
> *domain, struct device *dev,
> struct pci_dev *pdev = to_pci_dev(dev);
> struct vmd_dev *vmd = vmd_from_bus(pdev->bus);
>
> - if (nvec > vmd->msix_count)
> + if (nvec > vmd->msix_count) {
> + if (vmd->msix_count > 1)
> + return vmd->msix_count - 1;
> return vmd->msix_count;
> -
> + }
> memset(arg, 0, sizeof(*arg));
> return 0;
> }
> @@ -663,6 +698,14 @@ static int vmd_probe(struct pci_dev *dev, const
> struct pci_device_id *id)
> struct vmd_dev *vmd;
> int i, err;
>
> + /*
> + * The first vector is reserved for special use, so start
> affinity at
> + * the second vector.
> + */
> + struct irq_affinity affd = {
> + .pre_vectors = 1,
> + };
> +
> if (resource_size(&dev->resource[VMD_CFGBAR]) < (1 << 20))
> return -ENOMEM;
>
> @@ -688,8 +731,15 @@ static int vmd_probe(struct pci_dev *dev, const
> struct pci_device_id *id)
> if (vmd->msix_count < 0)
> return -ENODEV;
>
> - vmd->msix_count = pci_alloc_irq_vectors(dev, 1, vmd-
> >msix_count,
> - PCI_IRQ_MSIX);
> + /*
> + * Reserve remaining vectors that IRQ affinity won't be able
> to assign.
> + */
> + if ((vmd->msix_count - 1) >
> cpumask_weight(cpu_present_mask))
> + affd.post_vectors = vmd->msix_count -
> + cpumask_weight(cpu_present_m
> ask) - 1;
> +
> + vmd->msix_count = pci_alloc_irq_vectors_affinity(dev, 1,
> vmd->msix_count,
> + PCI_IRQ_MSIX |
> PCI_IRQ_AFFINITY, &affd);
> if (vmd->msix_count < 0)
> return vmd->msix_count;
>
[-- Attachment #2: smime.p7s --]
[-- Type: application/x-pkcs7-signature, Size: 3278 bytes --]
next prev parent reply other threads:[~2018-02-06 18:10 UTC|newest]
Thread overview: 3+ messages / expand[flat|nested] mbox.gz Atom feed top
2018-02-01 22:23 [PATCH] vmd: Interrupt affinity pairing to child devices Keith Busch
2018-02-06 18:10 ` Derrick, Jonathan [this message]
2018-02-06 19:08 ` Bjorn Helgaas
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1517940617.2496.8.camel@intel.com \
--to=jonathan.derrick@intel.com \
--cc=bhelgaas@google.com \
--cc=keith.busch@intel.com \
--cc=linux-pci@vger.kernel.org \
--cc=scott.bauer@intel.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).