From: Alexey Kardashevskiy <aik@ozlabs.ru>
To: Oliver O'Halloran <oohall@gmail.com>, linuxppc-dev@lists.ozlabs.org
Subject: Re: [PATCH 03/15] powerpc/powernv/pci: Add explicit tracking of the DMA setup state
Date: Tue, 14 Jul 2020 15:37:06 +1000 [thread overview]
Message-ID: <ee5a00db-badd-12fe-1c46-eaba5afc8dea@ozlabs.ru> (raw)
In-Reply-To: <20200710052340.737567-4-oohall@gmail.com>
On 10/07/2020 15:23, Oliver O'Halloran wrote:
> There's an optimisation in the PE setup which skips performing DMA
> setup for a PE if we only have bridges in a PE. The assumption being
> that only "real" devices will DMA to system memory, which is probably
> fair. However, if we start off with only bridge devices in a PE then
> add a non-bridge device the new device won't be able to use DMA because
> we never configured it.
>
> Fix this (admittedly pretty weird) edge case by tracking whether we've done
> the DMA setup for the PE or not. If a non-bridge device is added to the PE
> (via rescan or hotplug, or whatever) we can set up DMA on demand.
So hotplug does not work on powernv then, right? I thought you tested it
a while ago, or this patch is the result of that attempt? If it is, then
Reviewed-by: Alexey Kardashevskiy <aik@ozlabs.ru>
> This also means the only remaining user of the old "DMA Weight" code is
> the IODA1 DMA setup code that it was originally added for, which is good.
Is ditching IODA1 in the plan? :)
>
> Cc: Alexey Kardashevskiy <aik@ozlabs.ru>
> Signed-off-by: Oliver O'Halloran <oohall@gmail.com>
> ---
> Alexey, do we need to have the IOMMU API stuff set/clear this flag?
I'd say no as that API only cares if a device is in a PE and for those
the PE DMA setup optimization is skipped. Thanks,
> ---
> arch/powerpc/platforms/powernv/pci-ioda.c | 48 ++++++++++++++---------
> arch/powerpc/platforms/powernv/pci.h | 7 ++++
> 2 files changed, 36 insertions(+), 19 deletions(-)
>
> diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c
> index bfb40607aa0e..bb9c1cc60c33 100644
> --- a/arch/powerpc/platforms/powernv/pci-ioda.c
> +++ b/arch/powerpc/platforms/powernv/pci-ioda.c
> @@ -141,6 +141,7 @@ static struct pnv_ioda_pe *pnv_ioda_init_pe(struct pnv_phb *phb, int pe_no)
>
> phb->ioda.pe_array[pe_no].phb = phb;
> phb->ioda.pe_array[pe_no].pe_number = pe_no;
> + phb->ioda.pe_array[pe_no].dma_setup_done = false;
>
> /*
> * Clear the PE frozen state as it might be put into frozen state
> @@ -1685,6 +1686,12 @@ static int pnv_pcibios_sriov_enable(struct pci_dev *pdev, u16 num_vfs)
> }
> #endif /* CONFIG_PCI_IOV */
>
> +static void pnv_pci_ioda1_setup_dma_pe(struct pnv_phb *phb,
> + struct pnv_ioda_pe *pe);
> +
> +static void pnv_pci_ioda2_setup_dma_pe(struct pnv_phb *phb,
> + struct pnv_ioda_pe *pe);
> +
> static void pnv_pci_ioda_dma_dev_setup(struct pci_dev *pdev)
> {
> struct pnv_phb *phb = pci_bus_to_pnvhb(pdev->bus);
> @@ -1713,6 +1720,24 @@ static void pnv_pci_ioda_dma_dev_setup(struct pci_dev *pdev)
> pci_info(pdev, "Added to existing PE#%x\n", pe->pe_number);
> }
>
> + /*
> + * We assume that bridges *probably* don't need to do any DMA so we can
> + * skip allocating a TCE table, etc unless we get a non-bridge device.
> + */
> + if (!pe->dma_setup_done && !pci_is_bridge(pdev)) {
> + switch (phb->type) {
> + case PNV_PHB_IODA1:
> + pnv_pci_ioda1_setup_dma_pe(phb, pe);
> + break;
> + case PNV_PHB_IODA2:
> + pnv_pci_ioda2_setup_dma_pe(phb, pe);
> + break;
> + default:
> + pr_warn("%s: No DMA for PHB#%x (type %d)\n",
> + __func__, phb->hose->global_number, phb->type);
> + }
> + }
> +
> if (pdn)
> pdn->pe_number = pe->pe_number;
> pe->device_count++;
> @@ -2222,6 +2247,7 @@ static void pnv_pci_ioda1_setup_dma_pe(struct pnv_phb *phb,
> pe->table_group.tce32_size = tbl->it_size << tbl->it_page_shift;
> iommu_init_table(tbl, phb->hose->node, 0, 0);
>
> + pe->dma_setup_done = true;
> return;
> fail:
> /* XXX Failure: Try to fallback to 64-bit only ? */
> @@ -2536,9 +2562,6 @@ static void pnv_pci_ioda2_setup_dma_pe(struct pnv_phb *phb,
> {
> int64_t rc;
>
> - if (!pnv_pci_ioda_pe_dma_weight(pe))
> - return;
> -
> /* TVE #1 is selected by PCI address bit 59 */
> pe->tce_bypass_base = 1ull << 59;
>
> @@ -2563,6 +2586,7 @@ static void pnv_pci_ioda2_setup_dma_pe(struct pnv_phb *phb,
> iommu_register_group(&pe->table_group, phb->hose->global_number,
> pe->pe_number);
> #endif
> + pe->dma_setup_done = true;
> }
>
> int64_t pnv_opal_pci_msi_eoi(struct irq_chip *chip, unsigned int hw_irq)
> @@ -3136,7 +3160,6 @@ static void pnv_pci_fixup_bridge_resources(struct pci_bus *bus,
>
> static void pnv_pci_configure_bus(struct pci_bus *bus)
> {
> - struct pnv_phb *phb = pci_bus_to_pnvhb(bus);
> struct pci_dev *bridge = bus->self;
> struct pnv_ioda_pe *pe;
> bool all = (bridge && pci_pcie_type(bridge) == PCI_EXP_TYPE_PCI_BRIDGE);
> @@ -3160,17 +3183,6 @@ static void pnv_pci_configure_bus(struct pci_bus *bus)
> return;
>
> pnv_ioda_setup_pe_seg(pe);
> - switch (phb->type) {
> - case PNV_PHB_IODA1:
> - pnv_pci_ioda1_setup_dma_pe(phb, pe);
> - break;
> - case PNV_PHB_IODA2:
> - pnv_pci_ioda2_setup_dma_pe(phb, pe);
> - break;
> - default:
> - pr_warn("%s: No DMA for PHB#%x (type %d)\n",
> - __func__, phb->hose->global_number, phb->type);
> - }
> }
>
> static resource_size_t pnv_pci_default_alignment(void)
> @@ -3289,11 +3301,10 @@ static long pnv_pci_ioda1_unset_window(struct iommu_table_group *table_group,
>
> static void pnv_pci_ioda1_release_pe_dma(struct pnv_ioda_pe *pe)
> {
> - unsigned int weight = pnv_pci_ioda_pe_dma_weight(pe);
> struct iommu_table *tbl = pe->table_group.tables[0];
> int64_t rc;
>
> - if (!weight)
> + if (!pe->dma_setup_done)
> return;
>
> rc = pnv_pci_ioda1_unset_window(&pe->table_group, 0);
> @@ -3313,10 +3324,9 @@ static void pnv_pci_ioda1_release_pe_dma(struct pnv_ioda_pe *pe)
> static void pnv_pci_ioda2_release_pe_dma(struct pnv_ioda_pe *pe)
> {
> struct iommu_table *tbl = pe->table_group.tables[0];
> - unsigned int weight = pnv_pci_ioda_pe_dma_weight(pe);
> int64_t rc;
>
> - if (!weight)
> + if (pe->dma_setup_done)
> return;
>
> rc = pnv_pci_ioda2_unset_window(&pe->table_group, 0);
> diff --git a/arch/powerpc/platforms/powernv/pci.h b/arch/powerpc/platforms/powernv/pci.h
> index 0727dec9a0d1..6aa6aefb637d 100644
> --- a/arch/powerpc/platforms/powernv/pci.h
> +++ b/arch/powerpc/platforms/powernv/pci.h
> @@ -87,6 +87,13 @@ struct pnv_ioda_pe {
> bool tce_bypass_enabled;
> uint64_t tce_bypass_base;
>
> + /*
> + * Used to track whether we've done DMA setup for this PE or not. We
> + * want to defer allocating TCE tables, etc until we've added a
> + * non-bridge device to the PE.
> + */
> + bool dma_setup_done;
> +
> /* MSIs. MVE index is identical for for 32 and 64 bit MSI
> * and -1 if not supported. (It's actually identical to the
> * PE number)
>
--
Alexey
next prev parent reply other threads:[~2020-07-14 5:38 UTC|newest]
Thread overview: 55+ messages / expand[flat|nested] mbox.gz Atom feed top
2020-07-10 5:23 PowerNV PCI & SR-IOV cleanups Oliver O'Halloran
2020-07-10 5:23 ` [PATCH 01/15] powernv/pci: Add pci_bus_to_pnvhb() helper Oliver O'Halloran
2020-07-13 8:28 ` Alexey Kardashevskiy
2020-07-10 5:23 ` [PATCH 02/15] powerpc/powernv/pci: Always tear down DMA windows on PE release Oliver O'Halloran
2020-07-13 8:30 ` Alexey Kardashevskiy
2020-07-10 5:23 ` [PATCH 03/15] powerpc/powernv/pci: Add explicit tracking of the DMA setup state Oliver O'Halloran
2020-07-14 5:37 ` Alexey Kardashevskiy [this message]
2020-07-14 5:58 ` Oliver O'Halloran
2020-07-14 7:21 ` Alexey Kardashevskiy
2020-07-15 0:23 ` Alexey Kardashevskiy
2020-07-15 1:38 ` Oliver O'Halloran
2020-07-15 3:33 ` Alexey Kardashevskiy
2020-07-15 7:05 ` Cédric Le Goater
2020-07-15 9:00 ` Oliver O'Halloran
2020-07-15 10:05 ` Cédric Le Goater
2020-07-10 5:23 ` [PATCH 04/15] powerpc/powernv/pci: Initialise M64 for IODA1 as a 1-1 window Oliver O'Halloran
2020-07-14 7:39 ` Alexey Kardashevskiy
2020-07-10 5:23 ` [PATCH 05/15] powerpc/powernv/sriov: Move SR-IOV into a seperate file Oliver O'Halloran
2020-07-14 9:16 ` Alexey Kardashevskiy
2020-07-22 5:01 ` Oliver O'Halloran
2020-07-22 9:53 ` Alexey Kardashevskiy
2020-07-10 5:23 ` [PATCH 06/15] powerpc/powernv/sriov: Explain how SR-IOV works on PowerNV Oliver O'Halloran
2020-07-15 0:40 ` Alexey Kardashevskiy
2020-07-10 5:23 ` [PATCH 07/15] powerpc/powernv/sriov: Rename truncate_iov Oliver O'Halloran
2020-07-15 0:46 ` Alexey Kardashevskiy
2020-07-10 5:23 ` [PATCH 08/15] powerpc/powernv/sriov: Simplify used window tracking Oliver O'Halloran
2020-07-15 1:34 ` Alexey Kardashevskiy
2020-07-15 1:41 ` Oliver O'Halloran
2020-07-10 5:23 ` [PATCH 09/15] powerpc/powernv/sriov: Factor out M64 BAR setup Oliver O'Halloran
2020-07-15 2:09 ` Alexey Kardashevskiy
2020-07-10 5:23 ` [PATCH 10/15] powerpc/powernv/pci: Refactor pnv_ioda_alloc_pe() Oliver O'Halloran
2020-07-15 2:29 ` Alexey Kardashevskiy
2020-07-15 2:53 ` Oliver O'Halloran
2020-07-15 3:15 ` Alexey Kardashevskiy
2020-07-10 5:23 ` [PATCH 11/15] powerpc/powernv/sriov: Drop iov->pe_num_map[] Oliver O'Halloran
2020-07-15 3:31 ` Alexey Kardashevskiy
2020-07-10 5:23 ` [PATCH 12/15] powerpc/powernv/sriov: De-indent setup and teardown Oliver O'Halloran
2020-07-15 4:00 ` Alexey Kardashevskiy
2020-07-15 4:21 ` Oliver O'Halloran
2020-07-15 4:41 ` Alexey Kardashevskiy
2020-07-15 4:46 ` Oliver O'Halloran
2020-07-15 4:58 ` Alexey Kardashevskiy
2020-07-10 5:23 ` [PATCH 13/15] powerpc/powernv/sriov: Move M64 BAR allocation into a helper Oliver O'Halloran
2020-07-15 4:02 ` Alexey Kardashevskiy
2020-07-10 5:23 ` [PATCH 14/15] powerpc/powernv/sriov: Refactor M64 BAR setup Oliver O'Halloran
2020-07-15 4:50 ` Alexey Kardashevskiy
2020-07-10 5:23 ` [PATCH 15/15] powerpc/powernv/sriov: Make single PE mode a per-BAR setting Oliver O'Halloran
2020-07-15 5:24 ` Alexey Kardashevskiy
2020-07-15 6:16 ` Oliver O'Halloran
2020-07-15 8:00 ` Alexey Kardashevskiy
2020-07-22 5:39 ` Oliver O'Halloran
2020-07-22 10:06 ` Alexey Kardashevskiy
2020-07-24 3:40 ` Oliver O'Halloran
2020-07-10 6:45 ` PowerNV PCI & SR-IOV cleanups Christoph Hellwig
2020-07-10 12:45 ` Oliver O'Halloran
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=ee5a00db-badd-12fe-1c46-eaba5afc8dea@ozlabs.ru \
--to=aik@ozlabs.ru \
--cc=linuxppc-dev@lists.ozlabs.org \
--cc=oohall@gmail.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).