All of lore.kernel.org
 help / color / mirror / Atom feed
From: Bjorn Helgaas <bhelgaas@google.com>
To: Wei Yang <weiyang@linux.vnet.ibm.com>,
	benh@au1.ibm.com, gwshan@linux.vnet.ibm.com
Cc: linux-pci@vger.kernel.org, linuxppc-dev@lists.ozlabs.org
Subject: Re: [PATCH v12 17/21] powerpc/powernv: Shift VF resource with an offset
Date: Tue, 24 Feb 2015 03:03:04 -0600	[thread overview]
Message-ID: <20150224090304.GL6220@google.com> (raw)
In-Reply-To: <20150224083457.32124.55534.stgit@bhelgaas-glaptop2.roam.corp.google.com>

On Tue, Feb 24, 2015 at 02:34:57AM -0600, Bjorn Helgaas wrote:
> From: Wei Yang <weiyang@linux.vnet.ibm.com>
> 
> On PowerNV platform, resource position in M64 implies the PE# the resource
> belongs to.  In some cases, adjustment of a resource is necessary to locate
> it to a correct position in M64.
> 
> Add pnv_pci_vf_resource_shift() to shift the 'real' PF IOV BAR address
> according to an offset.

I think I squashed the "powerpc/powernv: Allocate VF PE" into this one, but
I didn't merge the changelog into this one.  Those two patches don't seem
super related to each other, but I think there really was some dependency.

> [bhelgaas: rework loops, rework overlap check, index resource[]
> conventionally, remove pci_regs.h include, squashed with next patch]
> Signed-off-by: Wei Yang <weiyang@linux.vnet.ibm.com>
> Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
> ---
>  arch/powerpc/include/asm/pci-bridge.h     |    4 
>  arch/powerpc/kernel/pci_dn.c              |   11 +
>  arch/powerpc/platforms/powernv/pci-ioda.c |  520 ++++++++++++++++++++++++++++-
>  arch/powerpc/platforms/powernv/pci.c      |   18 +
>  arch/powerpc/platforms/powernv/pci.h      |    7 
>  5 files changed, 543 insertions(+), 17 deletions(-)
> 
> diff --git a/arch/powerpc/include/asm/pci-bridge.h b/arch/powerpc/include/asm/pci-bridge.h
> index de11de7d4547..011340df8583 100644
> --- a/arch/powerpc/include/asm/pci-bridge.h
> +++ b/arch/powerpc/include/asm/pci-bridge.h
> @@ -177,6 +177,10 @@ struct pci_dn {
>  	int	pe_number;
>  #ifdef CONFIG_PCI_IOV
>  	u16     max_vfs;		/* number of VFs IOV BAR expended */
> +	u16     vf_pes;			/* VF PE# under this PF */
> +	int     offset;			/* PE# for the first VF PE */
> +#define IODA_INVALID_M64        (-1)
> +	int     m64_wins[PCI_SRIOV_NUM_BARS];
>  #endif /* CONFIG_PCI_IOV */
>  #endif
>  	struct list_head child_list;
> diff --git a/arch/powerpc/kernel/pci_dn.c b/arch/powerpc/kernel/pci_dn.c
> index f3a1a81d112f..5faf7ca45434 100644
> --- a/arch/powerpc/kernel/pci_dn.c
> +++ b/arch/powerpc/kernel/pci_dn.c
> @@ -217,6 +217,17 @@ void remove_dev_pci_info(struct pci_dev *pdev)
>  	struct pci_dn *pdn, *tmp;
>  	int i;
>  
> +	/*
> +	 * VF and VF PE are created/released dynamically, so we need to
> +	 * bind/unbind them.  Otherwise the VF and VF PE would be mismatched
> +	 * when re-enabling SR-IOV.
> +	 */
> +	if (pdev->is_virtfn) {
> +		pdn = pci_get_pdn(pdev);
> +		pdn->pe_number = IODA_INVALID_PE;
> +		return;
> +	}
> +
>  	/* Only support IOV PF for now */
>  	if (!pdev->is_physfn)
>  		return;
> diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c
> index 6a86690bb8de..a3c2fbe35fc8 100644
> --- a/arch/powerpc/platforms/powernv/pci-ioda.c
> +++ b/arch/powerpc/platforms/powernv/pci-ioda.c
> @@ -44,6 +44,9 @@
>  #include "powernv.h"
>  #include "pci.h"
>  
> +/* 256M DMA window, 4K TCE pages, 8 bytes TCE */
> +#define TCE32_TABLE_SIZE	((0x10000000 / 0x1000) * 8)
> +
>  static void pe_level_printk(const struct pnv_ioda_pe *pe, const char *level,
>  			    const char *fmt, ...)
>  {
> @@ -56,11 +59,18 @@ static void pe_level_printk(const struct pnv_ioda_pe *pe, const char *level,
>  	vaf.fmt = fmt;
>  	vaf.va = &args;
>  
> -	if (pe->pdev)
> +	if (pe->flags & PNV_IODA_PE_DEV)
>  		strlcpy(pfix, dev_name(&pe->pdev->dev), sizeof(pfix));
> -	else
> +	else if (pe->flags & (PNV_IODA_PE_BUS | PNV_IODA_PE_BUS_ALL))
>  		sprintf(pfix, "%04x:%02x     ",
>  			pci_domain_nr(pe->pbus), pe->pbus->number);
> +#ifdef CONFIG_PCI_IOV
> +	else if (pe->flags & PNV_IODA_PE_VF)
> +		sprintf(pfix, "%04x:%02x:%2x.%d",
> +			pci_domain_nr(pe->parent_dev->bus),
> +			(pe->rid & 0xff00) >> 8,
> +			PCI_SLOT(pe->rid), PCI_FUNC(pe->rid));
> +#endif /* CONFIG_PCI_IOV*/
>  
>  	printk("%spci %s: [PE# %.3d] %pV",
>  	       level, pfix, pe->pe_number, &vaf);
> @@ -591,7 +601,7 @@ static int pnv_ioda_set_peltv(struct pnv_phb *phb,
>  			      bool is_add)
>  {
>  	struct pnv_ioda_pe *slave;
> -	struct pci_dev *pdev;
> +	struct pci_dev *pdev = NULL;
>  	int ret;
>  
>  	/*
> @@ -630,8 +640,12 @@ static int pnv_ioda_set_peltv(struct pnv_phb *phb,
>  
>  	if (pe->flags & (PNV_IODA_PE_BUS_ALL | PNV_IODA_PE_BUS))
>  		pdev = pe->pbus->self;
> -	else
> +	else if (pe->flags & PNV_IODA_PE_DEV)
>  		pdev = pe->pdev->bus->self;
> +#ifdef CONFIG_PCI_IOV
> +	else if (pe->flags & PNV_IODA_PE_VF)
> +		pdev = pe->parent_dev->bus->self;
> +#endif /* CONFIG_PCI_IOV */
>  	while (pdev) {
>  		struct pci_dn *pdn = pci_get_pdn(pdev);
>  		struct pnv_ioda_pe *parent;
> @@ -649,6 +663,87 @@ static int pnv_ioda_set_peltv(struct pnv_phb *phb,
>  	return 0;
>  }
>  
> +#ifdef CONFIG_PCI_IOV
> +static int pnv_ioda_deconfigure_pe(struct pnv_phb *phb, struct pnv_ioda_pe *pe)
> +{
> +	struct pci_dev *parent;
> +	uint8_t bcomp, dcomp, fcomp;
> +	int64_t rc;
> +	long rid_end, rid;
> +
> +	/* Currently, we just deconfigure VF PE. Bus PE will always there.*/
> +	if (pe->pbus) {
> +		int count;
> +
> +		dcomp = OPAL_IGNORE_RID_DEVICE_NUMBER;
> +		fcomp = OPAL_IGNORE_RID_FUNCTION_NUMBER;
> +		parent = pe->pbus->self;
> +		if (pe->flags & PNV_IODA_PE_BUS_ALL)
> +			count = pe->pbus->busn_res.end - pe->pbus->busn_res.start + 1;
> +		else
> +			count = 1;
> +
> +		switch(count) {
> +		case  1: bcomp = OpalPciBusAll;         break;
> +		case  2: bcomp = OpalPciBus7Bits;       break;
> +		case  4: bcomp = OpalPciBus6Bits;       break;
> +		case  8: bcomp = OpalPciBus5Bits;       break;
> +		case 16: bcomp = OpalPciBus4Bits;       break;
> +		case 32: bcomp = OpalPciBus3Bits;       break;
> +		default:
> +			dev_err(&pe->pbus->dev, "Number of subordinate buses %d unsupported\n",
> +			        count);
> +			/* Do an exact match only */
> +			bcomp = OpalPciBusAll;
> +		}
> +		rid_end = pe->rid + (count << 8);
> +	} else {
> +		if (pe->flags & PNV_IODA_PE_VF)
> +			parent = pe->parent_dev;
> +		else
> +			parent = pe->pdev->bus->self;
> +		bcomp = OpalPciBusAll;
> +		dcomp = OPAL_COMPARE_RID_DEVICE_NUMBER;
> +		fcomp = OPAL_COMPARE_RID_FUNCTION_NUMBER;
> +		rid_end = pe->rid + 1;
> +	}
> +
> +	/* Clear the reverse map */
> +	for (rid = pe->rid; rid < rid_end; rid++)
> +		phb->ioda.pe_rmap[rid] = 0;
> +
> +	/* Release from all parents PELT-V */
> +	while (parent) {
> +		struct pci_dn *pdn = pci_get_pdn(parent);
> +		if (pdn && pdn->pe_number != IODA_INVALID_PE) {
> +			rc = opal_pci_set_peltv(phb->opal_id, pdn->pe_number,
> +						pe->pe_number, OPAL_REMOVE_PE_FROM_DOMAIN);
> +			/* XXX What to do in case of error ? */
> +		}
> +		parent = parent->bus->self;
> +	}
> +
> +	opal_pci_eeh_freeze_set(phb->opal_id, pe->pe_number,
> +				  OPAL_EEH_ACTION_CLEAR_FREEZE_ALL);
> +
> +	/* Disassociate PE in PELT */
> +	rc = opal_pci_set_peltv(phb->opal_id, pe->pe_number,
> +				pe->pe_number, OPAL_REMOVE_PE_FROM_DOMAIN);
> +	if (rc)
> +		pe_warn(pe, "OPAL error %ld remove self from PELTV\n", rc);
> +	rc = opal_pci_set_pe(phb->opal_id, pe->pe_number, pe->rid,
> +			     bcomp, dcomp, fcomp, OPAL_UNMAP_PE);
> +	if (rc)
> +		pe_err(pe, "OPAL error %ld trying to setup PELT table\n", rc);
> +
> +	pe->pbus = NULL;
> +	pe->pdev = NULL;
> +	pe->parent_dev = NULL;
> +
> +	return 0;
> +}
> +#endif /* CONFIG_PCI_IOV */
> +
>  static int pnv_ioda_configure_pe(struct pnv_phb *phb, struct pnv_ioda_pe *pe)
>  {
>  	struct pci_dev *parent;
> @@ -675,15 +770,19 @@ static int pnv_ioda_configure_pe(struct pnv_phb *phb, struct pnv_ioda_pe *pe)
>  		case 16: bcomp = OpalPciBus4Bits;	break;
>  		case 32: bcomp = OpalPciBus3Bits;	break;
>  		default:
> -			pr_err("%s: Number of subordinate busses %d"
> -			       " unsupported\n",
> -			       pci_name(pe->pbus->self), count);
> +			dev_err(&pe->pbus->dev, "Number of subordinate buses %d unsupported\n",
> +			        count);
>  			/* Do an exact match only */
>  			bcomp = OpalPciBusAll;
>  		}
>  		rid_end = pe->rid + (count << 8);
>  	} else {
> -		parent = pe->pdev->bus->self;
> +#ifdef CONFIG_PCI_IOV
> +		if (pe->flags & PNV_IODA_PE_VF)
> +			parent = pe->parent_dev;
> +		else
> +#endif /* CONFIG_PCI_IOV */
> +			parent = pe->pdev->bus->self;
>  		bcomp = OpalPciBusAll;
>  		dcomp = OPAL_COMPARE_RID_DEVICE_NUMBER;
>  		fcomp = OPAL_COMPARE_RID_FUNCTION_NUMBER;
> @@ -774,6 +873,74 @@ static unsigned int pnv_ioda_dma_weight(struct pci_dev *dev)
>  	return 10;
>  }
>  
> +#ifdef CONFIG_PCI_IOV
> +static int pnv_pci_vf_resource_shift(struct pci_dev *dev, int offset)
> +{
> +	struct pci_dn *pdn = pci_get_pdn(dev);
> +	int i;
> +	struct resource *res, res2;
> +	resource_size_t size;
> +	u16 vf_num;
> +
> +	if (!dev->is_physfn)
> +		return -EINVAL;
> +
> +	/*
> +	 * "offset" is in VFs.  The M64 windows are sized so that when they
> +	 * are segmented, each segment is the same size as the IOV BAR.
> +	 * Each segment is in a separate PE, and the high order bits of the
> +	 * address are the PE number.  Therefore, each VF's BAR is in a
> +	 * separate PE, and changing the IOV BAR start address changes the
> +	 * range of PEs the VFs are in.
> +	 */
> +	vf_num = pdn->vf_pes;
> +	for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) {
> +		res = &dev->resource[i + PCI_IOV_RESOURCES];
> +		if (!res->flags || !res->parent)
> +			continue;
> +
> +		if (!pnv_pci_is_mem_pref_64(res->flags))
> +			continue;
> +
> +		/*
> +		 * The actual IOV BAR range is determined by the start address
> +		 * and the actual size for vf_num VFs BAR.  This check is to
> +		 * make sure that after shifting, the range will not overlap
> +		 * with another device.
> +		 */
> +		size = pci_iov_resource_size(dev, i + PCI_IOV_RESOURCES);
> +		res2.flags = res->flags;
> +		res2.start = res->start + (size * offset);
> +		res2.end = res2.start + (size * vf_num) - 1;
> +
> +		if (res2.end > res->end) {
> +			dev_err(&dev->dev, "VF BAR%d: %pR would extend past %pR (trying to enable %d VFs shifted by %d)\n",
> +				i, &res2, res, vf_num, offset);
> +			return -EBUSY;
> +		}
> +	}
> +
> +	for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) {
> +		res = &dev->resource[i + PCI_IOV_RESOURCES];
> +		if (!res->flags || !res->parent)
> +			continue;
> +
> +		if (!pnv_pci_is_mem_pref_64(res->flags))
> +			continue;
> +
> +		size = pci_iov_resource_size(dev, i + PCI_IOV_RESOURCES);
> +		res2 = *res;
> +		res->start += size * offset;
> +
> +		dev_info(&dev->dev, "VF BAR%d: %pR shifted to %pR (enabling %d VFs shifted by %d)\n",
> +			 i, &res2, res, vf_num, offset);
> +		pci_update_resource(dev, i + PCI_IOV_RESOURCES);
> +	}
> +	pdn->max_vfs -= offset;
> +	return 0;
> +}
> +#endif /* CONFIG_PCI_IOV */
> +
>  #if 0
>  static struct pnv_ioda_pe *pnv_ioda_setup_dev_PE(struct pci_dev *dev)
>  {
> @@ -979,8 +1146,312 @@ static void pnv_pci_ioda_setup_PEs(void)
>  }
>  
>  #ifdef CONFIG_PCI_IOV
> +static int pnv_pci_vf_release_m64(struct pci_dev *pdev)
> +{
> +	struct pci_bus        *bus;
> +	struct pci_controller *hose;
> +	struct pnv_phb        *phb;
> +	struct pci_dn         *pdn;
> +	int                    i;
> +
> +	bus = pdev->bus;
> +	hose = pci_bus_to_host(bus);
> +	phb = hose->private_data;
> +	pdn = pci_get_pdn(pdev);
> +
> +	for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) {
> +		if (pdn->m64_wins[i] == IODA_INVALID_M64)
> +			continue;
> +		opal_pci_phb_mmio_enable(phb->opal_id,
> +				OPAL_M64_WINDOW_TYPE, pdn->m64_wins[i], 0);
> +		clear_bit(pdn->m64_wins[i], &phb->ioda.m64_bar_alloc);
> +		pdn->m64_wins[i] = IODA_INVALID_M64;
> +	}
> +
> +	return 0;
> +}
> +
> +static int pnv_pci_vf_assign_m64(struct pci_dev *pdev)
> +{
> +	struct pci_bus        *bus;
> +	struct pci_controller *hose;
> +	struct pnv_phb        *phb;
> +	struct pci_dn         *pdn;
> +	unsigned int           win;
> +	struct resource       *res;
> +	int                    i;
> +	int64_t                rc;
> +
> +	bus = pdev->bus;
> +	hose = pci_bus_to_host(bus);
> +	phb = hose->private_data;
> +	pdn = pci_get_pdn(pdev);
> +
> +	/* Initialize the m64_wins to IODA_INVALID_M64 */
> +	for (i = 0; i < PCI_SRIOV_NUM_BARS; i++)
> +		pdn->m64_wins[i] = IODA_INVALID_M64;
> +
> +	for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) {
> +		res = &pdev->resource[i + PCI_IOV_RESOURCES];
> +		if (!res->flags || !res->parent)
> +			continue;
> +
> +		if (!pnv_pci_is_mem_pref_64(res->flags))
> +			continue;
> +
> +		do {
> +			win = find_next_zero_bit(&phb->ioda.m64_bar_alloc,
> +					phb->ioda.m64_bar_idx + 1, 0);
> +
> +			if (win >= phb->ioda.m64_bar_idx + 1)
> +				goto m64_failed;
> +		} while (test_and_set_bit(win, &phb->ioda.m64_bar_alloc));
> +
> +		pdn->m64_wins[i] = win;
> +
> +		/* Map the M64 here */
> +		rc = opal_pci_set_phb_mem_window(phb->opal_id,
> +						 OPAL_M64_WINDOW_TYPE,
> +						 pdn->m64_wins[i],
> +						 res->start,
> +						 0, /* unused */
> +						 resource_size(res));
> +		if (rc != OPAL_SUCCESS) {
> +			dev_err(&pdev->dev, "Failed to map M64 window #%d: %lld\n",
> +				win, rc);
> +			goto m64_failed;
> +		}
> +
> +		rc = opal_pci_phb_mmio_enable(phb->opal_id,
> +				OPAL_M64_WINDOW_TYPE, pdn->m64_wins[i], 1);
> +		if (rc != OPAL_SUCCESS) {
> +			dev_err(&pdev->dev, "Failed to enable M64 window #%d: %llx\n",
> +				win, rc);
> +			goto m64_failed;
> +		}
> +	}
> +	return 0;
> +
> +m64_failed:
> +	pnv_pci_vf_release_m64(pdev);
> +	return -EBUSY;
> +}
> +
> +static void pnv_pci_ioda2_release_dma_pe(struct pci_dev *dev, struct pnv_ioda_pe *pe)
> +{
> +	struct pci_bus        *bus;
> +	struct pci_controller *hose;
> +	struct pnv_phb        *phb;
> +	struct iommu_table    *tbl;
> +	unsigned long         addr;
> +	int64_t               rc;
> +
> +	bus = dev->bus;
> +	hose = pci_bus_to_host(bus);
> +	phb = hose->private_data;
> +	tbl = pe->tce32_table;
> +	addr = tbl->it_base;
> +
> +	opal_pci_map_pe_dma_window(phb->opal_id, pe->pe_number,
> +				   pe->pe_number << 1, 1, __pa(addr),
> +				   0, 0x1000);
> +
> +	rc = opal_pci_map_pe_dma_window_real(pe->phb->opal_id,
> +				        pe->pe_number,
> +				        (pe->pe_number << 1) + 1,
> +				        pe->tce_bypass_base,
> +				        0);
> +	if (rc)
> +		pe_warn(pe, "OPAL error %ld release DMA window\n", rc);
> +
> +	iommu_free_table(tbl, of_node_full_name(dev->dev.of_node));
> +	free_pages(addr, get_order(TCE32_TABLE_SIZE));
> +	pe->tce32_table = NULL;
> +}
> +
> +static void pnv_ioda_release_vf_PE(struct pci_dev *pdev)
> +{
> +	struct pci_bus        *bus;
> +	struct pci_controller *hose;
> +	struct pnv_phb        *phb;
> +	struct pnv_ioda_pe    *pe, *pe_n;
> +	struct pci_dn         *pdn;
> +
> +	bus = pdev->bus;
> +	hose = pci_bus_to_host(bus);
> +	phb = hose->private_data;
> +
> +	if (!pdev->is_physfn)
> +		return;
> +
> +	pdn = pci_get_pdn(pdev);
> +	list_for_each_entry_safe(pe, pe_n, &phb->ioda.pe_list, list) {
> +		if (pe->parent_dev != pdev)
> +			continue;
> +
> +		pnv_pci_ioda2_release_dma_pe(pdev, pe);
> +
> +		/* Remove from list */
> +		mutex_lock(&phb->ioda.pe_list_mutex);
> +		list_del(&pe->list);
> +		mutex_unlock(&phb->ioda.pe_list_mutex);
> +
> +		pnv_ioda_deconfigure_pe(phb, pe);
> +
> +		pnv_ioda_free_pe(phb, pe->pe_number);
> +	}
> +}
> +
> +void pnv_pci_sriov_disable(struct pci_dev *pdev)
> +{
> +	struct pci_bus        *bus;
> +	struct pci_controller *hose;
> +	struct pnv_phb        *phb;
> +	struct pci_dn         *pdn;
> +	struct pci_sriov      *iov;
> +	u16 vf_num;
> +
> +	bus = pdev->bus;
> +	hose = pci_bus_to_host(bus);
> +	phb = hose->private_data;
> +	pdn = pci_get_pdn(pdev);
> +	iov = pdev->sriov;
> +	vf_num = pdn->vf_pes;
> +
> +	/* Release VF PEs */
> +	pnv_ioda_release_vf_PE(pdev);
> +
> +	if (phb->type == PNV_PHB_IODA2) {
> +		pnv_pci_vf_resource_shift(pdev, -pdn->offset);
> +
> +		/* Release M64 windows */
> +		pnv_pci_vf_release_m64(pdev);
> +
> +		/* Release PE numbers */
> +		bitmap_clear(phb->ioda.pe_alloc, pdn->offset, vf_num);
> +		pdn->offset = 0;
> +	}
> +}
> +
> +static void pnv_pci_ioda2_setup_dma_pe(struct pnv_phb *phb,
> +				       struct pnv_ioda_pe *pe);
> +static void pnv_ioda_setup_vf_PE(struct pci_dev *pdev, u16 vf_num)
> +{
> +	struct pci_bus        *bus;
> +	struct pci_controller *hose;
> +	struct pnv_phb        *phb;
> +	struct pnv_ioda_pe    *pe;
> +	int                    pe_num;
> +	u16                    vf_index;
> +	struct pci_dn         *pdn;
> +
> +	bus = pdev->bus;
> +	hose = pci_bus_to_host(bus);
> +	phb = hose->private_data;
> +	pdn = pci_get_pdn(pdev);
> +
> +	if (!pdev->is_physfn)
> +		return;
> +
> +	/* Reserve PE for each VF */
> +	for (vf_index = 0; vf_index < vf_num; vf_index++) {
> +		pe_num = pdn->offset + vf_index;
> +
> +		pe = &phb->ioda.pe_array[pe_num];
> +		pe->pe_number = pe_num;
> +		pe->phb = phb;
> +		pe->flags = PNV_IODA_PE_VF;
> +		pe->pbus = NULL;
> +		pe->parent_dev = pdev;
> +		pe->tce32_seg = -1;
> +		pe->mve_number = -1;
> +		pe->rid = (pci_iov_virtfn_bus(pdev, vf_index) << 8) |
> +			   pci_iov_virtfn_devfn(pdev, vf_index);
> +
> +		pe_info(pe, "VF %04d:%02d:%02d.%d associated with PE#%d\n",
> +			hose->global_number, pdev->bus->number,
> +			PCI_SLOT(pci_iov_virtfn_devfn(pdev, vf_index)),
> +			PCI_FUNC(pci_iov_virtfn_devfn(pdev, vf_index)), pe_num);
> +
> +		if (pnv_ioda_configure_pe(phb, pe)) {
> +			/* XXX What do we do here ? */
> +			if (pe_num)
> +				pnv_ioda_free_pe(phb, pe_num);
> +			pe->pdev = NULL;
> +			continue;
> +		}
> +
> +		pe->tce32_table = kzalloc_node(sizeof(struct iommu_table),
> +				GFP_KERNEL, hose->node);
> +		pe->tce32_table->data = pe;
> +
> +		/* Put PE to the list */
> +		mutex_lock(&phb->ioda.pe_list_mutex);
> +		list_add_tail(&pe->list, &phb->ioda.pe_list);
> +		mutex_unlock(&phb->ioda.pe_list_mutex);
> +
> +		pnv_pci_ioda2_setup_dma_pe(phb, pe);
> +	}
> +}
> +
> +int pnv_pci_sriov_enable(struct pci_dev *pdev, u16 vf_num)
> +{
> +	struct pci_bus        *bus;
> +	struct pci_controller *hose;
> +	struct pnv_phb        *phb;
> +	struct pci_dn         *pdn;
> +	int                    ret;
> +
> +	bus = pdev->bus;
> +	hose = pci_bus_to_host(bus);
> +	phb = hose->private_data;
> +	pdn = pci_get_pdn(pdev);
> +
> +	if (phb->type == PNV_PHB_IODA2) {
> +		/* Calculate available PE for required VFs */
> +		mutex_lock(&phb->ioda.pe_alloc_mutex);
> +		pdn->offset = bitmap_find_next_zero_area(
> +			phb->ioda.pe_alloc, phb->ioda.total_pe,
> +			0, vf_num, 0);
> +		if (pdn->offset >= phb->ioda.total_pe) {
> +			mutex_unlock(&phb->ioda.pe_alloc_mutex);
> +			dev_info(&pdev->dev, "Failed to enable VF%d\n", vf_num);
> +			pdn->offset = 0;
> +			return -EBUSY;
> +		}
> +		bitmap_set(phb->ioda.pe_alloc, pdn->offset, vf_num);
> +		pdn->vf_pes = vf_num;
> +		mutex_unlock(&phb->ioda.pe_alloc_mutex);
> +
> +		/* Assign M64 window accordingly */
> +		ret = pnv_pci_vf_assign_m64(pdev);
> +		if (ret) {
> +			dev_info(&pdev->dev, "Not enough M64 window resources\n");
> +			goto m64_failed;
> +		}
> +
> +		/* Do some magic shift */
> +		ret = pnv_pci_vf_resource_shift(pdev, pdn->offset);
> +		if (ret)
> +			goto m64_failed;
> +	}
> +
> +	/* Setup VF PEs */
> +	pnv_ioda_setup_vf_PE(pdev, vf_num);
> +
> +	return 0;
> +
> +m64_failed:
> +	bitmap_clear(phb->ioda.pe_alloc, pdn->offset, vf_num);
> +	pdn->offset = 0;
> +
> +	return ret;
> +}
> +
>  int pcibios_sriov_disable(struct pci_dev *pdev)
>  {
> +	pnv_pci_sriov_disable(pdev);
> +
>  	/* Release firmware data */
>  	remove_dev_pci_info(pdev);
>  	return 0;
> @@ -990,6 +1461,8 @@ int pcibios_sriov_enable(struct pci_dev *pdev, u16 vf_num)
>  {
>  	/* Allocate firmware data */
>  	add_dev_pci_info(pdev);
> +
> +	pnv_pci_sriov_enable(pdev, vf_num);
>  	return 0;
>  }
>  #endif /* CONFIG_PCI_IOV */
> @@ -1186,9 +1659,6 @@ static void pnv_pci_ioda_setup_dma_pe(struct pnv_phb *phb,
>  	int64_t rc;
>  	void *addr;
>  
> -	/* 256M DMA window, 4K TCE pages, 8 bytes TCE */
> -#define TCE32_TABLE_SIZE	((0x10000000 / 0x1000) * 8)
> -
>  	/* XXX FIXME: Handle 64-bit only DMA devices */
>  	/* XXX FIXME: Provide 64-bit DMA facilities & non-4K TCE tables etc.. */
>  	/* XXX FIXME: Allocate multi-level tables on PHB3 */
> @@ -1251,12 +1721,19 @@ static void pnv_pci_ioda_setup_dma_pe(struct pnv_phb *phb,
>  				 TCE_PCI_SWINV_PAIR);
>  	}
>  	iommu_init_table(tbl, phb->hose->node);
> -	iommu_register_group(tbl, phb->hose->global_number, pe->pe_number);
>  
> -	if (pe->pdev)
> +	if (pe->flags & PNV_IODA_PE_DEV) {
> +		iommu_register_group(tbl, phb->hose->global_number,
> +				     pe->pe_number);
>  		set_iommu_table_base_and_group(&pe->pdev->dev, tbl);
> -	else
> +	} else if (pe->flags & (PNV_IODA_PE_BUS | PNV_IODA_PE_BUS_ALL)) {
> +		iommu_register_group(tbl, phb->hose->global_number,
> +				     pe->pe_number);
>  		pnv_ioda_setup_bus_dma(pe, pe->pbus, true);
> +	} else if (pe->flags & PNV_IODA_PE_VF) {
> +		iommu_register_group(tbl, phb->hose->global_number,
> +				     pe->pe_number);
> +	}
>  
>  	return;
>   fail:
> @@ -1383,12 +1860,19 @@ static void pnv_pci_ioda2_setup_dma_pe(struct pnv_phb *phb,
>  		tbl->it_type |= (TCE_PCI_SWINV_CREATE | TCE_PCI_SWINV_FREE);
>  	}
>  	iommu_init_table(tbl, phb->hose->node);
> -	iommu_register_group(tbl, phb->hose->global_number, pe->pe_number);
>  
> -	if (pe->pdev)
> +	if (pe->flags & PNV_IODA_PE_DEV) {
> +		iommu_register_group(tbl, phb->hose->global_number,
> +				     pe->pe_number);
>  		set_iommu_table_base_and_group(&pe->pdev->dev, tbl);
> -	else
> +	} else if (pe->flags & (PNV_IODA_PE_BUS | PNV_IODA_PE_BUS_ALL)) {
> +		iommu_register_group(tbl, phb->hose->global_number,
> +				     pe->pe_number);
>  		pnv_ioda_setup_bus_dma(pe, pe->pbus, true);
> +	} else if (pe->flags & PNV_IODA_PE_VF) {
> +		iommu_register_group(tbl, phb->hose->global_number,
> +				     pe->pe_number);
> +	}
>  
>  	/* Also create a bypass window */
>  	if (!pnv_iommu_bypass_disabled)
> @@ -2083,6 +2567,7 @@ static void __init pnv_pci_init_ioda_phb(struct device_node *np,
>  	phb->hub_id = hub_id;
>  	phb->opal_id = phb_id;
>  	phb->type = ioda_type;
> +	mutex_init(&phb->ioda.pe_alloc_mutex);
>  
>  	/* Detect specific models for error handling */
>  	if (of_device_is_compatible(np, "ibm,p7ioc-pciex"))
> @@ -2142,6 +2627,7 @@ static void __init pnv_pci_init_ioda_phb(struct device_node *np,
>  
>  	INIT_LIST_HEAD(&phb->ioda.pe_dma_list);
>  	INIT_LIST_HEAD(&phb->ioda.pe_list);
> +	mutex_init(&phb->ioda.pe_list_mutex);
>  
>  	/* Calculate how many 32-bit TCE segments we have */
>  	phb->ioda.tce32_count = phb->ioda.m32_pci_base >> 28;
> diff --git a/arch/powerpc/platforms/powernv/pci.c b/arch/powerpc/platforms/powernv/pci.c
> index 6c20d6e70383..a88f915fc603 100644
> --- a/arch/powerpc/platforms/powernv/pci.c
> +++ b/arch/powerpc/platforms/powernv/pci.c
> @@ -714,6 +714,24 @@ static void pnv_pci_dma_dev_setup(struct pci_dev *pdev)
>  {
>  	struct pci_controller *hose = pci_bus_to_host(pdev->bus);
>  	struct pnv_phb *phb = hose->private_data;
> +#ifdef CONFIG_PCI_IOV
> +	struct pnv_ioda_pe *pe;
> +	struct pci_dn *pdn;
> +
> +	/* Fix the VF pdn PE number */
> +	if (pdev->is_virtfn) {
> +		pdn = pci_get_pdn(pdev);
> +		WARN_ON(pdn->pe_number != IODA_INVALID_PE);
> +		list_for_each_entry(pe, &phb->ioda.pe_list, list) {
> +			if (pe->rid == ((pdev->bus->number << 8) |
> +			    (pdev->devfn & 0xff))) {
> +				pdn->pe_number = pe->pe_number;
> +				pe->pdev = pdev;
> +				break;
> +			}
> +		}
> +	}
> +#endif /* CONFIG_PCI_IOV */
>  
>  	/* If we have no phb structure, try to setup a fallback based on
>  	 * the device-tree (RTAS PCI for example)
> diff --git a/arch/powerpc/platforms/powernv/pci.h b/arch/powerpc/platforms/powernv/pci.h
> index 731777734bca..39d42f2b7a15 100644
> --- a/arch/powerpc/platforms/powernv/pci.h
> +++ b/arch/powerpc/platforms/powernv/pci.h
> @@ -23,6 +23,7 @@ enum pnv_phb_model {
>  #define PNV_IODA_PE_BUS_ALL	(1 << 2)	/* PE has subordinate buses	*/
>  #define PNV_IODA_PE_MASTER	(1 << 3)	/* Master PE in compound case	*/
>  #define PNV_IODA_PE_SLAVE	(1 << 4)	/* Slave PE in compound case	*/
> +#define PNV_IODA_PE_VF		(1 << 5)	/* PE for one VF 		*/
>  
>  /* Data associated with a PE, including IOMMU tracking etc.. */
>  struct pnv_phb;
> @@ -34,6 +35,9 @@ struct pnv_ioda_pe {
>  	 * entire bus (& children). In the former case, pdev
>  	 * is populated, in the later case, pbus is.
>  	 */
> +#ifdef CONFIG_PCI_IOV
> +	struct pci_dev          *parent_dev;
> +#endif
>  	struct pci_dev		*pdev;
>  	struct pci_bus		*pbus;
>  
> @@ -165,6 +169,8 @@ struct pnv_phb {
>  
>  			/* PE allocation bitmap */
>  			unsigned long		*pe_alloc;
> +			/* PE allocation mutex */
> +			struct mutex		pe_alloc_mutex;
>  
>  			/* M32 & IO segment maps */
>  			unsigned int		*m32_segmap;
> @@ -179,6 +185,7 @@ struct pnv_phb {
>  			 * on the sequence of creation
>  			 */
>  			struct list_head	pe_list;
> +			struct mutex            pe_list_mutex;
>  
>  			/* Reverse map of PEs, will have to extend if
>  			 * we are to support more than 256 PEs, indexed
> 

  parent reply	other threads:[~2015-02-24  9:03 UTC|newest]

Thread overview: 69+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2015-02-24  8:32 [PATCH v12 00/21] Enable SRIOV on Power8 Bjorn Helgaas
2015-02-24  8:33 ` [PATCH v12 01/21] PCI: Print more info in sriov_enable() error message Bjorn Helgaas
2015-02-24  8:33 ` [PATCH v12 02/21] PCI: Print PF SR-IOV resource that contains all VF(n) BAR space Bjorn Helgaas
2015-02-24  8:33 ` [PATCH v12 03/21] PCI: Keep individual VF BAR size in struct pci_sriov Bjorn Helgaas
2015-02-24  8:33 ` [PATCH v12 04/21] PCI: Index IOV resources in the conventional style Bjorn Helgaas
2015-02-24  8:33 ` [PATCH v12 05/21] PCI: Refresh First VF Offset and VF Stride when updating NumVFs Bjorn Helgaas
2015-02-24  8:33 ` [PATCH v12 06/21] PCI: Calculate maximum number of buses required for VFs Bjorn Helgaas
2015-02-24  8:33 ` [PATCH v12 07/21] PCI: Export pci_iov_virtfn_bus() and pci_iov_virtfn_devfn() Bjorn Helgaas
2015-02-24  8:33 ` [PATCH v12 08/21] PCI: Add pcibios_sriov_enable() and pcibios_sriov_disable() Bjorn Helgaas
2015-02-24  8:39   ` Bjorn Helgaas
2015-03-02  6:53     ` Wei Yang
2015-03-02  6:53       ` Wei Yang
2015-02-24  8:33 ` [PATCH v12 09/21] PCI: Add pcibios_iov_resource_alignment() interface Bjorn Helgaas
2015-02-24  8:34 ` [PATCH v12 10/21] PCI: Consider additional PF's IOV BAR alignment in sizing and assigning Bjorn Helgaas
2015-02-24  8:41   ` Bjorn Helgaas
2015-03-02  7:32     ` Wei Yang
2015-03-02  7:32       ` Wei Yang
2015-03-11  2:36       ` Bjorn Helgaas
2015-03-11  2:36         ` Bjorn Helgaas
2015-03-11  9:17         ` Wei Yang
2015-03-11  9:17           ` Wei Yang
2015-02-24  8:34 ` [PATCH v12 11/21] powerpc/pci: Don't unset PCI resources for VFs Bjorn Helgaas
2015-02-24  8:44   ` Bjorn Helgaas
2015-03-02  7:34     ` Wei Yang
2015-03-02  7:34       ` Wei Yang
2015-02-24  8:34 ` [PATCH v12 12/21] powerpc/pci: Refactor pci_dn Bjorn Helgaas
2015-02-24  8:34 ` [PATCH v12 13/21] powerpc/powernv: Use pci_dn, not device_node, in PCI config accessor Bjorn Helgaas
2015-02-24  8:34 ` [PATCH v12 14/21] powerpc/powernv: Allocate struct pnv_ioda_pe iommu_table dynamically Bjorn Helgaas
2015-02-24  8:46   ` Bjorn Helgaas
2015-03-02  7:50     ` Wei Yang
2015-03-02  7:50       ` Wei Yang
2015-03-02  7:56       ` Benjamin Herrenschmidt
2015-03-02  7:56         ` Benjamin Herrenschmidt
2015-03-02  8:02         ` Wei Yang
2015-03-02  8:02           ` Wei Yang
2015-03-11  2:47       ` Bjorn Helgaas
2015-03-11  2:47         ` Bjorn Helgaas
2015-03-11  6:13         ` Wei Yang
2015-03-11  6:13           ` Wei Yang
2015-02-24  8:34 ` [PATCH v12 15/21] powerpc/powernv: Reserve additional space for IOV BAR according to the number of total_pe Bjorn Helgaas
2015-02-24  8:52   ` Bjorn Helgaas
2015-03-02  7:41     ` Wei Yang
2015-03-02  7:41       ` Wei Yang
2015-03-11  2:51       ` Bjorn Helgaas
2015-03-11  2:51         ` Bjorn Helgaas
2015-03-11  6:22         ` Wei Yang
2015-03-11  6:22           ` Wei Yang
2015-03-11 13:40           ` Bjorn Helgaas
2015-03-11 13:40             ` Bjorn Helgaas
2015-02-24  8:34 ` [PATCH v12 16/21] powerpc/powernv: Implement pcibios_iov_resource_alignment() on powernv Bjorn Helgaas
2015-02-24  8:34 ` [PATCH v12 17/21] powerpc/powernv: Shift VF resource with an offset Bjorn Helgaas
2015-02-24  9:00   ` Bjorn Helgaas
2015-02-24 17:10     ` Bjorn Helgaas
2015-03-02  7:58       ` Wei Yang
2015-03-02  7:58         ` Wei Yang
2015-03-04  3:01     ` Wei Yang
2015-03-04  3:01       ` Wei Yang
2015-03-11  2:55       ` Bjorn Helgaas
2015-03-11  2:55         ` Bjorn Helgaas
2015-03-11  6:42         ` Wei Yang
2015-03-11  6:42           ` Wei Yang
2015-02-24  9:03   ` Bjorn Helgaas [this message]
2015-02-24  8:35 ` [PATCH v12 18/21] powerpc/powernv: Reserve additional space for IOV BAR, with m64_per_iov supported Bjorn Helgaas
2015-02-24  9:06   ` Bjorn Helgaas
2015-03-02  7:55     ` Wei Yang
2015-03-02  7:55       ` Wei Yang
2015-02-24  8:35 ` [PATCH v12 19/21] powerpc/powernv: Group VF PE when IOV BAR is big on PHB3 Bjorn Helgaas
2015-02-24  8:35 ` [PATCH v12 20/21] powerpc/pci: Remove unused struct pci_dn.pcidev field Bjorn Helgaas
2015-02-24  8:35 ` [PATCH v12 21/21] powerpc/pci: Add PCI resource alignment documentation Bjorn Helgaas

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20150224090304.GL6220@google.com \
    --to=bhelgaas@google.com \
    --cc=benh@au1.ibm.com \
    --cc=gwshan@linux.vnet.ibm.com \
    --cc=linux-pci@vger.kernel.org \
    --cc=linuxppc-dev@lists.ozlabs.org \
    --cc=weiyang@linux.vnet.ibm.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.