- * [PATCH 1/7] powerpc/powernv: Supports PHB3
  2013-04-24  9:37 [PATCH v3 0/7] powerpc/powernv: PHB3 Support Gavin Shan
@ 2013-04-24  9:37 ` Gavin Shan
  2013-04-24  9:37 ` [PATCH 2/7] powerpc/powernv: Retrieve IODA2 tables explicitly Gavin Shan
                   ` (5 subsequent siblings)
  6 siblings, 0 replies; 15+ messages in thread
From: Gavin Shan @ 2013-04-24  9:37 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: Gavin Shan
The patch intends to initialize PHB3 during system boot stage. The
flag "PNV_PHB_MODEL_PHB3" is introduced to differentiate IODA2
compatible PHB3 from other types of PHBs.
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/platforms/powernv/pci-ioda.c |   62 +++++++++++++++--------------
 arch/powerpc/platforms/powernv/pci.c      |    6 ++-
 arch/powerpc/platforms/powernv/pci.h      |    8 ++-
 3 files changed, 42 insertions(+), 34 deletions(-)
diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c
index a5c5f15..3d4e958 100644
--- a/arch/powerpc/platforms/powernv/pci-ioda.c
+++ b/arch/powerpc/platforms/powernv/pci-ioda.c
@@ -852,18 +852,19 @@ static u32 pnv_ioda_bdfn_to_pe(struct pnv_phb *phb, struct pci_bus *bus,
 	return phb->ioda.pe_rmap[(bus->number << 8) | devfn];
 }
 
-void __init pnv_pci_init_ioda1_phb(struct device_node *np)
+void __init pnv_pci_init_ioda_phb(struct device_node *np, int ioda_type)
 {
 	struct pci_controller *hose;
 	static int primary = 1;
 	struct pnv_phb *phb;
 	unsigned long size, m32map_off, iomap_off, pemap_off;
 	const u64 *prop64;
+	const u32 *prop32;
 	u64 phb_id;
 	void *aux;
 	long rc;
 
-	pr_info(" Initializing IODA OPAL PHB %s\n", np->full_name);
+	pr_info(" Initializing IODA%d OPAL PHB %s\n", ioda_type, np->full_name);
 
 	prop64 = of_get_property(np, "ibm,opal-phbid", NULL);
 	if (!prop64) {
@@ -890,37 +891,34 @@ void __init pnv_pci_init_ioda1_phb(struct device_node *np)
 	hose->last_busno = 0xff;
 	hose->private_data = phb;
 	phb->opal_id = phb_id;
-	phb->type = PNV_PHB_IODA1;
+	phb->type = ioda_type;
 
 	/* Detect specific models for error handling */
 	if (of_device_is_compatible(np, "ibm,p7ioc-pciex"))
 		phb->model = PNV_PHB_MODEL_P7IOC;
+	else if (of_device_is_compatible(np, "ibm,p8-pciex"))
+		phb->model = PNV_PHB_MODEL_PHB3;
 	else
 		phb->model = PNV_PHB_MODEL_UNKNOWN;
 
-	/* We parse "ranges" now since we need to deduce the register base
-	 * from the IO base
-	 */
+	/* Parse 32-bit and IO ranges (if any) */
 	pci_process_bridge_OF_ranges(phb->hose, np, primary);
 	primary = 0;
 
-	/* Magic formula from Milton */
+	/* Get registers */
 	phb->regs = of_iomap(np, 0);
 	if (phb->regs == NULL)
 		pr_err("  Failed to map registers !\n");
 
-
-	/* XXX This is hack-a-thon. This needs to be changed so that:
-	 *  - we obtain stuff like PE# etc... from device-tree
-	 *  - we properly re-allocate M32 ourselves
-	 *    (the OFW one isn't very good)
-	 */
-
 	/* Initialize more IODA stuff */
-	phb->ioda.total_pe = 128;
+	prop32 = of_get_property(np, "ibm,opal-num-pes", NULL);
+	if (!prop32)
+		phb->ioda.total_pe = 1;
+	else
+		phb->ioda.total_pe = *prop32;
 
 	phb->ioda.m32_size = resource_size(&hose->mem_resources[0]);
-	/* OFW Has already off top 64k of M32 space (MSI space) */
+	/* FW Has already off top 64k of M32 space (MSI space) */
 	phb->ioda.m32_size += 0x10000;
 
 	phb->ioda.m32_segsize = phb->ioda.m32_size / phb->ioda.total_pe;
@@ -930,7 +928,10 @@ void __init pnv_pci_init_ioda1_phb(struct device_node *np)
 	phb->ioda.io_segsize = phb->ioda.io_size / phb->ioda.total_pe;
 	phb->ioda.io_pci_base = 0; /* XXX calculate this ? */
 
-	/* Allocate aux data & arrays */
+	/* Allocate aux data & arrays
+	 *
+	 * XXX TODO: Don't allocate io segmap on PHB3
+	 */
 	size = _ALIGN_UP(phb->ioda.total_pe / 8, sizeof(unsigned long));
 	m32map_off = size;
 	size += phb->ioda.total_pe * sizeof(phb->ioda.m32_segmap[0]);
@@ -960,7 +961,7 @@ void __init pnv_pci_init_ioda1_phb(struct device_node *np)
 	hose->mem_resources[2].start = 0;
 	hose->mem_resources[2].end = 0;
 
-#if 0
+#if 0 /* We should really do that ... */
 	rc = opal_pci_set_phb_mem_window(opal->phb_id,
 					 window_type,
 					 window_num,
@@ -974,16 +975,6 @@ void __init pnv_pci_init_ioda1_phb(struct device_node *np)
 		phb->ioda.m32_size, phb->ioda.m32_segsize,
 		phb->ioda.io_size, phb->ioda.io_segsize);
 
-	if (phb->regs)  {
-		pr_devel(" BUID     = 0x%016llx\n", in_be64(phb->regs + 0x100));
-		pr_devel(" PHB2_CR  = 0x%016llx\n", in_be64(phb->regs + 0x160));
-		pr_devel(" IO_BAR   = 0x%016llx\n", in_be64(phb->regs + 0x170));
-		pr_devel(" IO_BAMR  = 0x%016llx\n", in_be64(phb->regs + 0x178));
-		pr_devel(" IO_SAR   = 0x%016llx\n", in_be64(phb->regs + 0x180));
-		pr_devel(" M32_BAR  = 0x%016llx\n", in_be64(phb->regs + 0x190));
-		pr_devel(" M32_BAMR = 0x%016llx\n", in_be64(phb->regs + 0x198));
-		pr_devel(" M32_SAR  = 0x%016llx\n", in_be64(phb->regs + 0x1a0));
-	}
 	phb->hose->ops = &pnv_pci_ops;
 
 	/* Setup RID -> PE mapping function */
@@ -1011,7 +1002,18 @@ void __init pnv_pci_init_ioda1_phb(struct device_node *np)
 	rc = opal_pci_reset(phb_id, OPAL_PCI_IODA_TABLE_RESET, OPAL_ASSERT_RESET);
 	if (rc)
 		pr_warning("  OPAL Error %ld performing IODA table reset !\n", rc);
-	opal_pci_set_pe(phb_id, 0, 0, 7, 1, 1 , OPAL_MAP_PE);
+
+	/*
+	 * On IODA1 map everything to PE#0, on IODA2 we assume the IODA reset
+	 * has cleared the RTT which has the same effect
+	 */
+	if (ioda_type == PNV_PHB_IODA1)
+		opal_pci_set_pe(phb_id, 0, 0, 7, 1, 1 , OPAL_MAP_PE);
+}
+
+void pnv_pci_init_ioda2_phb(struct device_node *np)
+{
+	pnv_pci_init_ioda_phb(np, PNV_PHB_IODA2);
 }
 
 void __init pnv_pci_init_ioda_hub(struct device_node *np)
@@ -1034,6 +1036,6 @@ void __init pnv_pci_init_ioda_hub(struct device_node *np)
 	for_each_child_of_node(np, phbn) {
 		/* Look for IODA1 PHBs */
 		if (of_device_is_compatible(phbn, "ibm,ioda-phb"))
-			pnv_pci_init_ioda1_phb(phbn);
+			pnv_pci_init_ioda_phb(phbn, PNV_PHB_IODA1);
 	}
 }
diff --git a/arch/powerpc/platforms/powernv/pci.c b/arch/powerpc/platforms/powernv/pci.c
index 42eee93..a11b5a6 100644
--- a/arch/powerpc/platforms/powernv/pci.c
+++ b/arch/powerpc/platforms/powernv/pci.c
@@ -492,7 +492,7 @@ static void pnv_pci_dma_dev_setup(struct pci_dev *pdev)
 		pnv_pci_dma_fallback_setup(hose, pdev);
 }
 
-/* Fixup wrong class code in p7ioc root complex */
+/* Fixup wrong class code in p7ioc and p8 root complex */
 static void pnv_p7ioc_rc_quirk(struct pci_dev *dev)
 {
 	dev->class = PCI_CLASS_BRIDGE_PCI << 8;
@@ -558,6 +558,10 @@ void __init pnv_pci_init(void)
 		if (!found_ioda)
 			for_each_compatible_node(np, NULL, "ibm,p5ioc2")
 				pnv_pci_init_p5ioc2_hub(np);
+
+		/* Look for ioda2 built-in PHB3's */
+		for_each_compatible_node(np, NULL, "ibm,ioda2-phb")
+			pnv_pci_init_ioda2_phb(np);
 	}
 
 	/* Setup the linkage between OF nodes and PHBs */
diff --git a/arch/powerpc/platforms/powernv/pci.h b/arch/powerpc/platforms/powernv/pci.h
index 42ddfba..f6314d6 100644
--- a/arch/powerpc/platforms/powernv/pci.h
+++ b/arch/powerpc/platforms/powernv/pci.h
@@ -4,9 +4,9 @@
 struct pci_dn;
 
 enum pnv_phb_type {
-	PNV_PHB_P5IOC2,
-	PNV_PHB_IODA1,
-	PNV_PHB_IODA2,
+	PNV_PHB_P5IOC2	= 0,
+	PNV_PHB_IODA1	= 1,
+	PNV_PHB_IODA2	= 2,
 };
 
 /* Precise PHB model for error management */
@@ -14,6 +14,7 @@ enum pnv_phb_model {
 	PNV_PHB_MODEL_UNKNOWN,
 	PNV_PHB_MODEL_P5IOC2,
 	PNV_PHB_MODEL_P7IOC,
+	PNV_PHB_MODEL_PHB3,
 };
 
 #define PNV_PCI_DIAG_BUF_SIZE	4096
@@ -148,6 +149,7 @@ extern void pnv_pci_setup_iommu_table(struct iommu_table *tbl,
 				      u64 dma_offset);
 extern void pnv_pci_init_p5ioc2_hub(struct device_node *np);
 extern void pnv_pci_init_ioda_hub(struct device_node *np);
+extern void pnv_pci_init_ioda2_phb(struct device_node *np);
 
 
 #endif /* __POWERNV_PCI_H */
-- 
1.7.5.4
^ permalink raw reply related	[flat|nested] 15+ messages in thread
- * [PATCH 2/7] powerpc/powernv: Retrieve IODA2 tables explicitly
  2013-04-24  9:37 [PATCH v3 0/7] powerpc/powernv: PHB3 Support Gavin Shan
  2013-04-24  9:37 ` [PATCH 1/7] powerpc/powernv: Supports PHB3 Gavin Shan
@ 2013-04-24  9:37 ` Gavin Shan
  2013-04-24  9:37 ` [PATCH 3/7] powerpc/powernv: Add option CONFIG_POWERNV_MSI Gavin Shan
                   ` (4 subsequent siblings)
  6 siblings, 0 replies; 15+ messages in thread
From: Gavin Shan @ 2013-04-24  9:37 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: Gavin Shan
The PHB3, which is compatible with IODA2, have lots of tables (RTT/
PETLV/PEST/IVT/RBA) in system memory and have corresponding BARs to
trace the system memory address. The tables have been allocated in
firmware and exported through device-tree. The patch retrieves the
tables explicitly.
Signed-off-by: Gavin Shan <shangw@linux.vnet.ibm.com>
---
 arch/powerpc/include/asm/opal.h           |    5 +--
 arch/powerpc/platforms/powernv/pci-ioda.c |   35 +++++++++++++++++++++++++++++
 arch/powerpc/platforms/powernv/pci.h      |   13 ++++++++++
 3 files changed, 50 insertions(+), 3 deletions(-)
diff --git a/arch/powerpc/include/asm/opal.h b/arch/powerpc/include/asm/opal.h
index a4b28f1..0af7ba0 100644
--- a/arch/powerpc/include/asm/opal.h
+++ b/arch/powerpc/include/asm/opal.h
@@ -491,9 +491,8 @@ int64_t opal_pci_map_pe_mmio_window(uint64_t phb_id, uint16_t pe_number,
 				    uint16_t window_type, uint16_t window_num,
 				    uint16_t segment_num);
 int64_t opal_pci_set_phb_table_memory(uint64_t phb_id, uint64_t rtt_addr,
-				      uint64_t ivt_addr, uint64_t ivt_len,
-				      uint64_t reject_array_addr,
-				      uint64_t peltv_addr);
+				      uint64_t peltv_addr, uint64_t pest_addr,
+				      uint64_t ivt_addr, uint64_t rba_addr);
 int64_t opal_pci_set_pe(uint64_t phb_id, uint64_t pe_number, uint64_t bus_dev_func,
 			uint8_t bus_compare, uint8_t dev_compare, uint8_t func_compare,
 			uint8_t pe_action);
diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c
index 3d4e958..0c15870 100644
--- a/arch/powerpc/platforms/powernv/pci-ioda.c
+++ b/arch/powerpc/platforms/powernv/pci-ioda.c
@@ -852,6 +852,23 @@ static u32 pnv_ioda_bdfn_to_pe(struct pnv_phb *phb, struct pci_bus *bus,
 	return phb->ioda.pe_rmap[(bus->number << 8) | devfn];
 }
 
+static void __init pnv_pci_get_ioda2_table(struct device_node *np,
+					   const char *name,
+					   void **table,
+					   unsigned int *len)
+{
+	const u32 *prop32;
+	u64 base;
+
+	prop32 = of_get_property(np, name, NULL);
+	if (prop32) {
+		base = be32_to_cpup(prop32);
+		base = base << 32 | be32_to_cpup(prop32 + 1);
+		*table = __va(base);
+		*len = be32_to_cpup(prop32 + 2);
+	}
+}
+
 void __init pnv_pci_init_ioda_phb(struct device_node *np, int ioda_type)
 {
 	struct pci_controller *hose;
@@ -998,6 +1015,24 @@ void __init pnv_pci_init_ioda_phb(struct device_node *np, int ioda_type)
 	ppc_md.pcibios_window_alignment = pnv_pci_window_alignment;
 	pci_add_flags(PCI_REASSIGN_ALL_RSRC);
 
+	/* Retrieve variable IODA2 tables */
+	if (ioda_type == PNV_PHB_IODA2) {
+		pnv_pci_get_ioda2_table(np, "ibm,opal-rtt-table",
+				&phb->ioda.tbl_rtt, &phb->ioda.rtt_len);
+		pnv_pci_get_ioda2_table(np, "ibm,opal-peltv-table",
+				&phb->ioda.tbl_peltv, &phb->ioda.peltv_len);
+		pnv_pci_get_ioda2_table(np, "ibm,opal-pest-table",
+				&phb->ioda.tbl_pest, &phb->ioda.pest_len);
+		pnv_pci_get_ioda2_table(np, "ibm,opal-ivt-table",
+				&phb->ioda.tbl_ivt, &phb->ioda.ivt_len);
+		pnv_pci_get_ioda2_table(np, "ibm,opal-rba-table",
+				&phb->ioda.tbl_rba, &phb->ioda.rba_len);
+		/* Get IVE stride */
+		prop32 = of_get_property(np, "ibm,opal-ive-stride", NULL);
+		if (prop32)
+			phb->ioda.ive_stride = be32_to_cpup(prop32);
+	}
+
 	/* Reset IODA tables to a clean state */
 	rc = opal_pci_reset(phb_id, OPAL_PCI_IODA_TABLE_RESET, OPAL_ASSERT_RESET);
 	if (rc)
diff --git a/arch/powerpc/platforms/powernv/pci.h b/arch/powerpc/platforms/powernv/pci.h
index f6314d6..c048c29 100644
--- a/arch/powerpc/platforms/powernv/pci.h
+++ b/arch/powerpc/platforms/powernv/pci.h
@@ -100,6 +100,19 @@ struct pnv_phb {
 			unsigned int		io_segsize;
 			unsigned int		io_pci_base;
 
+			/* Variable tables for IODA2 */
+			void			*tbl_rtt;
+			void			*tbl_peltv;
+			void			*tbl_pest;
+			void			*tbl_ivt;
+			void			*tbl_rba;
+			unsigned int		ive_stride;
+			unsigned int		rtt_len;
+			unsigned int		peltv_len;
+			unsigned int		pest_len;
+			unsigned int		ivt_len;
+			unsigned int		rba_len;
+
 			/* PE allocation bitmap */
 			unsigned long		*pe_alloc;
 
-- 
1.7.5.4
^ permalink raw reply related	[flat|nested] 15+ messages in thread
- * [PATCH 3/7] powerpc/powernv: Add option CONFIG_POWERNV_MSI
  2013-04-24  9:37 [PATCH v3 0/7] powerpc/powernv: PHB3 Support Gavin Shan
  2013-04-24  9:37 ` [PATCH 1/7] powerpc/powernv: Supports PHB3 Gavin Shan
  2013-04-24  9:37 ` [PATCH 2/7] powerpc/powernv: Retrieve IODA2 tables explicitly Gavin Shan
@ 2013-04-24  9:37 ` Gavin Shan
  2013-04-24  9:37 ` [PATCH 4/7] powerpc/powernv: Patch MSI EOI handler on P8 Gavin Shan
                   ` (3 subsequent siblings)
  6 siblings, 0 replies; 15+ messages in thread
From: Gavin Shan @ 2013-04-24  9:37 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: Gavin Shan
As Michael Ellerman suggested, to add CONFIG_POWERNV_MSI for PowerNV
platform. That's similar to CONFIG_PSERIES_MSI for pSeries platform.
For now, we don't make it dependent on CONFIG_EEH since it's not ready
to enable that yet.
Apart from that, we also enable CONFIG_PPC_MSI_BITMAP on selecting
CONFIG_POWERNV_MSI.
Signed-off-by: Gavin Shan <shangw@linux.vnet.ibm.com>
---
 arch/powerpc/platforms/powernv/Kconfig |    5 +++++
 arch/powerpc/sysdev/Kconfig            |    1 +
 2 files changed, 6 insertions(+), 0 deletions(-)
diff --git a/arch/powerpc/platforms/powernv/Kconfig b/arch/powerpc/platforms/powernv/Kconfig
index 74fea5c..d3e840d 100644
--- a/arch/powerpc/platforms/powernv/Kconfig
+++ b/arch/powerpc/platforms/powernv/Kconfig
@@ -8,6 +8,11 @@ config PPC_POWERNV
 	select PPC_PCI_CHOICE if EMBEDDED
 	default y
 
+config POWERNV_MSI
+	bool "Support PCI MSI on PowerNV platform"
+	depends on PCI_MSI
+	default y
+
 config PPC_POWERNV_RTAS
 	depends on PPC_POWERNV
 	bool "Support for RTAS based PowerNV platforms such as BML"
diff --git a/arch/powerpc/sysdev/Kconfig b/arch/powerpc/sysdev/Kconfig
index a84fecf..ab4cb54 100644
--- a/arch/powerpc/sysdev/Kconfig
+++ b/arch/powerpc/sysdev/Kconfig
@@ -19,6 +19,7 @@ config PPC_MSI_BITMAP
 	default y if MPIC
 	default y if FSL_PCI
 	default y if PPC4xx_MSI
+	default y if POWERNV_MSI
 
 source "arch/powerpc/sysdev/xics/Kconfig"
 
-- 
1.7.5.4
^ permalink raw reply related	[flat|nested] 15+ messages in thread
- * [PATCH 4/7] powerpc/powernv: Patch MSI EOI handler on P8
  2013-04-24  9:37 [PATCH v3 0/7] powerpc/powernv: PHB3 Support Gavin Shan
                   ` (2 preceding siblings ...)
  2013-04-24  9:37 ` [PATCH 3/7] powerpc/powernv: Add option CONFIG_POWERNV_MSI Gavin Shan
@ 2013-04-24  9:37 ` Gavin Shan
  2013-04-24 20:49   ` Benjamin Herrenschmidt
  2013-04-24  9:37 ` [PATCH 5/7] powerpc/powernv: TCE invalidation for PHB3 Gavin Shan
                   ` (2 subsequent siblings)
  6 siblings, 1 reply; 15+ messages in thread
From: Gavin Shan @ 2013-04-24  9:37 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: Gavin Shan
The EOI handler of MSI/MSI-X interrupts for P8 (PHB3) need additional
steps to handle the P/Q bits in IVE before EOIing the corresponding
interrupt. The patch changes the EOI handler to cover that.
Signed-off-by: Gavin Shan <shangw@linux.vnet.ibm.com>
---
 arch/powerpc/include/asm/opal.h                |    2 +
 arch/powerpc/include/asm/xics.h                |    3 ++
 arch/powerpc/platforms/powernv/opal-wrappers.S |    1 +
 arch/powerpc/platforms/powernv/pci-ioda.c      |   16 ++++++++++++++
 arch/powerpc/platforms/powernv/pci.c           |   19 ++++++++++++++++
 arch/powerpc/platforms/powernv/pci.h           |    1 +
 arch/powerpc/sysdev/xics/icp-native.c          |   27 +++++++++++++++++++++++-
 7 files changed, 68 insertions(+), 1 deletions(-)
diff --git a/arch/powerpc/include/asm/opal.h b/arch/powerpc/include/asm/opal.h
index 0af7ba0..93dad52 100644
--- a/arch/powerpc/include/asm/opal.h
+++ b/arch/powerpc/include/asm/opal.h
@@ -117,6 +117,7 @@ extern int opal_enter_rtas(struct rtas_args *args,
 #define OPAL_SET_SLOT_LED_STATUS		55
 #define OPAL_GET_EPOW_STATUS			56
 #define OPAL_SET_SYSTEM_ATTENTION_LED		57
+#define OPAL_PCI_MSI_EOI			63
 
 #ifndef __ASSEMBLY__
 
@@ -505,6 +506,7 @@ int64_t opal_pci_get_xive_reissue(uint64_t phb_id, uint32_t xive_number,
 				  uint8_t *p_bit, uint8_t *q_bit);
 int64_t opal_pci_set_xive_reissue(uint64_t phb_id, uint32_t xive_number,
 				  uint8_t p_bit, uint8_t q_bit);
+int64_t opal_pci_msi_eoi(uint64_t phb_id, uint32_t ive_number);
 int64_t opal_pci_set_xive_pe(uint64_t phb_id, uint32_t pe_number,
 			     uint32_t xive_num);
 int64_t opal_get_xive_source(uint64_t phb_id, uint32_t xive_num,
diff --git a/arch/powerpc/include/asm/xics.h b/arch/powerpc/include/asm/xics.h
index 4ae9a09..c4b364b 100644
--- a/arch/powerpc/include/asm/xics.h
+++ b/arch/powerpc/include/asm/xics.h
@@ -72,6 +72,9 @@ extern int ics_opal_init(void);
 static inline int ics_opal_init(void) { return -ENODEV; }
 #endif
 
+/* Extra EOI handler for PHB3 */
+extern int pnv_pci_msi_eoi(unsigned int hw_irq);
+
 /* ICS instance, hooked up to chip_data of an irq */
 struct ics {
 	struct list_head link;
diff --git a/arch/powerpc/platforms/powernv/opal-wrappers.S b/arch/powerpc/platforms/powernv/opal-wrappers.S
index 3bb07e5..6fabe92 100644
--- a/arch/powerpc/platforms/powernv/opal-wrappers.S
+++ b/arch/powerpc/platforms/powernv/opal-wrappers.S
@@ -107,3 +107,4 @@ OPAL_CALL(opal_pci_mask_pe_error,		OPAL_PCI_MASK_PE_ERROR);
 OPAL_CALL(opal_set_slot_led_status,		OPAL_SET_SLOT_LED_STATUS);
 OPAL_CALL(opal_get_epow_status,			OPAL_GET_EPOW_STATUS);
 OPAL_CALL(opal_set_system_attention_led,	OPAL_SET_SYSTEM_ATTENTION_LED);
+OPAL_CALL(opal_pci_msi_eoi,			OPAL_PCI_MSI_EOI);
diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c
index 0c15870..32197af 100644
--- a/arch/powerpc/platforms/powernv/pci-ioda.c
+++ b/arch/powerpc/platforms/powernv/pci-ioda.c
@@ -646,6 +646,20 @@ static int pnv_pci_ioda_msi_setup(struct pnv_phb *phb, struct pci_dev *dev,
 	return 0;
 }
 
+static int pnv_pci_ioda_msi_eoi(struct pnv_phb *phb, unsigned int hw_irq)
+{
+	long rc;
+
+	rc = opal_pci_msi_eoi(phb->opal_id, hw_irq - phb->msi_base);
+	if (rc) {
+		pr_warning("%s: Failed to EOI IRQ#%d on PHB#%d, rc=%ld\n",
+			   __func__, hw_irq, phb->hose->global_number, rc);
+		return -EIO;
+	}
+
+	return 0;
+}
+
 static void pnv_pci_init_ioda_msis(struct pnv_phb *phb)
 {
 	unsigned int count;
@@ -667,6 +681,8 @@ static void pnv_pci_init_ioda_msis(struct pnv_phb *phb)
 	}
 
 	phb->msi_setup = pnv_pci_ioda_msi_setup;
+	if (phb->type == PNV_PHB_IODA2)
+		phb->msi_eoi = pnv_pci_ioda_msi_eoi;
 	phb->msi32_support = 1;
 	pr_info("  Allocated bitmap for %d MSIs (base IRQ 0x%x)\n",
 		count, phb->msi_base);
diff --git a/arch/powerpc/platforms/powernv/pci.c b/arch/powerpc/platforms/powernv/pci.c
index a11b5a6..ea6a93d 100644
--- a/arch/powerpc/platforms/powernv/pci.c
+++ b/arch/powerpc/platforms/powernv/pci.c
@@ -115,6 +115,25 @@ static void pnv_teardown_msi_irqs(struct pci_dev *pdev)
 		irq_dispose_mapping(entry->irq);
 	}
 }
+
+int pnv_pci_msi_eoi(unsigned int hw_irq)
+{
+	struct pci_controller *hose, *tmp;
+	struct pnv_phb *phb = NULL;
+
+	list_for_each_entry_safe(hose, tmp, &hose_list, list_node) {
+		phb = hose->private_data;
+		if (hw_irq >= phb->msi_base &&
+		    hw_irq < phb->msi_base + phb->msi_bmp.irq_count) {
+			if (!phb->msi_eoi)
+				return -EEXIST;
+			return phb->msi_eoi(phb, hw_irq);
+		}
+	}
+
+	/* For LSI interrupts, we needn't do it */
+	return 0;
+}
 #endif /* CONFIG_PCI_MSI */
 
 static void pnv_pci_dump_p7ioc_diag_data(struct pnv_phb *phb)
diff --git a/arch/powerpc/platforms/powernv/pci.h b/arch/powerpc/platforms/powernv/pci.h
index c048c29..c6690b3 100644
--- a/arch/powerpc/platforms/powernv/pci.h
+++ b/arch/powerpc/platforms/powernv/pci.h
@@ -81,6 +81,7 @@ struct pnv_phb {
 	int (*msi_setup)(struct pnv_phb *phb, struct pci_dev *dev,
 			 unsigned int hwirq, unsigned int is_64,
 			 struct msi_msg *msg);
+	int (*msi_eoi)(struct pnv_phb *phb, unsigned int hw_irq);
 	void (*dma_dev_setup)(struct pnv_phb *phb, struct pci_dev *pdev);
 	void (*fixup_phb)(struct pci_controller *hose);
 	u32 (*bdfn_to_pe)(struct pnv_phb *phb, struct pci_bus *bus, u32 devfn);
diff --git a/arch/powerpc/sysdev/xics/icp-native.c b/arch/powerpc/sysdev/xics/icp-native.c
index 48861d3..38dd2b1 100644
--- a/arch/powerpc/sysdev/xics/icp-native.c
+++ b/arch/powerpc/sysdev/xics/icp-native.c
@@ -89,6 +89,22 @@ static void icp_native_eoi(struct irq_data *d)
 	icp_native_set_xirr((xics_pop_cppr() << 24) | hw_irq);
 }
 
+static void icp_p8_native_eoi(struct irq_data *d)
+{
+	unsigned int hw_irq = (unsigned int)irqd_to_hwirq(d);
+	int ret;
+
+	/* Let firmware handle P/Q bits */
+	if (hw_irq != XICS_IPI) {
+		ret = pnv_pci_msi_eoi(hw_irq);
+		WARN_ON_ONCE(ret);
+	}
+
+	/* EOI on ICP */
+	iosync();
+	icp_native_set_xirr((xics_pop_cppr() << 24) | hw_irq);
+}
+
 static void icp_native_teardown_cpu(void)
 {
 	int cpu = smp_processor_id();
@@ -264,7 +280,7 @@ static int __init icp_native_init_one_node(struct device_node *np,
 	return 0;
 }
 
-static const struct icp_ops icp_native_ops = {
+static struct icp_ops icp_native_ops = {
 	.get_irq	= icp_native_get_irq,
 	.eoi		= icp_native_eoi,
 	.set_priority	= icp_native_set_cpu_priority,
@@ -296,6 +312,15 @@ int __init icp_native_init(void)
 	if (found == 0)
 		return -ENODEV;
 
+	/* Change the EOI handler for P8 */
+#ifdef CONFIG_POWERNV_MSI
+	np = of_find_compatible_node(NULL, NULL, "ibm,power8-xicp");
+	if (np) {
+		icp_native_ops.eoi = icp_p8_native_eoi;
+		of_node_put(np);
+	}
+#endif
+
 	icp_ops = &icp_native_ops;
 
 	return 0;
-- 
1.7.5.4
^ permalink raw reply related	[flat|nested] 15+ messages in thread
- * Re: [PATCH 4/7] powerpc/powernv: Patch MSI EOI handler on P8
  2013-04-24  9:37 ` [PATCH 4/7] powerpc/powernv: Patch MSI EOI handler on P8 Gavin Shan
@ 2013-04-24 20:49   ` Benjamin Herrenschmidt
  2013-04-25  8:08     ` Gavin Shan
  0 siblings, 1 reply; 15+ messages in thread
From: Benjamin Herrenschmidt @ 2013-04-24 20:49 UTC (permalink / raw)
  To: Gavin Shan; +Cc: linuxppc-dev
On Wed, 2013-04-24 at 17:37 +0800, Gavin Shan wrote:
> The EOI handler of MSI/MSI-X interrupts for P8 (PHB3) need additional
> steps to handle the P/Q bits in IVE before EOIing the corresponding
> interrupt. The patch changes the EOI handler to cover that.
 .../...
>  static void pnv_pci_init_ioda_msis(struct pnv_phb *phb)
>  {
>  	unsigned int count;
> @@ -667,6 +681,8 @@ static void pnv_pci_init_ioda_msis(struct pnv_phb *phb)
>  	}
>  
>  	phb->msi_setup = pnv_pci_ioda_msi_setup;
> +	if (phb->type == PNV_PHB_IODA2)
> +		phb->msi_eoi = pnv_pci_ioda_msi_eoi;
Ouch, another function pointer call in a hot path...
>  	phb->msi32_support = 1;
>  	pr_info("  Allocated bitmap for %d MSIs (base IRQ 0x%x)\n",
>  		count, phb->msi_base);
> diff --git a/arch/powerpc/platforms/powernv/pci.c b/arch/powerpc/platforms/powernv/pci.c
> index a11b5a6..ea6a93d 100644
> --- a/arch/powerpc/platforms/powernv/pci.c
> +++ b/arch/powerpc/platforms/powernv/pci.c
> @@ -115,6 +115,25 @@ static void pnv_teardown_msi_irqs(struct pci_dev *pdev)
>  		irq_dispose_mapping(entry->irq);
>  	}
>  }
> +
> +int pnv_pci_msi_eoi(unsigned int hw_irq)
> +{
> +	struct pci_controller *hose, *tmp;
> +	struct pnv_phb *phb = NULL;
> +
> +	list_for_each_entry_safe(hose, tmp, &hose_list, list_node) {
> +		phb = hose->private_data;
> +		if (hw_irq >= phb->msi_base &&
> +		    hw_irq < phb->msi_base + phb->msi_bmp.irq_count) {
> +			if (!phb->msi_eoi)
> +				return -EEXIST;
> +			return phb->msi_eoi(phb, hw_irq);
> +		}
> +	}
> +
> +	/* For LSI interrupts, we needn't do it */
> +	return 0;
> +}
And a list walk ... that's not right.
Also, you do it for all XICS interrupts, including the non-PCI ones, the
LSIs, etc... only to figure out that some might not be MSIs later in
the loop.
Why not instead look at changing the irq_chip for the MSIs ?
IE. When setting up the MSIs for IODA2, use a different irq_chip which
is a copy of the original one with a different ->eoi callback, which
does the original xics eoi and then the OPAL stuff ?
You might even be able to use something like container_of to get back
to the struct phb, no need to iterate them all.
Cheers,
Ben.
^ permalink raw reply	[flat|nested] 15+ messages in thread
- * Re: [PATCH 4/7] powerpc/powernv: Patch MSI EOI handler on P8
  2013-04-24 20:49   ` Benjamin Herrenschmidt
@ 2013-04-25  8:08     ` Gavin Shan
  2013-04-25  8:13       ` Gavin Shan
  0 siblings, 1 reply; 15+ messages in thread
From: Gavin Shan @ 2013-04-25  8:08 UTC (permalink / raw)
  To: Benjamin Herrenschmidt; +Cc: linuxppc-dev, Gavin Shan
On Thu, Apr 25, 2013 at 06:49:40AM +1000, Benjamin Herrenschmidt wrote:
>On Wed, 2013-04-24 at 17:37 +0800, Gavin Shan wrote:
>> The EOI handler of MSI/MSI-X interrupts for P8 (PHB3) need additional
>> steps to handle the P/Q bits in IVE before EOIing the corresponding
>> interrupt. The patch changes the EOI handler to cover that.
>
> .../...
>
>>  static void pnv_pci_init_ioda_msis(struct pnv_phb *phb)
>>  {
>>  	unsigned int count;
>> @@ -667,6 +681,8 @@ static void pnv_pci_init_ioda_msis(struct pnv_phb *phb)
>>  	}
>>  
>>  	phb->msi_setup = pnv_pci_ioda_msi_setup;
>> +	if (phb->type == PNV_PHB_IODA2)
>> +		phb->msi_eoi = pnv_pci_ioda_msi_eoi;
>
>Ouch, another function pointer call in a hot path...
>
Yeah. I've removed it in next version (not send out yet) :-)
>>  	phb->msi32_support = 1;
>>  	pr_info("  Allocated bitmap for %d MSIs (base IRQ 0x%x)\n",
>>  		count, phb->msi_base);
>> diff --git a/arch/powerpc/platforms/powernv/pci.c b/arch/powerpc/platforms/powernv/pci.c
>> index a11b5a6..ea6a93d 100644
>> --- a/arch/powerpc/platforms/powernv/pci.c
>> +++ b/arch/powerpc/platforms/powernv/pci.c
>> @@ -115,6 +115,25 @@ static void pnv_teardown_msi_irqs(struct pci_dev *pdev)
>>  		irq_dispose_mapping(entry->irq);
>>  	}
>>  }
>> +
>> +int pnv_pci_msi_eoi(unsigned int hw_irq)
>> +{
>> +	struct pci_controller *hose, *tmp;
>> +	struct pnv_phb *phb = NULL;
>> +
>> +	list_for_each_entry_safe(hose, tmp, &hose_list, list_node) {
>> +		phb = hose->private_data;
>> +		if (hw_irq >= phb->msi_base &&
>> +		    hw_irq < phb->msi_base + phb->msi_bmp.irq_count) {
>> +			if (!phb->msi_eoi)
>> +				return -EEXIST;
>> +			return phb->msi_eoi(phb, hw_irq);
>> +		}
>> +	}
>> +
>> +	/* For LSI interrupts, we needn't do it */
>> +	return 0;
>> +}
>
>And a list walk ... that's not right.
>
>Also, you do it for all XICS interrupts, including the non-PCI ones, the
>LSIs, etc... only to figure out that some might not be MSIs later in
>the loop.
>
>Why not instead look at changing the irq_chip for the MSIs ?
>
>IE. When setting up the MSIs for IODA2, use a different irq_chip which
>is a copy of the original one with a different ->eoi callback, which
>does the original xics eoi and then the OPAL stuff ?
>
>You might even be able to use something like container_of to get back
>to the struct phb, no need to iterate them all.
>
Thanks for the detailed explaining, Ben.
I found irq_data hasn't been fully utilized until this moment. I already
have code to start use that. Firstly, "irq_data" is set to the PHB OPAL ID
or invalid value (0xffs) during mapping stage (there, we call irq_set_chip_data()
to trace the PHB OPAL ID or invalid value). Before EOIing the interrupt, we
will check "irq_data" and do special handling on P/Q bits if it has valid value.
With it, the "hot" path should be fast enough and the function pointer (mentioned
above) can be removed.
Thanks,
Gavin
^ permalink raw reply	[flat|nested] 15+ messages in thread
- * Re: [PATCH 4/7] powerpc/powernv: Patch MSI EOI handler on P8
  2013-04-25  8:08     ` Gavin Shan
@ 2013-04-25  8:13       ` Gavin Shan
  2013-04-25  8:47         ` Benjamin Herrenschmidt
  0 siblings, 1 reply; 15+ messages in thread
From: Gavin Shan @ 2013-04-25  8:13 UTC (permalink / raw)
  To: Gavin Shan; +Cc: linuxppc-dev
On Thu, Apr 25, 2013 at 04:08:37PM +0800, Gavin Shan wrote:
>On Thu, Apr 25, 2013 at 06:49:40AM +1000, Benjamin Herrenschmidt wrote:
>>On Wed, 2013-04-24 at 17:37 +0800, Gavin Shan wrote:
>>> The EOI handler of MSI/MSI-X interrupts for P8 (PHB3) need additional
>>> steps to handle the P/Q bits in IVE before EOIing the corresponding
>>> interrupt. The patch changes the EOI handler to cover that.
>>
>> .../...
>>
.../...
>>> diff --git a/arch/powerpc/platforms/powernv/pci.c b/arch/powerpc/platforms/powernv/pci.c
>>> index a11b5a6..ea6a93d 100644
>>> --- a/arch/powerpc/platforms/powernv/pci.c
>>> +++ b/arch/powerpc/platforms/powernv/pci.c
>>> @@ -115,6 +115,25 @@ static void pnv_teardown_msi_irqs(struct pci_dev *pdev)
>>>  		irq_dispose_mapping(entry->irq);
>>>  	}
>>>  }
>>> +
>>> +int pnv_pci_msi_eoi(unsigned int hw_irq)
>>> +{
>>> +	struct pci_controller *hose, *tmp;
>>> +	struct pnv_phb *phb = NULL;
>>> +
>>> +	list_for_each_entry_safe(hose, tmp, &hose_list, list_node) {
>>> +		phb = hose->private_data;
>>> +		if (hw_irq >= phb->msi_base &&
>>> +		    hw_irq < phb->msi_base + phb->msi_bmp.irq_count) {
>>> +			if (!phb->msi_eoi)
>>> +				return -EEXIST;
>>> +			return phb->msi_eoi(phb, hw_irq);
>>> +		}
>>> +	}
>>> +
>>> +	/* For LSI interrupts, we needn't do it */
>>> +	return 0;
>>> +}
>>
>>And a list walk ... that's not right.
>>
>>Also, you do it for all XICS interrupts, including the non-PCI ones, the
>>LSIs, etc... only to figure out that some might not be MSIs later in
>>the loop.
>>
>>Why not instead look at changing the irq_chip for the MSIs ?
>>
>>IE. When setting up the MSIs for IODA2, use a different irq_chip which
>>is a copy of the original one with a different ->eoi callback, which
>>does the original xics eoi and then the OPAL stuff ?
>>
>>You might even be able to use something like container_of to get back
>>to the struct phb, no need to iterate them all.
>>
>
>Thanks for the detailed explaining, Ben.
>
>I found irq_data hasn't been fully utilized until this moment. I already
>have code to start use that. Firstly, "irq_data" is set to the PHB OPAL ID
>or invalid value (0xffs) during mapping stage (there, we call irq_set_chip_data()
>to trace the PHB OPAL ID or invalid value). Before EOIing the interrupt, we
>will check "irq_data" and do special handling on P/Q bits if it has valid value.
>With it, the "hot" path should be fast enough and the function pointer (mentioned
>above) can be removed.
>
It should be "chip_data" (not "irq_data"). Hopefully, you haven't
get time to see the reply. Otherwise, it would a bit confused ;-)
Thanks,
Gavin
^ permalink raw reply	[flat|nested] 15+ messages in thread
- * Re: [PATCH 4/7] powerpc/powernv: Patch MSI EOI handler on P8
  2013-04-25  8:13       ` Gavin Shan
@ 2013-04-25  8:47         ` Benjamin Herrenschmidt
  2013-04-25 11:58           ` Gavin Shan
  0 siblings, 1 reply; 15+ messages in thread
From: Benjamin Herrenschmidt @ 2013-04-25  8:47 UTC (permalink / raw)
  To: Gavin Shan; +Cc: linuxppc-dev
On Thu, 2013-04-25 at 16:13 +0800, Gavin Shan wrote:
> It should be "chip_data" (not "irq_data"). Hopefully, you haven't
> get time to see the reply. Otherwise, it would a bit confused ;-)
Doesn't ics-opal already use chip_data ?
I was thinking just duplicating the irq_chip (including chip_data) so it
can be used by ics-opal just fine for all calls, just then overriding
the eoi callback and using container_of to get to the PHB.
Any reason that wouldn't work ?
Cheers,
Ben.
^ permalink raw reply	[flat|nested] 15+ messages in thread 
- * Re: [PATCH 4/7] powerpc/powernv: Patch MSI EOI handler on P8
  2013-04-25  8:47         ` Benjamin Herrenschmidt
@ 2013-04-25 11:58           ` Gavin Shan
  0 siblings, 0 replies; 15+ messages in thread
From: Gavin Shan @ 2013-04-25 11:58 UTC (permalink / raw)
  To: Benjamin Herrenschmidt; +Cc: linuxppc-dev, Gavin Shan
On Thu, Apr 25, 2013 at 06:47:58PM +1000, Benjamin Herrenschmidt wrote:
>On Thu, 2013-04-25 at 16:13 +0800, Gavin Shan wrote:
>> It should be "chip_data" (not "irq_data"). Hopefully, you haven't
>> get time to see the reply. Otherwise, it would a bit confused ;-)
>
>Doesn't ics-opal already use chip_data ?
>
Yeah, Ben. that have been used now. So we can't use it for other purposes :-)
>I was thinking just duplicating the irq_chip (including chip_data) so it
>can be used by ics-opal just fine for all calls, just then overriding
>the eoi callback and using container_of to get to the PHB.
>
>Any reason that wouldn't work ?
>
It should work and I had the code (with your idea implemented) and verified
that on simulator. I'll send next version (together with the changes on f/w)
for review after it works correctly on real hardware box.
(I hope it can be done as early as possible to catch 3.10 merge window).
Thanks,
Gavin
^ permalink raw reply	[flat|nested] 15+ messages in thread 
 
 
 
 
 
- * [PATCH 5/7] powerpc/powernv: TCE invalidation for PHB3
  2013-04-24  9:37 [PATCH v3 0/7] powerpc/powernv: PHB3 Support Gavin Shan
                   ` (3 preceding siblings ...)
  2013-04-24  9:37 ` [PATCH 4/7] powerpc/powernv: Patch MSI EOI handler on P8 Gavin Shan
@ 2013-04-24  9:37 ` Gavin Shan
  2013-04-24 20:52   ` Benjamin Herrenschmidt
  2013-04-24  9:37 ` [PATCH 6/7] powerpc/powernv: Build DMA space for PE on PHB3 Gavin Shan
  2013-04-24  9:37 ` [PATCH 7/7] powerpc/powernv: Fix invalid IOMMU table Gavin Shan
  6 siblings, 1 reply; 15+ messages in thread
From: Gavin Shan @ 2013-04-24  9:37 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: Gavin Shan
The TCE should be invalidated while it's created or free'd. The
approach to do that for IODA1 and IODA2 compliant PHBs are different.
So the patch differentiate them with different functions called to
do that for IODA1 and IODA2 compliant PHBs. It's notable that the
PCI address is used to invalidate the corresponding TCE on IODA2
compliant PHB3.
Signed-off-by: Gavin Shan <shangw@linux.vnet.ibm.com>
---
 arch/powerpc/include/asm/iommu.h            |    1 +
 arch/powerpc/platforms/powernv/pci-ioda.c   |   75 ++++++++++++++++++++++++++-
 arch/powerpc/platforms/powernv/pci-p5ioc2.c |    1 +
 arch/powerpc/platforms/powernv/pci.c        |   60 +++++----------------
 arch/powerpc/platforms/powernv/pci.h        |    6 ++-
 5 files changed, 93 insertions(+), 50 deletions(-)
diff --git a/arch/powerpc/include/asm/iommu.h b/arch/powerpc/include/asm/iommu.h
index cbfe678..0db308e 100644
--- a/arch/powerpc/include/asm/iommu.h
+++ b/arch/powerpc/include/asm/iommu.h
@@ -76,6 +76,7 @@ struct iommu_table {
 	struct iommu_pool large_pool;
 	struct iommu_pool pools[IOMMU_NR_POOLS];
 	unsigned long *it_map;       /* A simple allocation bitmap for now */
+	void *sysdata;
 };
 
 struct scatterlist;
diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c
index 32197af..9f4d323 100644
--- a/arch/powerpc/platforms/powernv/pci-ioda.c
+++ b/arch/powerpc/platforms/powernv/pci-ioda.c
@@ -448,6 +448,73 @@ static void pnv_ioda_setup_bus_dma(struct pnv_ioda_pe *pe, struct pci_bus *bus)
 	}
 }
 
+void pnv_pci_ioda1_tce_invalidate(struct iommu_table *tbl,
+				  u64 *startp, u64 *endp)
+{
+	u64 __iomem *invalidate = (u64 __iomem *)tbl->it_index;
+	unsigned long start, end, inc;
+
+	start = __pa(startp);
+	end = __pa(endp);
+
+	/* BML uses this case for p6/p7/galaxy2: Shift addr and put in node */
+	if (tbl->it_busno) {
+		start <<= 12;
+		end <<= 12;
+		inc = 128 << 12;
+		start |= tbl->it_busno;
+		end |= tbl->it_busno;
+	} else if (tbl->it_type & TCE_PCI_SWINV_PAIR) {
+		/* p7ioc-style invalidation, 2 TCEs per write */
+		start |= (1ull << 63);
+		end |= (1ull << 63);
+		inc = 16;
+        } else {
+		/* Default (older HW) */
+                inc = 128;
+	}
+
+        end |= inc - 1;	/* round up end to be different than start */
+
+        mb(); /* Ensure above stores are visible */
+        while (start <= end) {
+                __raw_writeq(start, invalidate);
+                start += inc;
+        }
+
+	/*
+	 * The iommu layer will do another mb() for us on build()
+	 * and we don't care on free()
+	 */
+}
+
+void pnv_pci_ioda2_tce_invalidate(struct iommu_table *tbl,
+				  u64 *startp, u64 *endp)
+{
+	unsigned long start, end, inc;
+	u64 __iomem *invalidate = (u64 __iomem *)tbl->it_index;
+	struct pnv_ioda_pe *pe = container_of(tbl, struct pnv_ioda_pe,
+					      tce32_table);
+
+	/* We'll invalidate DMA address in PE scope */
+	start = 0x2ul << 60;
+	start |= (pe->pe_number & 0xFF);
+	end = start;
+
+	/* Figure out the start, end and step */
+	inc = tbl->it_offset + (((u64)startp - tbl->it_base) / sizeof(u64));
+	start |= (inc << 12);
+	inc = tbl->it_offset + (((u64)endp - tbl->it_base) / sizeof(u64));
+	end |= (inc << 12);
+	inc = (0x1ul << 12);
+	mb();
+
+	while (start <= end) {
+		__raw_writeq(start, invalidate);
+		start += inc;
+	}
+}
+
 static void pnv_pci_ioda_setup_dma_pe(struct pnv_phb *phb,
 				      struct pnv_ioda_pe *pe, unsigned int base,
 				      unsigned int segs)
@@ -509,6 +576,9 @@ static void pnv_pci_ioda_setup_dma_pe(struct pnv_phb *phb,
 	pnv_pci_setup_iommu_table(tbl, addr, TCE32_TABLE_SIZE * segs,
 				  base << 28);
 
+	/* Hook the IOMMU table to PHB */
+	tbl->sysdata = phb;
+
 	/* OPAL variant of P7IOC SW invalidated TCEs */
 	swinvp = of_get_property(phb->hose->dn, "ibm,opal-tce-kill", NULL);
 	if (swinvp) {
@@ -519,8 +589,9 @@ static void pnv_pci_ioda_setup_dma_pe(struct pnv_phb *phb,
 		 */
 		tbl->it_busno = 0;
 		tbl->it_index = (unsigned long)ioremap(be64_to_cpup(swinvp), 8);
-		tbl->it_type = TCE_PCI_SWINV_CREATE | TCE_PCI_SWINV_FREE
-			| TCE_PCI_SWINV_PAIR;
+		tbl->it_type = TCE_PCI_SWINV_CREATE | TCE_PCI_SWINV_FREE;
+		if (phb->type == PNV_PHB_IODA1)
+			tbl->it_type |= TCE_PCI_SWINV_PAIR;
 	}
 	iommu_init_table(tbl, phb->hose->node);
 
diff --git a/arch/powerpc/platforms/powernv/pci-p5ioc2.c b/arch/powerpc/platforms/powernv/pci-p5ioc2.c
index d5c066e..177ef26 100644
--- a/arch/powerpc/platforms/powernv/pci-p5ioc2.c
+++ b/arch/powerpc/platforms/powernv/pci-p5ioc2.c
@@ -167,6 +167,7 @@ static void __init pnv_pci_init_p5ioc2_phb(struct device_node *np,
 
 	/* Setup TCEs */
 	phb->dma_dev_setup = pnv_pci_p5ioc2_dma_dev_setup;
+	phb->p5ioc2.iommu_table.sysdata = phb;
 	pnv_pci_setup_iommu_table(&phb->p5ioc2.iommu_table,
 				  tce_mem, tce_size, 0);
 }
diff --git a/arch/powerpc/platforms/powernv/pci.c b/arch/powerpc/platforms/powernv/pci.c
index ea6a93d..f140c7a 100644
--- a/arch/powerpc/platforms/powernv/pci.c
+++ b/arch/powerpc/platforms/powernv/pci.c
@@ -348,52 +348,11 @@ struct pci_ops pnv_pci_ops = {
 	.write = pnv_pci_write_config,
 };
 
-
-static void pnv_tce_invalidate(struct iommu_table *tbl,
-			       u64 *startp, u64 *endp)
-{
-	u64 __iomem *invalidate = (u64 __iomem *)tbl->it_index;
-	unsigned long start, end, inc;
-
-	start = __pa(startp);
-	end = __pa(endp);
-
-
-	/* BML uses this case for p6/p7/galaxy2: Shift addr and put in node */
-	if (tbl->it_busno) {
-		start <<= 12;
-		end <<= 12;
-		inc = 128 << 12;
-		start |= tbl->it_busno;
-		end |= tbl->it_busno;
-	}
-	/* p7ioc-style invalidation, 2 TCEs per write */
-	else if (tbl->it_type & TCE_PCI_SWINV_PAIR) {
-		start |= (1ull << 63);
-		end |= (1ull << 63);
-		inc = 16;
-	}
-	/* Default (older HW) */
-	else
-		inc = 128;
-
-	end |= inc - 1;		/* round up end to be different than start */
-
-	mb(); /* Ensure above stores are visible */
-	while (start <= end) {
-		__raw_writeq(start, invalidate);
-		start += inc;
-	}
-	/* The iommu layer will do another mb() for us on build() and
-	 * we don't care on free()
-	 */
-}
-
-
 static int pnv_tce_build(struct iommu_table *tbl, long index, long npages,
 			 unsigned long uaddr, enum dma_data_direction direction,
 			 struct dma_attrs *attrs)
 {
+	struct pnv_phb *phb = tbl->sysdata;
 	u64 proto_tce;
 	u64 *tcep, *tces;
 	u64 rpn;
@@ -413,14 +372,19 @@ static int pnv_tce_build(struct iommu_table *tbl, long index, long npages,
 	 * need that flush. We'll probably turn it_type into a bit mask
 	 * of flags if that becomes the case
 	 */
-	if (tbl->it_type & TCE_PCI_SWINV_CREATE)
-		pnv_tce_invalidate(tbl, tces, tcep - 1);
+	if (tbl->it_type & TCE_PCI_SWINV_CREATE) {
+		if (phb->type == PNV_PHB_IODA1)
+			pnv_pci_ioda1_tce_invalidate(tbl, tces, tcep - 1);
+		else
+			pnv_pci_ioda2_tce_invalidate(tbl, tces, tcep - 1);
+	}
 
 	return 0;
 }
 
 static void pnv_tce_free(struct iommu_table *tbl, long index, long npages)
 {
+	struct pnv_phb *phb = tbl->sysdata;
 	u64 *tcep, *tces;
 
 	tces = tcep = ((u64 *)tbl->it_base) + index - tbl->it_offset;
@@ -428,8 +392,12 @@ static void pnv_tce_free(struct iommu_table *tbl, long index, long npages)
 	while (npages--)
 		*(tcep++) = 0;
 
-	if (tbl->it_type & TCE_PCI_SWINV_FREE)
-		pnv_tce_invalidate(tbl, tces, tcep - 1);
+	if (tbl->it_type & TCE_PCI_SWINV_CREATE) {
+		if (phb->type == PNV_PHB_IODA1)
+			pnv_pci_ioda1_tce_invalidate(tbl, tces, tcep - 1);
+		else
+			pnv_pci_ioda2_tce_invalidate(tbl, tces, tcep - 1);
+	}
 }
 
 static unsigned long pnv_tce_get(struct iommu_table *tbl, long index)
diff --git a/arch/powerpc/platforms/powernv/pci.h b/arch/powerpc/platforms/powernv/pci.h
index c6690b3..3cdc878 100644
--- a/arch/powerpc/platforms/powernv/pci.h
+++ b/arch/powerpc/platforms/powernv/pci.h
@@ -164,6 +164,8 @@ extern void pnv_pci_setup_iommu_table(struct iommu_table *tbl,
 extern void pnv_pci_init_p5ioc2_hub(struct device_node *np);
 extern void pnv_pci_init_ioda_hub(struct device_node *np);
 extern void pnv_pci_init_ioda2_phb(struct device_node *np);
-
-
+extern void pnv_pci_ioda1_tce_invalidate(struct iommu_table *tbl,
+					 u64 *startp, u64 *endp);
+extern void pnv_pci_ioda2_tce_invalidate(struct iommu_table *tbl,
+					 u64 *startp, u64 *endp);
 #endif /* __POWERNV_PCI_H */
-- 
1.7.5.4
^ permalink raw reply related	[flat|nested] 15+ messages in thread
- * Re: [PATCH 5/7] powerpc/powernv: TCE invalidation for PHB3
  2013-04-24  9:37 ` [PATCH 5/7] powerpc/powernv: TCE invalidation for PHB3 Gavin Shan
@ 2013-04-24 20:52   ` Benjamin Herrenschmidt
  2013-04-25  8:39     ` Gavin Shan
  0 siblings, 1 reply; 15+ messages in thread
From: Benjamin Herrenschmidt @ 2013-04-24 20:52 UTC (permalink / raw)
  To: Gavin Shan; +Cc: linuxppc-dev
> diff --git a/arch/powerpc/include/asm/iommu.h b/arch/powerpc/include/asm/iommu.h
> index cbfe678..0db308e 100644
> --- a/arch/powerpc/include/asm/iommu.h
> +++ b/arch/powerpc/include/asm/iommu.h
> @@ -76,6 +76,7 @@ struct iommu_table {
>  	struct iommu_pool large_pool;
>  	struct iommu_pool pools[IOMMU_NR_POOLS];
>  	unsigned long *it_map;       /* A simple allocation bitmap for now */
> +	void *sysdata;
>  };
You should be able to avoid adding that field by using the container_of
trick to get to the PE and moving the iommu ops for ioda into pci-ioda.c
instead of sharing them with the non-ioda stuff.
Cheers,
Ben.
^ permalink raw reply	[flat|nested] 15+ messages in thread
- * Re: [PATCH 5/7] powerpc/powernv: TCE invalidation for PHB3
  2013-04-24 20:52   ` Benjamin Herrenschmidt
@ 2013-04-25  8:39     ` Gavin Shan
  0 siblings, 0 replies; 15+ messages in thread
From: Gavin Shan @ 2013-04-25  8:39 UTC (permalink / raw)
  To: Benjamin Herrenschmidt; +Cc: linuxppc-dev, Gavin Shan
On Thu, Apr 25, 2013 at 06:52:37AM +1000, Benjamin Herrenschmidt wrote:
>
>> diff --git a/arch/powerpc/include/asm/iommu.h b/arch/powerpc/include/asm/iommu.h
>> index cbfe678..0db308e 100644
>> --- a/arch/powerpc/include/asm/iommu.h
>> +++ b/arch/powerpc/include/asm/iommu.h
>> @@ -76,6 +76,7 @@ struct iommu_table {
>>  	struct iommu_pool large_pool;
>>  	struct iommu_pool pools[IOMMU_NR_POOLS];
>>  	unsigned long *it_map;       /* A simple allocation bitmap for now */
>> +	void *sysdata;
>>  };
>
>You should be able to avoid adding that field by using the container_of
>trick to get to the PE and moving the iommu ops for ioda into pci-ioda.c
>instead of sharing them with the non-ioda stuff.
>
Yep. I will introduce one function pnv_pci_ioda_tce_invalidate() to pci-ioda.c
and jump to IODA1/IODA2 cases there. By the way, I will introduce one addtional
field "struct pnv_phb *phb" to "struct pnv_ioda_pe".
Thanks,
Gavin
^ permalink raw reply	[flat|nested] 15+ messages in thread
 
 
- * [PATCH 6/7] powerpc/powernv: Build DMA space for PE on PHB3
  2013-04-24  9:37 [PATCH v3 0/7] powerpc/powernv: PHB3 Support Gavin Shan
                   ` (4 preceding siblings ...)
  2013-04-24  9:37 ` [PATCH 5/7] powerpc/powernv: TCE invalidation for PHB3 Gavin Shan
@ 2013-04-24  9:37 ` Gavin Shan
  2013-04-24  9:37 ` [PATCH 7/7] powerpc/powernv: Fix invalid IOMMU table Gavin Shan
  6 siblings, 0 replies; 15+ messages in thread
From: Gavin Shan @ 2013-04-24  9:37 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: Gavin Shan
The patch intends to build 32-bits DMA space for individual PEs on
PHB3. The TVE# is recognized by the combo of PE# and fixed bits
from DMA address, which is zero for 32-bits DMA space.
Signed-off-by: Gavin Shan <shangw@linux.vnet.ibm.com>
---
 arch/powerpc/platforms/powernv/pci-ioda.c |  102 +++++++++++++++++++++++++++--
 1 files changed, 96 insertions(+), 6 deletions(-)
diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c
index 9f4d323..6bc4648 100644
--- a/arch/powerpc/platforms/powernv/pci-ioda.c
+++ b/arch/powerpc/platforms/powernv/pci-ioda.c
@@ -589,9 +589,8 @@ static void pnv_pci_ioda_setup_dma_pe(struct pnv_phb *phb,
 		 */
 		tbl->it_busno = 0;
 		tbl->it_index = (unsigned long)ioremap(be64_to_cpup(swinvp), 8);
-		tbl->it_type = TCE_PCI_SWINV_CREATE | TCE_PCI_SWINV_FREE;
-		if (phb->type == PNV_PHB_IODA1)
-			tbl->it_type |= TCE_PCI_SWINV_PAIR;
+		tbl->it_type = TCE_PCI_SWINV_CREATE | TCE_PCI_SWINV_FREE |
+			       TCE_PCI_SWINV_PAIR;
 	}
 	iommu_init_table(tbl, phb->hose->node);
 
@@ -609,6 +608,84 @@ static void pnv_pci_ioda_setup_dma_pe(struct pnv_phb *phb,
 		__free_pages(tce_mem, get_order(TCE32_TABLE_SIZE * segs));
 }
 
+static void pnv_pci_ioda2_setup_dma_pe(struct pnv_phb *phb,
+				       struct pnv_ioda_pe *pe)
+{
+	struct page *tce_mem = NULL;
+	void *addr;
+	const __be64 *swinvp;
+	struct iommu_table *tbl;
+	unsigned int tce_table_size, end;
+	int64_t rc;
+
+	/* We shouldn't already have a 32-bit DMA associated */
+	if (WARN_ON(pe->tce32_seg >= 0))
+		return;
+
+	/* The PE will reserve all possible 32-bits space */
+	pe->tce32_seg = 0;
+	end = (1 << ilog2(phb->ioda.m32_pci_base));
+	tce_table_size = (end / 0x1000) * 8;
+	pe_info(pe, "Setting up 32-bit TCE table at 0..%08x\n",
+		end);
+
+	/* Allocate TCE table */
+	tce_mem = alloc_pages_node(phb->hose->node, GFP_KERNEL,
+				   get_order(tce_table_size));
+	if (!tce_mem) {
+		pe_err(pe, "Failed to allocate a 32-bit TCE memory\n");
+		goto fail;
+	}
+	addr = page_address(tce_mem);
+	memset(addr, 0, tce_table_size);
+
+	/*
+	 * Map TCE table through TVT. The TVE index is the PE number
+	 * shifted by 1 bit for 32-bits DMA space.
+	 */
+	rc = opal_pci_map_pe_dma_window(phb->opal_id, pe->pe_number,
+					pe->pe_number << 1, 1, __pa(addr),
+					tce_table_size, 0x1000);
+	if (rc) {
+		pe_err(pe, "Failed to configure 32-bit TCE table,"
+		       " err %ld\n", rc);
+		goto fail;
+	}
+
+	/* Setup linux iommu table */
+	tbl = &pe->tce32_table;
+	pnv_pci_setup_iommu_table(tbl, addr, tce_table_size, 0);
+
+	/* Hook the IOMMU table to PHB */
+	tbl->sysdata = phb;
+
+	/* OPAL variant of PHB3 invalidated TCEs */
+	swinvp = of_get_property(phb->hose->dn, "ibm,opal-tce-kill", NULL);
+	if (swinvp) {
+		/* We need a couple more fields -- an address and a data
+		 * to or.  Since the bus is only printed out on table free
+		 * errors, and on the first pass the data will be a relative
+		 * bus number, print that out instead.
+		 */
+		tbl->it_busno = 0;
+		tbl->it_index = (unsigned long)ioremap(be64_to_cpup(swinvp), 8);
+		tbl->it_type = TCE_PCI_SWINV_CREATE | TCE_PCI_SWINV_FREE;
+	}
+	iommu_init_table(tbl, phb->hose->node);
+
+	if (pe->pdev)
+		set_iommu_table_base(&pe->pdev->dev, tbl);
+	else
+		pnv_ioda_setup_bus_dma(pe, pe->pbus);
+
+	return;
+fail:
+	if (pe->tce32_seg >= 0)
+		pe->tce32_seg = -1;
+	if (tce_mem)
+		__free_pages(tce_mem, get_order(tce_table_size));
+}
+
 static void pnv_ioda_setup_dma(struct pnv_phb *phb)
 {
 	struct pci_controller *hose = phb->hose;
@@ -651,9 +728,22 @@ static void pnv_ioda_setup_dma(struct pnv_phb *phb)
 			if (segs > remaining)
 				segs = remaining;
 		}
-		pe_info(pe, "DMA weight %d, assigned %d DMA32 segments\n",
-			pe->dma_weight, segs);
-		pnv_pci_ioda_setup_dma_pe(phb, pe, base, segs);
+
+		/*
+		 * For IODA2 compliant PHB3, we needn't care about the weight.
+		 * The all available 32-bits DMA space will be assigned to
+		 * the specific PE.
+		 */
+		if (phb->type == PNV_PHB_IODA1) {
+			pe_info(pe, "DMA weight %d, assigned %d DMA32 segments\n",
+				pe->dma_weight, segs);
+			pnv_pci_ioda_setup_dma_pe(phb, pe, base, segs);
+		} else {
+			pe_info(pe, "Assign DMA32 space\n");
+			segs = 0;
+			pnv_pci_ioda2_setup_dma_pe(phb, pe);
+		}
+
 		remaining -= segs;
 		base += segs;
 	}
-- 
1.7.5.4
^ permalink raw reply related	[flat|nested] 15+ messages in thread
- * [PATCH 7/7] powerpc/powernv: Fix invalid IOMMU table
  2013-04-24  9:37 [PATCH v3 0/7] powerpc/powernv: PHB3 Support Gavin Shan
                   ` (5 preceding siblings ...)
  2013-04-24  9:37 ` [PATCH 6/7] powerpc/powernv: Build DMA space for PE on PHB3 Gavin Shan
@ 2013-04-24  9:37 ` Gavin Shan
  6 siblings, 0 replies; 15+ messages in thread
From: Gavin Shan @ 2013-04-24  9:37 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: Gavin Shan
Ben found the root cause. Commit 37f02195bee9c25ce44e25204f40b7961a6d7c9d
("powerpc/pci: fix PCI-e devices rescan issue on powerpc platform")
overwrites the IOMMU table of PCI device while enabling PCI device.
The patch intends to fix the IOMMU table after that point.
Signed-off-by: Gavin Shan <shangw@linux.vnet.ibm.com>
---
 arch/powerpc/platforms/powernv/pci-ioda.c |   33 ++++++++++------------------
 1 files changed, 12 insertions(+), 21 deletions(-)
diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c
index 6bc4648..c41696f 100644
--- a/arch/powerpc/platforms/powernv/pci-ioda.c
+++ b/arch/powerpc/platforms/powernv/pci-ioda.c
@@ -432,20 +432,21 @@ static void pnv_pci_ioda_setup_PEs(void)
 	}
 }
 
-static void pnv_pci_ioda_dma_dev_setup(struct pnv_phb *phb, struct pci_dev *dev)
+static void pnv_pci_ioda_dma_dev_setup(struct pnv_phb *phb, struct pci_dev *pdev)
 {
-	/* We delay DMA setup after we have assigned all PE# */
-}
+	struct pci_dn *pdn = pnv_ioda_get_pdn(pdev);
+	struct pnv_ioda_pe *pe;
 
-static void pnv_ioda_setup_bus_dma(struct pnv_ioda_pe *pe, struct pci_bus *bus)
-{
-	struct pci_dev *dev;
+	/*
+	 * The function can be called while the PE#
+	 * hasn't been assigned. Do nothing for the
+	 * case.
+	 */
+	if (!pdn || pdn->pe_number == IODA_INVALID_PE)
+		return;
 
-	list_for_each_entry(dev, &bus->devices, bus_list) {
-		set_iommu_table_base(&dev->dev, &pe->tce32_table);
-		if (dev->subordinate)
-			pnv_ioda_setup_bus_dma(pe, dev->subordinate);
-	}
+	pe = &phb->ioda.pe_array[pdn->pe_number];
+	set_iommu_table_base(&pdev->dev, &pe->tce32_table);
 }
 
 void pnv_pci_ioda1_tce_invalidate(struct iommu_table *tbl,
@@ -594,11 +595,6 @@ static void pnv_pci_ioda_setup_dma_pe(struct pnv_phb *phb,
 	}
 	iommu_init_table(tbl, phb->hose->node);
 
-	if (pe->pdev)
-		set_iommu_table_base(&pe->pdev->dev, tbl);
-	else
-		pnv_ioda_setup_bus_dma(pe, pe->pbus);
-
 	return;
  fail:
 	/* XXX Failure: Try to fallback to 64-bit only ? */
@@ -673,11 +669,6 @@ static void pnv_pci_ioda2_setup_dma_pe(struct pnv_phb *phb,
 	}
 	iommu_init_table(tbl, phb->hose->node);
 
-	if (pe->pdev)
-		set_iommu_table_base(&pe->pdev->dev, tbl);
-	else
-		pnv_ioda_setup_bus_dma(pe, pe->pbus);
-
 	return;
 fail:
 	if (pe->tce32_seg >= 0)
-- 
1.7.5.4
^ permalink raw reply related	[flat|nested] 15+ messages in thread