- * [PATCH v2 01/27] of/pci: Provide support for parsing PCI DT ranges property
  2013-01-28 18:56 [PATCH v2] PCIe support for the Armada 370 and Armada XP SoCs Thomas Petazzoni
@ 2013-01-28 18:56 ` Thomas Petazzoni
  2013-01-28 18:56 ` [PATCH v2 02/27] of/pci: Add of_pci_get_devfn() function Thomas Petazzoni
                   ` (25 subsequent siblings)
  26 siblings, 0 replies; 216+ messages in thread
From: Thomas Petazzoni @ 2013-01-28 18:56 UTC (permalink / raw)
  To: linux-arm-kernel
From: Andrew Murray <andrew.murray@arm.com>
DT bindings for PCI host bridges often use the ranges property to describe
memory and IO ranges - this binding tends to be the same across architectures
yet several parsing implementations exist, e.g. arch/mips/pci/pci.c,
arch/powerpc/kernel/pci-common.c, arch/sparc/kernel/pci.c and
arch/microblaze/pci/pci-common.c (clone of PPC). Some of these duplicate
functionality provided by drivers/of/address.c.
This patch provides a common iterator-based parser for the ranges property, it
is hoped this will reduce DT representation differences between architectures
and that architectures will migrate in part to this new parser.
It is also hoped (and the motativation for the patch) that this patch will
reduce duplication of code when writing host bridge drivers that are supported
by multiple architectures.
This patch provides struct resources from a device tree node, e.g.:
	u32 *last = NULL;
	struct resource res;
	while ((last = of_pci_process_ranges(np, res, last))) {
		//do something with res
	}
Platforms with quirks can then do what they like with the resource or migrate
common quirk handling to the parser. In an ideal world drivers can just request
the obtained resources and pass them on (e.g. pci_add_resource_offset).
Signed-off-by: Andrew Murray <Andrew.Murray@arm.com>
Signed-off-by: Liviu Dudau <Liviu.Dudau@arm.com>
Signed-off-by: Thierry Reding <thierry.reding@avionic-design.de>
Signed-off-by: Thomas Petazzoni <thomas.petazzoni@free-electrons.com>
---
 drivers/of/address.c       |   63 ++++++++++++++++++++++++++++++++++++++++++++
 include/linux/of_address.h |    9 +++++++
 2 files changed, 72 insertions(+)
diff --git a/drivers/of/address.c b/drivers/of/address.c
index 0125524..d659527 100644
--- a/drivers/of/address.c
+++ b/drivers/of/address.c
@@ -13,6 +13,7 @@
 #define OF_CHECK_COUNTS(na, ns)	(OF_CHECK_ADDR_COUNT(na) && (ns) > 0)
 
 static struct of_bus *of_match_bus(struct device_node *np);
+static struct of_bus *of_find_bus(const char *name);
 static int __of_address_to_resource(struct device_node *dev,
 		const __be32 *addrp, u64 size, unsigned int flags,
 		const char *name, struct resource *r);
@@ -227,6 +228,57 @@ int of_pci_address_to_resource(struct device_node *dev, int bar,
 	return __of_address_to_resource(dev, addrp, size, flags, NULL, r);
 }
 EXPORT_SYMBOL_GPL(of_pci_address_to_resource);
+
+const __be32 *of_pci_process_ranges(struct device_node *node,
+				    struct resource *res, const __be32 *from)
+{
+	const __be32 *start, *end;
+	int na, ns, np, pna;
+	int rlen;
+	struct of_bus *bus;
+
+	WARN_ON(!res);
+
+	bus = of_find_bus("pci");
+	bus->count_cells(node, &na, &ns);
+	if (!OF_CHECK_COUNTS(na, ns)) {
+		pr_err("Bad cell count for %s\n", node->full_name);
+		return NULL;
+	}
+
+	pna = of_n_addr_cells(node);
+	np = pna + na + ns;
+
+	start = of_get_property(node, "ranges", &rlen);
+	if (start == NULL)
+		return NULL;
+
+	end = start + rlen / sizeof(__be32);
+
+	if (!from)
+		from = start;
+
+	while (from + np <= end) {
+		u64 cpu_addr, size;
+
+		cpu_addr = of_translate_address(node, from + na);
+		size = of_read_number(from + na + pna, ns);
+		res->flags = bus->get_flags(from);
+		from += np;
+
+		if (cpu_addr == OF_BAD_ADDR || size == 0)
+			continue;
+
+		res->name = node->full_name;
+		res->start = cpu_addr;
+		res->end = res->start + size - 1;
+		res->parent = res->child = res->sibling = NULL;
+		return from;
+	}
+
+	return NULL;
+}
+EXPORT_SYMBOL_GPL(of_pci_process_ranges);
 #endif /* CONFIG_PCI */
 
 /*
@@ -337,6 +389,17 @@ static struct of_bus *of_match_bus(struct device_node *np)
 	return NULL;
 }
 
+static struct of_bus *of_find_bus(const char *name)
+{
+	unsigned int i;
+
+	for (i = 0; i < ARRAY_SIZE(of_busses); i++)
+		if (strcmp(name, of_busses[i].name) == 0)
+			return &of_busses[i];
+
+	return NULL;
+}
+
 static int of_translate_one(struct device_node *parent, struct of_bus *bus,
 			    struct of_bus *pbus, __be32 *addr,
 			    int na, int ns, int pna, const char *rprop)
diff --git a/include/linux/of_address.h b/include/linux/of_address.h
index 0506eb5..751e889 100644
--- a/include/linux/of_address.h
+++ b/include/linux/of_address.h
@@ -27,6 +27,8 @@ static inline unsigned long pci_address_to_pio(phys_addr_t addr) { return -1; }
 #define pci_address_to_pio pci_address_to_pio
 #endif
 
+const __be32 *of_pci_process_ranges(struct device_node *node,
+				    struct resource *res, const __be32 *from);
 #else /* CONFIG_OF_ADDRESS */
 #ifndef of_address_to_resource
 static inline int of_address_to_resource(struct device_node *dev, int index,
@@ -53,6 +55,13 @@ static inline const __be32 *of_get_address(struct device_node *dev, int index,
 {
 	return NULL;
 }
+
+static inline const __be32 *of_pci_process_ranges(struct device_node *node,
+						  struct resource *res,
+						  const __be32 *from)
+{
+	return NULL;
+}
 #endif /* CONFIG_OF_ADDRESS */
 
 
-- 
1.7.9.5
^ permalink raw reply related	[flat|nested] 216+ messages in thread
- * [PATCH v2 02/27] of/pci: Add of_pci_get_devfn() function
  2013-01-28 18:56 [PATCH v2] PCIe support for the Armada 370 and Armada XP SoCs Thomas Petazzoni
  2013-01-28 18:56 ` [PATCH v2 01/27] of/pci: Provide support for parsing PCI DT ranges property Thomas Petazzoni
@ 2013-01-28 18:56 ` Thomas Petazzoni
  2013-01-28 22:00   ` Stephen Warren
  2013-01-28 18:56 ` [PATCH v2 03/27] of/pci: Add of_pci_parse_bus_range() function Thomas Petazzoni
                   ` (24 subsequent siblings)
  26 siblings, 1 reply; 216+ messages in thread
From: Thomas Petazzoni @ 2013-01-28 18:56 UTC (permalink / raw)
  To: linux-arm-kernel
From: Thierry Reding <thierry.reding@avionic-design.de>
This function can be used to parse the device and function number from a
standard 5-cell PCI resource. PCI_SLOT() and PCI_FUNC() can be used on
the returned value obtain the device and function numbers respectively.
Signed-off-by: Thierry Reding <thierry.reding@avionic-design.de>
Signed-off-by: Thomas Petazzoni <thomas.petazzoni@free-electrons.com>
---
 drivers/of/of_pci.c    |   32 ++++++++++++++++++++++++++++----
 include/linux/of_pci.h |    1 +
 2 files changed, 29 insertions(+), 4 deletions(-)
diff --git a/drivers/of/of_pci.c b/drivers/of/of_pci.c
index 13e37e2..0dd52df 100644
--- a/drivers/of/of_pci.c
+++ b/drivers/of/of_pci.c
@@ -7,12 +7,13 @@
 static inline int __of_pci_pci_compare(struct device_node *node,
 				       unsigned int devfn)
 {
-	unsigned int size;
-	const __be32 *reg = of_get_property(node, "reg", &size);
+	int err;
 
-	if (!reg || size < 5 * sizeof(__be32))
+	err = of_pci_get_devfn(node);
+	if (err < 0)
 		return 0;
-	return ((be32_to_cpup(®[0]) >> 8) & 0xff) == devfn;
+
+	return devfn == err;
 }
 
 struct device_node *of_pci_find_child_device(struct device_node *parent,
@@ -40,3 +41,26 @@ struct device_node *of_pci_find_child_device(struct device_node *parent,
 	return NULL;
 }
 EXPORT_SYMBOL_GPL(of_pci_find_child_device);
+
+/**
+ * of_pci_get_devfn() - Get device and function numbers for a device node
+ * @np: device node
+ *
+ * Parses a standard 5-cell PCI resource and returns an 8-bit value that can
+ * be passed to the PCI_SLOT() and PCI_FUNC() macros to extract the device
+ * and function numbers respectively. On error a negative error code is
+ * returned.
+ */
+int of_pci_get_devfn(struct device_node *np)
+{
+	unsigned int size;
+	const __be32 *reg;
+
+	reg = of_get_property(np, "reg", &size);
+
+	if (!reg || size < 5 * sizeof(__be32))
+		return -EINVAL;
+
+	return (be32_to_cpup(reg) >> 8) & 0xff;
+}
+EXPORT_SYMBOL_GPL(of_pci_get_devfn);
diff --git a/include/linux/of_pci.h b/include/linux/of_pci.h
index bb115de..91ec484 100644
--- a/include/linux/of_pci.h
+++ b/include/linux/of_pci.h
@@ -10,5 +10,6 @@ int of_irq_map_pci(const struct pci_dev *pdev, struct of_irq *out_irq);
 struct device_node;
 struct device_node *of_pci_find_child_device(struct device_node *parent,
 					     unsigned int devfn);
+int of_pci_get_devfn(struct device_node *np);
 
 #endif
-- 
1.7.9.5
^ permalink raw reply related	[flat|nested] 216+ messages in thread
- * [PATCH v2 02/27] of/pci: Add of_pci_get_devfn() function
  2013-01-28 18:56 ` [PATCH v2 02/27] of/pci: Add of_pci_get_devfn() function Thomas Petazzoni
@ 2013-01-28 22:00   ` Stephen Warren
  2013-01-28 22:16     ` Thierry Reding
  0 siblings, 1 reply; 216+ messages in thread
From: Stephen Warren @ 2013-01-28 22:00 UTC (permalink / raw)
  To: linux-arm-kernel
On 01/28/2013 11:56 AM, Thomas Petazzoni wrote:
> From: Thierry Reding <thierry.reding@avionic-design.de>
> 
> This function can be used to parse the device and function number from a
> standard 5-cell PCI resource. PCI_SLOT() and PCI_FUNC() can be used on
> the returned value obtain the device and function numbers respectively.
> diff --git a/drivers/of/of_pci.c b/drivers/of/of_pci.c
>  static inline int __of_pci_pci_compare(struct device_node *node,
>  				       unsigned int devfn)
>  {
> -	unsigned int size;
> -	const __be32 *reg = of_get_property(node, "reg", &size);
> +	int err;
I think I commented when Thierry posted this, that calling that "err"
seems a little odd. Thierry replied:
Maybe renaming the devfn parameter to data and using devfn for the local
variable would be more obvious.
^ permalink raw reply	[flat|nested] 216+ messages in thread
- * [PATCH v2 02/27] of/pci: Add of_pci_get_devfn() function
  2013-01-28 22:00   ` Stephen Warren
@ 2013-01-28 22:16     ` Thierry Reding
  2013-01-29 10:04       ` Thomas Petazzoni
  0 siblings, 1 reply; 216+ messages in thread
From: Thierry Reding @ 2013-01-28 22:16 UTC (permalink / raw)
  To: linux-arm-kernel
On Mon, Jan 28, 2013 at 03:00:10PM -0700, Stephen Warren wrote:
> On 01/28/2013 11:56 AM, Thomas Petazzoni wrote:
> > From: Thierry Reding <thierry.reding@avionic-design.de>
> > 
> > This function can be used to parse the device and function number from a
> > standard 5-cell PCI resource. PCI_SLOT() and PCI_FUNC() can be used on
> > the returned value obtain the device and function numbers respectively.
> 
> > diff --git a/drivers/of/of_pci.c b/drivers/of/of_pci.c
> 
> >  static inline int __of_pci_pci_compare(struct device_node *node,
> >  				       unsigned int devfn)
> >  {
> > -	unsigned int size;
> > -	const __be32 *reg = of_get_property(node, "reg", &size);
> > +	int err;
> 
> I think I commented when Thierry posted this, that calling that "err"
> seems a little odd. Thierry replied:
> 
> Maybe renaming the devfn parameter to data and using devfn for the local
> variable would be more obvious.
That's already fixed up in my series. I was going to wait until I was
done with the MSI rework but maybe posting an intermediate version is in
order to share the latest state.
Thierry
-------------- next part --------------
A non-text attachment was scrubbed...
Name: not available
Type: application/pgp-signature
Size: 836 bytes
Desc: not available
URL: <http://lists.infradead.org/pipermail/linux-arm-kernel/attachments/20130128/02aacdbf/attachment.sig>
^ permalink raw reply	[flat|nested] 216+ messages in thread
- * [PATCH v2 02/27] of/pci: Add of_pci_get_devfn() function
  2013-01-28 22:16     ` Thierry Reding
@ 2013-01-29 10:04       ` Thomas Petazzoni
  0 siblings, 0 replies; 216+ messages in thread
From: Thomas Petazzoni @ 2013-01-29 10:04 UTC (permalink / raw)
  To: linux-arm-kernel
Thierry, Stephen,
On Mon, 28 Jan 2013 23:16:04 +0100, Thierry Reding wrote:
> That's already fixed up in my series. I was going to wait until I was
> done with the MSI rework but maybe posting an intermediate version is in
> order to share the latest state.
Thanks! Those first four patches are definitely shared between our
series.
Thomas
-- 
Thomas Petazzoni, Free Electrons
Kernel, drivers, real-time and embedded Linux
development, consulting, training and support.
http://free-electrons.com
^ permalink raw reply	[flat|nested] 216+ messages in thread 
 
 
 
- * [PATCH v2 03/27] of/pci: Add of_pci_parse_bus_range() function
  2013-01-28 18:56 [PATCH v2] PCIe support for the Armada 370 and Armada XP SoCs Thomas Petazzoni
  2013-01-28 18:56 ` [PATCH v2 01/27] of/pci: Provide support for parsing PCI DT ranges property Thomas Petazzoni
  2013-01-28 18:56 ` [PATCH v2 02/27] of/pci: Add of_pci_get_devfn() function Thomas Petazzoni
@ 2013-01-28 18:56 ` Thomas Petazzoni
  2013-01-28 18:56 ` [PATCH v2 04/27] ARM: pci: Allow passing per-controller private data Thomas Petazzoni
                   ` (23 subsequent siblings)
  26 siblings, 0 replies; 216+ messages in thread
From: Thomas Petazzoni @ 2013-01-28 18:56 UTC (permalink / raw)
  To: linux-arm-kernel
From: Thierry Reding <thierry.reding@avionic-design.de>
This function can be used to parse a bus-range property as specified by
device nodes representing PCI bridges.
Signed-off-by: Thierry Reding <thierry.reding@avionic-design.de>
Signed-off-by: Thomas Petazzoni <thomas.petazzoni@free-electrons.com>
---
 drivers/of/of_pci.c    |   25 +++++++++++++++++++++++++
 include/linux/of_pci.h |    1 +
 2 files changed, 26 insertions(+)
diff --git a/drivers/of/of_pci.c b/drivers/of/of_pci.c
index 0dd52df..3ea0e84 100644
--- a/drivers/of/of_pci.c
+++ b/drivers/of/of_pci.c
@@ -64,3 +64,28 @@ int of_pci_get_devfn(struct device_node *np)
 	return (be32_to_cpup(reg) >> 8) & 0xff;
 }
 EXPORT_SYMBOL_GPL(of_pci_get_devfn);
+
+/**
+ * of_pci_parse_bus_range() - parse the bus-range property of a PCI device
+ * @node: device node
+ * @res: address to a struct resource to return the bus-range
+ *
+ * Returns 0 on success or a negative error-code on failure.
+ */
+int of_pci_parse_bus_range(struct device_node *node, struct resource *res)
+{
+	const __be32 *values;
+	int len;
+
+	values = of_get_property(node, "bus-range", &len);
+	if (!values || len < sizeof(*values) * 2)
+		return -EINVAL;
+
+	res->name = node->name;
+	res->start = be32_to_cpup(values++);
+	res->end = be32_to_cpup(values);
+	res->flags = IORESOURCE_BUS;
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(of_pci_parse_bus_range);
diff --git a/include/linux/of_pci.h b/include/linux/of_pci.h
index 91ec484..7a04826 100644
--- a/include/linux/of_pci.h
+++ b/include/linux/of_pci.h
@@ -11,5 +11,6 @@ struct device_node;
 struct device_node *of_pci_find_child_device(struct device_node *parent,
 					     unsigned int devfn);
 int of_pci_get_devfn(struct device_node *np);
+int of_pci_parse_bus_range(struct device_node *node, struct resource *res);
 
 #endif
-- 
1.7.9.5
^ permalink raw reply related	[flat|nested] 216+ messages in thread
- * [PATCH v2 04/27] ARM: pci: Allow passing per-controller private data
  2013-01-28 18:56 [PATCH v2] PCIe support for the Armada 370 and Armada XP SoCs Thomas Petazzoni
                   ` (2 preceding siblings ...)
  2013-01-28 18:56 ` [PATCH v2 03/27] of/pci: Add of_pci_parse_bus_range() function Thomas Petazzoni
@ 2013-01-28 18:56 ` Thomas Petazzoni
  2013-01-28 18:56 ` [PATCH v2 05/27] arm: pci: add a align_resource hook Thomas Petazzoni
                   ` (22 subsequent siblings)
  26 siblings, 0 replies; 216+ messages in thread
From: Thomas Petazzoni @ 2013-01-28 18:56 UTC (permalink / raw)
  To: linux-arm-kernel
From: Thierry Reding <thierry.reding@avionic-design.de>
In order to allow drivers to specify private data for each controller,
this commit adds a private_data field to the struct hw_pci. This field
is an array of nr_controllers pointers that will be used to initialize
the private_data field of the corresponding controller's pci_sys_data
structure.
Signed-off-by: Thierry Reding <thierry.reding@avionic-design.de>
---
 arch/arm/include/asm/mach/pci.h |    1 +
 arch/arm/kernel/bios32.c        |    3 +++
 2 files changed, 4 insertions(+)
diff --git a/arch/arm/include/asm/mach/pci.h b/arch/arm/include/asm/mach/pci.h
index db9fedb..5cf2e97 100644
--- a/arch/arm/include/asm/mach/pci.h
+++ b/arch/arm/include/asm/mach/pci.h
@@ -23,6 +23,7 @@ struct hw_pci {
 #endif
 	struct pci_ops	*ops;
 	int		nr_controllers;
+	void		**private_data;
 	int		(*setup)(int nr, struct pci_sys_data *);
 	struct pci_bus *(*scan)(int nr, struct pci_sys_data *);
 	void		(*preinit)(void);
diff --git a/arch/arm/kernel/bios32.c b/arch/arm/kernel/bios32.c
index 379cf32..5401645 100644
--- a/arch/arm/kernel/bios32.c
+++ b/arch/arm/kernel/bios32.c
@@ -464,6 +464,9 @@ static void __init pcibios_init_hw(struct hw_pci *hw, struct list_head *head)
 		sys->map_irq = hw->map_irq;
 		INIT_LIST_HEAD(&sys->resources);
 
+		if (hw->private_data)
+			sys->private_data = hw->private_data[nr];
+
 		ret = hw->setup(nr, sys);
 
 		if (ret > 0) {
-- 
1.7.9.5
^ permalink raw reply related	[flat|nested] 216+ messages in thread
- * [PATCH v2 05/27] arm: pci: add a align_resource hook
  2013-01-28 18:56 [PATCH v2] PCIe support for the Armada 370 and Armada XP SoCs Thomas Petazzoni
                   ` (3 preceding siblings ...)
  2013-01-28 18:56 ` [PATCH v2 04/27] ARM: pci: Allow passing per-controller private data Thomas Petazzoni
@ 2013-01-28 18:56 ` Thomas Petazzoni
  2013-01-29 15:12   ` Thomas Petazzoni
  2013-01-28 18:56 ` [PATCH v2 06/27] lib: devres: don't enclose pcim_*() functions in CONFIG_HAS_IOPORT Thomas Petazzoni
                   ` (21 subsequent siblings)
  26 siblings, 1 reply; 216+ messages in thread
From: Thomas Petazzoni @ 2013-01-28 18:56 UTC (permalink / raw)
  To: linux-arm-kernel
The PCI specifications says that an I/O region must be aligned on a 4
KB boundary, and a memory region aligned on a 1 MB boundary.
However, the Marvell PCIe interfaces rely on address decoding windows
(which allow to associate a range of physical addresses with a given
device). For PCIe memory windows, those windows are defined with a 1
MB granularity (which matches the PCI specs), but PCIe I/O windows can
only be defined with a 64 KB granularity, so they have to be 64 KB
aligned. We therefore need to tell the PCI core about this special
alignement requirement.
The PCI core already calls pcibios_align_resource() in the ARM PCI
core, specifically for such purposes. So this patch extends the ARM
PCI core so that it calls a ->align_resource() hook registered by the
PCI driver, exactly like the existing ->map_irq() and ->swizzle()
hooks.
A particular PCI driver can register a align_resource() hook, and do
its own specific alignement, depending on the specific constraints of
the underlying hardware.
Signed-off-by: Thomas Petazzoni <thomas.petazzoni@free-electrons.com>
Cc: Russell King <linux@arm.linux.org.uk>
---
 arch/arm/include/asm/mach/pci.h |   11 +++++++++++
 arch/arm/kernel/bios32.c        |    6 ++++++
 2 files changed, 17 insertions(+)
diff --git a/arch/arm/include/asm/mach/pci.h b/arch/arm/include/asm/mach/pci.h
index 5cf2e97..7d2c3c8 100644
--- a/arch/arm/include/asm/mach/pci.h
+++ b/arch/arm/include/asm/mach/pci.h
@@ -30,6 +30,11 @@ struct hw_pci {
 	void		(*postinit)(void);
 	u8		(*swizzle)(struct pci_dev *dev, u8 *pin);
 	int		(*map_irq)(const struct pci_dev *dev, u8 slot, u8 pin);
+	resource_size_t (*align_resource)(struct pci_dev *dev,
+					  const struct resource *res,
+					  resource_size_t start,
+					  resource_size_t size,
+					  resource_size_t align);
 };
 
 /*
@@ -51,6 +56,12 @@ struct pci_sys_data {
 	u8		(*swizzle)(struct pci_dev *, u8 *);
 					/* IRQ mapping				*/
 	int		(*map_irq)(const struct pci_dev *, u8, u8);
+					/* Resource alignement requirements	*/
+	resource_size_t (*align_resource)(struct pci_dev *dev,
+					  const struct resource *res,
+					  resource_size_t start,
+					  resource_size_t size,
+					  resource_size_t align);
 	void		*private_data;	/* platform controller private data	*/
 };
 
diff --git a/arch/arm/kernel/bios32.c b/arch/arm/kernel/bios32.c
index 5401645..be2e6c9 100644
--- a/arch/arm/kernel/bios32.c
+++ b/arch/arm/kernel/bios32.c
@@ -462,6 +462,7 @@ static void __init pcibios_init_hw(struct hw_pci *hw, struct list_head *head)
 		sys->busnr   = busnr;
 		sys->swizzle = hw->swizzle;
 		sys->map_irq = hw->map_irq;
+		sys->align_resource = hw->align_resource;
 		INIT_LIST_HEAD(&sys->resources);
 
 		if (hw->private_data)
@@ -574,6 +575,8 @@ char * __init pcibios_setup(char *str)
 resource_size_t pcibios_align_resource(void *data, const struct resource *res,
 				resource_size_t size, resource_size_t align)
 {
+	struct pci_dev *dev = data;
+	struct pci_sys_data *sys = dev->sysdata;
 	resource_size_t start = res->start;
 
 	if (res->flags & IORESOURCE_IO && start & 0x300)
@@ -581,6 +584,9 @@ resource_size_t pcibios_align_resource(void *data, const struct resource *res,
 
 	start = (start + align - 1) & ~(align - 1);
 
+	if (sys->align_resource)
+		return sys->align_resource(dev, res, start, size, align);
+
 	return start;
 }
 
-- 
1.7.9.5
^ permalink raw reply related	[flat|nested] 216+ messages in thread
- * [PATCH v2 05/27] arm: pci: add a align_resource hook
  2013-01-28 18:56 ` [PATCH v2 05/27] arm: pci: add a align_resource hook Thomas Petazzoni
@ 2013-01-29 15:12   ` Thomas Petazzoni
  2013-01-29 15:15     ` Russell King - ARM Linux
  2013-01-29 15:58     ` Russell King - ARM Linux
  0 siblings, 2 replies; 216+ messages in thread
From: Thomas Petazzoni @ 2013-01-29 15:12 UTC (permalink / raw)
  To: linux-arm-kernel
Russell,
As the arch/arm/kernel/ maintainer, what is your position regarding the
below patch?
Thanks for your review,
Thomas
On Mon, 28 Jan 2013 19:56:14 +0100, Thomas Petazzoni wrote:
> The PCI specifications says that an I/O region must be aligned on a 4
> KB boundary, and a memory region aligned on a 1 MB boundary.
> 
> However, the Marvell PCIe interfaces rely on address decoding windows
> (which allow to associate a range of physical addresses with a given
> device). For PCIe memory windows, those windows are defined with a 1
> MB granularity (which matches the PCI specs), but PCIe I/O windows can
> only be defined with a 64 KB granularity, so they have to be 64 KB
> aligned. We therefore need to tell the PCI core about this special
> alignement requirement.
> 
> The PCI core already calls pcibios_align_resource() in the ARM PCI
> core, specifically for such purposes. So this patch extends the ARM
> PCI core so that it calls a ->align_resource() hook registered by the
> PCI driver, exactly like the existing ->map_irq() and ->swizzle()
> hooks.
> 
> A particular PCI driver can register a align_resource() hook, and do
> its own specific alignement, depending on the specific constraints of
> the underlying hardware.
> 
> Signed-off-by: Thomas Petazzoni <thomas.petazzoni@free-electrons.com>
> Cc: Russell King <linux@arm.linux.org.uk>
> ---
>  arch/arm/include/asm/mach/pci.h |   11 +++++++++++
>  arch/arm/kernel/bios32.c        |    6 ++++++
>  2 files changed, 17 insertions(+)
> 
> diff --git a/arch/arm/include/asm/mach/pci.h b/arch/arm/include/asm/mach/pci.h
> index 5cf2e97..7d2c3c8 100644
> --- a/arch/arm/include/asm/mach/pci.h
> +++ b/arch/arm/include/asm/mach/pci.h
> @@ -30,6 +30,11 @@ struct hw_pci {
>  	void		(*postinit)(void);
>  	u8		(*swizzle)(struct pci_dev *dev, u8 *pin);
>  	int		(*map_irq)(const struct pci_dev *dev, u8 slot, u8 pin);
> +	resource_size_t (*align_resource)(struct pci_dev *dev,
> +					  const struct resource *res,
> +					  resource_size_t start,
> +					  resource_size_t size,
> +					  resource_size_t align);
>  };
>  
>  /*
> @@ -51,6 +56,12 @@ struct pci_sys_data {
>  	u8		(*swizzle)(struct pci_dev *, u8 *);
>  					/* IRQ mapping				*/
>  	int		(*map_irq)(const struct pci_dev *, u8, u8);
> +					/* Resource alignement requirements	*/
> +	resource_size_t (*align_resource)(struct pci_dev *dev,
> +					  const struct resource *res,
> +					  resource_size_t start,
> +					  resource_size_t size,
> +					  resource_size_t align);
>  	void		*private_data;	/* platform controller private data	*/
>  };
>  
> diff --git a/arch/arm/kernel/bios32.c b/arch/arm/kernel/bios32.c
> index 5401645..be2e6c9 100644
> --- a/arch/arm/kernel/bios32.c
> +++ b/arch/arm/kernel/bios32.c
> @@ -462,6 +462,7 @@ static void __init pcibios_init_hw(struct hw_pci *hw, struct list_head *head)
>  		sys->busnr   = busnr;
>  		sys->swizzle = hw->swizzle;
>  		sys->map_irq = hw->map_irq;
> +		sys->align_resource = hw->align_resource;
>  		INIT_LIST_HEAD(&sys->resources);
>  
>  		if (hw->private_data)
> @@ -574,6 +575,8 @@ char * __init pcibios_setup(char *str)
>  resource_size_t pcibios_align_resource(void *data, const struct resource *res,
>  				resource_size_t size, resource_size_t align)
>  {
> +	struct pci_dev *dev = data;
> +	struct pci_sys_data *sys = dev->sysdata;
>  	resource_size_t start = res->start;
>  
>  	if (res->flags & IORESOURCE_IO && start & 0x300)
> @@ -581,6 +584,9 @@ resource_size_t pcibios_align_resource(void *data, const struct resource *res,
>  
>  	start = (start + align - 1) & ~(align - 1);
>  
> +	if (sys->align_resource)
> +		return sys->align_resource(dev, res, start, size, align);
> +
>  	return start;
>  }
>  
-- 
Thomas Petazzoni, Free Electrons
Kernel, drivers, real-time and embedded Linux
development, consulting, training and support.
http://free-electrons.com
^ permalink raw reply	[flat|nested] 216+ messages in thread
- * [PATCH v2 05/27] arm: pci: add a align_resource hook
  2013-01-29 15:12   ` Thomas Petazzoni
@ 2013-01-29 15:15     ` Russell King - ARM Linux
  2013-01-29 15:23       ` Thomas Petazzoni
  2013-01-29 15:58     ` Russell King - ARM Linux
  1 sibling, 1 reply; 216+ messages in thread
From: Russell King - ARM Linux @ 2013-01-29 15:15 UTC (permalink / raw)
  To: linux-arm-kernel
On Tue, Jan 29, 2013 at 04:12:11PM +0100, Thomas Petazzoni wrote:
> Russell,
> 
> As the arch/arm/kernel/ maintainer, what is your position regarding the
> below patch?
Given the description, I'd feel much happier with this if we specified
the alignment numerically rather than allowing "some random code" to do
something with the passed values.
^ permalink raw reply	[flat|nested] 216+ messages in thread 
- * [PATCH v2 05/27] arm: pci: add a align_resource hook
  2013-01-29 15:15     ` Russell King - ARM Linux
@ 2013-01-29 15:23       ` Thomas Petazzoni
  2013-01-29 15:25         ` Russell King - ARM Linux
  0 siblings, 1 reply; 216+ messages in thread
From: Thomas Petazzoni @ 2013-01-29 15:23 UTC (permalink / raw)
  To: linux-arm-kernel
Russell,
Thanks for your quick feedback!
On Tue, 29 Jan 2013 15:15:01 +0000, Russell King - ARM Linux wrote:
> Given the description, I'd feel much happier with this if we specified
> the alignment numerically rather than allowing "some random code" to do
> something with the passed values.
So, you'd prefer to have two new members added in the hw_pci structure
to give the alignment requirements for I/O regions and memory regions?
Something like:
struct hw_pci {
	[...]
	unsigned long io_align;
	unsigned long mem_align;
};
If that's fine with you, I'll go ahead and change the implementation in
this direction. As long as I can express my special 64 KB alignment
requirement for I/O regions, I'm just fine :-)
Thanks again,
Thomas
-- 
Thomas Petazzoni, Free Electrons
Kernel, drivers, real-time and embedded Linux
development, consulting, training and support.
http://free-electrons.com
^ permalink raw reply	[flat|nested] 216+ messages in thread
- * [PATCH v2 05/27] arm: pci: add a align_resource hook
  2013-01-29 15:23       ` Thomas Petazzoni
@ 2013-01-29 15:25         ` Russell King - ARM Linux
  2013-01-29 15:28           ` Thomas Petazzoni
  0 siblings, 1 reply; 216+ messages in thread
From: Russell King - ARM Linux @ 2013-01-29 15:25 UTC (permalink / raw)
  To: linux-arm-kernel
On Tue, Jan 29, 2013 at 04:23:37PM +0100, Thomas Petazzoni wrote:
> Russell,
> 
> Thanks for your quick feedback!
> 
> On Tue, 29 Jan 2013 15:15:01 +0000, Russell King - ARM Linux wrote:
> 
> > Given the description, I'd feel much happier with this if we specified
> > the alignment numerically rather than allowing "some random code" to do
> > something with the passed values.
> 
> So, you'd prefer to have two new members added in the hw_pci structure
> to give the alignment requirements for I/O regions and memory regions?
Yep, otherwise we'll have yet more code to review rather than one
algorithm with a set of numbers...
I work on the principle that if something can be expressed numerically,
that's always better than expressing it with code.
^ permalink raw reply	[flat|nested] 216+ messages in thread 
- * [PATCH v2 05/27] arm: pci: add a align_resource hook
  2013-01-29 15:25         ` Russell King - ARM Linux
@ 2013-01-29 15:28           ` Thomas Petazzoni
  0 siblings, 0 replies; 216+ messages in thread
From: Thomas Petazzoni @ 2013-01-29 15:28 UTC (permalink / raw)
  To: linux-arm-kernel
Russell,
On Tue, 29 Jan 2013 15:25:18 +0000, Russell King - ARM Linux wrote:
> Yep, otherwise we'll have yet more code to review rather than one
> algorithm with a set of numbers...
> 
> I work on the principle that if something can be expressed numerically,
> that's always better than expressing it with code.
Having a hook allows for more flexibility (for example having special
alignment requirements depending on the device or something like that),
but I don't need this flexibility for the specific case I'm interested
in. So as you suggest, let's not over-engineer this, and use numerical
values. I'll rework my patch according to this suggestion.
Thanks,
Thomas
-- 
Thomas Petazzoni, Free Electrons
Kernel, drivers, real-time and embedded Linux
development, consulting, training and support.
http://free-electrons.com
^ permalink raw reply	[flat|nested] 216+ messages in thread 
 
 
 
- * [PATCH v2 05/27] arm: pci: add a align_resource hook
  2013-01-29 15:12   ` Thomas Petazzoni
  2013-01-29 15:15     ` Russell King - ARM Linux
@ 2013-01-29 15:58     ` Russell King - ARM Linux
  2013-01-29 16:20       ` Thomas Petazzoni
  1 sibling, 1 reply; 216+ messages in thread
From: Russell King - ARM Linux @ 2013-01-29 15:58 UTC (permalink / raw)
  To: linux-arm-kernel
On Tue, Jan 29, 2013 at 04:12:11PM +0100, Thomas Petazzoni wrote:
> On Mon, 28 Jan 2013 19:56:14 +0100, Thomas Petazzoni wrote:
> > The PCI specifications says that an I/O region must be aligned on a 4
> > KB boundary, and a memory region aligned on a 1 MB boundary.
BTW, this, as a general statement, is wrong - though it really depends
what you mean by "region".
Remember that BARs can be set where-ever provided that they satisify
their _individual_ alignment requirements.  So, an IO bar which
occupies 16 bytes must be set to a 16-byte boundary.
Now, there's an additional complication there which occurs if you have
ISA devices sharing the PCI IO space: ISA devices used to only decode
10 bits of IO space, which means that their registers repeat throughout
the IO space.
Therefore, it is generally accepted that within any 1K block, only the
first 256 locations are only usable.
Moreover, some PCI cards have taken advantage of this, particularly VGA
cards.  For example, S3 VGA cards put different registers on 1K
multiples of the standard PC VGA IO addresses...
Also, another reason why I suspect your statement is wrong if I were
to interpret "region" as "BAR" is that consider a bunch of PCI peripherals
behind a PCI bridge.  The total number of IO BARs on the peripherals
is 16.
If you allocate each of those IO BARs to be 4K aligned, then you
consume all 64K of IO space behind one bridge, which leaves no space
for any other IO peripherals elsewhere in the bus structure.
Last reason I think that intepretation is wrong is, on this PC, I see:
        Region 4: I/O ports at 1800 [size=8]
        Region 0: I/O ports at 1830 [size=8]
        Region 2: I/O ports at 1840 [size=32]
        Region 4: I/O ports at 1860 [size=32]
        Region 4: I/O ports at 1880 [size=32]
        Region 4: I/O ports at 18a0 [size=32]
        Region 4: I/O ports at 18c0 [size=32]
        Region 4: I/O ports at 18e0 [size=32]
        Region 4: I/O ports at 1c00 [size=32]
        Region 0: I/O ports at 1c48 [size=8]
        Region 1: I/O ports at 183c [size=4]
        Region 2: I/O ports at 1c40 [size=8]
        Region 3: I/O ports at 1838 [size=4]
        Region 4: I/O ports at 1c20 [size=32]
        Region 4: I/O ports at 1c60 [size=32]
which doesn't follow - and I can pull out other examples on other
x86 platforms where IO BARs aren't aligned to 4K...
^ permalink raw reply	[flat|nested] 216+ messages in thread
- * [PATCH v2 05/27] arm: pci: add a align_resource hook
  2013-01-29 15:58     ` Russell King - ARM Linux
@ 2013-01-29 16:20       ` Thomas Petazzoni
  2013-01-29 16:45         ` Arnd Bergmann
  0 siblings, 1 reply; 216+ messages in thread
From: Thomas Petazzoni @ 2013-01-29 16:20 UTC (permalink / raw)
  To: linux-arm-kernel
Russell,
On Tue, 29 Jan 2013 15:58:20 +0000, Russell King - ARM Linux wrote:
> On Tue, Jan 29, 2013 at 04:12:11PM +0100, Thomas Petazzoni wrote:
> > On Mon, 28 Jan 2013 19:56:14 +0100, Thomas Petazzoni wrote:
> > > The PCI specifications says that an I/O region must be aligned on a 4
> > > KB boundary, and a memory region aligned on a 1 MB boundary.
> 
> BTW, this, as a general statement, is wrong - though it really depends
> what you mean by "region".
Yes, sorry, my statement does not correctly reflect the reality. My
knowledge of the PCI terminology is still quite fuzzy (as you found
out). What I am referring to is that the PCI standard requires the I/O
base register of a PCI-to-PCI bridge to contain a 4 KB aligned address.
>From the PCI-to-PCI Bridge Architecture Specification, Revision 1.1,
section 3.2.5.6. I/O Base Register and I/O Limit Register:
"""
   If a bridge implements an I/O address range, the upper 4 bits of
   both the I/O Base and I/O Limit registers are writable and
   correspond to address bits AD[15::12]. For the purpose of address
   decoding, the bridge assumes that the lower 12 address bits,
   AD[11::00], of the I/O base address (not implemented in the I/O Base
   register) are zero. Similarly, the bridge assumes that the lower 12
   address bits, AD[11::00], of the I/O limit address (not implemented
   in the I/O Limit register) are FFFh. Thus, the bottom of the defined
   I/O address range will be aligned to a 4 KB boundary and the top of
   the defined I/O address range will be one less than a 4 KB boundary.
"""
And the Linux PCI resource allocation code complies with this, so that
if I have two PCI-to-PCI bridges (each having downstream a device with
an I/O BAR), then the first PCI-to-PCI bridge gets its I/O base address
register set to ADDR + 0x0, and the second bridge gets its I/O base
address set to ADDR + 0x1000. And this doesn't play well with the
requirements of Marvell address decoding windows for PCIe I/O regions,
which must be 64 KB aligned.
So I guess I should simply rewrite the commit log to make it clear that
I'm referring to the I/O base address register of PCI-to-PCI bridges.
Would this be more correct? In that case, maybe in fact I really need a
hook so that this alignment requirement on only applied on the
resources allocated to bridges, and not on their downstream devices?
Thanks,
Thomas
-- 
Thomas Petazzoni, Free Electrons
Kernel, drivers, real-time and embedded Linux
development, consulting, training and support.
http://free-electrons.com
^ permalink raw reply	[flat|nested] 216+ messages in thread 
- * [PATCH v2 05/27] arm: pci: add a align_resource hook
  2013-01-29 16:20       ` Thomas Petazzoni
@ 2013-01-29 16:45         ` Arnd Bergmann
  2013-01-29 17:09           ` Thomas Petazzoni
  2013-01-30  4:56           ` Jason Gunthorpe
  0 siblings, 2 replies; 216+ messages in thread
From: Arnd Bergmann @ 2013-01-29 16:45 UTC (permalink / raw)
  To: linux-arm-kernel
On Tuesday 29 January 2013, Thomas Petazzoni wrote:
> And the Linux PCI resource allocation code complies with this, so that
> if I have two PCI-to-PCI bridges (each having downstream a device with
> an I/O BAR), then the first PCI-to-PCI bridge gets its I/O base address
> register set to ADDR + 0x0, and the second bridge gets its I/O base
> address set to ADDR + 0x1000. And this doesn't play well with the
> requirements of Marvell address decoding windows for PCIe I/O regions,
> which must be 64 KB aligned.
But we normally only assign a 64 KB I/O window to each PCI host bridge.
Requiring PCI bridges to be space 64 KB apart would mean that we cannot
actually support bridges at all.
Is this just about your "virtual" bridges? If each one has its
own 64 KB I/O range and its own configuration space, that sounds
a lot like you should make them appear as individual domains instead.
	Arnd
^ permalink raw reply	[flat|nested] 216+ messages in thread 
- * [PATCH v2 05/27] arm: pci: add a align_resource hook
  2013-01-29 16:45         ` Arnd Bergmann
@ 2013-01-29 17:09           ` Thomas Petazzoni
  2013-01-29 20:15             ` Arnd Bergmann
  2013-01-30  4:56           ` Jason Gunthorpe
  1 sibling, 1 reply; 216+ messages in thread
From: Thomas Petazzoni @ 2013-01-29 17:09 UTC (permalink / raw)
  To: linux-arm-kernel
Dear Arnd Bergmann,
On Tue, 29 Jan 2013 16:45:07 +0000, Arnd Bergmann wrote:
> On Tuesday 29 January 2013, Thomas Petazzoni wrote:
> > And the Linux PCI resource allocation code complies with this, so
> > that if I have two PCI-to-PCI bridges (each having downstream a
> > device with an I/O BAR), then the first PCI-to-PCI bridge gets its
> > I/O base address register set to ADDR + 0x0, and the second bridge
> > gets its I/O base address set to ADDR + 0x1000. And this doesn't
> > play well with the requirements of Marvell address decoding windows
> > for PCIe I/O regions, which must be 64 KB aligned.
> 
> But we normally only assign a 64 KB I/O window to each PCI host
> bridge. Requiring PCI bridges to be space 64 KB apart would mean that
> we cannot actually support bridges at all.
> 
> Is this just about your "virtual" bridges? If each one has its
> own 64 KB I/O range and its own configuration space, that sounds
> a lot like you should make them appear as individual domains instead.
Yes, it is about the emulated PCI-to-PCI bridges. Each
emulated PCI-to-PCI bridge corresponds to one hardware PCIe interface,
and I need the I/O base address assigned to each PCIe interface to be
aligned on a 64 KB boundary. I am not sure to understand why you think
this is a problem.
Also, what do you mean exactly by making them appear as individual
domains?
Remember that the very reason to use emulated PCI-to-PCI bridges is
that we want to assign a global range of addresses of I/O regions and a
global range of addresses of memory regions, and let the Linux PCI core
allocate from those two ranges to the different devices connected
downstream of the PCI-to-PCI bridges. This gives us for free the rather
complex allocation of addresses we need to set up our address decoding
windows.
If we have have separate domains for each of our hardware PCIe
interface, can we still benefit from this allocation of resources from
a globally defined range of I/O addresses and memory addresses?
Thanks,
Thomas
-- 
Thomas Petazzoni, Free Electrons
Kernel, drivers, real-time and embedded Linux
development, consulting, training and support.
http://free-electrons.com
^ permalink raw reply	[flat|nested] 216+ messages in thread 
- * [PATCH v2 05/27] arm: pci: add a align_resource hook
  2013-01-29 17:09           ` Thomas Petazzoni
@ 2013-01-29 20:15             ` Arnd Bergmann
  2013-01-29 20:33               ` Thomas Petazzoni
  0 siblings, 1 reply; 216+ messages in thread
From: Arnd Bergmann @ 2013-01-29 20:15 UTC (permalink / raw)
  To: linux-arm-kernel
On Tuesday 29 January 2013, Thomas Petazzoni wrote:
> Yes, it is about the emulated PCI-to-PCI bridges. Each
> emulated PCI-to-PCI bridge corresponds to one hardware PCIe interface,
> and I need the I/O base address assigned to each PCIe interface to be
> aligned on a 64 KB boundary. I am not sure to understand why you think
> this is a problem.
> 
> Also, what do you mean exactly by making them appear as individual
> domains?
I mean you could make each root port look like a separate host
bridge that is not related to the others, and not have any
emulated PCI-to-PCI bridges at all.
> Remember that the very reason to use emulated PCI-to-PCI bridges is
> that we want to assign a global range of addresses of I/O regions and a
> global range of addresses of memory regions, and let the Linux PCI core
> allocate from those two ranges to the different devices connected
> downstream of the PCI-to-PCI bridges. This gives us for free the rather
> complex allocation of addresses we need to set up our address decoding
> windows.
> 
> If we have have separate domains for each of our hardware PCIe
> interface, can we still benefit from this allocation of resources from
> a globally defined range of I/O addresses and memory addresses?
My interpretation of what you told me in the previous mail is that
each root port has 
* A separate configuration space
* A separate 64KB I/O window that is not shared with the other ports,
  or potentially multiple 64KB windows, which we would not want to use
* A configurable range of the memory space that does not overlap
  with the other ports
Is the above a correct description?
If so, I think it would be most sensible to not try to put all ports
into the same domain, but give each port the full view of its own
256 buses, and 64KB I/O space. The memory space can still be directly
mapped, if you only set up the physical address window for that after
the bus scan is complete.
	Arnd
^ permalink raw reply	[flat|nested] 216+ messages in thread 
- * [PATCH v2 05/27] arm: pci: add a align_resource hook
  2013-01-29 20:15             ` Arnd Bergmann
@ 2013-01-29 20:33               ` Thomas Petazzoni
  2013-01-29 21:59                 ` Thomas Petazzoni
  2013-01-29 22:54                 ` Arnd Bergmann
  0 siblings, 2 replies; 216+ messages in thread
From: Thomas Petazzoni @ 2013-01-29 20:33 UTC (permalink / raw)
  To: linux-arm-kernel
Dear Arnd Bergmann,
On Tue, 29 Jan 2013 20:15:21 +0000, Arnd Bergmann wrote:
> I mean you could make each root port look like a separate host
> bridge that is not related to the others, and not have any
> emulated PCI-to-PCI bridges at all.
Ok.
> > Remember that the very reason to use emulated PCI-to-PCI bridges is
> > that we want to assign a global range of addresses of I/O regions
> > and a global range of addresses of memory regions, and let the
> > Linux PCI core allocate from those two ranges to the different
> > devices connected downstream of the PCI-to-PCI bridges. This gives
> > us for free the rather complex allocation of addresses we need to
> > set up our address decoding windows.
> > 
> > If we have have separate domains for each of our hardware PCIe
> > interface, can we still benefit from this allocation of resources
> > from a globally defined range of I/O addresses and memory addresses?
> 
> My interpretation of what you told me in the previous mail is that
> each root port has 
> 
> * A separate configuration space
> * A separate 64KB I/O window that is not shared with the other ports,
>   or potentially multiple 64KB windows, which we would not want to use
> * A configurable range of the memory space that does not overlap
>   with the other ports
> 
> Is the above a correct description?
> 
> If so, I think it would be most sensible to not try to put all ports
> into the same domain, but give each port the full view of its own
> 256 buses, and 64KB I/O space. The memory space can still be directly
> mapped, if you only set up the physical address window for that after
> the bus scan is complete.
Does this still allows me to give the Linux PCI *one* global range of
addresses for I/O space, and *one* global range of addresses for memory
space, and the the Linux PCI core assign ranges, within those global
ranges, to each host bridge?
This is absolutely essential for me, as I then read those allocated
ranges to configure the address decoding windows.
Basically, I have currently two suggestions:
 * From Jason Gunthorpe, to not use any host bridge, and instead use
   only PCI-to-PCI bridges, one per PCIe interface.
 * From you, to not use any PCI-to-PCI bridge, and use only host
   bridges, one per PCIe interface.
Would it be possible to get some consensus on this? In the review of
RFCv1, I was already told to use one global host bridge, and then one
PCI-to-PCI bridge per PCIe interface, and now we're talking about doing
something different. I'd like to avoid having to try gazillions of
different possible implementations :-)
Best regards,
Thomas
-- 
Thomas Petazzoni, Free Electrons
Kernel, drivers, real-time and embedded Linux
development, consulting, training and support.
http://free-electrons.com
^ permalink raw reply	[flat|nested] 216+ messages in thread 
- * [PATCH v2 05/27] arm: pci: add a align_resource hook
  2013-01-29 20:33               ` Thomas Petazzoni
@ 2013-01-29 21:59                 ` Thomas Petazzoni
  2013-01-29 22:17                   ` Stephen Warren
  2013-01-30  4:49                   ` Jason Gunthorpe
  2013-01-29 22:54                 ` Arnd Bergmann
  1 sibling, 2 replies; 216+ messages in thread
From: Thomas Petazzoni @ 2013-01-29 21:59 UTC (permalink / raw)
  To: linux-arm-kernel
Arnd,
On Tue, 29 Jan 2013 21:33:08 +0100, Thomas Petazzoni wrote:
> Basically, I have currently two suggestions:
> 
>  * From Jason Gunthorpe, to not use any host bridge, and instead use
>    only PCI-to-PCI bridges, one per PCIe interface.
> 
>  * From you, to not use any PCI-to-PCI bridge, and use only host
>    bridges, one per PCIe interface.
Thinking more about this, this solution (using one emulated host bridge
per PCIe interface) would cause one problem: the PCIe device itself
would no longer be in slot 0.
If I'm correct, with one host bridge per PCIe interface, we would have
the following topology:
 bus 0, slot 0: emulated host bridge 0
 bus 0, slot 1: PCIe device connected to PCIe interface 0
 bus 1, slot 0: emulated host bridge 1
 bus 1, slot 1: PCIe device connected to PCIe interface 1
 bus 2, slot 0: emulated host bridge 2
 bus 2, slot 1: PCIe device connected to PCIE interface 2
 etc.
However, one of the reason to use a PCI-to-PCI bridge was to ensure
that the PCIe devices were all listed in slot 0. According to the
Marvell engineers who work on the PCIe stuff, some new PCIe devices
have this requirement. I don't have a lot of details about this, but I
was told that most of the new Intel NICs require this, for example the
Intel X520 fiber NIC. Maybe PCIe experts (Jason?) could provide more
details about this, and confirm/infirm this statement.
The usage of PCI-to-PCI bridge allows to have each PCIe device on its
own bus, at slot 0, which also solves this problem.
Best regards,
Thomas
-- 
Thomas Petazzoni, Free Electrons
Kernel, drivers, real-time and embedded Linux
development, consulting, training and support.
http://free-electrons.com
^ permalink raw reply	[flat|nested] 216+ messages in thread 
- * [PATCH v2 05/27] arm: pci: add a align_resource hook
  2013-01-29 21:59                 ` Thomas Petazzoni
@ 2013-01-29 22:17                   ` Stephen Warren
  2013-01-30  4:49                   ` Jason Gunthorpe
  1 sibling, 0 replies; 216+ messages in thread
From: Stephen Warren @ 2013-01-29 22:17 UTC (permalink / raw)
  To: linux-arm-kernel
On 01/29/2013 02:59 PM, Thomas Petazzoni wrote:
> Arnd,
> 
> On Tue, 29 Jan 2013 21:33:08 +0100, Thomas Petazzoni wrote:
> 
>> Basically, I have currently two suggestions:
>>
>>  * From Jason Gunthorpe, to not use any host bridge, and instead use
>>    only PCI-to-PCI bridges, one per PCIe interface.
>>
>>  * From you, to not use any PCI-to-PCI bridge, and use only host
>>    bridges, one per PCIe interface.
> 
> Thinking more about this, this solution (using one emulated host bridge
> per PCIe interface) would cause one problem: the PCIe device itself
> would no longer be in slot 0.
I think that's device 0 not slot 0 right?
> If I'm correct, with one host bridge per PCIe interface, we would have
> the following topology:
> 
>  bus 0, slot 0: emulated host bridge 0
>  bus 0, slot 1: PCIe device connected to PCIe interface 0
I /think/ the bus that the root port itself is on is different from the
bus that the downstream device is on, so wouldn't you end up with:
bus 0, slot 0: emulated host bridge 0
bus 1, slot 0: PCIe device connected to PCIe interface 0
(and isn't that "root port" not "host bridge" in the first line above?)
^ permalink raw reply	[flat|nested] 216+ messages in thread 
- * [PATCH v2 05/27] arm: pci: add a align_resource hook
  2013-01-29 21:59                 ` Thomas Petazzoni
  2013-01-29 22:17                   ` Stephen Warren
@ 2013-01-30  4:49                   ` Jason Gunthorpe
  1 sibling, 0 replies; 216+ messages in thread
From: Jason Gunthorpe @ 2013-01-30  4:49 UTC (permalink / raw)
  To: linux-arm-kernel
On Tue, Jan 29, 2013 at 10:59:32PM +0100, Thomas Petazzoni wrote:
> Arnd,
> 
> On Tue, 29 Jan 2013 21:33:08 +0100, Thomas Petazzoni wrote:
> 
> > Basically, I have currently two suggestions:
> > 
> >  * From Jason Gunthorpe, to not use any host bridge, and instead use
> >    only PCI-to-PCI bridges, one per PCIe interface.
> > 
> >  * From you, to not use any PCI-to-PCI bridge, and use only host
> >    bridges, one per PCIe interface.
Arnd is suggesting to use multiple *linux* host bridges (ie host
drivers), there is never any need for a 'host bridge config space' as
in patch #7, in either case.
> However, one of the reason to use a PCI-to-PCI bridge was to ensure
> that the PCIe devices were all listed in slot 0. According to the
> Marvell engineers who work on the PCIe stuff, some new PCIe devices
> have this requirement. I don't have a lot of details about this, but I
> was told that most of the new Intel NICs require this, for example the
> Intel X520 fiber NIC. Maybe PCIe experts (Jason?) could provide more
> details about this, and confirm/infirm this statement.
I'm not sure what this is referring to.. I don't recall any specific
requirements in PCI-E for the device number, I think the spec requires
it to be learned based on the config TLPs received.
There might be a device number sensitivity in INTx translation, but
that is defined by the spec.
That said, if your root complex is PCI-E compliant then all downstream
end ports attached to a root port should have a device number of 0.
> The usage of PCI-to-PCI bridge allows to have each PCIe device on its
> own bus, at slot 0, which also solves this problem.
Hrm....
Looking at the docs, you will also need to change the internal device
number (probably reg 41a04 again) to something other than 0, otherwise
the Marvell itself will claim device number 0 and the downstream end
port will be device number 1. You should see this happen today??
You should set the Marvell internal device number to something like
all ones and then deny any Linux config register access to the all
ones device number on the subordinate bus to hide the Marvell end port
config space registers from Linux.
As for as this process goes, it doesn't matter which approach you
take. If you use multiple PCI domains then you'd still be able to
arrange things via the above so that the downstream device could
always claim device number 0.
Jason
^ permalink raw reply	[flat|nested] 216+ messages in thread 
 
- * [PATCH v2 05/27] arm: pci: add a align_resource hook
  2013-01-29 20:33               ` Thomas Petazzoni
  2013-01-29 21:59                 ` Thomas Petazzoni
@ 2013-01-29 22:54                 ` Arnd Bergmann
  2013-01-30  4:21                   ` Jason Gunthorpe
  1 sibling, 1 reply; 216+ messages in thread
From: Arnd Bergmann @ 2013-01-29 22:54 UTC (permalink / raw)
  To: linux-arm-kernel
On Tuesday 29 January 2013, Thomas Petazzoni wrote:
> Does this still allows me to give the Linux PCI one global range of
> addresses for I/O space, and one global range of addresses for memory
> space, and the the Linux PCI core assign ranges, within those global
> ranges, to each host bridge?
> 
> This is absolutely essential for me, as I then read those allocated
> ranges to configure the address decoding windows.
> 
> Basically, I have currently two suggestions:
> 
>  * From Jason Gunthorpe, to not use any host bridge, and instead use
>    only PCI-to-PCI bridges, one per PCIe interface.
> 
>  * From you, to not use any PCI-to-PCI bridge, and use only host
>    bridges, one per PCIe interface.
> 
> Would it be possible to get some consensus on this? In the review of
> RFCv1, I was already told to use one global host bridge, and then one
> PCI-to-PCI bridge per PCIe interface, and now we're talking about doing
> something different. I'd like to avoid having to try gazillions of
> different possible implementations :-)
I'm actually fine with either of the two suggestions you mentioned above,
whichever is easier to implement and/or more closely matches what the
hardware actually implements is better IMHO.
The part that I did not like about having emulated PCI-to-PCI bridges
is that it seems to just work around a (percieved or real) limitation
in the Linux kernel by adding a piece of infrastructure, rather than
lifting that limitation by making the kernel deal with what the
hardware provides. That reminded me of the original mach-vt8500
PCI implementation that faked a complete PCI host bridge and a
bunch of PCI devices on it, in order to use the via-velocity
ethernet controller, instead of adding a simple 'platform_driver'
struct to that driver.
	Arnd
^ permalink raw reply	[flat|nested] 216+ messages in thread 
- * [PATCH v2 05/27] arm: pci: add a align_resource hook
  2013-01-29 22:54                 ` Arnd Bergmann
@ 2013-01-30  4:21                   ` Jason Gunthorpe
  2013-01-30  9:55                     ` Arnd Bergmann
  0 siblings, 1 reply; 216+ messages in thread
From: Jason Gunthorpe @ 2013-01-30  4:21 UTC (permalink / raw)
  To: linux-arm-kernel
On Tue, Jan 29, 2013 at 10:54:00PM +0000, Arnd Bergmann wrote:
> I'm actually fine with either of the two suggestions you mentioned above,
> whichever is easier to implement and/or more closely matches what the
> hardware actually implements is better IMHO.
> 
> The part that I did not like about having emulated PCI-to-PCI bridges
> is that it seems to just work around a (percieved or real) limitation
> in the Linux kernel by adding a piece of infrastructure, rather than
> lifting that limitation by making the kernel deal with what the
> hardware provides. That reminded me of the original mach-vt8500
Well.. in this case there is a standard - PCI-E for what HW vendors
are supposed to do. The kernel core code follows it and works with
compliant hardware.
Marvell HW is not compliant.
So..
Should the kernel core PCI code support this particular non-compliance?
Should the driver work around the non-compliance and present a
compliant interface to the kernel and userspace?
My take is the kernel core PCI code is fine, and I hope
this will be an isolated issue with one family of Marvell IP. So
working around the HW problem in the driver seems best.
If we learn of many more instances like this then, yah, update the
core code and rip out this driver work around...
Jason
^ permalink raw reply	[flat|nested] 216+ messages in thread 
- * [PATCH v2 05/27] arm: pci: add a align_resource hook
  2013-01-30  4:21                   ` Jason Gunthorpe
@ 2013-01-30  9:55                     ` Arnd Bergmann
  2013-01-30 11:47                       ` Thomas Petazzoni
  0 siblings, 1 reply; 216+ messages in thread
From: Arnd Bergmann @ 2013-01-30  9:55 UTC (permalink / raw)
  To: linux-arm-kernel
On Wednesday 30 January 2013, Jason Gunthorpe wrote:
> On Tue, Jan 29, 2013 at 10:54:00PM +0000, Arnd Bergmann wrote:
> > The part that I did not like about having emulated PCI-to-PCI bridges
> > is that it seems to just work around a (percieved or real) limitation
> > in the Linux kernel by adding a piece of infrastructure, rather than
> > lifting that limitation by making the kernel deal with what the
> > hardware provides. That reminded me of the original mach-vt8500
> 
> Well.. in this case there is a standard - PCI-E for what HW vendors
> are supposed to do. The kernel core code follows it and works with
> compliant hardware.
> 
> Marvell HW is not compliant.
> 
> So..
> 
> Should the kernel core PCI code support this particular non-compliance?
> Should the driver work around the non-compliance and present a
> compliant interface to the kernel and userspace?
> 
> My take is the kernel core PCI code is fine, and I hope
> this will be an isolated issue with one family of Marvell IP. So
> working around the HW problem in the driver seems best.
I don't remember the kernel ever caring about whether hardware complies
to a standard or not. The kernel's job is to make hardware work, based
on the actual implementation of that hardware. In a lot of cases that
means taking the standard document as a reference, and adding quirks
for the devices that are different.
In the end, it comes down to the impact on the code complexity, and
the run-time overhead for whatever hardware is most common when adding
those quirks.
Can you (or someone else) describe what kind of changes to the core
code we would actually need to make it work without emulting the
bridge?
> If we learn of many more instances like this then, yah, update the
> core code and rip out this driver work around...
But the code was specifically written to be reusable, which is normally
a good thing.
	Arnd
^ permalink raw reply	[flat|nested] 216+ messages in thread 
- * [PATCH v2 05/27] arm: pci: add a align_resource hook
  2013-01-30  9:55                     ` Arnd Bergmann
@ 2013-01-30 11:47                       ` Thomas Petazzoni
  2013-01-30 16:17                         ` Arnd Bergmann
  2013-01-30 20:48                         ` Bjorn Helgaas
  0 siblings, 2 replies; 216+ messages in thread
From: Thomas Petazzoni @ 2013-01-30 11:47 UTC (permalink / raw)
  To: linux-arm-kernel
Dear Arnd Bergmann,
On Wed, 30 Jan 2013 09:55:49 +0000, Arnd Bergmann wrote:
> I don't remember the kernel ever caring about whether hardware complies
> to a standard or not. The kernel's job is to make hardware work, based
> on the actual implementation of that hardware. In a lot of cases that
> means taking the standard document as a reference, and adding quirks
> for the devices that are different.
> 
> In the end, it comes down to the impact on the code complexity, and
> the run-time overhead for whatever hardware is most common when adding
> those quirks.
This is not only about standards, it is also about re-using the PCI
resource allocation code.
In my RFCv1, sent December, 7th, I wasn't using any emulated PCI-to-PCI
bridge. So it *can* perfectly work without it.
However, one major drawback of my RFCv1 version is that since I didn't
know how much I/O space and memory space was needed for each PCIe
device, I had to oversize the address decoding windows. And also, I had
to have a special allocator (certainly simple, but still) to find an
available physical address to set up each address decoding window.
Emulating a PCI-to-PCI bridge very nicely allows to re-use the PCI core
resource allocation code. I think it's really the main reason for
emulated those PCI-to-PCI bridges, rather than willing to comply to
some standards.
So what I'm going to do now is rework my patch series by removing the
emulated host bridge (which is normally mandatory by PCIe standard, but
Linux doesn't need it, so we don't care), but I'll keep the emulated
PCI-to-PCI bridges in order to benefit for the PCI core resource
allocation mechanisms.
Is this ok for you?
I'd like to settle on the strategy to follow, because we're really
going a funny road here: on December 7th, I submit a series that
doesn't use any PCI-to-PCI bridge, and I'm told that I should emulate
some. I spent a long time working on an implementation that uses
emumlated PCI-to-PCI bridges, which I submitted on Monday, now to be
told that I should work really hard not to use PCI-to-PCI bridges. I
hope you can feel my little embarrassment here...
Thanks,
Thomas
-- 
Thomas Petazzoni, Free Electrons
Kernel, drivers, real-time and embedded Linux
development, consulting, training and support.
http://free-electrons.com
^ permalink raw reply	[flat|nested] 216+ messages in thread 
- * [PATCH v2 05/27] arm: pci: add a align_resource hook
  2013-01-30 11:47                       ` Thomas Petazzoni
@ 2013-01-30 16:17                         ` Arnd Bergmann
  2013-01-30 16:38                           ` Thomas Petazzoni
  2013-01-30 20:48                         ` Bjorn Helgaas
  1 sibling, 1 reply; 216+ messages in thread
From: Arnd Bergmann @ 2013-01-30 16:17 UTC (permalink / raw)
  To: linux-arm-kernel
On Wednesday 30 January 2013, Thomas Petazzoni wrote:
> Dear Arnd Bergmann,
> 
> On Wed, 30 Jan 2013 09:55:49 +0000, Arnd Bergmann wrote:
> 
> > I don't remember the kernel ever caring about whether hardware complies
> > to a standard or not. The kernel's job is to make hardware work, based
> > on the actual implementation of that hardware. In a lot of cases that
> > means taking the standard document as a reference, and adding quirks
> > for the devices that are different.
> > 
> > In the end, it comes down to the impact on the code complexity, and
> > the run-time overhead for whatever hardware is most common when adding
> > those quirks.
> 
> This is not only about standards, it is also about re-using the PCI
> resource allocation code.
> 
> In my RFCv1, sent December, 7th, I wasn't using any emulated PCI-to-PCI
> bridge. So it *can* perfectly work without it.
Ok, Isee.
> However, one major drawback of my RFCv1 version is that since I didn't
> know how much I/O space and memory space was needed for each PCIe
> device, I had to oversize the address decoding windows. And also, I had
> to have a special allocator (certainly simple, but still) to find an
> available physical address to set up each address decoding window.
Well, for the I/O space, there is no oversizing because either way you
end up with exactly 64KB per root port, right?
> Emulating a PCI-to-PCI bridge very nicely allows to re-use the PCI core
> resource allocation code. I think it's really the main reason for
> emulated those PCI-to-PCI bridges, rather than willing to comply to
> some standards.
> 
> So what I'm going to do now is rework my patch series by removing the
> emulated host bridge (which is normally mandatory by PCIe standard, but
> Linux doesn't need it, so we don't care), but I'll keep the emulated
> PCI-to-PCI bridges in order to benefit for the PCI core resource
> allocation mechanisms.
> 
> Is this ok for you?
Using the Linux allocator for memory resources does sound useful,
so if that requires using the emulated PCI-to-PCI bridges, I guess
it's the best compromise.
> I'd like to settle on the strategy to follow, because we're really
> going a funny road here: on December 7th, I submit a series that
> doesn't use any PCI-to-PCI bridge, and I'm told that I should emulate
> some. I spent a long time working on an implementation that uses
> emumlated PCI-to-PCI bridges, which I submitted on Monday, now to be
> told that I should work really hard not to use PCI-to-PCI bridges. I
> hope you can feel my little embarrassment here...
I'm sorry about this situation. Unfortunatly the way that such decisions
are made is not always straightforward, and what seems like a good idea
at one point turns out to be a mistake or more complex than anticipated
later. With the description of the first patch set, I did not think
it would be necessary to fake a bridge device and their config space.
What I had guessed you could do was to call pci_scan_root_bus on
each of the ports, and then set up the memory space window for
the bus including all of its child devices.
	Arnd
^ permalink raw reply	[flat|nested] 216+ messages in thread 
- * [PATCH v2 05/27] arm: pci: add a align_resource hook
  2013-01-30 16:17                         ` Arnd Bergmann
@ 2013-01-30 16:38                           ` Thomas Petazzoni
  0 siblings, 0 replies; 216+ messages in thread
From: Thomas Petazzoni @ 2013-01-30 16:38 UTC (permalink / raw)
  To: linux-arm-kernel
Dear Arnd Bergmann,
On Wed, 30 Jan 2013 16:17:38 +0000, Arnd Bergmann wrote:
> > However, one major drawback of my RFCv1 version is that since I didn't
> > know how much I/O space and memory space was needed for each PCIe
> > device, I had to oversize the address decoding windows. And also, I had
> > to have a special allocator (certainly simple, but still) to find an
> > available physical address to set up each address decoding window.
> 
> Well, for the I/O space, there is no oversizing because either way you
> end up with exactly 64KB per root port, right?
Correct I/O space is not an issue, of course. Only the memory windows
are an issue (in terms of quantity of address space used).
That said, the PCI-to-PCI bridge solution doesn't solve the fact that
I/O addresses get assigned even though the driver will most likely use
them. This means that one I/O window is consumed for each PCIe
interface, even though it is not being used in practice. And see I have
10 PCIe interfaces in this SoC, and only 20 windows available globally
(not only for PCIe, but also for NAND, NOR, etc.). But for now, I'd
like to leave this potential problem on the side, and get something
working. If it seems useful to remove this problem later, we'll work on
it.
> > Emulating a PCI-to-PCI bridge very nicely allows to re-use the PCI core
> > resource allocation code. I think it's really the main reason for
> > emulated those PCI-to-PCI bridges, rather than willing to comply to
> > some standards.
> > 
> > So what I'm going to do now is rework my patch series by removing the
> > emulated host bridge (which is normally mandatory by PCIe standard, but
> > Linux doesn't need it, so we don't care), but I'll keep the emulated
> > PCI-to-PCI bridges in order to benefit for the PCI core resource
> > allocation mechanisms.
> > 
> > Is this ok for you?
> 
> Using the Linux allocator for memory resources does sound useful,
> so if that requires using the emulated PCI-to-PCI bridges, I guess
> it's the best compromise.
Yes, that was Jason's original idea when he suggested to use PCI-to-PCI
bridges. And when I did the implementation, it really worked nicely.
And this PCI-to-PCI bridge emulation stuff is really not a big deal,
look at drivers/pci/sw-pci-pci-bridge.c: 185 lines in total, including
10 lines of comment header at the top.
> I'm sorry about this situation. Unfortunatly the way that such decisions
> are made is not always straightforward, and what seems like a good idea
> at one point turns out to be a mistake or more complex than anticipated
> later. With the description of the first patch set, I did not think
> it would be necessary to fake a bridge device and their config space.
Sure, I understand this. I guess you also understand my slight
frustration when I propose A, I'm told to do B, I propose B, and I'm
then suggested to do A again :-) But I agree, it's part of the
technical discussion, and we can't get it right on the first shot.
> What I had guessed you could do was to call pci_scan_root_bus on
> each of the ports, and then set up the memory space window for
> the bus including all of its child devices.
But where would I read how much space is needed for the I/O and memory
regions of each bus?
Thanks,
Thomas
-- 
Thomas Petazzoni, Free Electrons
Kernel, drivers, real-time and embedded Linux
development, consulting, training and support.
http://free-electrons.com
^ permalink raw reply	[flat|nested] 216+ messages in thread 
 
- * [PATCH v2 05/27] arm: pci: add a align_resource hook
  2013-01-30 11:47                       ` Thomas Petazzoni
  2013-01-30 16:17                         ` Arnd Bergmann
@ 2013-01-30 20:48                         ` Bjorn Helgaas
  2013-01-30 21:06                           ` Jason Gunthorpe
  1 sibling, 1 reply; 216+ messages in thread
From: Bjorn Helgaas @ 2013-01-30 20:48 UTC (permalink / raw)
  To: linux-arm-kernel
On Wed, Jan 30, 2013 at 4:47 AM, Thomas Petazzoni
<thomas.petazzoni@free-electrons.com> wrote:
> So what I'm going to do now is rework my patch series by removing the
> emulated host bridge (which is normally mandatory by PCIe standard, but
> Linux doesn't need it, so we don't care), ...
This is a tangent since you're removing the emulated host bridge
anyway, but it's been mentioned a couple of times, and I'd like to
understand this.  Jason mentioned earlier in the [07/27] emulated host
bridge thread that the PCIe spec requires a host bridge at 00:00.0.
I've never seen that mentioned in the spec; can somebody point me to
the actual requirement that host bridges appear in config space?
My understanding has been that host bridges, whether PCI or PCIe, are
required to *exist*, but that the way you enumerate them and configure
them is outside the scope of the PCI/PCIe specs.  I know that many
chips, especially for x86, *do* make the host bridge appear in config
space, but I've never seen a requirement for that.
Bjorn
^ permalink raw reply	[flat|nested] 216+ messages in thread 
- * [PATCH v2 05/27] arm: pci: add a align_resource hook
  2013-01-30 20:48                         ` Bjorn Helgaas
@ 2013-01-30 21:06                           ` Jason Gunthorpe
  0 siblings, 0 replies; 216+ messages in thread
From: Jason Gunthorpe @ 2013-01-30 21:06 UTC (permalink / raw)
  To: linux-arm-kernel
On Wed, Jan 30, 2013 at 01:48:33PM -0700, Bjorn Helgaas wrote:
> This is a tangent since you're removing the emulated host bridge
> anyway, but it's been mentioned a couple of times, and I'd like to
> understand this.  Jason mentioned earlier in the [07/27] emulated host
> bridge thread that the PCIe spec requires a host bridge at 00:00.0.
> I've never seen that mentioned in the spec; can somebody point me to
> the actual requirement that host bridges appear in config space?
Hum, a more careful search/reading brings up this:
7.2.2.1. Host Bridge Requirements
 [...] The use of Host Bridge PCI class code is reserved for backwards
 compatibility; host Bridge configuration space is opaque to standard
 PCI Express software and may be implemented in an implementation
 specific manner that is compatible with PCI Host Bridge Type 0
 configuration space. A PCI Express Host Bridge is not required to
 signal errors through a Root Complex Event Collector. This support is
 optional for PCI Express Host Bridges.
So, if it is present it is required to be compatible with the 'PCI
Host Bridge' stuff, but it is not mandatory.
My bad, I believe I got also confused with the spec language regarding
a 'host bridge' vs a 'host bridge configuration space'
Regards,
Jason
^ permalink raw reply	[flat|nested] 216+ messages in thread 
 
 
 
 
 
 
 
 
- * [PATCH v2 05/27] arm: pci: add a align_resource hook
  2013-01-29 16:45         ` Arnd Bergmann
  2013-01-29 17:09           ` Thomas Petazzoni
@ 2013-01-30  4:56           ` Jason Gunthorpe
  2013-01-30  8:19             ` Thomas Petazzoni
  2013-01-30  9:46             ` Arnd Bergmann
  1 sibling, 2 replies; 216+ messages in thread
From: Jason Gunthorpe @ 2013-01-30  4:56 UTC (permalink / raw)
  To: linux-arm-kernel
On Tue, Jan 29, 2013 at 04:45:07PM +0000, Arnd Bergmann wrote:
> On Tuesday 29 January 2013, Thomas Petazzoni wrote:
> > And the Linux PCI resource allocation code complies with this, so that
> > if I have two PCI-to-PCI bridges (each having downstream a device with
> > an I/O BAR), then the first PCI-to-PCI bridge gets its I/O base address
> > register set to ADDR + 0x0, and the second bridge gets its I/O base
> > address set to ADDR + 0x1000. And this doesn't play well with the
> > requirements of Marvell address decoding windows for PCIe I/O regions,
> > which must be 64 KB aligned.
> 
> But we normally only assign a 64 KB I/O window to each PCI host bridge.
> Requiring PCI bridges to be space 64 KB apart would mean that we cannot
> actually support bridges at all.
The PCI resource code uses full 32 bit integers when it handles IO
addresses, so this actually does sort of work out.
However, Thomas how did you recover the high bits of the
IO window address from the bridge configuration? Are you reading the
struct resource directly? That probably causes problems with
hotplug/etc...
If you look back in your old emails I outlined a solution to this
using the MMU.
Jason
^ permalink raw reply	[flat|nested] 216+ messages in thread 
- * [PATCH v2 05/27] arm: pci: add a align_resource hook
  2013-01-30  4:56           ` Jason Gunthorpe
@ 2013-01-30  8:19             ` Thomas Petazzoni
  2013-01-30  9:46             ` Arnd Bergmann
  1 sibling, 0 replies; 216+ messages in thread
From: Thomas Petazzoni @ 2013-01-30  8:19 UTC (permalink / raw)
  To: linux-arm-kernel
Dear Jason Gunthorpe,
On Tue, 29 Jan 2013 21:56:55 -0700, Jason Gunthorpe wrote:
> However, Thomas how did you recover the high bits of the
> IO window address from the bridge configuration? Are you reading the
> struct resource directly? That probably causes problems with
> hotplug/etc...
The PCI-to-PCI bridge configuration space has a register with the high
bits of the I/O window address. If you look at the PCI-to-PCI emulation
code, I set the bit that says "I'm a bridge capable of 32 bits
addressing of I/O addresses", and then when setting up the windows, I
reconstruct the full 32 bits address by reading the two I/O address
registers.
See 3.2.5.6 in the PCI-to-PCI bridge specification:
  If the low four bits of the I/O Base and I/O Limit registers are 01h,
  then the bridge supports 32-bit I/O address decoding, and the I/O
  Base Upper 16 Bits and the I/O Limit Upper 16 Bits hold the upper 16
  bits, corresponding to AD[31::16], of the 32-bit I/O Base and I/O
  Limit addresses respectively. In this case, system configuration
  software is permitted to locate the I/O address range supported by
  the anywhere in the 4-GB I/O Space. Note that the 4-KB alignment and
  granularity restrictions still apply when the bridge supports 32 -bit
  I/O addressing.
(And my code does ensure that the low four bits of the I/O Base and I/O
Limit registers are 01h)
Best regards,
Thomas
-- 
Thomas Petazzoni, Free Electrons
Kernel, drivers, real-time and embedded Linux
development, consulting, training and support.
http://free-electrons.com
^ permalink raw reply	[flat|nested] 216+ messages in thread 
- * [PATCH v2 05/27] arm: pci: add a align_resource hook
  2013-01-30  4:56           ` Jason Gunthorpe
  2013-01-30  8:19             ` Thomas Petazzoni
@ 2013-01-30  9:46             ` Arnd Bergmann
  2013-01-30  9:54               ` Thomas Petazzoni
  1 sibling, 1 reply; 216+ messages in thread
From: Arnd Bergmann @ 2013-01-30  9:46 UTC (permalink / raw)
  To: linux-arm-kernel
On Wednesday 30 January 2013, Jason Gunthorpe wrote:
> > But we normally only assign a 64 KB I/O window to each PCI host bridge.
> > Requiring PCI bridges to be space 64 KB apart would mean that we cannot
> > actually support bridges at all.
> 
> The PCI resource code uses full 32 bit integers when it handles IO
> addresses, so this actually does sort of work out.
However, we only reserve 1 MB (I think) virtual address window for all
I/O spaces of all PCI domains combined, at a fixed location (0xfee00000).
This means we can have at most 16 such windows at run-time. That can
be changed if necessary, but it seems like overkill when in practice
you only need a few bytes at most.
	Arnd
^ permalink raw reply	[flat|nested] 216+ messages in thread 
- * [PATCH v2 05/27] arm: pci: add a align_resource hook
  2013-01-30  9:46             ` Arnd Bergmann
@ 2013-01-30  9:54               ` Thomas Petazzoni
  2013-01-30 10:03                 ` Arnd Bergmann
  0 siblings, 1 reply; 216+ messages in thread
From: Thomas Petazzoni @ 2013-01-30  9:54 UTC (permalink / raw)
  To: linux-arm-kernel
Dear Arnd Bergmann,
On Wed, 30 Jan 2013 09:46:53 +0000, Arnd Bergmann wrote:
> On Wednesday 30 January 2013, Jason Gunthorpe wrote:
> > > But we normally only assign a 64 KB I/O window to each PCI host bridge.
> > > Requiring PCI bridges to be space 64 KB apart would mean that we cannot
> > > actually support bridges at all.
> > 
> > The PCI resource code uses full 32 bit integers when it handles IO
> > addresses, so this actually does sort of work out.
> 
> However, we only reserve 1 MB (I think) virtual address window for all
> I/O spaces of all PCI domains combined, at a fixed location (0xfee00000).
> This means we can have at most 16 such windows at run-time. That can
> be changed if necessary, but it seems like overkill when in practice
> you only need a few bytes at most.
I am not sure where this 0xfee00000 address comes from, but in my case
(and I think in the Tegra PCI driver as well), we tell the Linux PCI
core from which addresses the I/O ranges should be allocated. In my DT,
I have:
                        ranges = <0x00000800 0 0xd0040000 0xd0040000 0 0x00002000   /* port 0.0 registers */
                                  0x00004800 0 0xd0042000 0xd0042000 0 0x00002000   /* port 2.0 registers */
                                  0x00001000 0 0xd0044000 0xd0044000 0 0x00002000   /* port 0.1 registers */
                                  0x00001800 0 0xd0048000 0xd0048000 0 0x00002000   /* port 0.2 registers */
                                  0x00002000 0 0xd004C000 0xd004C000 0 0x00002000   /* port 0.3 registers */
                                  0x00002800 0 0xd0080000 0xd0080000 0 0x00002000   /* port 1.0 registers */
                                  0x00005000 0 0xd0082000 0xd0082000 0 0x00002000   /* port 3.0 registers */
                                  0x00003000 0 0xd0084000 0xd0084000 0 0x00002000   /* port 1.1 registers */
                                  0x00003800 0 0xd0088000 0xd0088000 0 0x00002000   /* port 1.2 registers */
                                  0x00004000 0 0xd008C000 0xd008C000 0 0x00002000   /* port 1.3 registers */
                                  0x81000000 0 0          0xc0000000 0 0x00100000   /* downstream I/O */
                                  0x82000000 0 0          0xc1000000 0 0x08000000>; /* non-prefetchable memory */
And then, the Marvell PCI driver gets the "downstream I/O" range,
parses it into a "struct resource", and then does (where &pcie->io is
the struct resource into which we parsed the "downstream I/O" range):
        pci_add_resource_offset(&sys->resources, &pcie->io, sys->io_offset);
	[...]
	pci_ioremap_io(nr * SZ_64K, pcie->io.start);
And it works just fine, I get my I/O ranges allocated at 0xc0000000 for
the first device, 0xc0010000 (i.e base address + 64KB) for the second
device, etc.
The Tegra PCI driver does exactly the same (I shamelessly copied what
Thierry has done).
I somehow have the feeling that we are looking for problems that simply
don't exist...
Best regards,
Thomas
-- 
Thomas Petazzoni, Free Electrons
Kernel, drivers, real-time and embedded Linux
development, consulting, training and support.
http://free-electrons.com
^ permalink raw reply	[flat|nested] 216+ messages in thread
- * [PATCH v2 05/27] arm: pci: add a align_resource hook
  2013-01-30  9:54               ` Thomas Petazzoni
@ 2013-01-30 10:03                 ` Arnd Bergmann
  2013-01-30 11:42                   ` Thomas Petazzoni
  0 siblings, 1 reply; 216+ messages in thread
From: Arnd Bergmann @ 2013-01-30 10:03 UTC (permalink / raw)
  To: linux-arm-kernel
On Wednesday 30 January 2013, Thomas Petazzoni wrote:
> I am not sure where this 0xfee00000 address comes from, but in my case
> (and I think in the Tegra PCI driver as well), we tell the Linux PCI
> core from which addresses the I/O ranges should be allocated. In my DT,
> I have:
> 
>                         ranges = <0x00000800 0 0xd0040000 0xd0040000 0 0x00002000   /* port 0.0 registers */
>                                   0x00004800 0 0xd0042000 0xd0042000 0 0x00002000   /* port 2.0 registers */
>                                   0x00001000 0 0xd0044000 0xd0044000 0 0x00002000   /* port 0.1 registers */
>                                   0x00001800 0 0xd0048000 0xd0048000 0 0x00002000   /* port 0.2 registers */
>                                   0x00002000 0 0xd004C000 0xd004C000 0 0x00002000   /* port 0.3 registers */
>                                   0x00002800 0 0xd0080000 0xd0080000 0 0x00002000   /* port 1.0 registers */
>                                   0x00005000 0 0xd0082000 0xd0082000 0 0x00002000   /* port 3.0 registers */
>                                   0x00003000 0 0xd0084000 0xd0084000 0 0x00002000   /* port 1.1 registers */
>                                   0x00003800 0 0xd0088000 0xd0088000 0 0x00002000   /* port 1.2 registers */
>                                   0x00004000 0 0xd008C000 0xd008C000 0 0x00002000   /* port 1.3 registers */
>                                   0x81000000 0 0          0xc0000000 0 0x00100000   /* downstream I/O */
>                                   0x82000000 0 0          0xc1000000 0 0x08000000>; /* non-prefetchable memory */
> 
> And then, the Marvell PCI driver gets the "downstream I/O" range,
> parses it into a "struct resource", and then does (where &pcie->io is
> the struct resource into which we parsed the "downstream I/O" range):
> 
>         pci_add_resource_offset(&sys->resources, &pcie->io, sys->io_offset);
> 	[...]
> 	pci_ioremap_io(nr * SZ_64K, pcie->io.start);
0xfee00000 is the platform independent virtual address that pci_ioremap_io
maps your platform specific physical address (from pcie->io.start) to. It's
defined (in the kernel I am looking at) in asm/io.h as
#define PCI_IO_VIRT_BASE        0xfee00000
and used by pci_ioremap_io as
        return ioremap_page_range(PCI_IO_VIRT_BASE + offset,
                                  PCI_IO_VIRT_BASE + offset + SZ_64K,
                                  phys_addr,
                                  __pgprot(get_mem_type(MT_DEVICE)->prot_pte));
> And it works just fine, I get my I/O ranges allocated at 0xc0000000 for
> the first device, 0xc0010000 (i.e base address + 64KB) for the second
> device, etc.
(void*)0xc0000000 is the normal PAGE_OFFSET. If you map your I/O space there,
you are in big trouble because that is supposed to have the start of your
physical memory mapping.
	Arnd
^ permalink raw reply	[flat|nested] 216+ messages in thread
- * [PATCH v2 05/27] arm: pci: add a align_resource hook
  2013-01-30 10:03                 ` Arnd Bergmann
@ 2013-01-30 11:42                   ` Thomas Petazzoni
  0 siblings, 0 replies; 216+ messages in thread
From: Thomas Petazzoni @ 2013-01-30 11:42 UTC (permalink / raw)
  To: linux-arm-kernel
Dear Arnd Bergmann,
On Wed, 30 Jan 2013 10:03:43 +0000, Arnd Bergmann wrote:
> 0xfee00000 is the platform independent virtual address that pci_ioremap_io
> maps your platform specific physical address (from pcie->io.start) to. It's
> defined (in the kernel I am looking at) in asm/io.h as
> 
> #define PCI_IO_VIRT_BASE        0xfee00000
> 
> and used by pci_ioremap_io as
> 
>         return ioremap_page_range(PCI_IO_VIRT_BASE + offset,
>                                   PCI_IO_VIRT_BASE + offset + SZ_64K,
>                                   phys_addr,
>                                   __pgprot(get_mem_type(MT_DEVICE)->prot_pte));
> 
> 
> > And it works just fine, I get my I/O ranges allocated at 0xc0000000 for
> > the first device, 0xc0010000 (i.e base address + 64KB) for the second
> > device, etc.
> 
> (void*)0xc0000000 is the normal PAGE_OFFSET. If you map your I/O space there,
> you are in big trouble because that is supposed to have the start of your
> physical memory mapping.
Aaah, I know where the confusion comes from. You are talking about
virtual addresses, while I am talking about physical addresses.
0xC0000000 in my DT is a *physical* address.
Basically, with Marvell SoCs, we have the following behavior:
                     -------                          --------------------
 Virtual address --> | MMU | --> Physical address --> | Address decoding | --> real hardware
                     -------                          --------------------
The MMU is the usual stuff everybody knows about. What's more special
about Marvell SoC is this "Address decoding" thing. Basically, instead
of having the physical address of things fully hardcoded and mentioned
in the datasheets, they are configurable. So for each PCIe interface,
you have to set up an address decoding window for the I/O accesses and
another address decoding window for the memory accesses. And the
physical address associated to each of these "address decoding windows"
can be freely chosen, so they must be "assigned" for each PCIe
interface.
So, my 0xC0000000 is a *physical* address is the diagram above. The
fact that it gets maps at 0xfee00000 as a virtual address doesn't
really matter for me, I'm just fine with that.
Does that clarify things?
Thomas
-- 
Thomas Petazzoni, Free Electrons
Kernel, drivers, real-time and embedded Linux
development, consulting, training and support.
http://free-electrons.com
^ permalink raw reply	[flat|nested] 216+ messages in thread
 
 
 
 
 
 
 
 
 
- * [PATCH v2 06/27] lib: devres: don't enclose pcim_*() functions in CONFIG_HAS_IOPORT
  2013-01-28 18:56 [PATCH v2] PCIe support for the Armada 370 and Armada XP SoCs Thomas Petazzoni
                   ` (4 preceding siblings ...)
  2013-01-28 18:56 ` [PATCH v2 05/27] arm: pci: add a align_resource hook Thomas Petazzoni
@ 2013-01-28 18:56 ` Thomas Petazzoni
  2013-01-28 18:56 ` [PATCH v2 07/27] PCI: Add software-emulated host bridge Thomas Petazzoni
                   ` (20 subsequent siblings)
  26 siblings, 0 replies; 216+ messages in thread
From: Thomas Petazzoni @ 2013-01-28 18:56 UTC (permalink / raw)
  To: linux-arm-kernel
The pcim_*() functions are used by the libata-sff subsystem, and this
subsystem is used for many SATA drivers on ARM platforms that do not
necessarily have I/O ports.
Signed-off-by: Thomas Petazzoni <thomas.petazzoni@free-electrons.com>
Cc: Paul Gortmaker <paul.gortmaker@windriver.com>
Cc: Jesse Barnes <jbarnes@virtuousgeek.org>
Cc: Yinghai Lu <yinghai@kernel.org>
Cc: linux-kernel at vger.kernel.org
---
 lib/devres.c |    2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/lib/devres.c b/lib/devres.c
index 80b9c76..5639c3e 100644
--- a/lib/devres.c
+++ b/lib/devres.c
@@ -195,6 +195,7 @@ void devm_ioport_unmap(struct device *dev, void __iomem *addr)
 			       devm_ioport_map_match, (void *)addr));
 }
 EXPORT_SYMBOL(devm_ioport_unmap);
+#endif /* CONFIG_HAS_IOPORT */
 
 #ifdef CONFIG_PCI
 /*
@@ -400,4 +401,3 @@ void pcim_iounmap_regions(struct pci_dev *pdev, int mask)
 }
 EXPORT_SYMBOL(pcim_iounmap_regions);
 #endif /* CONFIG_PCI */
-#endif /* CONFIG_HAS_IOPORT */
-- 
1.7.9.5
^ permalink raw reply related	[flat|nested] 216+ messages in thread
- * [PATCH v2 07/27] PCI: Add software-emulated host bridge
  2013-01-28 18:56 [PATCH v2] PCIe support for the Armada 370 and Armada XP SoCs Thomas Petazzoni
                   ` (5 preceding siblings ...)
  2013-01-28 18:56 ` [PATCH v2 06/27] lib: devres: don't enclose pcim_*() functions in CONFIG_HAS_IOPORT Thomas Petazzoni
@ 2013-01-28 18:56 ` Thomas Petazzoni
  2013-01-28 20:18   ` Arnd Bergmann
  2013-01-28 18:56 ` [PATCH v2 08/27] pci: implement an emulated PCI-to-PCI bridge Thomas Petazzoni
                   ` (19 subsequent siblings)
  26 siblings, 1 reply; 216+ messages in thread
From: Thomas Petazzoni @ 2013-01-28 18:56 UTC (permalink / raw)
  To: linux-arm-kernel
From: Thierry Reding <thierry.reding@avionic-design.de>
[Thomas Petazzoni:
 - Simplify capabilities handling.
 - Move to a separate file.
 - Fix mask used when writing a 4 bytes value.]
Signed-off-by: Thierry Reding <thierry.reding@avionic-design.de>
Signed-off-by: Thomas Petazzoni <thomas.petazzoni@free-electrons.com>
---
 drivers/pci/Kconfig          |    3 +
 drivers/pci/Makefile         |    3 +
 drivers/pci/sw-host-bridge.c |  144 ++++++++++++++++++++++++++++++++++++++++++
 include/linux/pci.h          |   23 +++++++
 4 files changed, 173 insertions(+)
 create mode 100644 drivers/pci/sw-host-bridge.c
diff --git a/drivers/pci/Kconfig b/drivers/pci/Kconfig
index 6d51aa6..f7548e2 100644
--- a/drivers/pci/Kconfig
+++ b/drivers/pci/Kconfig
@@ -119,3 +119,6 @@ config PCI_IOAPIC
 config PCI_LABEL
 	def_bool y if (DMI || ACPI)
 	select NLS
+
+config PCI_SW_HOST_BRIDGE
+	bool
diff --git a/drivers/pci/Makefile b/drivers/pci/Makefile
index 0c3efcf..44ce914 100644
--- a/drivers/pci/Makefile
+++ b/drivers/pci/Makefile
@@ -15,6 +15,9 @@ obj-$(CONFIG_PCIEPORTBUS) += pcie/
 
 obj-$(CONFIG_PCI_IOAPIC) += ioapic.o
 
+# Emulated PCI elements
+obj-$(CONFIG_PCI_SW_HOST_BRIDGE) += sw-host-bridge.o
+
 # Build the PCI Hotplug drivers if we were asked to
 obj-$(CONFIG_HOTPLUG_PCI) += hotplug/
 ifdef CONFIG_HOTPLUG_PCI
diff --git a/drivers/pci/sw-host-bridge.c b/drivers/pci/sw-host-bridge.c
new file mode 100644
index 0000000..b5a2aed
--- /dev/null
+++ b/drivers/pci/sw-host-bridge.c
@@ -0,0 +1,144 @@
+/*
+ * Implementation of a simple emulated PCI host bridge.
+ *
+ * Thierry Reding <thierry.reding@avionic-design.de>
+ * Thomas Petazzoni <thomas.petazzoni@free-electrons.com>
+ *
+ * This file is licensed under the terms of the GNU General Public
+ * License version 2.  This program is licensed "as is" without any
+ * warranty of any kind, whether express or implied.
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/pci.h>
+#include <linux/module.h>
+
+int pci_sw_host_bridge_init(struct pci_sw_host_bridge *bridge)
+{
+	unsigned int i;
+
+	if (!bridge)
+		return -EINVAL;
+
+	bridge->vendor = 0x0000;
+	bridge->device = 0x0000;
+
+	bridge->command = 0x0000;
+	bridge->status = PCI_STATUS_CAP_LIST;
+
+	bridge->class = PCI_CLASS_BRIDGE_HOST;
+	bridge->interface = 0x00;
+	bridge->revision = 0x00;
+
+	bridge->bist = 0x00;
+	bridge->header_type = PCI_HEADER_TYPE_NORMAL;
+	bridge->latency_timer = 0x00;
+	bridge->cache_line_size = 0x10;
+
+	for (i = 0; i < 6; i++)
+		bridge->bar[i] = 0x00000000;
+
+	bridge->subsystem_vendor = 0x0000;
+	bridge->subsystem_device = 0x0000;
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(pci_sw_host_bridge_init);
+
+int pci_sw_host_bridge_read(struct pci_sw_host_bridge *bridge,
+			    unsigned int where, int size, u32 *value)
+{
+	switch (where & ~3) {
+	case PCI_VENDOR_ID:
+		*value = bridge->device << 16 | bridge->vendor;
+		break;
+
+	case PCI_COMMAND:
+		*value = bridge->status << 16 | bridge->command;
+		break;
+
+	case PCI_STATUS:
+		*value = 0;
+		break;
+
+	case PCI_CLASS_REVISION:
+		*value = bridge->class << 16 | bridge->interface << 8 |
+			 bridge->revision;
+		break;
+
+	case PCI_CACHE_LINE_SIZE:
+		*value = bridge->bist << 24 | bridge->header_type << 16 |
+			 bridge->latency_timer << 8 | bridge->cache_line_size;
+		break;
+
+	case PCI_BASE_ADDRESS_0 ... PCI_BASE_ADDRESS_5:
+		*value = bridge->bar[((where & ~3) - PCI_BASE_ADDRESS_0) / 4];
+		break;
+
+	case PCI_CARDBUS_CIS:
+		*value = 0;
+		break;
+
+	case PCI_SUBSYSTEM_VENDOR_ID:
+		*value = bridge->subsystem_device << 16 |
+			 bridge->subsystem_vendor;
+		break;
+
+	case PCI_ROM_ADDRESS:
+		*value = 0;
+		break;
+
+	case PCI_INTERRUPT_LINE:
+		break;
+
+	default:
+		*value = 0xffffffff;
+		return PCIBIOS_BAD_REGISTER_NUMBER;
+	}
+
+	if (size == 2)
+		*value = (*value >> (8 * (where & 3))) & 0xffff;
+	else if (size == 1)
+		*value = (*value >> (8 * (where & 3))) & 0xff;
+
+	return PCIBIOS_SUCCESSFUL;
+}
+EXPORT_SYMBOL_GPL(pci_sw_host_bridge_read);
+
+int pci_sw_host_bridge_write(struct pci_sw_host_bridge *bridge,
+			     unsigned int where, int size, u32 value)
+{
+	u32 mask, reg;
+	int err;
+
+	if (size == 4)
+		mask = 0x0;
+	else if (size == 2)
+		mask = ~(0xffff << ((where & 3) * 8));
+	else if (size == 1)
+		mask = ~(0xff << ((where & 3) * 8));
+	else
+		return PCIBIOS_BAD_REGISTER_NUMBER;
+
+	err = pci_sw_host_bridge_read(bridge, where & ~3, 4, ®);
+	if (err)
+		return err;
+
+	value = (reg & mask) | value << ((where & 3) * 8);
+
+	switch (where & ~3) {
+	case PCI_COMMAND:
+		bridge->command = value & 0xffff;
+		bridge->status = value >> 16;
+		break;
+
+	case PCI_BASE_ADDRESS_0 ... PCI_BASE_ADDRESS_5:
+		bridge->bar[((where & ~3) - PCI_BASE_ADDRESS_0) / 4] = value;
+		break;
+	}
+
+	return PCIBIOS_SUCCESSFUL;
+}
+EXPORT_SYMBOL_GPL(pci_sw_host_bridge_write);
+
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 15472d6..c93e258 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -1841,4 +1841,27 @@ static inline struct eeh_dev *pci_dev_to_eeh_dev(struct pci_dev *pdev)
  */
 struct pci_dev *pci_find_upstream_pcie_bridge(struct pci_dev *pdev);
 
+struct pci_sw_host_bridge {
+	u16 vendor;
+	u16 device;
+	u16 command;
+	u16 status;
+	u16 class;
+	u8 interface;
+	u8 revision;
+	u8 bist;
+	u8 header_type;
+	u8 latency_timer;
+	u8 cache_line_size;
+	u32 bar[6];
+	u16 subsystem_vendor;
+	u16 subsystem_device;
+};
+
+extern int pci_sw_host_bridge_init(struct pci_sw_host_bridge *bridge);
+extern int pci_sw_host_bridge_read(struct pci_sw_host_bridge *bridge,
+				   unsigned int where, int size, u32 *value);
+extern int pci_sw_host_bridge_write(struct pci_sw_host_bridge *bridge,
+				    unsigned int where, int size, u32 value);
+
 #endif /* LINUX_PCI_H */
-- 
1.7.9.5
^ permalink raw reply related	[flat|nested] 216+ messages in thread
- * [PATCH v2 07/27] PCI: Add software-emulated host bridge
  2013-01-28 18:56 ` [PATCH v2 07/27] PCI: Add software-emulated host bridge Thomas Petazzoni
@ 2013-01-28 20:18   ` Arnd Bergmann
  2013-01-28 22:03     ` Stephen Warren
  2013-01-28 22:09     ` Thomas Petazzoni
  0 siblings, 2 replies; 216+ messages in thread
From: Arnd Bergmann @ 2013-01-28 20:18 UTC (permalink / raw)
  To: linux-arm-kernel
On Monday 28 January 2013, Thomas Petazzoni wrote:
> From: Thierry Reding <thierry.reding@avionic-design.de>
> 
> [Thomas Petazzoni:
>  - Simplify capabilities handling.
>  - Move to a separate file.
>  - Fix mask used when writing a 4 bytes value.]
> 
> Signed-off-by: Thierry Reding <thierry.reding@avionic-design.de>
> Signed-off-by: Thomas Petazzoni <thomas.petazzoni@free-electrons.com>
Not even a description why this is needed?
This patch (together with patch 8) seems like the most controversial
one of the series, so you should better provide a really good reason
why we would emulate something in software rather than using whatever
hardware is there.
	Arnd
^ permalink raw reply	[flat|nested] 216+ messages in thread 
- * [PATCH v2 07/27] PCI: Add software-emulated host bridge
  2013-01-28 20:18   ` Arnd Bergmann
@ 2013-01-28 22:03     ` Stephen Warren
  2013-01-28 22:09       ` Jason Gunthorpe
  2013-01-28 22:09     ` Thomas Petazzoni
  1 sibling, 1 reply; 216+ messages in thread
From: Stephen Warren @ 2013-01-28 22:03 UTC (permalink / raw)
  To: linux-arm-kernel
On 01/28/2013 01:18 PM, Arnd Bergmann wrote:
> On Monday 28 January 2013, Thomas Petazzoni wrote:
>> From: Thierry Reding <thierry.reding@avionic-design.de>
>>
>> [Thomas Petazzoni:
>>  - Simplify capabilities handling.
>>  - Move to a separate file.
>>  - Fix mask used when writing a 4 bytes value.]
>>
>> Signed-off-by: Thierry Reding <thierry.reding@avionic-design.de>
>> Signed-off-by: Thomas Petazzoni <thomas.petazzoni@free-electrons.com>
> 
> Not even a description why this is needed?
> 
> This patch (together with patch 8) seems like the most controversial
> one of the series, so you should better provide a really good reason
> why we would emulate something in software rather than using whatever
> hardware is there.
At least on Tegra, there is no HW that exposes PCI configuration
registers for the host bridge itself. Only the root ports have exposed
PCI configuration registers. There was some debate re: whether a host
bridge device needed to exist or not. This patch makes such a device
exist if it's required.
^ permalink raw reply	[flat|nested] 216+ messages in thread 
- * [PATCH v2 07/27] PCI: Add software-emulated host bridge
  2013-01-28 22:03     ` Stephen Warren
@ 2013-01-28 22:09       ` Jason Gunthorpe
  2013-01-28 22:18         ` Thomas Petazzoni
  2013-01-29  2:40         ` Bjorn Helgaas
  0 siblings, 2 replies; 216+ messages in thread
From: Jason Gunthorpe @ 2013-01-28 22:09 UTC (permalink / raw)
  To: linux-arm-kernel
On Mon, Jan 28, 2013 at 03:03:55PM -0700, Stephen Warren wrote:
> On 01/28/2013 01:18 PM, Arnd Bergmann wrote:
> > On Monday 28 January 2013, Thomas Petazzoni wrote:
> >> From: Thierry Reding <thierry.reding@avionic-design.de>
> >>
> >> [Thomas Petazzoni:
> >>  - Simplify capabilities handling.
> >>  - Move to a separate file.
> >>  - Fix mask used when writing a 4 bytes value.]
> >>
> >> Signed-off-by: Thierry Reding <thierry.reding@avionic-design.de>
> >> Signed-off-by: Thomas Petazzoni <thomas.petazzoni@free-electrons.com>
> > 
> > Not even a description why this is needed?
> > 
> > This patch (together with patch 8) seems like the most controversial
> > one of the series, so you should better provide a really good reason
> > why we would emulate something in software rather than using whatever
> > hardware is there.
> 
> At least on Tegra, there is no HW that exposes PCI configuration
> registers for the host bridge itself. Only the root ports have exposed
> PCI configuration registers. There was some debate re: whether a host
> bridge device needed to exist or not. This patch makes such a device
> exist if it's required.
If Linux will discover properly (I strongly suspect it does) without
the host bridge, then I would say to ditch this...
The PCI-E standard requires a host bridge device, but if Linux doesn't
require it then there is no reason to emulate one.
That would simplify the question of PCI IDs - for Marvell's case and
the sw root port bridge we can just copy the IDs from the bogus config
space of the HW.
Jason
^ permalink raw reply	[flat|nested] 216+ messages in thread 
- * [PATCH v2 07/27] PCI: Add software-emulated host bridge
  2013-01-28 22:09       ` Jason Gunthorpe
@ 2013-01-28 22:18         ` Thomas Petazzoni
  2013-01-28 22:23           ` Jason Gunthorpe
  2013-01-29  2:40         ` Bjorn Helgaas
  1 sibling, 1 reply; 216+ messages in thread
From: Thomas Petazzoni @ 2013-01-28 22:18 UTC (permalink / raw)
  To: linux-arm-kernel
Dear Jason Gunthorpe,
On Mon, 28 Jan 2013 15:09:04 -0700, Jason Gunthorpe wrote:
> If Linux will discover properly (I strongly suspect it does) without
> the host bridge, then I would say to ditch this...
> 
> The PCI-E standard requires a host bridge device, but if Linux doesn't
> require it then there is no reason to emulate one.
> 
> That would simplify the question of PCI IDs - for Marvell's case and
> the sw root port bridge we can just copy the IDs from the bogus config
> space of the HW.
Not sure what you mean in this last paragraph. In this second version,
I really rely on the emulated PCI-to-PCI bridges for the resource
allocation. I give the Linux PCI core a global range of addresses for
memory regions and a global range of addresses for I/O regions, and
then I let Linux do the allocation of ranges on a per bridge basis,
depending on the devices detected downstream. And at the end, I use
those allocated ranges to set up the address decoding windows.
This all comes from your suggestions during the review of the first
revision of this patch set.
Thomas
-- 
Thomas Petazzoni, Free Electrons
Kernel, drivers, real-time and embedded Linux
development, consulting, training and support.
http://free-electrons.com
^ permalink raw reply	[flat|nested] 216+ messages in thread 
- * [PATCH v2 07/27] PCI: Add software-emulated host bridge
  2013-01-28 22:18         ` Thomas Petazzoni
@ 2013-01-28 22:23           ` Jason Gunthorpe
  2013-01-28 22:30             ` Thomas Petazzoni
  0 siblings, 1 reply; 216+ messages in thread
From: Jason Gunthorpe @ 2013-01-28 22:23 UTC (permalink / raw)
  To: linux-arm-kernel
On Mon, Jan 28, 2013 at 11:18:29PM +0100, Thomas Petazzoni wrote:
> > That would simplify the question of PCI IDs - for Marvell's case and
> > the sw root port bridge we can just copy the IDs from the bogus config
> > space of the HW.
> 
> Not sure what you mean in this last paragraph. In this second version,
> I really rely on the emulated PCI-to-PCI bridges for the resource
I'm refering to your earlier question about what PCI IDs to use for
the SW emulated devices. If there is no need for the host bridge then
you only need 1 PCI ID (for the root port bridge) and you can probably
fairly safely re-use the one in the Marvell config space of the HW.
> allocation. I give the Linux PCI core a global range of addresses for
> memory regions and a global range of addresses for I/O regions, and
> then I let Linux do the allocation of ranges on a per bridge basis,
> depending on the devices detected downstream. And at the end, I use
> those allocated ranges to set up the address decoding windows.
Yes, that all seems OK to me.
Jason
^ permalink raw reply	[flat|nested] 216+ messages in thread 
- * [PATCH v2 07/27] PCI: Add software-emulated host bridge
  2013-01-28 22:23           ` Jason Gunthorpe
@ 2013-01-28 22:30             ` Thomas Petazzoni
  2013-01-28 22:51               ` Jason Gunthorpe
  0 siblings, 1 reply; 216+ messages in thread
From: Thomas Petazzoni @ 2013-01-28 22:30 UTC (permalink / raw)
  To: linux-arm-kernel
Dear Jason Gunthorpe,
On Mon, 28 Jan 2013 15:23:48 -0700, Jason Gunthorpe wrote:
> I'm refering to your earlier question about what PCI IDs to use for
> the SW emulated devices. If there is no need for the host bridge then
> you only need 1 PCI ID (for the root port bridge) and you can probably
> fairly safely re-use the one in the Marvell config space of the HW.
Ah, ok, I see. But isn't a host bridge needed to bind all the
PCI-to-PCI bridges under a single bus, in order to get the global
resource assignment I was referring to?
Regarding the PCI IDs, I have started to work with Marvell to see what
is possible. I, unfortunately, haven't received the answer for now.
Best regards,
Thomas
-- 
Thomas Petazzoni, Free Electrons
Kernel, drivers, real-time and embedded Linux
development, consulting, training and support.
http://free-electrons.com
^ permalink raw reply	[flat|nested] 216+ messages in thread 
- * [PATCH v2 07/27] PCI: Add software-emulated host bridge
  2013-01-28 22:30             ` Thomas Petazzoni
@ 2013-01-28 22:51               ` Jason Gunthorpe
  2013-01-29 10:01                 ` Thomas Petazzoni
  0 siblings, 1 reply; 216+ messages in thread
From: Jason Gunthorpe @ 2013-01-28 22:51 UTC (permalink / raw)
  To: linux-arm-kernel
On Mon, Jan 28, 2013 at 11:30:48PM +0100, Thomas Petazzoni wrote:
> Dear Jason Gunthorpe,
> 
> On Mon, 28 Jan 2013 15:23:48 -0700, Jason Gunthorpe wrote:
> 
> > I'm refering to your earlier question about what PCI IDs to use for
> > the SW emulated devices. If there is no need for the host bridge then
> > you only need 1 PCI ID (for the root port bridge) and you can probably
> > fairly safely re-use the one in the Marvell config space of the HW.
> 
> Ah, ok, I see. But isn't a host bridge needed to bind all the
> PCI-to-PCI bridges under a single bus, in order to get the global
> resource assignment I was referring to?
The PCI-E spec requires it, but AFAIK it doesn't actually *do*
anything on Linux, and Linux doesn't require it.
I thought Thierry did this experiment and decided it wasn't necessary:
> The reason is that with the latest bindings the matching of root
> ports to device tree nodes works as-is and nothing else indicates
> that the emulated host bridge is actually required to make any of
> this work. So in order not to introduce unneeded code I've left it
> out for now. If somebody decides that we actually need this host
> bridge (for standards compliance or whatnot) it could easily be
> added back.
Jason
^ permalink raw reply	[flat|nested] 216+ messages in thread 
- * [PATCH v2 07/27] PCI: Add software-emulated host bridge
  2013-01-28 22:51               ` Jason Gunthorpe
@ 2013-01-29 10:01                 ` Thomas Petazzoni
  2013-01-29 17:42                   ` Jason Gunthorpe
  0 siblings, 1 reply; 216+ messages in thread
From: Thomas Petazzoni @ 2013-01-29 10:01 UTC (permalink / raw)
  To: linux-arm-kernel
Dear Jason Gunthorpe,
On Mon, 28 Jan 2013 15:51:05 -0700, Jason Gunthorpe wrote:
> > > I'm refering to your earlier question about what PCI IDs to use for
> > > the SW emulated devices. If there is no need for the host bridge then
> > > you only need 1 PCI ID (for the root port bridge) and you can probably
> > > fairly safely re-use the one in the Marvell config space of the HW.
> > 
> > Ah, ok, I see. But isn't a host bridge needed to bind all the
> > PCI-to-PCI bridges under a single bus, in order to get the global
> > resource assignment I was referring to?
> 
> The PCI-E spec requires it, but AFAIK it doesn't actually *do*
> anything on Linux, and Linux doesn't require it.
> 
> I thought Thierry did this experiment and decided it wasn't necessary:
Could you detail what would be visible PCI bus topology if I remove the
emulated PCI host bridge? (And keeping one PCI-to-PCI bridge per PCIe
interface) ?
I'm just trying to understand what it would look like, in terms of
"lspci -t" output, because for now, it's not clear to me how everything
would fit together with the emulated host bridge.
Thanks,
Thomas
-- 
Thomas Petazzoni, Free Electrons
Kernel, drivers, real-time and embedded Linux
development, consulting, training and support.
http://free-electrons.com
^ permalink raw reply	[flat|nested] 216+ messages in thread 
- * [PATCH v2 07/27] PCI: Add software-emulated host bridge
  2013-01-29 10:01                 ` Thomas Petazzoni
@ 2013-01-29 17:42                   ` Jason Gunthorpe
  2013-01-29 17:43                     ` Thomas Petazzoni
  0 siblings, 1 reply; 216+ messages in thread
From: Jason Gunthorpe @ 2013-01-29 17:42 UTC (permalink / raw)
  To: linux-arm-kernel
On Tue, Jan 29, 2013 at 11:01:19AM +0100, Thomas Petazzoni wrote:
> Dear Jason Gunthorpe,
> 
> On Mon, 28 Jan 2013 15:51:05 -0700, Jason Gunthorpe wrote:
> 
> > > > I'm refering to your earlier question about what PCI IDs to use for
> > > > the SW emulated devices. If there is no need for the host bridge then
> > > > you only need 1 PCI ID (for the root port bridge) and you can probably
> > > > fairly safely re-use the one in the Marvell config space of the HW.
> > > 
> > > Ah, ok, I see. But isn't a host bridge needed to bind all the
> > > PCI-to-PCI bridges under a single bus, in order to get the global
> > > resource assignment I was referring to?
> > 
> > The PCI-E spec requires it, but AFAIK it doesn't actually *do*
> > anything on Linux, and Linux doesn't require it.
> > 
> > I thought Thierry did this experiment and decided it wasn't necessary:
> 
> Could you detail what would be visible PCI bus topology if I remove the
> emulated PCI host bridge? (And keeping one PCI-to-PCI bridge per PCIe
> interface) ?
So if this is what you have now...
-[0000:00]-+-00.0 <- Host bridge
           +-10.0 <- Bridge
           +-11.0 <- Bridge
           +-12.0 <- Bridge
Then removing the config space at 00.0 (ie the host bridge) will give
you:
-[0000:00]-+-10.0 <- Bridge
           +-11.0 <- Bridge
           +-12.0 <- Bridge
The 'host bridge' isn't a bridge in the PCI-PCI sense, it is just a
normal device@bus 0, device 0, function 0.
Jason
^ permalink raw reply	[flat|nested] 216+ messages in thread
- * [PATCH v2 07/27] PCI: Add software-emulated host bridge
  2013-01-29 17:42                   ` Jason Gunthorpe
@ 2013-01-29 17:43                     ` Thomas Petazzoni
  0 siblings, 0 replies; 216+ messages in thread
From: Thomas Petazzoni @ 2013-01-29 17:43 UTC (permalink / raw)
  To: linux-arm-kernel
Dear Jason Gunthorpe,
On Tue, 29 Jan 2013 10:42:05 -0700, Jason Gunthorpe wrote:
> So if this is what you have now...
> 
> -[0000:00]-+-00.0 <- Host bridge
>            +-10.0 <- Bridge
>            +-11.0 <- Bridge
>            +-12.0 <- Bridge
> 
> Then removing the config space at 00.0 (ie the host bridge) will give
> you:
> 
> -[0000:00]-+-10.0 <- Bridge
>            +-11.0 <- Bridge
>            +-12.0 <- Bridge
> 
> The 'host bridge' isn't a bridge in the PCI-PCI sense, it is just a
> normal device at bus 0, device 0, function 0.
Ok, thanks I'll try this!
Thomas
-- 
Thomas Petazzoni, Free Electrons
Kernel, drivers, real-time and embedded Linux
development, consulting, training and support.
http://free-electrons.com
^ permalink raw reply	[flat|nested] 216+ messages in thread 
 
 
 
 
 
 
- * [PATCH v2 07/27] PCI: Add software-emulated host bridge
  2013-01-28 22:09       ` Jason Gunthorpe
  2013-01-28 22:18         ` Thomas Petazzoni
@ 2013-01-29  2:40         ` Bjorn Helgaas
  2013-01-29  6:16           ` Jason Gunthorpe
  1 sibling, 1 reply; 216+ messages in thread
From: Bjorn Helgaas @ 2013-01-29  2:40 UTC (permalink / raw)
  To: linux-arm-kernel
On Mon, Jan 28, 2013 at 3:09 PM, Jason Gunthorpe
<jgunthorpe@obsidianresearch.com> wrote:
> On Mon, Jan 28, 2013 at 03:03:55PM -0700, Stephen Warren wrote:
>> On 01/28/2013 01:18 PM, Arnd Bergmann wrote:
>> > On Monday 28 January 2013, Thomas Petazzoni wrote:
>> >> From: Thierry Reding <thierry.reding@avionic-design.de>
>> >>
>> >> [Thomas Petazzoni:
>> >>  - Simplify capabilities handling.
>> >>  - Move to a separate file.
>> >>  - Fix mask used when writing a 4 bytes value.]
>> >>
>> >> Signed-off-by: Thierry Reding <thierry.reding@avionic-design.de>
>> >> Signed-off-by: Thomas Petazzoni <thomas.petazzoni@free-electrons.com>
>> >
>> > Not even a description why this is needed?
>> >
>> > This patch (together with patch 8) seems like the most controversial
>> > one of the series, so you should better provide a really good reason
>> > why we would emulate something in software rather than using whatever
>> > hardware is there.
>>
>> At least on Tegra, there is no HW that exposes PCI configuration
>> registers for the host bridge itself. Only the root ports have exposed
>> PCI configuration registers. There was some debate re: whether a host
>> bridge device needed to exist or not. This patch makes such a device
>> exist if it's required.
Host bridges are not actually PCI devices on any architecture.  The
upstream side of a host bridge is by definition not on a PCI bus.  On
some architectures, it *looks* like the host bridge is a PCI device
because it responds to PCI config accesses and you can get to
configuration registers that way.  But it isn't really; you can't
enumerate host bridges by using normal PCI device enumeration because
you have to somehow discover the root bus and the method of doing
config accesses to it.  That is all outside the scope of PCI.  Even on
the architectures where host bridges appear in PCI config space, the
only reason that works is because we assume a config access mechanism
that works for domain 0.  We can't discover bridges in other domains
without help.
> If Linux will discover properly (I strongly suspect it does) without
> the host bridge, then I would say to ditch this...
>
> The PCI-E standard requires a host bridge device, but if Linux doesn't
> require it then there is no reason to emulate one.
I agree that you don't need to emulate anything in the sense of making
config space accessors as this patch does.
However, I think you *should* use pci_scan_root_bus() (maybe you do
already; I haven't read all these patches), which requires that you
know the configuration of the host bridge, i.e., the config access
mechanism, the bus number range below the host bridge, and the I/O and
MMIO apertures through the bridge.  The PCI core builds a logical host
bridge structure internally from that information, and that's all
Linux really needs.
Bjorn
^ permalink raw reply	[flat|nested] 216+ messages in thread 
- * [PATCH v2 07/27] PCI: Add software-emulated host bridge
  2013-01-29  2:40         ` Bjorn Helgaas
@ 2013-01-29  6:16           ` Jason Gunthorpe
  0 siblings, 0 replies; 216+ messages in thread
From: Jason Gunthorpe @ 2013-01-29  6:16 UTC (permalink / raw)
  To: linux-arm-kernel
On Mon, Jan 28, 2013 at 07:40:16PM -0700, Bjorn Helgaas wrote:
> On Mon, Jan 28, 2013 at 3:09 PM, Jason Gunthorpe
> <jgunthorpe@obsidianresearch.com> wrote:
> > On Mon, Jan 28, 2013 at 03:03:55PM -0700, Stephen Warren wrote:
> >> On 01/28/2013 01:18 PM, Arnd Bergmann wrote:
> >> > On Monday 28 January 2013, Thomas Petazzoni wrote:
> >> >> From: Thierry Reding <thierry.reding@avionic-design.de>
> >> >>
> >> >> [Thomas Petazzoni:
> >> >>  - Simplify capabilities handling.
> >> >>  - Move to a separate file.
> >> >>  - Fix mask used when writing a 4 bytes value.]
> >> >>
> >> >> Signed-off-by: Thierry Reding <thierry.reding@avionic-design.de>
> >> >> Signed-off-by: Thomas Petazzoni <thomas.petazzoni@free-electrons.com>
> >> >
> >> > Not even a description why this is needed?
> >> >
> >> > This patch (together with patch 8) seems like the most controversial
> >> > one of the series, so you should better provide a really good reason
> >> > why we would emulate something in software rather than using whatever
> >> > hardware is there.
> >>
> >> At least on Tegra, there is no HW that exposes PCI configuration
> >> registers for the host bridge itself. Only the root ports have exposed
> >> PCI configuration registers. There was some debate re: whether a host
> >> bridge device needed to exist or not. This patch makes such a device
> >> exist if it's required.
> 
> Host bridges are not actually PCI devices on any architecture.  The
> upstream side of a host bridge is by definition not on a PCI bus.  On
> some architectures, it *looks* like the host bridge is a PCI device
> because it responds to PCI config accesses and you can get to
Sure, you can't discover domains through any standard means, but once
you have found a domain (notably a way to issue config transactions)
then the PCI-E standard actually does place requirements on what
config transactions should return:
 - 0:00.0 is a host bridge config space.
 - 0:XX.X will be one of:
   - A root complex internal function, with some restrictions this
     is basically a PCI end device
   - A PCI-PCI bridge with various mandatory capability headers.
     One of these must show up for every physical PCI-E link
     on the root complex.
This collection of stuff on bus 0 is called the 'root complex'. This
is new in PCI-E, PCI-X and PCI didn't have such requirements.
SOC vendors are taking various liberties with their PCI-E implementations.
 - nvidia followed the standard but did not include the host bridge
   at 0:00.0
 - Marvell ignored everything about the root complex config space
   behavior :)
There are two patch sets in this subject, one for nvidia tegra and one
for Marvell, both presenting to Linux a view of the HW that matches
what the PCI-E spec describes - specifically that there is one domain,
and each PCI-E link/controller shows up as a PCI-PCI bridge on bus 0.
In this model, there is no 'host bridge aperture' hardware, each PCI-E
link has a dedicated aperture and control of that aperture is through
the PCI-PCI bridge window registers, again as PCI-E specifies.
Jason
^ permalink raw reply	[flat|nested] 216+ messages in thread
 
 
 
- * [PATCH v2 07/27] PCI: Add software-emulated host bridge
  2013-01-28 20:18   ` Arnd Bergmann
  2013-01-28 22:03     ` Stephen Warren
@ 2013-01-28 22:09     ` Thomas Petazzoni
  1 sibling, 0 replies; 216+ messages in thread
From: Thomas Petazzoni @ 2013-01-28 22:09 UTC (permalink / raw)
  To: linux-arm-kernel
Dear Arnd Bergmann,
On Mon, 28 Jan 2013 20:18:17 +0000, Arnd Bergmann wrote:
> Not even a description why this is needed?
> 
> This patch (together with patch 8) seems like the most controversial
> one of the series, so you should better provide a really good reason
> why we would emulate something in software rather than using whatever
> hardware is there.
Hum, you're right. In fact, the very reason why I'm adding an emulated
host bridge and emulated PCI-to-PCI bridges is simply because this was
one of the main suggestion raised during the review of the first
revision of this patch set.
Thomas
-- 
Thomas Petazzoni, Free Electrons
Kernel, drivers, real-time and embedded Linux
development, consulting, training and support.
http://free-electrons.com
^ permalink raw reply	[flat|nested] 216+ messages in thread 
 
 
- * [PATCH v2 08/27] pci: implement an emulated PCI-to-PCI bridge
  2013-01-28 18:56 [PATCH v2] PCIe support for the Armada 370 and Armada XP SoCs Thomas Petazzoni
                   ` (6 preceding siblings ...)
  2013-01-28 18:56 ` [PATCH v2 07/27] PCI: Add software-emulated host bridge Thomas Petazzoni
@ 2013-01-28 18:56 ` Thomas Petazzoni
  2013-01-28 19:35   ` Jason Gunthorpe
  2013-01-29 22:35   ` Bjorn Helgaas
  2013-01-28 18:56 ` [PATCH v2 09/27] pci: infrastructure to add drivers in drivers/pci/host Thomas Petazzoni
                   ` (18 subsequent siblings)
  26 siblings, 2 replies; 216+ messages in thread
From: Thomas Petazzoni @ 2013-01-28 18:56 UTC (permalink / raw)
  To: linux-arm-kernel
Signed-off-by: Thomas Petazzoni <thomas.petazzoni@free-electrons.com>
---
 drivers/pci/Kconfig             |    3 +
 drivers/pci/Makefile            |    1 +
 drivers/pci/sw-pci-pci-bridge.c |  185 +++++++++++++++++++++++++++++++++++++++
 include/linux/pci.h             |   43 +++++++++
 4 files changed, 232 insertions(+)
 create mode 100644 drivers/pci/sw-pci-pci-bridge.c
diff --git a/drivers/pci/Kconfig b/drivers/pci/Kconfig
index f7548e2..6ed3db1 100644
--- a/drivers/pci/Kconfig
+++ b/drivers/pci/Kconfig
@@ -122,3 +122,6 @@ config PCI_LABEL
 
 config PCI_SW_HOST_BRIDGE
 	bool
+
+config PCI_SW_PCI_PCI_BRIDGE
+	bool
diff --git a/drivers/pci/Makefile b/drivers/pci/Makefile
index 44ce914..5b48961 100644
--- a/drivers/pci/Makefile
+++ b/drivers/pci/Makefile
@@ -17,6 +17,7 @@ obj-$(CONFIG_PCI_IOAPIC) += ioapic.o
 
 # Emulated PCI elements
 obj-$(CONFIG_PCI_SW_HOST_BRIDGE) += sw-host-bridge.o
+obj-$(CONFIG_PCI_SW_PCI_PCI_BRIDGE) += sw-pci-pci-bridge.o
 
 # Build the PCI Hotplug drivers if we were asked to
 obj-$(CONFIG_HOTPLUG_PCI) += hotplug/
diff --git a/drivers/pci/sw-pci-pci-bridge.c b/drivers/pci/sw-pci-pci-bridge.c
new file mode 100644
index 0000000..25679cc
--- /dev/null
+++ b/drivers/pci/sw-pci-pci-bridge.c
@@ -0,0 +1,185 @@
+/*
+ * Implementation of a simple emulated PCI-to-PCI bridge.
+ *
+ * Thierry Reding <thierry.reding@avionic-design.de>
+ * Thomas Petazzoni <thomas.petazzoni@free-electrons.com>
+ *
+ * This file is licensed under the terms of the GNU General Public
+ * License version 2.  This program is licensed "as is" without any
+ * warranty of any kind, whether express or implied.
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/pci.h>
+#include <linux/module.h>
+
+int pci_sw_pci_bridge_init(struct pci_sw_pci_bridge *bridge)
+{
+	if (!bridge)
+		return -EINVAL;
+
+	memset(bridge, 0, sizeof(struct pci_sw_pci_bridge));
+
+	bridge->status = PCI_STATUS_CAP_LIST;
+	bridge->class = PCI_CLASS_BRIDGE_PCI;
+	bridge->header_type = PCI_HEADER_TYPE_BRIDGE;
+	bridge->cache_line_size = 0x10;
+
+	/* We support 32 bits I/O addressing */
+	bridge->iobase = PCI_IO_RANGE_TYPE_32;
+	bridge->iolimit = PCI_IO_RANGE_TYPE_32;
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(pci_sw_pci_bridge_init);
+
+int pci_sw_pci_bridge_read(struct pci_sw_pci_bridge *bridge,
+			   unsigned int where, int size, u32 *value)
+{
+	switch (where & ~3) {
+	case PCI_VENDOR_ID:
+		*value = bridge->device << 16 | bridge->vendor;
+		break;
+
+	case PCI_COMMAND:
+		*value = bridge->status << 16 | bridge->command;
+		break;
+
+	case PCI_CLASS_REVISION:
+		*value = bridge->class << 16 | bridge->interface << 8 |
+			 bridge->revision;
+		break;
+
+	case PCI_CACHE_LINE_SIZE:
+		*value = bridge->bist << 24 | bridge->header_type << 16 |
+			 bridge->latency_timer << 8 | bridge->cache_line_size;
+		break;
+
+	case PCI_BASE_ADDRESS_0 ... PCI_BASE_ADDRESS_1:
+		*value = bridge->bar[((where & ~3) - PCI_BASE_ADDRESS_0) / 4];
+		break;
+
+	case PCI_PRIMARY_BUS:
+		*value = (bridge->secondary_latency_timer << 24 |
+			  bridge->subordinate_bus         << 16 |
+			  bridge->secondary_bus           <<  8 |
+			  bridge->primary_bus);
+		break;
+
+	case PCI_IO_BASE:
+		*value = (bridge->secondary_status << 16 |
+			  bridge->iolimit          <<  8 |
+			  bridge->iobase);
+		break;
+
+	case PCI_MEMORY_BASE:
+		*value = (bridge->memlimit << 16 | bridge->membase);
+		break;
+
+	case PCI_PREF_MEMORY_BASE:
+		*value = (bridge->prefmemlimit << 16 | bridge->prefmembase);
+		break;
+
+	case PCI_PREF_BASE_UPPER32:
+		*value = bridge->prefbaseupper;
+		break;
+
+	case PCI_PREF_LIMIT_UPPER32:
+		*value = bridge->preflimitupper;
+		break;
+
+	case PCI_IO_BASE_UPPER16:
+		*value = (bridge->iolimitupper << 16 | bridge->iobaseupper);
+		break;
+
+	case PCI_ROM_ADDRESS1:
+		*value = 0;
+		break;
+
+	default:
+		*value = 0xffffffff;
+		return PCIBIOS_BAD_REGISTER_NUMBER;
+	}
+
+	if (size == 2)
+		*value = (*value >> (8 * (where & 3))) & 0xffff;
+	else if (size == 1)
+		*value = (*value >> (8 * (where & 3))) & 0xff;
+
+	return PCIBIOS_SUCCESSFUL;
+}
+EXPORT_SYMBOL_GPL(pci_sw_pci_bridge_read);
+
+int pci_sw_pci_bridge_write(struct pci_sw_pci_bridge *bridge,
+			    unsigned int where, int size, u32 value)
+{
+	u32 mask, reg;
+	int err;
+
+	if (size == 4)
+		mask = 0x0;
+	else if (size == 2)
+		mask = ~(0xffff << ((where & 3) * 8));
+	else if (size == 1)
+		mask = ~(0xff << ((where & 3) * 8));
+	else
+		return PCIBIOS_BAD_REGISTER_NUMBER;
+
+	err = pci_sw_pci_bridge_read(bridge, where & ~3, 4, ®);
+	if (err)
+		return err;
+
+	value = (reg & mask) | value << ((where & 3) * 8);
+
+	switch (where & ~3) {
+	case PCI_COMMAND:
+		bridge->command = value & 0xffff;
+		bridge->status = value >> 16;
+		break;
+
+	case PCI_BASE_ADDRESS_0 ... PCI_BASE_ADDRESS_1:
+		bridge->bar[((where & ~3) - PCI_BASE_ADDRESS_0) / 4] = value;
+		break;
+
+	case PCI_IO_BASE:
+		/*
+		 * We also keep bit 1 set, it is a read-only bit that
+		 * indicates we support 32 bits addressing for the
+		 * I/O
+		 */
+		bridge->iobase = (value & 0xff) | PCI_IO_RANGE_TYPE_32;
+		bridge->iolimit = ((value >> 8) & 0xff) | PCI_IO_RANGE_TYPE_32;
+		bridge->secondary_status = value >> 16;
+		break;
+
+	case PCI_MEMORY_BASE:
+		bridge->membase = value & 0xffff;
+		bridge->memlimit = value >> 16;
+		break;
+
+	case PCI_PREF_MEMORY_BASE:
+		bridge->prefmembase = value & 0xffff;
+		bridge->prefmemlimit = value >> 16;
+		break;
+
+	case PCI_PREF_BASE_UPPER32:
+		bridge->prefbaseupper = value;
+		break;
+
+	case PCI_PREF_LIMIT_UPPER32:
+		bridge->preflimitupper = value;
+		break;
+
+	case PCI_IO_BASE_UPPER16:
+		bridge->iobaseupper = value & 0xffff;
+		bridge->iolimitupper = value >> 16;
+		break;
+
+	default:
+		break;
+	}
+
+	return PCIBIOS_SUCCESSFUL;
+}
+EXPORT_SYMBOL_GPL(pci_sw_pci_bridge_write);
diff --git a/include/linux/pci.h b/include/linux/pci.h
index c93e258..b83b4c8 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -1864,4 +1864,47 @@ extern int pci_sw_host_bridge_read(struct pci_sw_host_bridge *bridge,
 extern int pci_sw_host_bridge_write(struct pci_sw_host_bridge *bridge,
 				    unsigned int where, int size, u32 value);
 
+struct pci_sw_pci_bridge {
+	u16 vendor;
+	u16 device;
+	u16 command;
+	u16 status;
+	u16 class;
+	u8 interface;
+	u8 revision;
+	u8 bist;
+	u8 header_type;
+	u8 latency_timer;
+	u8 cache_line_size;
+	u32 bar[2];
+	u8 primary_bus;
+	u8 secondary_bus;
+	u8 subordinate_bus;
+	u8 secondary_latency_timer;
+	u8 iobase;
+	u8 iolimit;
+	u16 secondary_status;
+	u16 membase;
+	u16 memlimit;
+	u16 prefmembase;
+	u16 prefmemlimit;
+	u32 prefbaseupper;
+	u32 preflimitupper;
+	u16 iobaseupper;
+	u16 iolimitupper;
+	u8 cappointer;
+	u8 reserved1;
+	u16 reserved2;
+	u32 romaddr;
+	u8 intline;
+	u8 intpin;
+	u16 bridgectrl;
+};
+
+extern int pci_sw_pci_bridge_init(struct pci_sw_pci_bridge *bridge);
+extern int pci_sw_pci_bridge_read(struct pci_sw_pci_bridge *bridge,
+				  unsigned int where, int size, u32 *value);
+extern int pci_sw_pci_bridge_write(struct pci_sw_pci_bridge *bridge,
+				   unsigned int where, int size, u32 value);
+
 #endif /* LINUX_PCI_H */
-- 
1.7.9.5
^ permalink raw reply related	[flat|nested] 216+ messages in thread
- * [PATCH v2 08/27] pci: implement an emulated PCI-to-PCI bridge
  2013-01-28 18:56 ` [PATCH v2 08/27] pci: implement an emulated PCI-to-PCI bridge Thomas Petazzoni
@ 2013-01-28 19:35   ` Jason Gunthorpe
  2013-01-28 19:39     ` Thomas Petazzoni
  2013-01-29 22:35   ` Bjorn Helgaas
  1 sibling, 1 reply; 216+ messages in thread
From: Jason Gunthorpe @ 2013-01-28 19:35 UTC (permalink / raw)
  To: linux-arm-kernel
On Mon, Jan 28, 2013 at 07:56:17PM +0100, Thomas Petazzoni wrote:
> +int pci_sw_pci_bridge_read(struct pci_sw_pci_bridge *bridge,
> +			   unsigned int where, int size, u32 *value)
> +{
> +	switch (where & ~3) {
It is not essential, but desirable, to report an Express Root Port
capability for PCI-E bridges:
        Capabilities: [40] Express (v2) Root Port (Slot+), MSI 00
                DevCap: MaxPayload 128 bytes, PhantFunc 0, Latency L0s <64ns, L1 <1us
                        ExtTag- RBE+ FLReset-
                DevCtl: Report errors: Correctable- Non-Fatal- Fatal- Unsupported-
                        RlxdOrd- ExtTag- PhantFunc- AuxPwr- NoSnoop-
                        MaxPayload 128 bytes, MaxReadReq 128 bytes
                DevSta: CorrErr- UncorrErr- FatalErr- UnsuppReq- AuxPwr+ TransPend-
                LnkCap: Port #1, Speed 5GT/s, Width x4, ASPM L0s L1, Latency L0 <1us, L1 <4us
                        ClockPM- Surprise- LLActRep+ BwNot-
                LnkCtl: ASPM Disabled; RCB 64 bytes Disabled- Retrain- CommClk-
                        ExtSynch- ClockPM- AutWidDis- BWInt- AutBWInt-
                LnkSta: Speed 2.5GT/s, Width x0, TrErr- Train- SlotClk+ DLActive- BWMgmt- ABWMgmt-
                SltCap: AttnBtn- PwrCtrl- MRL- AttnInd- PwrInd- HotPlug- Surprise-
                        Slot #0, PowerLimit 25.000W; Interlock- NoCompl+
                SltCtl: Enable: AttnBtn- PwrFlt- MRL- PresDet- CmdCplt- HPIrq- LinkChg-
                        Control: AttnInd Unknown, PwrInd Unknown, Power- Interlock-
                SltSta: Status: AttnBtn- PowerFlt- MRL- CmdCplt- PresDet- Interlock-
                        Changed: MRL- PresDet- LinkState-
                RootCtl: ErrCorrectable- ErrNon-Fatal- ErrFatal- PMEIntEna+ CRSVisible-
                RootCap: CRSVisible-
                RootSta: PME ReqID 0000, PMEStatus- PMEPending-
                DevCap2: Completion Timeout: Range BC, TimeoutDis+ ARIFwd-
                DevCtl2: Completion Timeout: 50us to 50ms, TimeoutDis- ARIFwd-
                LnkCtl2: Target Link Speed: 2.5GT/s, EnterCompliance- SpeedDis-, Selectable De-emphasis: -6dB
                         Transmit Margin: Normal Operating Range, EnterModifiedCompliance- ComplianceSOS-
                         Compliance De-emphasis: -6dB
                LnkSta2: Current De-emphasis Level: -3.5dB
In the Marvell case, this capability can be constructed by pulling
data from the the Express End Point capability of the PCI-E port:
        Capabilities: [60] Express (v2) Endpoint, MSI 00
                DevCap: MaxPayload 256 bytes, PhantFunc 0, Latency L0s <64ns, L1 unlimited
                        ExtTag- AttnBtn- AttnInd- PwrInd- RBE+ FLReset-
                DevCtl: Report errors: Correctable- Non-Fatal+ Fatal+ Unsupported-
                        RlxdOrd- ExtTag- PhantFunc- AuxPwr- NoSnoop-
                        MaxPayload 128 bytes, MaxReadReq 512 bytes
                DevSta: CorrErr- UncorrErr- FatalErr- UnsuppReq- AuxPwr- TransPend-
                LnkCap: Port #8, Speed 2.5GT/s, Width x8, ASPM L0s, Latency L0 unlimited, L1 unlimited
                        ClockPM- Surprise- LLActRep- BwNot-
                LnkCtl: ASPM Disabled; RCB 64 bytes Disabled- Retrain- CommClk-
                        ExtSynch- ClockPM- AutWidDis- BWInt- AutBWInt-
                LnkSta: Speed 2.5GT/s, Width x8, TrErr- Train- SlotClk- DLActive- BWMgmt- ABWMgmt-
                DevCap2: Completion Timeout: Range ABCD, TimeoutDis+
                DevCtl2: Completion Timeout: 50us to 50ms, TimeoutDis-
                LnkCtl2: Target Link Speed: 2.5GT/s, EnterCompliance- SpeedDis-, Selectable De-emphasis: -6dB
                         Transmit Margin: Normal Operating Range, EnterModifiedCompliance- ComplianceSOS-
                         Compliance De-emphasis: -6dB
                LnkSta2: Current De-emphasis Level: -6dB
This lets user space see the width/speed/etc state of the PCI-E link
itself...
Jason
^ permalink raw reply	[flat|nested] 216+ messages in thread
- * [PATCH v2 08/27] pci: implement an emulated PCI-to-PCI bridge
  2013-01-28 19:35   ` Jason Gunthorpe
@ 2013-01-28 19:39     ` Thomas Petazzoni
  2013-01-28 19:55       ` Jason Gunthorpe
  0 siblings, 1 reply; 216+ messages in thread
From: Thomas Petazzoni @ 2013-01-28 19:39 UTC (permalink / raw)
  To: linux-arm-kernel
Dear Jason Gunthorpe,
Thanks a lot for your quick feedback!
On Mon, 28 Jan 2013 12:35:16 -0700, Jason Gunthorpe wrote:
> It is not essential, but desirable, to report an Express Root Port
> capability for PCI-E bridges:
[...]
> In the Marvell case, this capability can be constructed by pulling
> data from the the Express End Point capability of the PCI-E port:
I am not sure what you mean by "pulling". Do you mean that I should get
informations from the real PCIe interface, from within the emulated
PCI-to-PCI bridge implementation? This would unfortunately not be
really nice, because until now, the PCI-to-PCI bridge emulation is
clearly separated from the Marvell PCIe driver itself. Of course, it
could register a hook or something like that, so that the emulated
PCI-to-PCI bridge could potentially call back into the Marvell PCIe
driver.
I'll have to dig a little bit more about this capability to see how it
works exactly.
Thanks again for the feedback,
Thomas
-- 
Thomas Petazzoni, Free Electrons
Kernel, drivers, real-time and embedded Linux
development, consulting, training and support.
http://free-electrons.com
^ permalink raw reply	[flat|nested] 216+ messages in thread 
- * [PATCH v2 08/27] pci: implement an emulated PCI-to-PCI bridge
  2013-01-28 19:39     ` Thomas Petazzoni
@ 2013-01-28 19:55       ` Jason Gunthorpe
  2013-01-28 22:06         ` Stephen Warren
  0 siblings, 1 reply; 216+ messages in thread
From: Jason Gunthorpe @ 2013-01-28 19:55 UTC (permalink / raw)
  To: linux-arm-kernel
On Mon, Jan 28, 2013 at 08:39:47PM +0100, Thomas Petazzoni wrote:
> > In the Marvell case, this capability can be constructed by pulling
> > data from the the Express End Point capability of the PCI-E port:
> 
> I am not sure what you mean by "pulling". Do you mean that I should get
> informations from the real PCIe interface, from within the emulated
> PCI-to-PCI bridge implementation? This would unfortunately not be
> really nice, because until now, the PCI-to-PCI bridge emulation is
> clearly separated from the Marvell PCIe driver itself. Of course, it
> could register a hook or something like that, so that the emulated
> PCI-to-PCI bridge could potentially call back into the Marvell PCIe
> driver.
Yes, a callback would be needed to the main driver and IIRC the driver
can read/write the end port link info config regsiters via MMIO. They
probably need a bit of massaging to be in root port format, but
otherwise it should be straightforward..
> I'll have to dig a little bit more about this capability to see how it
> works exactly.
All ports have registers to report and control the link, but the root
port and end port versions are a bit different, so the goal is to read
the end port formatted registers and map them into the root port
format so that userspace can properly see the link state and
configuration.
Jason
^ permalink raw reply	[flat|nested] 216+ messages in thread 
- * [PATCH v2 08/27] pci: implement an emulated PCI-to-PCI bridge
  2013-01-28 19:55       ` Jason Gunthorpe
@ 2013-01-28 22:06         ` Stephen Warren
  2013-01-28 22:16           ` Jason Gunthorpe
  0 siblings, 1 reply; 216+ messages in thread
From: Stephen Warren @ 2013-01-28 22:06 UTC (permalink / raw)
  To: linux-arm-kernel
On 01/28/2013 12:55 PM, Jason Gunthorpe wrote:
> On Mon, Jan 28, 2013 at 08:39:47PM +0100, Thomas Petazzoni wrote:
> 
>>> In the Marvell case, this capability can be constructed by pulling
>>> data from the the Express End Point capability of the PCI-E port:
>>
>> I am not sure what you mean by "pulling". Do you mean that I should get
>> informations from the real PCIe interface, from within the emulated
>> PCI-to-PCI bridge implementation? This would unfortunately not be
>> really nice, because until now, the PCI-to-PCI bridge emulation is
>> clearly separated from the Marvell PCIe driver itself. Of course, it
>> could register a hook or something like that, so that the emulated
>> PCI-to-PCI bridge could potentially call back into the Marvell PCIe
>> driver.
> 
> Yes, a callback would be needed to the main driver and IIRC the driver
> can read/write the end port link info config regsiters via MMIO. They
> probably need a bit of massaging to be in root port format, but
> otherwise it should be straightforward..
> 
>> I'll have to dig a little bit more about this capability to see how it
>> works exactly.
> 
> All ports have registers to report and control the link, but the root
> port and end port versions are a bit different, so the goal is to read
> the end port formatted registers and map them into the root port
> format so that userspace can properly see the link state and
> configuration.
Isn't the thing being emulated here a host bridge, which "contains" the
PCIe root ports underneath, which in turn "contain" the PCIe devices
underneath? At least on Tegra, there is no host bridge device that
exposes PCIe config registers, but the PCIe root ports do exist and do
expose PCIe config registers...
^ permalink raw reply	[flat|nested] 216+ messages in thread 
- * [PATCH v2 08/27] pci: implement an emulated PCI-to-PCI bridge
  2013-01-28 22:06         ` Stephen Warren
@ 2013-01-28 22:16           ` Jason Gunthorpe
  0 siblings, 0 replies; 216+ messages in thread
From: Jason Gunthorpe @ 2013-01-28 22:16 UTC (permalink / raw)
  To: linux-arm-kernel
On Mon, Jan 28, 2013 at 03:06:32PM -0700, Stephen Warren wrote:
> On 01/28/2013 12:55 PM, Jason Gunthorpe wrote:
> > On Mon, Jan 28, 2013 at 08:39:47PM +0100, Thomas Petazzoni wrote:
> > 
> >>> In the Marvell case, this capability can be constructed by pulling
> >>> data from the the Express End Point capability of the PCI-E port:
> >>
> >> I am not sure what you mean by "pulling". Do you mean that I should get
> >> informations from the real PCIe interface, from within the emulated
> >> PCI-to-PCI bridge implementation? This would unfortunately not be
> >> really nice, because until now, the PCI-to-PCI bridge emulation is
> >> clearly separated from the Marvell PCIe driver itself. Of course, it
> >> could register a hook or something like that, so that the emulated
> >> PCI-to-PCI bridge could potentially call back into the Marvell PCIe
> >> driver.
> > 
> > Yes, a callback would be needed to the main driver and IIRC the driver
> > can read/write the end port link info config regsiters via MMIO. They
> > probably need a bit of massaging to be in root port format, but
> > otherwise it should be straightforward..
> > 
> >> I'll have to dig a little bit more about this capability to see how it
> >> works exactly.
> > 
> > All ports have registers to report and control the link, but the root
> > port and end port versions are a bit different, so the goal is to read
> > the end port formatted registers and map them into the root port
> > format so that userspace can properly see the link state and
> > configuration.
> 
> Isn't the thing being emulated here a host bridge, which "contains" the
> PCIe root ports underneath, which in turn "contain" the PCIe devices
> underneath? At least on Tegra, there is no host bridge device that
> exposes PCIe config registers, but the PCIe root ports do exist and do
> expose PCIe config registers...
Patch #7 create a SW emulated host bridge, which tegra and marvell
lack in HW.
Patch #8 creates a SW emulated root port bridge, which tegra has
properly in HW, while Marvell doesn't.
Basically, on the Marvell chips, the PCI config space of the PCI
complex is useless when used as a root complex - the config space is
only usable when the device is configured as an end port.
Jason
^ permalink raw reply	[flat|nested] 216+ messages in thread 
 
 
 
 
- * [PATCH v2 08/27] pci: implement an emulated PCI-to-PCI bridge
  2013-01-28 18:56 ` [PATCH v2 08/27] pci: implement an emulated PCI-to-PCI bridge Thomas Petazzoni
  2013-01-28 19:35   ` Jason Gunthorpe
@ 2013-01-29 22:35   ` Bjorn Helgaas
  2013-01-29 23:06     ` Arnd Bergmann
  1 sibling, 1 reply; 216+ messages in thread
From: Bjorn Helgaas @ 2013-01-29 22:35 UTC (permalink / raw)
  To: linux-arm-kernel
On Mon, Jan 28, 2013 at 11:56 AM, Thomas Petazzoni
<thomas.petazzoni@free-electrons.com> wrote:
> Signed-off-by: Thomas Petazzoni <thomas.petazzoni@free-electrons.com>
> ---
>  drivers/pci/Kconfig             |    3 +
>  drivers/pci/Makefile            |    1 +
>  drivers/pci/sw-pci-pci-bridge.c |  185 +++++++++++++++++++++++++++++++++++++++
>  include/linux/pci.h             |   43 +++++++++
>  4 files changed, 232 insertions(+)
>  create mode 100644 drivers/pci/sw-pci-pci-bridge.c
If you need this, it can be done in architecture code, can't it?  It's
true that there's nothing architecture-specific in this patch (other
than the fact that ARM is the only arch that needs it), but I'm not
sure there's anything useful for sharing here.
In fact, it seems like what you're after is not so much an *emulated*
bridge that has no corresponding hardware, as it is a wrapper that
presents a standard PCIe interface to hardware that exists but doesn't
conform to the PCIe spec.  If you really do need to ultimately connect
this pci_sw_pci_bridge to a piece of hardware, that will certainly be
arch-specific.
> diff --git a/drivers/pci/Kconfig b/drivers/pci/Kconfig
> index f7548e2..6ed3db1 100644
> --- a/drivers/pci/Kconfig
> +++ b/drivers/pci/Kconfig
> @@ -122,3 +122,6 @@ config PCI_LABEL
>
>  config PCI_SW_HOST_BRIDGE
>         bool
> +
> +config PCI_SW_PCI_PCI_BRIDGE
> +       bool
> diff --git a/drivers/pci/Makefile b/drivers/pci/Makefile
> index 44ce914..5b48961 100644
> --- a/drivers/pci/Makefile
> +++ b/drivers/pci/Makefile
> @@ -17,6 +17,7 @@ obj-$(CONFIG_PCI_IOAPIC) += ioapic.o
>
>  # Emulated PCI elements
>  obj-$(CONFIG_PCI_SW_HOST_BRIDGE) += sw-host-bridge.o
> +obj-$(CONFIG_PCI_SW_PCI_PCI_BRIDGE) += sw-pci-pci-bridge.o
>
>  # Build the PCI Hotplug drivers if we were asked to
>  obj-$(CONFIG_HOTPLUG_PCI) += hotplug/
> diff --git a/drivers/pci/sw-pci-pci-bridge.c b/drivers/pci/sw-pci-pci-bridge.c
> new file mode 100644
> index 0000000..25679cc
> --- /dev/null
> +++ b/drivers/pci/sw-pci-pci-bridge.c
> @@ -0,0 +1,185 @@
> +/*
> + * Implementation of a simple emulated PCI-to-PCI bridge.
> + *
> + * Thierry Reding <thierry.reding@avionic-design.de>
> + * Thomas Petazzoni <thomas.petazzoni@free-electrons.com>
> + *
> + * This file is licensed under the terms of the GNU General Public
> + * License version 2.  This program is licensed "as is" without any
> + * warranty of any kind, whether express or implied.
> + */
> +
> +#include <linux/kernel.h>
> +#include <linux/init.h>
> +#include <linux/pci.h>
> +#include <linux/module.h>
> +
> +int pci_sw_pci_bridge_init(struct pci_sw_pci_bridge *bridge)
> +{
> +       if (!bridge)
> +               return -EINVAL;
> +
> +       memset(bridge, 0, sizeof(struct pci_sw_pci_bridge));
> +
> +       bridge->status = PCI_STATUS_CAP_LIST;
> +       bridge->class = PCI_CLASS_BRIDGE_PCI;
> +       bridge->header_type = PCI_HEADER_TYPE_BRIDGE;
> +       bridge->cache_line_size = 0x10;
> +
> +       /* We support 32 bits I/O addressing */
> +       bridge->iobase = PCI_IO_RANGE_TYPE_32;
> +       bridge->iolimit = PCI_IO_RANGE_TYPE_32;
> +
> +       return 0;
> +}
> +EXPORT_SYMBOL_GPL(pci_sw_pci_bridge_init);
> +
> +int pci_sw_pci_bridge_read(struct pci_sw_pci_bridge *bridge,
> +                          unsigned int where, int size, u32 *value)
> +{
> +       switch (where & ~3) {
> +       case PCI_VENDOR_ID:
> +               *value = bridge->device << 16 | bridge->vendor;
> +               break;
> +
> +       case PCI_COMMAND:
> +               *value = bridge->status << 16 | bridge->command;
> +               break;
> +
> +       case PCI_CLASS_REVISION:
> +               *value = bridge->class << 16 | bridge->interface << 8 |
> +                        bridge->revision;
> +               break;
> +
> +       case PCI_CACHE_LINE_SIZE:
> +               *value = bridge->bist << 24 | bridge->header_type << 16 |
> +                        bridge->latency_timer << 8 | bridge->cache_line_size;
> +               break;
> +
> +       case PCI_BASE_ADDRESS_0 ... PCI_BASE_ADDRESS_1:
> +               *value = bridge->bar[((where & ~3) - PCI_BASE_ADDRESS_0) / 4];
> +               break;
> +
> +       case PCI_PRIMARY_BUS:
> +               *value = (bridge->secondary_latency_timer << 24 |
> +                         bridge->subordinate_bus         << 16 |
> +                         bridge->secondary_bus           <<  8 |
> +                         bridge->primary_bus);
> +               break;
> +
> +       case PCI_IO_BASE:
> +               *value = (bridge->secondary_status << 16 |
> +                         bridge->iolimit          <<  8 |
> +                         bridge->iobase);
> +               break;
> +
> +       case PCI_MEMORY_BASE:
> +               *value = (bridge->memlimit << 16 | bridge->membase);
> +               break;
> +
> +       case PCI_PREF_MEMORY_BASE:
> +               *value = (bridge->prefmemlimit << 16 | bridge->prefmembase);
> +               break;
> +
> +       case PCI_PREF_BASE_UPPER32:
> +               *value = bridge->prefbaseupper;
> +               break;
> +
> +       case PCI_PREF_LIMIT_UPPER32:
> +               *value = bridge->preflimitupper;
> +               break;
> +
> +       case PCI_IO_BASE_UPPER16:
> +               *value = (bridge->iolimitupper << 16 | bridge->iobaseupper);
> +               break;
> +
> +       case PCI_ROM_ADDRESS1:
> +               *value = 0;
> +               break;
> +
> +       default:
> +               *value = 0xffffffff;
> +               return PCIBIOS_BAD_REGISTER_NUMBER;
> +       }
> +
> +       if (size == 2)
> +               *value = (*value >> (8 * (where & 3))) & 0xffff;
> +       else if (size == 1)
> +               *value = (*value >> (8 * (where & 3))) & 0xff;
> +
> +       return PCIBIOS_SUCCESSFUL;
> +}
> +EXPORT_SYMBOL_GPL(pci_sw_pci_bridge_read);
> +
> +int pci_sw_pci_bridge_write(struct pci_sw_pci_bridge *bridge,
> +                           unsigned int where, int size, u32 value)
> +{
> +       u32 mask, reg;
> +       int err;
> +
> +       if (size == 4)
> +               mask = 0x0;
> +       else if (size == 2)
> +               mask = ~(0xffff << ((where & 3) * 8));
> +       else if (size == 1)
> +               mask = ~(0xff << ((where & 3) * 8));
> +       else
> +               return PCIBIOS_BAD_REGISTER_NUMBER;
> +
> +       err = pci_sw_pci_bridge_read(bridge, where & ~3, 4, ®);
> +       if (err)
> +               return err;
> +
> +       value = (reg & mask) | value << ((where & 3) * 8);
> +
> +       switch (where & ~3) {
> +       case PCI_COMMAND:
> +               bridge->command = value & 0xffff;
> +               bridge->status = value >> 16;
> +               break;
> +
> +       case PCI_BASE_ADDRESS_0 ... PCI_BASE_ADDRESS_1:
> +               bridge->bar[((where & ~3) - PCI_BASE_ADDRESS_0) / 4] = value;
> +               break;
> +
> +       case PCI_IO_BASE:
> +               /*
> +                * We also keep bit 1 set, it is a read-only bit that
> +                * indicates we support 32 bits addressing for the
> +                * I/O
> +                */
> +               bridge->iobase = (value & 0xff) | PCI_IO_RANGE_TYPE_32;
> +               bridge->iolimit = ((value >> 8) & 0xff) | PCI_IO_RANGE_TYPE_32;
> +               bridge->secondary_status = value >> 16;
> +               break;
> +
> +       case PCI_MEMORY_BASE:
> +               bridge->membase = value & 0xffff;
> +               bridge->memlimit = value >> 16;
> +               break;
> +
> +       case PCI_PREF_MEMORY_BASE:
> +               bridge->prefmembase = value & 0xffff;
> +               bridge->prefmemlimit = value >> 16;
> +               break;
> +
> +       case PCI_PREF_BASE_UPPER32:
> +               bridge->prefbaseupper = value;
> +               break;
> +
> +       case PCI_PREF_LIMIT_UPPER32:
> +               bridge->preflimitupper = value;
> +               break;
> +
> +       case PCI_IO_BASE_UPPER16:
> +               bridge->iobaseupper = value & 0xffff;
> +               bridge->iolimitupper = value >> 16;
> +               break;
> +
> +       default:
> +               break;
> +       }
> +
> +       return PCIBIOS_SUCCESSFUL;
> +}
> +EXPORT_SYMBOL_GPL(pci_sw_pci_bridge_write);
> diff --git a/include/linux/pci.h b/include/linux/pci.h
> index c93e258..b83b4c8 100644
> --- a/include/linux/pci.h
> +++ b/include/linux/pci.h
> @@ -1864,4 +1864,47 @@ extern int pci_sw_host_bridge_read(struct pci_sw_host_bridge *bridge,
>  extern int pci_sw_host_bridge_write(struct pci_sw_host_bridge *bridge,
>                                     unsigned int where, int size, u32 value);
>
> +struct pci_sw_pci_bridge {
> +       u16 vendor;
> +       u16 device;
> +       u16 command;
> +       u16 status;
> +       u16 class;
> +       u8 interface;
> +       u8 revision;
> +       u8 bist;
> +       u8 header_type;
> +       u8 latency_timer;
> +       u8 cache_line_size;
> +       u32 bar[2];
> +       u8 primary_bus;
> +       u8 secondary_bus;
> +       u8 subordinate_bus;
> +       u8 secondary_latency_timer;
> +       u8 iobase;
> +       u8 iolimit;
> +       u16 secondary_status;
> +       u16 membase;
> +       u16 memlimit;
> +       u16 prefmembase;
> +       u16 prefmemlimit;
> +       u32 prefbaseupper;
> +       u32 preflimitupper;
> +       u16 iobaseupper;
> +       u16 iolimitupper;
> +       u8 cappointer;
> +       u8 reserved1;
> +       u16 reserved2;
> +       u32 romaddr;
> +       u8 intline;
> +       u8 intpin;
> +       u16 bridgectrl;
> +};
> +
> +extern int pci_sw_pci_bridge_init(struct pci_sw_pci_bridge *bridge);
> +extern int pci_sw_pci_bridge_read(struct pci_sw_pci_bridge *bridge,
> +                                 unsigned int where, int size, u32 *value);
> +extern int pci_sw_pci_bridge_write(struct pci_sw_pci_bridge *bridge,
> +                                  unsigned int where, int size, u32 value);
> +
>  #endif /* LINUX_PCI_H */
> --
> 1.7.9.5
>
^ permalink raw reply	[flat|nested] 216+ messages in thread
- * [PATCH v2 08/27] pci: implement an emulated PCI-to-PCI bridge
  2013-01-29 22:35   ` Bjorn Helgaas
@ 2013-01-29 23:06     ` Arnd Bergmann
  2013-01-30  4:12       ` Jason Gunthorpe
  0 siblings, 1 reply; 216+ messages in thread
From: Arnd Bergmann @ 2013-01-29 23:06 UTC (permalink / raw)
  To: linux-arm-kernel
On Tuesday 29 January 2013, Bjorn Helgaas wrote:
> If you need this, it can be done in architecture code, can't it?  It's
> true that there's nothing architecture-specific in this patch (other
> than the fact that ARM is the only arch that needs it), but I'm not
> sure there's anything useful for sharing here.
Since we're moving the host bridge code to drivers/pci/host now, I think
this code should live in the same place. It's entirely possible that
it will be shared between arch/arm and arch/arm64, although I would
hope that we can do away with the emulated bridge code entirely.
More generally speaking, this is infrastructure code, and for any
piece of infrastructure my rule is
* don't add platform specific infrastructure if it can be done at
  the architecture level
* don't add architecture specific infrastructure if it can be
  written in an architecture independent way
* don't add global infrastructure if you can use or extend the
  existing infrastructure.
> In fact, it seems like what you're after is not so much an emulated
> bridge that has no corresponding hardware, as it is a wrapper that
> presents a standard PCIe interface to hardware that exists but doesn't
> conform to the PCIe spec.  If you really do need to ultimately connect
> this pci_sw_pci_bridge to a piece of hardware, that will certainly be
> arch-specific.
As Jason Gunthorpe suggested, we might not need this at all, if the
Linux PCI code can be convinced not to need a configuration space
for the devices that in case of the Marvell hardware don't provide
one.
	Arnd
^ permalink raw reply	[flat|nested] 216+ messages in thread 
- * [PATCH v2 08/27] pci: implement an emulated PCI-to-PCI bridge
  2013-01-29 23:06     ` Arnd Bergmann
@ 2013-01-30  4:12       ` Jason Gunthorpe
  0 siblings, 0 replies; 216+ messages in thread
From: Jason Gunthorpe @ 2013-01-30  4:12 UTC (permalink / raw)
  To: linux-arm-kernel
On Tue, Jan 29, 2013 at 11:06:13PM +0000, Arnd Bergmann wrote:
> On Tuesday 29 January 2013, Bjorn Helgaas wrote:
> > If you need this, it can be done in architecture code, can't it?  It's
> > true that there's nothing architecture-specific in this patch (other
> > than the fact that ARM is the only arch that needs it), but I'm not
> > sure there's anything useful for sharing here.
> 
> Since we're moving the host bridge code to drivers/pci/host now, I think
> this code should live in the same place. It's entirely possible that
> it will be shared between arch/arm and arch/arm64, although I would
> hope that we can do away with the emulated bridge code entirely.
This sounds right to me, this is part of the host bridge driver for
various Marvell SOCs, so these days it should live in the
drivers/pci/host or related, not arch/arm.
 
> > In fact, it seems like what you're after is not so much an emulated
> > bridge that has no corresponding hardware, as it is a wrapper that
> > presents a standard PCIe interface to hardware that exists but doesn't
> > conform to the PCIe spec.  If you really do need to ultimately connect
> > this pci_sw_pci_bridge to a piece of hardware, that will certainly be
> > arch-specific.
> 
> As Jason Gunthorpe suggested, we might not need this at all, if the
> Linux PCI code can be convinced not to need a configuration space
> for the devices that in case of the Marvell hardware don't provide
> one.
To be clear, that isn't what I was talking about.. Just to clarify a
few things in the last couple emails:
The PCI 'host bridge configuration space' software emulation code in
patch #7 is not necessary. Bjorn and Thierry both confirm this.
In several places when Bjorn/Arnd talked about a 'host bridge' this is
referring to (more or less) the PCI host *driver* and its attachment
to the kernel interfaces. Specifically a configuration access
mechanism and the resource ranges to allocate against. It has nothing
to do with the bus 0, device 0, function 0 host bridge config space.
Arnd's suggestion to use multiple domains would be broadly equivilent
to the first iteration of this driver - essentially the driver would
manage one link and there would be multiple instances. This gets us
back to where Thomas started - there is currently no code to do cross
domain resource allocation, and static allocation is not possible with
so many links on the chip.
Bjorn is quite right, the purpose of the PCI-PCI SW layer is to bind
the non-standard registers in the Marvell SOC to the standard PCI-E
config interface so the kernel can control it normally. This corrects
what is, IMHO, a defect in the Marvell hardware.
The alternative is to add some kind of cross-domain resource
allocation (or similar) to the PCI core code - however this would
*only* be required to support hardware broken in the same way as
Marvell, so I feel a bit leery about doing that kind of work before we
know if other chips require this. (early on in the discussion there
was some thought that Tegra might also be similary broken, but it
turned out to be pretty much fine, with a bit of driver work)
So, I still think using a SW layer to provide a compliant PCI-PCI
bridge configuration space for the Marvell hardware is the best way
forward..
Jason
^ permalink raw reply	[flat|nested] 216+ messages in thread 
 
 
 
- * [PATCH v2 09/27] pci: infrastructure to add drivers in drivers/pci/host
  2013-01-28 18:56 [PATCH v2] PCIe support for the Armada 370 and Armada XP SoCs Thomas Petazzoni
                   ` (7 preceding siblings ...)
  2013-01-28 18:56 ` [PATCH v2 08/27] pci: implement an emulated PCI-to-PCI bridge Thomas Petazzoni
@ 2013-01-28 18:56 ` Thomas Petazzoni
  2013-01-28 18:56 ` [PATCH v2 10/27] arm: mvebu: fix address-cells in mpic DT node Thomas Petazzoni
                   ` (17 subsequent siblings)
  26 siblings, 0 replies; 216+ messages in thread
From: Thomas Petazzoni @ 2013-01-28 18:56 UTC (permalink / raw)
  To: linux-arm-kernel
As agreed by the community, PCI host drivers will now be stored in
drivers/pci/host. This commit adds this directory and the related
Kconfig/Makefile changes to allow new drivers to be added in this
directory.
Signed-off-by: Thomas Petazzoni <thomas.petazzoni@free-electrons.com>
---
 drivers/pci/Kconfig      |    2 ++
 drivers/pci/Makefile     |    3 +++
 drivers/pci/host/Kconfig |    4 ++++
 3 files changed, 9 insertions(+)
 create mode 100644 drivers/pci/host/Kconfig
diff --git a/drivers/pci/Kconfig b/drivers/pci/Kconfig
index 6ed3db1..45392ab 100644
--- a/drivers/pci/Kconfig
+++ b/drivers/pci/Kconfig
@@ -125,3 +125,5 @@ config PCI_SW_HOST_BRIDGE
 
 config PCI_SW_PCI_PCI_BRIDGE
 	bool
+
+source "drivers/pci/host/Kconfig"
diff --git a/drivers/pci/Makefile b/drivers/pci/Makefile
index 5b48961..eae4db1 100644
--- a/drivers/pci/Makefile
+++ b/drivers/pci/Makefile
@@ -71,3 +71,6 @@ obj-$(CONFIG_XEN_PCIDEV_FRONTEND) += xen-pcifront.o
 obj-$(CONFIG_OF) += of.o
 
 ccflags-$(CONFIG_PCI_DEBUG) := -DDEBUG
+
+# PCI host controller drivers
+obj-y += host/
diff --git a/drivers/pci/host/Kconfig b/drivers/pci/host/Kconfig
new file mode 100644
index 0000000..cc3a1af
--- /dev/null
+++ b/drivers/pci/host/Kconfig
@@ -0,0 +1,4 @@
+menu "PCI host controller drivers"
+	depends on PCI
+
+endmenu
-- 
1.7.9.5
^ permalink raw reply related	[flat|nested] 216+ messages in thread
- * [PATCH v2 10/27] arm: mvebu: fix address-cells in mpic DT node
  2013-01-28 18:56 [PATCH v2] PCIe support for the Armada 370 and Armada XP SoCs Thomas Petazzoni
                   ` (8 preceding siblings ...)
  2013-01-28 18:56 ` [PATCH v2 09/27] pci: infrastructure to add drivers in drivers/pci/host Thomas Petazzoni
@ 2013-01-28 18:56 ` Thomas Petazzoni
  2013-01-28 18:56 ` [PATCH v2 11/27] clk: mvebu: create parent-child relation for PCIe clocks on Armada 370 Thomas Petazzoni
                   ` (16 subsequent siblings)
  26 siblings, 0 replies; 216+ messages in thread
From: Thomas Petazzoni @ 2013-01-28 18:56 UTC (permalink / raw)
  To: linux-arm-kernel
There is no need to have a #address-cells property in the MPIC Device
Tree node, and more than that, having it confuses the of_irq_map_raw()
logic, which will be used by the Marvell PCIe driver.
Signed-off-by: Thomas Petazzoni <thomas.petazzoni@free-electrons.com>
---
 arch/arm/boot/dts/armada-370-xp.dtsi |    1 -
 1 file changed, 1 deletion(-)
diff --git a/arch/arm/boot/dts/armada-370-xp.dtsi b/arch/arm/boot/dts/armada-370-xp.dtsi
index 4c0abe8..1dcdae9 100644
--- a/arch/arm/boot/dts/armada-370-xp.dtsi
+++ b/arch/arm/boot/dts/armada-370-xp.dtsi
@@ -31,7 +31,6 @@
 	mpic: interrupt-controller at d0020000 {
 	      compatible = "marvell,mpic";
 	      #interrupt-cells = <1>;
-	      #address-cells = <1>;
 	      #size-cells = <1>;
 	      interrupt-controller;
 	};
-- 
1.7.9.5
^ permalink raw reply related	[flat|nested] 216+ messages in thread
- * [PATCH v2 11/27] clk: mvebu: create parent-child relation for PCIe clocks on Armada 370
  2013-01-28 18:56 [PATCH v2] PCIe support for the Armada 370 and Armada XP SoCs Thomas Petazzoni
                   ` (9 preceding siblings ...)
  2013-01-28 18:56 ` [PATCH v2 10/27] arm: mvebu: fix address-cells in mpic DT node Thomas Petazzoni
@ 2013-01-28 18:56 ` Thomas Petazzoni
  2013-01-28 22:08   ` Stephen Warren
  2013-01-28 18:56 ` [PATCH v2 12/27] clk: mvebu: add more PCIe clocks for Armada XP Thomas Petazzoni
                   ` (15 subsequent siblings)
  26 siblings, 1 reply; 216+ messages in thread
From: Thomas Petazzoni @ 2013-01-28 18:56 UTC (permalink / raw)
  To: linux-arm-kernel
The Armada 370 has two gatable clocks for each PCIe interface, and we
want both of them to be enabled. We therefore make one of the two
clocks a child of the other, as we did for the sataX and sataXlnk
clocks on Armada XP.
Signed-off-by: Thomas Petazzoni <thomas.petazzoni@free-electrons.com>
Cc: Mike Turquette <mturquette@linaro.org>
---
 drivers/clk/mvebu/clk-gating-ctrl.c |    4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/drivers/clk/mvebu/clk-gating-ctrl.c b/drivers/clk/mvebu/clk-gating-ctrl.c
index 8fa5408..fd52b5f 100644
--- a/drivers/clk/mvebu/clk-gating-ctrl.c
+++ b/drivers/clk/mvebu/clk-gating-ctrl.c
@@ -119,8 +119,8 @@ static const struct mvebu_soc_descr __initconst armada_370_gating_descr[] = {
 	{ "pex1_en", NULL,  2 },
 	{ "ge1", NULL, 3 },
 	{ "ge0", NULL, 4 },
-	{ "pex0", NULL, 5 },
-	{ "pex1", NULL, 9 },
+	{ "pex0", "pex0_en", 5 },
+	{ "pex1", "pex1_en", 9 },
 	{ "sata0", NULL, 15 },
 	{ "sdio", NULL, 17 },
 	{ "tdm", NULL, 25 },
-- 
1.7.9.5
^ permalink raw reply related	[flat|nested] 216+ messages in thread
- * [PATCH v2 11/27] clk: mvebu: create parent-child relation for PCIe clocks on Armada 370
  2013-01-28 18:56 ` [PATCH v2 11/27] clk: mvebu: create parent-child relation for PCIe clocks on Armada 370 Thomas Petazzoni
@ 2013-01-28 22:08   ` Stephen Warren
  2013-01-28 22:21     ` Thomas Petazzoni
  0 siblings, 1 reply; 216+ messages in thread
From: Stephen Warren @ 2013-01-28 22:08 UTC (permalink / raw)
  To: linux-arm-kernel
On 01/28/2013 11:56 AM, Thomas Petazzoni wrote:
> The Armada 370 has two gatable clocks for each PCIe interface, and we
> want both of them to be enabled. We therefore make one of the two
> clocks a child of the other, as we did for the sataX and sataXlnk
> clocks on Armada XP.
> diff --git a/drivers/clk/mvebu/clk-gating-ctrl.c b/drivers/clk/mvebu/clk-gating-ctrl.c
> @@ -119,8 +119,8 @@ static const struct mvebu_soc_descr __initconst armada_370_gating_descr[] = {
>  	{ "pex1_en", NULL,  2 },
>  	{ "ge1", NULL, 3 },
>  	{ "ge0", NULL, 4 },
> -	{ "pex0", NULL, 5 },
> -	{ "pex1", NULL, 9 },
> +	{ "pex0", "pex0_en", 5 },
> +	{ "pex1", "pex1_en", 9 },
I must admit, I know nothing about struct mvebu_soc_descr, but I'm
having a hard time seeing how that code change makes one of those clock
a parent of the other, since the pex0 entry doesn't reference anything
"pex1"-related, nor vice-versa. Is more explanation in the commit
message warranted here?
^ permalink raw reply	[flat|nested] 216+ messages in thread
- * [PATCH v2 11/27] clk: mvebu: create parent-child relation for PCIe clocks on Armada 370
  2013-01-28 22:08   ` Stephen Warren
@ 2013-01-28 22:21     ` Thomas Petazzoni
  2013-01-28 22:27       ` Stephen Warren
  0 siblings, 1 reply; 216+ messages in thread
From: Thomas Petazzoni @ 2013-01-28 22:21 UTC (permalink / raw)
  To: linux-arm-kernel
Dear Stephen Warren,
On Mon, 28 Jan 2013 15:08:46 -0700, Stephen Warren wrote:
> I must admit, I know nothing about struct mvebu_soc_descr, but I'm
> having a hard time seeing how that code change makes one of those clock
> a parent of the other, since the pex0 entry doesn't reference anything
> "pex1"-related, nor vice-versa. Is more explanation in the commit
> message warranted here?
See the definition of mvebu_soc_descr:
struct mvebu_soc_descr {
        const char *name;
        const char *parent;
        int bit_idx;
};
It simply registers the pex0 clock with the pex0_en clock as its
parents. Those clocks are normal gatable clocks, registered with
clk_register_gate(). This ensures that whenever the pex0 clock is
enabled, its parent clock pex0_en gets enabled as well. We do the same
for SATA clocks on Armada XP, for example:
static const struct mvebu_soc_descr __initconst
armada_xp_gating_descr[] = { { "audio", NULL, 0 },
[...]
        { "sata0lnk", NULL, 14 },
        { "sata0", "sata0lnk", 15 },
[...]
        { "sata1lnk", NULL, 29 },
        { "sata1", "sata1lnk", 30 },
        { }
};
Best regards,
Thomas
-- 
Thomas Petazzoni, Free Electrons
Kernel, drivers, real-time and embedded Linux
development, consulting, training and support.
http://free-electrons.com
^ permalink raw reply	[flat|nested] 216+ messages in thread
- * [PATCH v2 11/27] clk: mvebu: create parent-child relation for PCIe clocks on Armada 370
  2013-01-28 22:21     ` Thomas Petazzoni
@ 2013-01-28 22:27       ` Stephen Warren
  2013-01-28 22:44         ` Thomas Petazzoni
  0 siblings, 1 reply; 216+ messages in thread
From: Stephen Warren @ 2013-01-28 22:27 UTC (permalink / raw)
  To: linux-arm-kernel
On 01/28/2013 03:21 PM, Thomas Petazzoni wrote:
> Dear Stephen Warren,
> 
> On Mon, 28 Jan 2013 15:08:46 -0700, Stephen Warren wrote:
> 
>> I must admit, I know nothing about struct mvebu_soc_descr, but I'm
>> having a hard time seeing how that code change makes one of those clock
>> a parent of the other, since the pex0 entry doesn't reference anything
>> "pex1"-related, nor vice-versa. Is more explanation in the commit
>> message warranted here?
> 
> See the definition of mvebu_soc_descr:
> 
> struct mvebu_soc_descr {
>         const char *name;
>         const char *parent;
>         int bit_idx;
> };
> 
> It simply registers the pex0 clock with the pex0_en clock as its
> parents. Those clocks are normal gatable clocks, registered with
> clk_register_gate(). This ensures that whenever the pex0 clock is
> enabled, its parent clock pex0_en gets enabled as well.
Oh I see; I was confused by the patch description. The two clocks being
made child/parent are the two clocks for a port, and this relationship
is set up for each port; for some reason I thought there was a
requirement to make one port's clock a child of the other port's clock.
^ permalink raw reply	[flat|nested] 216+ messages in thread
- * [PATCH v2 11/27] clk: mvebu: create parent-child relation for PCIe clocks on Armada 370
  2013-01-28 22:27       ` Stephen Warren
@ 2013-01-28 22:44         ` Thomas Petazzoni
  0 siblings, 0 replies; 216+ messages in thread
From: Thomas Petazzoni @ 2013-01-28 22:44 UTC (permalink / raw)
  To: linux-arm-kernel
Dear Stephen Warren,
On Mon, 28 Jan 2013 15:27:19 -0700, Stephen Warren wrote:
> Oh I see; I was confused by the patch description. The two clocks being
> made child/parent are the two clocks for a port, and this relationship
> is set up for each port; for some reason I thought there was a
> requirement to make one port's clock a child of the other port's clock.
Aah, ok, I understand the confusion now. Re-reading my commit log, I'm
not sure where the confusion comes from, but english is not my native
language, so maybe something that sounds clear to me is not clear in
reality. I'll rephrase the commit log to make sure this confusion is
clarified.
Thanks!
Thomas
-- 
Thomas Petazzoni, Free Electrons
Kernel, drivers, real-time and embedded Linux
development, consulting, training and support.
http://free-electrons.com
^ permalink raw reply	[flat|nested] 216+ messages in thread 
 
 
 
 
- * [PATCH v2 12/27] clk: mvebu: add more PCIe clocks for Armada XP
  2013-01-28 18:56 [PATCH v2] PCIe support for the Armada 370 and Armada XP SoCs Thomas Petazzoni
                   ` (10 preceding siblings ...)
  2013-01-28 18:56 ` [PATCH v2 11/27] clk: mvebu: create parent-child relation for PCIe clocks on Armada 370 Thomas Petazzoni
@ 2013-01-28 18:56 ` Thomas Petazzoni
  2013-01-28 18:56 ` [PATCH v2 13/27] arm: plat-orion: introduce WIN_CTRL_ENABLE in address mapping code Thomas Petazzoni
                   ` (14 subsequent siblings)
  26 siblings, 0 replies; 216+ messages in thread
From: Thomas Petazzoni @ 2013-01-28 18:56 UTC (permalink / raw)
  To: linux-arm-kernel
The current revision of the datasheet only mentions the gatable clocks
for the PCIe 0.0, 0.1, 0.2 and 0.3 interfaces, and forgot to mention
the ones for the PCIe 1.0, 1.1, 1.2, 1.3, 2.0 and 3.0
interfaces. After confirmation with Marvell engineers, this patch adds
the missing gatable clocks for those PCIe interfaces.
It also changes the name of the previously existing PCIe gatable
clocks, in order to match the naming using the datasheets.
Signed-off-by: Thomas Petazzoni <thomas.petazzoni@free-electrons.com>
---
 drivers/clk/mvebu/clk-gating-ctrl.c |   14 ++++++++++----
 1 file changed, 10 insertions(+), 4 deletions(-)
diff --git a/drivers/clk/mvebu/clk-gating-ctrl.c b/drivers/clk/mvebu/clk-gating-ctrl.c
index fd52b5f..24137f8 100644
--- a/drivers/clk/mvebu/clk-gating-ctrl.c
+++ b/drivers/clk/mvebu/clk-gating-ctrl.c
@@ -137,10 +137,14 @@ static const struct mvebu_soc_descr __initconst armada_xp_gating_descr[] = {
 	{ "ge2", NULL,  2 },
 	{ "ge1", NULL, 3 },
 	{ "ge0", NULL, 4 },
-	{ "pex0", NULL, 5 },
-	{ "pex1", NULL, 6 },
-	{ "pex2", NULL, 7 },
-	{ "pex3", NULL, 8 },
+	{ "pex00", NULL, 5 },
+	{ "pex01", NULL, 6 },
+	{ "pex02", NULL, 7 },
+	{ "pex03", NULL, 8 },
+	{ "pex10", NULL, 9 },
+	{ "pex11", NULL, 10 },
+	{ "pex12", NULL, 11 },
+	{ "pex13", NULL, 12 },
 	{ "bp", NULL, 13 },
 	{ "sata0lnk", NULL, 14 },
 	{ "sata0", "sata0lnk", 15 },
@@ -152,6 +156,8 @@ static const struct mvebu_soc_descr __initconst armada_xp_gating_descr[] = {
 	{ "xor0", NULL, 22 },
 	{ "crypto", NULL, 23 },
 	{ "tdm", NULL, 25 },
+	{ "pex20", NULL, 26 },
+	{ "pex30", NULL, 27 },
 	{ "xor1", NULL, 28 },
 	{ "sata1lnk", NULL, 29 },
 	{ "sata1", "sata1lnk", 30 },
-- 
1.7.9.5
^ permalink raw reply related	[flat|nested] 216+ messages in thread
- * [PATCH v2 13/27] arm: plat-orion: introduce WIN_CTRL_ENABLE in address mapping code
  2013-01-28 18:56 [PATCH v2] PCIe support for the Armada 370 and Armada XP SoCs Thomas Petazzoni
                   ` (11 preceding siblings ...)
  2013-01-28 18:56 ` [PATCH v2 12/27] clk: mvebu: add more PCIe clocks for Armada XP Thomas Petazzoni
@ 2013-01-28 18:56 ` Thomas Petazzoni
  2013-01-28 18:56 ` [PATCH v2 14/27] arm: plat-orion: refactor the orion_disable_wins() function Thomas Petazzoni
                   ` (13 subsequent siblings)
  26 siblings, 0 replies; 216+ messages in thread
From: Thomas Petazzoni @ 2013-01-28 18:56 UTC (permalink / raw)
  To: linux-arm-kernel
Instead of hardcoding "1" as being the bit value to enable an address
decoding window, introduce and use a WIN_CTRL_ENABLE definition.
Signed-off-by: Thomas Petazzoni <thomas.petazzoni@free-electrons.com>
---
 arch/arm/plat-orion/addr-map.c |    4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)
diff --git a/arch/arm/plat-orion/addr-map.c b/arch/arm/plat-orion/addr-map.c
index febe386..4dec3db 100644
--- a/arch/arm/plat-orion/addr-map.c
+++ b/arch/arm/plat-orion/addr-map.c
@@ -38,6 +38,7 @@ EXPORT_SYMBOL_GPL(mv_mbus_dram_info);
  * CPU Address Decode Windows registers
  */
 #define WIN_CTRL_OFF		0x0000
+#define   WIN_CTRL_ENABLE       BIT(0)
 #define WIN_BASE_OFF		0x0004
 #define WIN_REMAP_LO_OFF	0x0008
 #define WIN_REMAP_HI_OFF	0x000c
@@ -79,7 +80,8 @@ void __init orion_setup_cpu_win(const struct orion_addr_map_cfg *cfg,
 	}
 
 	base_high = base & 0xffff0000;
-	ctrl = ((size - 1) & 0xffff0000) | (attr << 8) | (target << 4) | 1;
+	ctrl = ((size - 1) & 0xffff0000) | (attr << 8) | (target << 4) |
+		WIN_CTRL_ENABLE;
 
 	writel(base_high, addr + WIN_BASE_OFF);
 	writel(ctrl, addr + WIN_CTRL_OFF);
-- 
1.7.9.5
^ permalink raw reply related	[flat|nested] 216+ messages in thread
- * [PATCH v2 14/27] arm: plat-orion: refactor the orion_disable_wins() function
  2013-01-28 18:56 [PATCH v2] PCIe support for the Armada 370 and Armada XP SoCs Thomas Petazzoni
                   ` (12 preceding siblings ...)
  2013-01-28 18:56 ` [PATCH v2 13/27] arm: plat-orion: introduce WIN_CTRL_ENABLE in address mapping code Thomas Petazzoni
@ 2013-01-28 18:56 ` Thomas Petazzoni
  2013-01-28 18:56 ` [PATCH v2 15/27] arm: plat-orion: introduce orion_{alloc, free}_cpu_win() functions Thomas Petazzoni
                   ` (12 subsequent siblings)
  26 siblings, 0 replies; 216+ messages in thread
From: Thomas Petazzoni @ 2013-01-28 18:56 UTC (permalink / raw)
  To: linux-arm-kernel
In the address decoding code, the orion_disable_wins() function is
used at boot time to disable all address decoding windows, before
configuring only the ones that are needed. This allows to make sure
that no configuration is left from the bootloader.
As a preparation for the introduction of address decoding window
allocation/deallocation function, we refactor this function into an
orion_disable_cpu_win() which disables a single window.
The orion_config_wins() function is changed to call
orion_disable_cpu_win() in a loop, to preserve an identical behavior.
Signed-off-by: Thomas Petazzoni <thomas.petazzoni@free-electrons.com>
---
 arch/arm/plat-orion/addr-map.c |   35 +++++++++++++++++------------------
 1 file changed, 17 insertions(+), 18 deletions(-)
diff --git a/arch/arm/plat-orion/addr-map.c b/arch/arm/plat-orion/addr-map.c
index 4dec3db..dd98638 100644
--- a/arch/arm/plat-orion/addr-map.c
+++ b/arch/arm/plat-orion/addr-map.c
@@ -95,6 +95,19 @@ void __init orion_setup_cpu_win(const struct orion_addr_map_cfg *cfg,
 	}
 }
 
+static void __init orion_disable_cpu_win(const struct orion_addr_map_cfg *cfg,
+					 const int win)
+{
+	void __iomem *addr = cfg->win_cfg_base(cfg, win);
+
+	writel(0, addr + WIN_BASE_OFF);
+	writel(0, addr + WIN_CTRL_OFF);
+	if (cfg->cpu_win_can_remap(cfg, win)) {
+		writel(0, addr + WIN_REMAP_LO_OFF);
+		writel(0, addr + WIN_REMAP_HI_OFF);
+	}
+}
+
 /*
  * Configure a number of windows.
  */
@@ -108,36 +121,22 @@ static void __init orion_setup_cpu_wins(const struct orion_addr_map_cfg * cfg,
 	}
 }
 
-static void __init orion_disable_wins(const struct orion_addr_map_cfg * cfg)
-{
-	void __iomem *addr;
-	int i;
-
-	for (i = 0; i < cfg->num_wins; i++) {
-		addr = cfg->win_cfg_base(cfg, i);
-
-		writel(0, addr + WIN_BASE_OFF);
-		writel(0, addr + WIN_CTRL_OFF);
-		if (cfg->cpu_win_can_remap(cfg, i)) {
-			writel(0, addr + WIN_REMAP_LO_OFF);
-			writel(0, addr + WIN_REMAP_HI_OFF);
-		}
-	}
-}
-
 /*
  * Disable, clear and configure windows.
  */
 void __init orion_config_wins(struct orion_addr_map_cfg * cfg,
 			      const struct orion_addr_map_info *info)
 {
+	int win;
+
 	if (!cfg->cpu_win_can_remap)
 		cfg->cpu_win_can_remap = orion_cpu_win_can_remap;
 
 	if (!cfg->win_cfg_base)
 		cfg->win_cfg_base = orion_win_cfg_base;
 
-	orion_disable_wins(cfg);
+	for (win = 0; win < cfg->num_wins; win++)
+		orion_disable_cpu_win(cfg, win);
 
 	if (info)
 		orion_setup_cpu_wins(cfg, info);
-- 
1.7.9.5
^ permalink raw reply related	[flat|nested] 216+ messages in thread
- * [PATCH v2 15/27] arm: plat-orion: introduce orion_{alloc, free}_cpu_win() functions
  2013-01-28 18:56 [PATCH v2] PCIe support for the Armada 370 and Armada XP SoCs Thomas Petazzoni
                   ` (13 preceding siblings ...)
  2013-01-28 18:56 ` [PATCH v2 14/27] arm: plat-orion: refactor the orion_disable_wins() function Thomas Petazzoni
@ 2013-01-28 18:56 ` Thomas Petazzoni
  2013-01-28 18:56 ` [PATCH v2 16/27] arm: mvebu: add functions to alloc/free PCIe decoding windows Thomas Petazzoni
                   ` (11 subsequent siblings)
  26 siblings, 0 replies; 216+ messages in thread
From: Thomas Petazzoni @ 2013-01-28 18:56 UTC (permalink / raw)
  To: linux-arm-kernel
In the address decoding code, we implement two new functions:
orion_alloc_cpu_win() and orion_free_cpu_win(). The first function
finds an unused address decoding window, and configures it according
to the given arguments (in terms of base address, size, target,
attributes). The second function frees an address decoding window,
given a physical base address.
Those two new functions will be used by the PCIe code, which needs to
dynamically register address decoding windows depending on the PCIe
devices that are detected.
The orion_free_cpu_win() function is only here to handle error cases
in the PCIe devices initialization, in the normal case, address
decoding windows are never freed.
Signed-off-by: Thomas Petazzoni <thomas.petazzoni@free-electrons.com>
---
 arch/arm/plat-orion/addr-map.c              |   50 +++++++++++++++++++++++++++
 arch/arm/plat-orion/include/plat/addr-map.h |    7 ++++
 2 files changed, 57 insertions(+)
diff --git a/arch/arm/plat-orion/addr-map.c b/arch/arm/plat-orion/addr-map.c
index dd98638..8f4ef82 100644
--- a/arch/arm/plat-orion/addr-map.c
+++ b/arch/arm/plat-orion/addr-map.c
@@ -109,6 +109,56 @@ static void __init orion_disable_cpu_win(const struct orion_addr_map_cfg *cfg,
 }
 
 /*
+ * Find an unused address decoding window, and enable it according to
+ * the arguments passed (base, size, target, attributes, remap).
+ */
+int __init orion_alloc_cpu_win(const struct orion_addr_map_cfg *cfg,
+			       const u32 base, const u32 size,
+			       const u8 target, const u8 attr, const int remap)
+{
+	int win;
+
+	for (win = 0; win < cfg->num_wins; win++) {
+		void __iomem *addr = cfg->win_cfg_base(cfg, win);
+		u32 ctrl = readl(addr + WIN_CTRL_OFF);
+		if (!(ctrl & WIN_CTRL_ENABLE))
+			break;
+	}
+
+	/* No more windows available */
+	if (win == cfg->num_wins)
+		return -ENOMEM;
+
+	orion_setup_cpu_win(cfg, win, base, size, target, attr, remap);
+	return 0;
+}
+
+/*
+ * Free an address decoding window, given its base address.
+ */
+int __init orion_free_cpu_win(const struct orion_addr_map_cfg *cfg,
+			      const u32 base)
+{
+	int win;
+
+	for (win = 0; win < cfg->num_wins; win++) {
+		void __iomem *addr = cfg->win_cfg_base(cfg, win);
+		u32 winbase = readl(addr + WIN_BASE_OFF);
+		u32 ctrl = readl(addr + WIN_CTRL_OFF);
+
+		if (!(ctrl & WIN_CTRL_ENABLE))
+			continue;
+
+		if (winbase == (base & 0xffff0000)) {
+			orion_disable_cpu_win(cfg, win);
+			return 0;
+		}
+	}
+
+	return -EINVAL;
+}
+
+/*
  * Configure a number of windows.
  */
 static void __init orion_setup_cpu_wins(const struct orion_addr_map_cfg * cfg,
diff --git a/arch/arm/plat-orion/include/plat/addr-map.h b/arch/arm/plat-orion/include/plat/addr-map.h
index b76c065..f8bb539 100644
--- a/arch/arm/plat-orion/include/plat/addr-map.h
+++ b/arch/arm/plat-orion/include/plat/addr-map.h
@@ -49,6 +49,13 @@ void __init orion_setup_cpu_win(const struct orion_addr_map_cfg *cfg,
 				const u32 size, const u8 target,
 				const u8 attr, const int remap);
 
+int __init orion_alloc_cpu_win(const struct orion_addr_map_cfg *cfg,
+			       const u32 base, const u32 size,
+			       const u8 target, const u8 attr, const int remap);
+
+int __init orion_free_cpu_win(const struct orion_addr_map_cfg *cfg,
+			      const u32 base);
+
 void __init orion_setup_cpu_mbus_target(const struct orion_addr_map_cfg *cfg,
 					const void __iomem *ddr_window_cpu_base);
 #endif
-- 
1.7.9.5
^ permalink raw reply related	[flat|nested] 216+ messages in thread
- * [PATCH v2 16/27] arm: mvebu: add functions to alloc/free PCIe decoding windows
  2013-01-28 18:56 [PATCH v2] PCIe support for the Armada 370 and Armada XP SoCs Thomas Petazzoni
                   ` (14 preceding siblings ...)
  2013-01-28 18:56 ` [PATCH v2 15/27] arm: plat-orion: introduce orion_{alloc, free}_cpu_win() functions Thomas Petazzoni
@ 2013-01-28 18:56 ` Thomas Petazzoni
  2013-01-28 18:56 ` [PATCH v2 17/27] arm: plat-orion: make common PCIe code usable on mvebu Thomas Petazzoni
                   ` (10 subsequent siblings)
  26 siblings, 0 replies; 216+ messages in thread
From: Thomas Petazzoni @ 2013-01-28 18:56 UTC (permalink / raw)
  To: linux-arm-kernel
This commit adds two functions armada_370_xp_alloc_pcie_window() and
armada_370_xp_free_pcie_window() that respectively allocate and free
an address decoding window pointing to either a memory or I/O region
of a PCIe device.
Those functions will be used by the PCIe driver to create and remove
those regions depending on the PCIe devices that are detected.
Signed-off-by: Thomas Petazzoni <thomas.petazzoni@free-electrons.com>
---
 arch/arm/mach-mvebu/addr-map.c              |   87 +++++++++++++++++++++++++--
 arch/arm/mach-mvebu/common.h                |    1 +
 arch/arm/mach-mvebu/include/mach/addr-map.h |    8 +++
 3 files changed, 92 insertions(+), 4 deletions(-)
 create mode 100644 arch/arm/mach-mvebu/include/mach/addr-map.h
diff --git a/arch/arm/mach-mvebu/addr-map.c b/arch/arm/mach-mvebu/addr-map.c
index ab9b3bd..7ec8fc6 100644
--- a/arch/arm/mach-mvebu/addr-map.c
+++ b/arch/arm/mach-mvebu/addr-map.c
@@ -24,14 +24,10 @@
 #define ARMADA_XP_TARGET_DEV_BUS	1
 #define   ARMADA_XP_ATTR_DEV_BOOTROM    0x1D
 #define ARMADA_XP_TARGET_ETH1		3
-#define ARMADA_XP_TARGET_PCIE_0_2	4
 #define ARMADA_XP_TARGET_ETH0		7
-#define ARMADA_XP_TARGET_PCIE_1_3	8
 
 #define ARMADA_370_TARGET_DEV_BUS       1
 #define   ARMADA_370_ATTR_DEV_BOOTROM   0x1D
-#define ARMADA_370_TARGET_PCIE_0        4
-#define ARMADA_370_TARGET_PCIE_1        8
 
 #define ARMADA_WINDOW_8_PLUS_OFFSET       0x90
 #define ARMADA_SDRAM_ADDR_DECODING_OFFSET 0x180
@@ -89,6 +85,89 @@ static struct __initdata orion_addr_map_cfg addr_map_cfg = {
 	.win_cfg_base = armada_cfg_base,
 };
 
+#ifdef CONFIG_PCI
+/*
+ * This structure and the following arrays allow to map a PCIe (port,
+ * lane) tuple to the corresponding (target, attribute) tuple needed
+ * to configure an address decoding window for the given PCIe (port,
+ * lane).
+ */
+struct pcie_mapping {
+	int port;
+	int lane;
+	u8  target;
+	u8  attr;
+};
+
+struct pcie_mapping armada_xp_pcie_mappings[] = {
+	{ .port = 0, .lane = 0, .target = 4, .attr = 0xE0 },
+	{ .port = 0, .lane = 1, .target = 4, .attr = 0xD0 },
+	{ .port = 0, .lane = 2, .target = 4, .attr = 0xB0 },
+	{ .port = 0, .lane = 3, .target = 4, .attr = 0x70 },
+	{ .port = 1, .lane = 0, .target = 8, .attr = 0xE0 },
+	{ .port = 1, .lane = 1, .target = 8, .attr = 0xD0 },
+	{ .port = 1, .lane = 2, .target = 8, .attr = 0xB0 },
+	{ .port = 1, .lane = 3, .target = 8, .attr = 0x70 },
+	{ .port = 2, .lane = 0, .target = 4, .attr = 0xF0 },
+	{ .port = 3, .lane = 0, .target = 8, .attr = 0xF0 },
+	{ .port = -1 },
+};
+
+struct pcie_mapping armada_370_pcie_mappings[] = {
+	{ .port = 0, .lane = 0, .target = 4, .attr = 0xE0 },
+	{ .port = 1, .lane = 0, .target = 8, .attr = 0xE0 },
+	{ .port = -1 },
+};
+
+/*
+ * This function sets up a new address decoding window at the given
+ * base address, pointing to the given PCIe interface (through
+ * pcie_port and pcie_lane).
+ */
+int __init armada_370_xp_alloc_pcie_window(int pcie_port, int pcie_lane,
+					   unsigned long base, u32 size,
+					   int type)
+{
+	struct pcie_mapping *mapping, *mappings;
+	u8 target, attr;
+
+	if (of_machine_is_compatible("marvell,armadaxp"))
+		mappings = armada_xp_pcie_mappings;
+	else if (of_machine_is_compatible("marvell,armada370"))
+		mappings = armada_370_pcie_mappings;
+	else
+		return -ENODEV;
+
+	for (mapping = mappings; mapping->port != -1; mapping++)
+		if (mapping->port == pcie_port && mapping->lane == pcie_lane)
+			break;
+
+	if (mapping->port == -1)
+		return -ENODEV;
+
+	target = mapping->target;
+	attr = mapping->attr;
+
+	/*
+	 * Bit 3 of the attributes indicates that it is a
+	 * memory region, as opposed to an I/O region
+	 */
+	if (type == IORESOURCE_MEM)
+		attr |= (1 << 3);
+
+	return orion_alloc_cpu_win(&addr_map_cfg, base, size, target, attr, -1);
+}
+
+/*
+ * Frees an address decoding window previously set up by
+ * armada_370_xp_setup_pcie_window().
+ */
+int __init armada_370_xp_free_pcie_window(unsigned long base)
+{
+	return orion_free_cpu_win(&addr_map_cfg, base);
+}
+#endif
+
 static int __init armada_setup_cpu_mbus(void)
 {
 	struct device_node *np;
diff --git a/arch/arm/mach-mvebu/common.h b/arch/arm/mach-mvebu/common.h
index aa27bc2..77c078c 100644
--- a/arch/arm/mach-mvebu/common.h
+++ b/arch/arm/mach-mvebu/common.h
@@ -25,4 +25,5 @@ int armada_370_xp_coherency_init(void);
 int armada_370_xp_pmsu_init(void);
 void armada_xp_secondary_startup(void);
 extern struct smp_operations armada_xp_smp_ops;
+
 #endif
diff --git a/arch/arm/mach-mvebu/include/mach/addr-map.h b/arch/arm/mach-mvebu/include/mach/addr-map.h
new file mode 100644
index 0000000..0845b27
--- /dev/null
+++ b/arch/arm/mach-mvebu/include/mach/addr-map.h
@@ -0,0 +1,8 @@
+#ifndef MVEBU_ADDR_MAP_H
+#define MVEBU_ADDR_MAP_H
+
+int armada_370_xp_alloc_pcie_window(int pcie_port, int pcie_lane,
+				    unsigned long base, u32 size, int type);
+int armada_370_xp_free_pcie_window(unsigned long base);
+
+#endif /* MVEBU_ADDR_MAP_H */
-- 
1.7.9.5
^ permalink raw reply related	[flat|nested] 216+ messages in thread
- * [PATCH v2 17/27] arm: plat-orion: make common PCIe code usable on mvebu
  2013-01-28 18:56 [PATCH v2] PCIe support for the Armada 370 and Armada XP SoCs Thomas Petazzoni
                   ` (15 preceding siblings ...)
  2013-01-28 18:56 ` [PATCH v2 16/27] arm: mvebu: add functions to alloc/free PCIe decoding windows Thomas Petazzoni
@ 2013-01-28 18:56 ` Thomas Petazzoni
  2013-01-28 18:56 ` [PATCH v2 18/27] arm: plat-orion: add more flexible PCI configuration space read/write functions Thomas Petazzoni
                   ` (9 subsequent siblings)
  26 siblings, 0 replies; 216+ messages in thread
From: Thomas Petazzoni @ 2013-01-28 18:56 UTC (permalink / raw)
  To: linux-arm-kernel
mvebu is a new-style Orion platform, so it only selects PLAT_ORION,
but not PLAT_ORION_LEGACY. It will however need the common PCIe code
from plat-orion, so make this code available for PLAT_ORION platforms
as a whole, and not only PLAT_ORION_LEGACY platforms.
We also take this opportunity to build the PCIe code only when
CONFIG_PCI is enabled.
Signed-off-by: Thomas Petazzoni <thomas.petazzoni@free-electrons.com>
---
 arch/arm/plat-orion/Makefile |    3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/arch/arm/plat-orion/Makefile b/arch/arm/plat-orion/Makefile
index a82cecb..1aca22b 100644
--- a/arch/arm/plat-orion/Makefile
+++ b/arch/arm/plat-orion/Makefile
@@ -4,7 +4,8 @@
 ccflags-$(CONFIG_ARCH_MULTIPLATFORM) := -I$(srctree)/$(src)/include
 
 obj-y                             += addr-map.o
+obj-$(CONFIG_PCI)                 += pcie.o
 
 orion-gpio-$(CONFIG_GENERIC_GPIO) += gpio.o
-obj-$(CONFIG_PLAT_ORION_LEGACY)   += irq.o pcie.o time.o common.o mpp.o
+obj-$(CONFIG_PLAT_ORION_LEGACY)   += irq.o time.o common.o mpp.o
 obj-$(CONFIG_PLAT_ORION_LEGACY)   += $(orion-gpio-y)
-- 
1.7.9.5
^ permalink raw reply related	[flat|nested] 216+ messages in thread
- * [PATCH v2 18/27] arm: plat-orion: add more flexible PCI configuration space read/write functions
  2013-01-28 18:56 [PATCH v2] PCIe support for the Armada 370 and Armada XP SoCs Thomas Petazzoni
                   ` (16 preceding siblings ...)
  2013-01-28 18:56 ` [PATCH v2 17/27] arm: plat-orion: make common PCIe code usable on mvebu Thomas Petazzoni
@ 2013-01-28 18:56 ` Thomas Petazzoni
  2013-01-28 19:51   ` Jason Gunthorpe
  2013-01-28 18:56 ` [PATCH v2 19/27] pci: PCIe driver for Marvell Armada 370/XP systems Thomas Petazzoni
                   ` (8 subsequent siblings)
  26 siblings, 1 reply; 216+ messages in thread
From: Thomas Petazzoni @ 2013-01-28 18:56 UTC (permalink / raw)
  To: linux-arm-kernel
The existing orion_pcie_rd_conf() and orion_pcie_wr_conf() functions
provided by plat-orion/pcie.c are nice to read and write the PCI
configuration space of a device, but they unfortunately assume that
the bus number and slot number at which a device is visible at the
Linux software level is the same as the bus number and slot number at
the hardware level.
However, with the usage of the emulated PCI host bridge and emulated
PCI-to-PCI bridges, this is not the case: bus number 0 is the emulated
bus on which the emulated PCI-to-PCI bridges sit, so from the Linux
point of view, the real busses start at bus 1, but from a hardware
point of view, they start at bus 0.
So, we cannot use the existing orion_pcie_rd_conf() and
orion_pcie_wr_conf() implementations, which take their bus number
directly from a given pci_bus structure. Instead, we add lower-level
variants, orion_pcie_rd_conf_bus() and orion_pcie_wr_conf_bus() that
take a bus number as argument. The existing orion_pcie_rd_conf() and
orion_pcie_wr_conf() functions are implemented on top of the new
*_bus() variants.
Those *_bus() variants will be used by the Marvell Armada 370/XP PCIe
driver.
Signed-off-by: Thomas Petazzoni <thomas.petazzoni@free-electrons.com>
---
 arch/arm/plat-orion/include/plat/pcie.h |    4 ++++
 arch/arm/plat-orion/pcie.c              |   26 ++++++++++++++++++++------
 2 files changed, 24 insertions(+), 6 deletions(-)
diff --git a/arch/arm/plat-orion/include/plat/pcie.h b/arch/arm/plat-orion/include/plat/pcie.h
index fe5b9e8..46974c1 100644
--- a/arch/arm/plat-orion/include/plat/pcie.h
+++ b/arch/arm/plat-orion/include/plat/pcie.h
@@ -21,12 +21,16 @@ int orion_pcie_get_local_bus_nr(void __iomem *base);
 void orion_pcie_set_local_bus_nr(void __iomem *base, int nr);
 void orion_pcie_reset(void __iomem *base);
 void orion_pcie_setup(void __iomem *base);
+int orion_pcie_rd_conf_bus(void __iomem *base, u32 busn,
+			   u32 devfn, int where, int size, u32 *val);
 int orion_pcie_rd_conf(void __iomem *base, struct pci_bus *bus,
 		       u32 devfn, int where, int size, u32 *val);
 int orion_pcie_rd_conf_tlp(void __iomem *base, struct pci_bus *bus,
 			   u32 devfn, int where, int size, u32 *val);
 int orion_pcie_rd_conf_wa(void __iomem *wa_base, struct pci_bus *bus,
 			  u32 devfn, int where, int size, u32 *val);
+int orion_pcie_wr_conf_bus(void __iomem *base, u32 busn,
+			   u32 devfn, int where, int size, u32 val);
 int orion_pcie_wr_conf(void __iomem *base, struct pci_bus *bus,
 		       u32 devfn, int where, int size, u32 val);
 
diff --git a/arch/arm/plat-orion/pcie.c b/arch/arm/plat-orion/pcie.c
index f20a321..0e85bdd 100644
--- a/arch/arm/plat-orion/pcie.c
+++ b/arch/arm/plat-orion/pcie.c
@@ -203,10 +203,10 @@ void __init orion_pcie_setup(void __iomem *base)
 	writel(mask, base + PCIE_MASK_OFF);
 }
 
-int orion_pcie_rd_conf(void __iomem *base, struct pci_bus *bus,
-		       u32 devfn, int where, int size, u32 *val)
+int orion_pcie_rd_conf_bus(void __iomem *base, u32 busn, u32 devfn,
+			   int where, int size, u32 *val)
 {
-	writel(PCIE_CONF_BUS(bus->number) |
+	writel(PCIE_CONF_BUS(busn) |
 		PCIE_CONF_DEV(PCI_SLOT(devfn)) |
 		PCIE_CONF_FUNC(PCI_FUNC(devfn)) |
 		PCIE_CONF_REG(where) | PCIE_CONF_ADDR_EN,
@@ -222,6 +222,13 @@ int orion_pcie_rd_conf(void __iomem *base, struct pci_bus *bus,
 	return PCIBIOS_SUCCESSFUL;
 }
 
+int orion_pcie_rd_conf(void __iomem *base, struct pci_bus *bus,
+		       u32 devfn, int where, int size, u32 *val)
+{
+	return orion_pcie_rd_conf_bus(base, bus->number, devfn,
+				      where, size, val);
+}
+
 int orion_pcie_rd_conf_tlp(void __iomem *base, struct pci_bus *bus,
 			   u32 devfn, int where, int size, u32 *val)
 {
@@ -261,12 +268,12 @@ int orion_pcie_rd_conf_wa(void __iomem *wa_base, struct pci_bus *bus,
 	return PCIBIOS_SUCCESSFUL;
 }
 
-int orion_pcie_wr_conf(void __iomem *base, struct pci_bus *bus,
-		       u32 devfn, int where, int size, u32 val)
+int orion_pcie_wr_conf_bus(void __iomem *base, u32 busn,
+			   u32 devfn, int where, int size, u32 val)
 {
 	int ret = PCIBIOS_SUCCESSFUL;
 
-	writel(PCIE_CONF_BUS(bus->number) |
+	writel(PCIE_CONF_BUS(busn) |
 		PCIE_CONF_DEV(PCI_SLOT(devfn)) |
 		PCIE_CONF_FUNC(PCI_FUNC(devfn)) |
 		PCIE_CONF_REG(where) | PCIE_CONF_ADDR_EN,
@@ -284,3 +291,10 @@ int orion_pcie_wr_conf(void __iomem *base, struct pci_bus *bus,
 
 	return ret;
 }
+
+int orion_pcie_wr_conf(void __iomem *base, struct pci_bus *bus,
+		       u32 devfn, int where, int size, u32 val)
+{
+	return orion_pcie_wr_conf_bus(base, bus->number, devfn,
+				      where, size, val);
+}
-- 
1.7.9.5
^ permalink raw reply related	[flat|nested] 216+ messages in thread
- * [PATCH v2 18/27] arm: plat-orion: add more flexible PCI configuration space read/write functions
  2013-01-28 18:56 ` [PATCH v2 18/27] arm: plat-orion: add more flexible PCI configuration space read/write functions Thomas Petazzoni
@ 2013-01-28 19:51   ` Jason Gunthorpe
  2013-01-29  8:40     ` Thomas Petazzoni
  0 siblings, 1 reply; 216+ messages in thread
From: Jason Gunthorpe @ 2013-01-28 19:51 UTC (permalink / raw)
  To: linux-arm-kernel
On Mon, Jan 28, 2013 at 07:56:27PM +0100, Thomas Petazzoni wrote:
> However, with the usage of the emulated PCI host bridge and emulated
> PCI-to-PCI bridges, this is not the case: bus number 0 is the emulated
> bus on which the emulated PCI-to-PCI bridges sit, so from the Linux
> point of view, the real busses start at bus 1, but from a hardware
> point of view, they start at bus 0.
Hum.. This is a bit funny sounding, can you confirm..
The bus number programmed into all the end points must match the Linux
number. Ie the PCI-E Link Description register of end point devices
must report the same bus number as Linux. PCI-E devices learn their
bus number by capturing the bus number from type 0 configuration
transactions.
For the most part config transactions issued to the PCI-E controllers
should be type 0 transactions with a bus number that matches what
Linux is setting.
The only time I think you'd ever see bus number 0 is when accessing
the config space of the Marvell PCI-E controller end port. But, I also
think you can avoid doing these transactions by just accessing the MMIO
versions of those registers..
Jason
^ permalink raw reply	[flat|nested] 216+ messages in thread 
- * [PATCH v2 18/27] arm: plat-orion: add more flexible PCI configuration space read/write functions
  2013-01-28 19:51   ` Jason Gunthorpe
@ 2013-01-29  8:40     ` Thomas Petazzoni
  2013-01-29 17:40       ` Jason Gunthorpe
  0 siblings, 1 reply; 216+ messages in thread
From: Thomas Petazzoni @ 2013-01-29  8:40 UTC (permalink / raw)
  To: linux-arm-kernel
Dear Jason Gunthorpe,
On Mon, 28 Jan 2013 12:51:11 -0700, Jason Gunthorpe wrote:
> On Mon, Jan 28, 2013 at 07:56:27PM +0100, Thomas Petazzoni wrote:
> 
> > However, with the usage of the emulated PCI host bridge and emulated
> > PCI-to-PCI bridges, this is not the case: bus number 0 is the emulated
> > bus on which the emulated PCI-to-PCI bridges sit, so from the Linux
> > point of view, the real busses start at bus 1, but from a hardware
> > point of view, they start at bus 0.
> 
> Hum.. This is a bit funny sounding, can you confirm..
Might be yes, but IIRC, when I try to enumerate the devices in the PCIe
interface 0 (from a hardware point of view), passing a bus number of 1
in the PCI configuration space access registers, then it simply doesn't
work.
> The bus number programmed into all the end points must match the Linux
> number. Ie the PCI-E Link Description register of end point devices
What is this PCI-E Link Description register ? Where is it located ?
Thanks,
Thomas
-- 
Thomas Petazzoni, Free Electrons
Kernel, drivers, real-time and embedded Linux
development, consulting, training and support.
http://free-electrons.com
^ permalink raw reply	[flat|nested] 216+ messages in thread 
- * [PATCH v2 18/27] arm: plat-orion: add more flexible PCI configuration space read/write functions
  2013-01-29  8:40     ` Thomas Petazzoni
@ 2013-01-29 17:40       ` Jason Gunthorpe
  0 siblings, 0 replies; 216+ messages in thread
From: Jason Gunthorpe @ 2013-01-29 17:40 UTC (permalink / raw)
  To: linux-arm-kernel
On Tue, Jan 29, 2013 at 09:40:03AM +0100, Thomas Petazzoni wrote:
> Dear Jason Gunthorpe,
> 
> On Mon, 28 Jan 2013 12:51:11 -0700, Jason Gunthorpe wrote:
> > On Mon, Jan 28, 2013 at 07:56:27PM +0100, Thomas Petazzoni wrote:
> > 
> > > However, with the usage of the emulated PCI host bridge and emulated
> > > PCI-to-PCI bridges, this is not the case: bus number 0 is the emulated
> > > bus on which the emulated PCI-to-PCI bridges sit, so from the Linux
> > > point of view, the real busses start at bus 1, but from a hardware
> > > point of view, they start at bus 0.
> > 
> > Hum.. This is a bit funny sounding, can you confirm..
> 
> Might be yes, but IIRC, when I try to enumerate the devices in the PCIe
> interface 0 (from a hardware point of view), passing a bus number of 1
> in the PCI configuration space access registers, then it simply doesn't
> work.
Hurm. The trick is you need the chip to issue a type 0 request. The
Marvell docs say this happens automatically basd on the 'internal bus
number'
The only other reference to bus number is in the PCI Express Status
Register (41A04), so that probably needs to be set to the subordinate
bus number of the bridge.
> > The bus number programmed into all the end points must match the Linux
> > number. Ie the PCI-E Link Description register of end point devices
> 
> What is this PCI-E Link Description register ? Where is it located ?
Hum, looks like this is only for root complex links not end devices -
PCI-X had a register for this but it seems to have been removed in
PCI-E.
Jason
^ permalink raw reply	[flat|nested] 216+ messages in thread 
 
 
 
- * [PATCH v2 19/27] pci: PCIe driver for Marvell Armada 370/XP systems
  2013-01-28 18:56 [PATCH v2] PCIe support for the Armada 370 and Armada XP SoCs Thomas Petazzoni
                   ` (17 preceding siblings ...)
  2013-01-28 18:56 ` [PATCH v2 18/27] arm: plat-orion: add more flexible PCI configuration space read/write functions Thomas Petazzoni
@ 2013-01-28 18:56 ` Thomas Petazzoni
  2013-01-28 22:21   ` Stephen Warren
                     ` (4 more replies)
  2013-01-28 18:56 ` [PATCH v2 20/27] arm: mvebu: PCIe support is now available on mvebu Thomas Petazzoni
                   ` (7 subsequent siblings)
  26 siblings, 5 replies; 216+ messages in thread
From: Thomas Petazzoni @ 2013-01-28 18:56 UTC (permalink / raw)
  To: linux-arm-kernel
This driver implements the support for the PCIe interfaces on the
Marvell Armada 370/XP ARM SoCs. In the future, it might be extended to
cover earlier families of Marvell SoCs, such as Dove, Orion and
Kirkwood.
The driver implements the hw_pci operations needed by the core ARM PCI
code to setup PCI devices and get their corresponding IRQs, and the
pci_ops operations that are used by the PCI core to read/write the
configuration space of PCI devices.
Since the PCIe interfaces of Marvell SoCs are completely separate and
not linked together in a bus, this driver sets up an emulated PCI host
bridge, with one PCI-to-PCI bridge as child for each hardware PCIe
interface.
In addition, this driver enumerates the different PCIe slots, and for
those having a device plugged in, it sets up the necessary address
decoding windows, using the new armada_370_xp_alloc_pcie_window()
function from mach-mvebu/addr-map.c.
Signed-off-by: Thomas Petazzoni <thomas.petazzoni@free-electrons.com>
---
 .../devicetree/bindings/pci/armada-370-xp-pcie.txt |  175 +++++++
 drivers/pci/host/Kconfig                           |    6 +
 drivers/pci/host/Makefile                          |    4 +
 drivers/pci/host/pci-mvebu.c                       |  500 ++++++++++++++++++++
 4 files changed, 685 insertions(+)
 create mode 100644 Documentation/devicetree/bindings/pci/armada-370-xp-pcie.txt
 create mode 100644 drivers/pci/host/Makefile
 create mode 100644 drivers/pci/host/pci-mvebu.c
diff --git a/Documentation/devicetree/bindings/pci/armada-370-xp-pcie.txt b/Documentation/devicetree/bindings/pci/armada-370-xp-pcie.txt
new file mode 100644
index 0000000..9313e92
--- /dev/null
+++ b/Documentation/devicetree/bindings/pci/armada-370-xp-pcie.txt
@@ -0,0 +1,175 @@
+* Marvell Armada 370/XP PCIe interfaces
+
+Mandatory properties:
+- compatible: must be "marvell,armada-370-xp-pcie"
+- status: either "disabled" or "okay"
+- #address-cells, set to <3>
+- #size-cells, set to <2>
+- #interrupt-cells, set to <1>
+- bus-range: PCI bus numbers covered
+- ranges: standard PCI-style address ranges, describing the PCIe
+  registers for each PCIe interface, and then ranges for the PCI
+  memory and I/O regions.
+- interrupt-map-mask and interrupt-map are standard PCI Device Tree
+  properties to describe the interrupts associated to each PCI
+  interface.
+
+In addition, the Device Tree node must have sub-nodes describing each
+PCIe interface, having the following mandatory properties:
+- reg: the address and size of the PCIe registers (translated
+  addresses according to the ranges property of the parent)
+- clocks: the clock associated to this PCIe interface
+- marvell,pcie-port: the physical PCIe port number
+- status: either "disabled" or "okay"
+
+and the following optional properties:
+- marvell,pcie-lane: the physical PCIe lane number, for ports having
+  multiple lanes. If this property is not found, we assume that the
+  value is 0.
+
+Example:
+
+pcie-controller {
+	compatible = "marvell,armada-370-xp-pcie";
+	status = "disabled";
+
+	#address-cells = <3>;
+	#size-cells = <2>;
+
+	bus-range = <0x00 0xff>;
+
+	ranges = <0x00000800 0 0xd0040000 0xd0040000 0 0x00002000   /* port 0.0 registers */
+	          0x00004800 0 0xd0042000 0xd0042000 0 0x00002000   /* port 2.0 registers */
+	          0x00001000 0 0xd0044000 0xd0044000 0 0x00002000   /* port 0.1 registers */
+	          0x00001800 0 0xd0048000 0xd0048000 0 0x00002000   /* port 0.2 registers */
+	          0x00002000 0 0xd004C000 0xd004C000 0 0x00002000   /* port 0.3 registers */
+		  0x00002800 0 0xd0080000 0xd0080000 0 0x00002000   /* port 1.0 registers */
+	          0x00005000 0 0xd0082000 0xd0082000 0 0x00002000   /* port 3.0 registers */
+		  0x00003000 0 0xd0084000 0xd0084000 0 0x00002000   /* port 1.1 registers */
+		  0x00003800 0 0xd0088000 0xd0088000 0 0x00002000   /* port 1.2 registers */
+		  0x00004000 0 0xd008C000 0xd008C000 0 0x00002000   /* port 1.3 registers */
+		  0x81000000 0 0	  0xc0000000 0 0x00100000   /* downstream I/O */
+		  0x82000000 0 0	  0xc1000000 0 0x08000000>; /* non-prefetchable memory */
+
+	#interrupt-cells = <1>;
+	interrupt-map-mask = <0xf800 0 0 1>;
+	interrupt-map = <0x0800 0 0 1 &mpic 58
+		         0x1000 0 0 1 &mpic 59
+			 0x1800 0 0 1 &mpic 60
+			 0x2000 0 0 1 &mpic 61
+			 0x2800 0 0 1 &mpic 62
+		         0x3000 0 0 1 &mpic 63
+			 0x3800 0 0 1 &mpic 64
+			 0x4000 0 0 1 &mpic 65
+			 0x4800 0 0 1 &mpic 99
+			 0x5000 0 0 1 &mpic 103>;
+
+	pcie at 0,0 {
+		device_type = "pciex";
+		reg = <0x0800 0 0xd0040000 0 0x2000>;
+		#address-cells = <3>;
+		#size-cells = <2>;
+		marvell,pcie-port = <0>;
+		marvell,pcie-lane = <0>;
+		clocks = <&gateclk 5>;
+		status = "disabled";
+	};
+
+	pcie at 0,1 {
+		device_type = "pciex";
+		reg = <0x1000 0 0xd0044000 0 0x2000>;
+		#address-cells = <3>;
+		#size-cells = <2>;
+		marvell,pcie-port = <0>;
+		marvell,pcie-lane = <1>;
+		clocks = <&gateclk 6>;
+		status = "disabled";
+	};
+
+	pcie at 0,2 {
+		device_type = "pciex";
+		reg = <0x1800 0 0xd0048000 0 0x2000>;
+		#address-cells = <3>;
+		#size-cells = <2>;
+		marvell,pcie-port = <0>;
+		marvell,pcie-lane = <2>;
+		clocks = <&gateclk 7>;
+		status = "disabled";
+	};
+
+	pcie at 0,3 {
+		device_type = "pciex";
+		reg = <0x2000 0 0xd004C000 0 0xC000>;
+		#address-cells = <3>;
+		#size-cells = <2>;
+		marvell,pcie-port = <0>;
+		marvell,pcie-lane = <3>;
+		clocks = <&gateclk 8>;
+		status = "disabled";
+	};
+
+	pcie at 1,0 {
+		device_type = "pciex";
+		reg = <0x2800 0 0xd0080000 0 0x2000>;
+		#address-cells = <3>;
+		#size-cells = <2>;
+		marvell,pcie-port = <1>;
+		marvell,pcie-lane = <0>;
+		clocks = <&gateclk 9>;
+		status = "disabled";
+	};
+
+	pcie at 1,1 {
+		device_type = "pciex";
+		reg = <0x3000 0 0xd0084000 0 0x2000>;
+		#address-cells = <3>;
+		#size-cells = <2>;
+		marvell,pcie-port = <1>;
+		marvell,pcie-lane = <1>;
+		clocks = <&gateclk 10>;
+		status = "disabled";
+	};
+
+	pcie at 1,2 {
+		device_type = "pciex";
+		reg = <0x3800 0 0xd0088000 0 0x2000>;
+		#address-cells = <3>;
+		#size-cells = <2>;
+		marvell,pcie-port = <1>;
+		marvell,pcie-lane = <2>;
+		clocks = <&gateclk 11>;
+		status = "disabled";
+	};
+
+	pcie at 1,3 {
+		device_type = "pciex";
+		reg = <0x4000 0 0xd008C000 0 0x2000>;
+		#address-cells = <3>;
+		#size-cells = <2>;
+		marvell,pcie-port = <1>;
+		marvell,pcie-lane = <3>;
+		clocks = <&gateclk 12>;
+		status = "disabled";
+	};
+	pcie at 2,0 {
+		device_type = "pciex";
+		reg = <0x4800 0 0xd0042000 0 0x2000>;
+		#address-cells = <3>;
+		#size-cells = <2>;
+		marvell,pcie-port = <2>;
+		marvell,pcie-lane = <0>;
+		clocks = <&gateclk 26>;
+		status = "disabled";
+	};
+
+	pcie at 3,0 {
+		device_type = "pciex";
+		reg = <0x5000 0 0xd0082000 0 0x2000>;
+		#address-cells = <3>;
+		#size-cells = <2>;
+		marvell,pcie-port = <3>;
+		marvell,pcie-lane = <0>;
+		clocks = <&gateclk 27>;
+		status = "disabled";
+	};
+};
diff --git a/drivers/pci/host/Kconfig b/drivers/pci/host/Kconfig
index cc3a1af..03e15e7 100644
--- a/drivers/pci/host/Kconfig
+++ b/drivers/pci/host/Kconfig
@@ -1,4 +1,10 @@
 menu "PCI host controller drivers"
 	depends on PCI
 
+config PCI_MVEBU
+	bool "Marvell EBU PCIe controller"
+	depends on ARCH_MVEBU
+	select PCI_SW_HOST_BRIDGE
+	select PCI_SW_PCI_PCI_BRIDGE
+
 endmenu
diff --git a/drivers/pci/host/Makefile b/drivers/pci/host/Makefile
new file mode 100644
index 0000000..34d6057
--- /dev/null
+++ b/drivers/pci/host/Makefile
@@ -0,0 +1,4 @@
+obj-$(CONFIG_PCI_MVEBU) += pci-mvebu.o
+ccflags-$(CONFIG_PCI_MVEBU) += \
+	-I$(srctree)/arch/arm/plat-orion/include \
+	-I$(srctree)/arch/arm/mach-mvebu/include
diff --git a/drivers/pci/host/pci-mvebu.c b/drivers/pci/host/pci-mvebu.c
new file mode 100644
index 0000000..4db09e1
--- /dev/null
+++ b/drivers/pci/host/pci-mvebu.c
@@ -0,0 +1,500 @@
+/*
+ * PCIe driver for Marvell Armada 370 and Armada XP SoCs
+ *
+ * This file is licensed under the terms of the GNU General Public
+ * License version 2.  This program is licensed "as is" without any
+ * warranty of any kind, whether express or implied.
+ */
+
+#include <linux/kernel.h>
+#include <linux/pci.h>
+#include <linux/clk.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/platform_device.h>
+#include <linux/of_address.h>
+#include <linux/of_pci.h>
+#include <linux/of_irq.h>
+#include <linux/of_platform.h>
+#include <plat/pcie.h>
+#include <mach/addr-map.h>
+
+/*
+ * Those are the product IDs used for the emulated PCI Host bridge and
+ * emulated PCI-to-PCI bridges. They are temporary until we get
+ * official IDs assigned.
+ */
+#define MARVELL_EMULATED_HOST_BRIDGE_ID    4141
+#define MARVELL_EMULATED_PCI_PCI_BRIDGE_ID 4242
+
+struct mvebu_pcie_port;
+
+/* Structure representing all PCIe interfaces */
+struct mvebu_pcie {
+	struct pci_sw_host_bridge bridge;
+	struct platform_device *pdev;
+	struct mvebu_pcie_port *ports;
+	struct resource io;
+	struct resource mem;
+	struct resource busn;
+	int nports;
+};
+
+/* Structure representing one PCIe interface */
+struct mvebu_pcie_port {
+	void __iomem *base;
+	spinlock_t conf_lock;
+	int haslink;
+	u32 port;
+	u32 lane;
+	int devfn;
+	struct clk *clk;
+	struct pci_sw_pci_bridge bridge;
+	struct device_node *dn;
+};
+
+static inline struct mvebu_pcie *sys_to_pcie(struct pci_sys_data *sys)
+{
+	return sys->private_data;
+}
+
+/* PCI configuration space write function */
+static int mvebu_pcie_wr_conf(struct pci_bus *bus, u32 devfn,
+			      int where, int size, u32 val)
+{
+	struct mvebu_pcie *pcie = sys_to_pcie(bus->sysdata);
+
+	if (bus->number != 0) {
+		/*
+		 * Accessing a real PCIe interface, where the Linux
+		 * virtual bus number is equal to the hardware PCIe
+		 * interface number + 1
+		 */
+		struct mvebu_pcie_port *port;
+		unsigned long flags;
+		int porti, ret;
+
+		porti = bus->number - 1;
+		if (porti >= pcie->nports)
+			return PCIBIOS_DEVICE_NOT_FOUND;
+
+		port = &pcie->ports[porti];
+
+		if (!port->haslink)
+			return PCIBIOS_DEVICE_NOT_FOUND;
+
+		if (PCI_SLOT(devfn) != 0)
+			return PCIBIOS_DEVICE_NOT_FOUND;
+
+		spin_lock_irqsave(&port->conf_lock, flags);
+		ret = orion_pcie_wr_conf_bus(port->base, bus->number - 1,
+					     PCI_DEVFN(1, PCI_FUNC(devfn)),
+					     where, size, val);
+		spin_unlock_irqrestore(&port->conf_lock, flags);
+
+		return ret;
+	} else {
+		/*
+		 * Accessing the emulated PCIe devices. In the first
+		 * slot, the emulated host bridge, and in the next
+		 * slots, the PCI-to-PCI bridges that correspond to
+		 * each PCIe hardware interface
+		 */
+		if (PCI_SLOT(devfn) == 0 && PCI_FUNC(devfn) == 0)
+			return pci_sw_host_bridge_write(&pcie->bridge, where,
+							size, val);
+		else if (PCI_SLOT(devfn) >= 1 &&
+			 PCI_SLOT(devfn) <= pcie->nports) {
+			struct mvebu_pcie_port *port;
+			int porti = PCI_SLOT(devfn) - 1;
+			port = &pcie->ports[porti];
+			return pci_sw_pci_bridge_write(&port->bridge, where,
+						       size, val);
+		} else {
+			return PCIBIOS_DEVICE_NOT_FOUND;
+		}
+	}
+
+	return PCIBIOS_SUCCESSFUL;
+}
+
+/* PCI configuration space read function */
+static int mvebu_pcie_rd_conf(struct pci_bus *bus, u32 devfn, int where,
+			      int size, u32 *val)
+{
+	struct mvebu_pcie *pcie = sys_to_pcie(bus->sysdata);
+
+	if (bus->number != 0) {
+		/*
+		 * Accessing a real PCIe interface, where the Linux
+		 * virtual bus number is equal to the hardware PCIe
+		 * interface number + 1
+		 */
+		struct mvebu_pcie_port *port;
+		unsigned long flags;
+		int porti, ret;
+
+		porti = bus->number - 1;
+		if (porti >= pcie->nports) {
+			*val = 0xffffffff;
+			return PCIBIOS_DEVICE_NOT_FOUND;
+		}
+
+		port = &pcie->ports[porti];
+
+		if (!port->haslink || PCI_SLOT(devfn) != 0) {
+			*val = 0xffffffff;
+			return PCIBIOS_DEVICE_NOT_FOUND;
+		}
+
+		spin_lock_irqsave(&port->conf_lock, flags);
+		ret = orion_pcie_rd_conf_bus(port->base, bus->number - 1,
+					     PCI_DEVFN(1, PCI_FUNC(devfn)),
+					     where, size, val);
+		spin_unlock_irqrestore(&port->conf_lock, flags);
+
+		return ret;
+	} else {
+		/*
+		 * Accessing the emulated PCIe devices. In the first
+		 * slot, the emulated host bridge, and in the next
+		 * slots, the PCI-to-PCI bridges that correspond to
+		 * each PCIe hardware interface
+		 */
+		if (PCI_SLOT(devfn) == 0 && PCI_FUNC(devfn) == 0)
+			return pci_sw_host_bridge_read(&pcie->bridge, where,
+						       size, val);
+		else if (PCI_SLOT(devfn) >= 1 &&
+			 PCI_SLOT(devfn) <= pcie->nports) {
+			struct mvebu_pcie_port *port;
+			int porti = PCI_SLOT(devfn) - 1;
+			port = &pcie->ports[porti];
+			return pci_sw_pci_bridge_read(&port->bridge, where,
+						      size, val);
+		} else {
+			*val = 0xffffffff;
+			return PCIBIOS_DEVICE_NOT_FOUND;
+		}
+	}
+}
+
+static struct pci_ops mvebu_pcie_ops = {
+	.read = mvebu_pcie_rd_conf,
+	.write = mvebu_pcie_wr_conf,
+};
+
+static int __init mvebu_pcie_setup(int nr, struct pci_sys_data *sys)
+{
+	struct mvebu_pcie *pcie = sys_to_pcie(sys);
+	int i;
+
+	pci_add_resource_offset(&sys->resources, &pcie->io, sys->io_offset);
+	pci_add_resource_offset(&sys->resources, &pcie->mem, sys->mem_offset);
+	pci_add_resource(&sys->resources, &pcie->busn);
+
+	pci_ioremap_io(nr * SZ_64K, pcie->io.start);
+
+	for (i = 0; i < pcie->nports; i++) {
+		struct mvebu_pcie_port *port = &pcie->ports[i];
+		orion_pcie_set_local_bus_nr(port->base, i);
+		orion_pcie_setup(port->base);
+	}
+
+	return 1;
+}
+
+static int __init mvebu_pcie_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
+{
+	struct mvebu_pcie *pcie = sys_to_pcie(dev->bus->sysdata);
+	struct mvebu_pcie_port *port;
+	struct of_irq oirq;
+	u32 laddr[3];
+	int ret;
+	__be32 intspec;
+
+	/*
+	 * Ignore requests related to the emulated host bridge or the
+	 * emulated pci-to-pci bridges
+	 */
+	if (!dev->bus->number)
+		return -1;
+
+	port = &pcie->ports[dev->bus->number - 1];
+
+	/*
+	 * Build an laddr array that describes the PCI device in a DT
+	 * way
+	 */
+	laddr[0] = cpu_to_be32(port->devfn << 8);
+	laddr[1] = laddr[2] = 0;
+	intspec = cpu_to_be32(pin);
+
+	ret = of_irq_map_raw(port->dn, &intspec, 1, laddr, &oirq);
+	if (ret) {
+		dev_err(&pcie->pdev->dev,
+			"%s: of_irq_map_raw() failed, %d\n",
+			__func__, ret);
+		return ret;
+	}
+
+	return irq_create_of_mapping(oirq.controller, oirq.specifier,
+				     oirq.size);
+}
+
+/*
+ * For a given PCIe interface (represented by a mvebu_pcie_port
+ * structure), we read the PCI configuration space of the
+ * corresponding PCI-to-PCI bridge in order to find out which range of
+ * I/O addresses and memory addresses have been assigned to this PCIe
+ * interface. Using these informations, we set up the appropriate
+ * address decoding windows so that the physical address are actually
+ * resolved to the right PCIe interface.
+ */
+static int mvebu_pcie_window_config_port(struct mvebu_pcie *pcie,
+					 struct mvebu_pcie_port *port)
+{
+	unsigned long iobase = 0;
+	int ret;
+
+	if (port->bridge.iolimit >= port->bridge.iobase) {
+		unsigned long iolimit = 0xFFF |
+			((port->bridge.iolimit & 0xF0) << 8) |
+			(port->bridge.iolimitupper << 16);
+		iobase = ((port->bridge.iobase & 0xF0) << 8) |
+			(port->bridge.iobaseupper << 16);
+		ret = armada_370_xp_alloc_pcie_window(port->port, port->lane,
+						      iobase, iolimit-iobase,
+						      IORESOURCE_IO);
+		if (ret) {
+			dev_err(&pcie->pdev->dev,
+				"%s: could not alloc PCIe %d:%d window for I/O [0x%lx; 0x%lx]\n",
+				__func__, port->port, port->lane,
+				iobase, iolimit);
+			goto out_io;
+		}
+	}
+
+	if (port->bridge.memlimit >= port->bridge.membase) {
+		unsigned long membase =
+			((port->bridge.membase & 0xFFF0) << 16);
+		unsigned long memlimit =
+			((port->bridge.memlimit & 0xFFF0) << 16) | 0xFFFFF;
+		ret = armada_370_xp_alloc_pcie_window(port->port, port->lane,
+						      membase, memlimit-membase,
+						      IORESOURCE_MEM);
+		if (ret) {
+			dev_err(&pcie->pdev->dev,
+				"%s: could not alloc PCIe %d:%d window for MEM [0x%lx; 0x%lx]\n",
+				__func__, port->port, port->lane,
+				membase, memlimit);
+			goto out_mem;
+		}
+	}
+
+out_mem:
+	if (port->bridge.iolimit >= port->bridge.iobase)
+		armada_370_xp_free_pcie_window(iobase);
+out_io:
+	return ret;
+}
+
+/*
+ * Set up the address decoding windows for all PCIe interfaces.
+ */
+static int mvebu_pcie_window_config(struct mvebu_pcie *pcie)
+{
+	int i, ret;
+
+	for (i = 0; i < pcie->nports; i++) {
+		struct mvebu_pcie_port *port = &pcie->ports[i];
+		if (!port->haslink)
+			continue;
+
+		ret = mvebu_pcie_window_config_port(pcie, port);
+		if (ret)
+			return ret;
+	}
+
+	return 0;
+}
+
+static resource_size_t mvebu_pcie_align_resource(struct pci_dev *dev,
+						 const struct resource *res,
+						 resource_size_t start,
+						 resource_size_t size,
+						 resource_size_t align)
+{
+	if (!(res->flags & IORESOURCE_IO))
+		return start;
+
+	/*
+	 * The I/O regions must be 64K aligned, because the
+	 * granularity of PCIe I/O address decoding windows is 64 K
+	 */
+	return round_up(start, SZ_64K);
+}
+
+static int mvebu_pcie_enable(struct mvebu_pcie *pcie)
+{
+	struct hw_pci hw;
+
+	memset(&hw, 0, sizeof(hw));
+
+	hw.nr_controllers = 1;
+	hw.private_data   = (void **)&pcie;
+	hw.setup          = mvebu_pcie_setup;
+	hw.map_irq        = mvebu_pcie_map_irq;
+	hw.align_resource = mvebu_pcie_align_resource;
+	hw.ops            = &mvebu_pcie_ops;
+
+	pci_common_init(&hw);
+
+	return mvebu_pcie_window_config(pcie);
+}
+
+static int __init mvebu_pcie_probe(struct platform_device *pdev)
+{
+	struct mvebu_pcie *pcie;
+	struct device_node *np = pdev->dev.of_node;
+	struct device_node *child;
+	const __be32 *range = NULL;
+	struct resource res;
+	int i, ret;
+
+	pcie = devm_kzalloc(&pdev->dev, sizeof(struct mvebu_pcie),
+			    GFP_KERNEL);
+	if (!pcie)
+		return -ENOMEM;
+
+	pcie->pdev = pdev;
+
+	pci_sw_host_bridge_init(&pcie->bridge);
+	pcie->bridge.vendor = PCI_VENDOR_ID_MARVELL;
+	pcie->bridge.device = MARVELL_EMULATED_HOST_BRIDGE_ID;
+
+	/* Get the I/O and memory ranges from DT */
+	while ((range = of_pci_process_ranges(np, &res, range)) != NULL) {
+		if (resource_type(&res) == IORESOURCE_IO) {
+			memcpy(&pcie->io, &res, sizeof(res));
+			pcie->io.name = "I/O";
+		}
+		if (resource_type(&res) == IORESOURCE_MEM) {
+			memcpy(&pcie->mem, &res, sizeof(res));
+			pcie->mem.name = "MEM";
+		}
+	}
+
+	/* Get the bus range */
+	ret = of_pci_parse_bus_range(np, &pcie->busn);
+	if (ret) {
+		dev_err(&pdev->dev, "failed to parse bus-range property: %d\n",
+			ret);
+		return ret;
+	}
+
+	for_each_child_of_node(pdev->dev.of_node, child) {
+		if (!of_device_is_available(child))
+			continue;
+		pcie->nports++;
+	}
+
+	pcie->ports = devm_kzalloc(&pdev->dev, pcie->nports *
+				   sizeof(struct mvebu_pcie_port),
+				   GFP_KERNEL);
+	if (!pcie->ports)
+		return -ENOMEM;
+
+	i = 0;
+	for_each_child_of_node(pdev->dev.of_node, child) {
+		struct mvebu_pcie_port *port = &pcie->ports[i];
+
+		if (!of_device_is_available(child))
+			continue;
+
+		if (of_property_read_u32(child, "marvell,pcie-port",
+					 &port->port)) {
+			dev_warn(&pdev->dev,
+				 "ignoring PCIe DT node, missing pcie-port property\n");
+			continue;
+		}
+
+		if (of_property_read_u32(child, "marvell,pcie-lane",
+					 &port->lane))
+			port->lane = 0;
+
+		port->devfn = of_pci_get_devfn(child);
+		if (port->devfn < 0)
+			continue;
+
+		port->base = of_iomap(child, 0);
+		if (!port->base) {
+			dev_err(&pdev->dev, "PCIe%d.%d: cannot map registers\n",
+				port->port, port->lane);
+			continue;
+		}
+
+		if (orion_pcie_link_up(port->base)) {
+			port->haslink = 1;
+			dev_info(&pdev->dev, "PCIe%d.%d: link up\n",
+				 port->port, port->lane);
+		} else {
+			port->haslink = 0;
+			dev_info(&pdev->dev, "PCIe%d.%d: link down\n",
+				 port->port, port->lane);
+		}
+
+		port->clk = of_clk_get_by_name(child, NULL);
+		if (!port->clk) {
+			dev_err(&pdev->dev, "PCIe%d.%d: cannot get clock\n",
+			       port->port, port->lane);
+			iounmap(port->base);
+			port->haslink = 0;
+			continue;
+		}
+
+		port->dn = child;
+
+		clk_prepare_enable(port->clk);
+		spin_lock_init(&port->conf_lock);
+
+		pci_sw_pci_bridge_init(&port->bridge);
+		port->bridge.vendor = PCI_VENDOR_ID_MARVELL;
+		port->bridge.device = MARVELL_EMULATED_PCI_PCI_BRIDGE_ID;
+		port->bridge.primary_bus = 0;
+		port->bridge.secondary_bus = PCI_SLOT(port->devfn);
+		port->bridge.subordinate_bus = PCI_SLOT(port->devfn);
+
+		i++;
+	}
+
+	mvebu_pcie_enable(pcie);
+
+	return 0;
+}
+
+static const struct of_device_id mvebu_pcie_of_match_table[] = {
+	{ .compatible = "marvell,armada-370-xp-pcie", },
+	{},
+};
+MODULE_DEVICE_TABLE(of, mvebu_pcie_of_match_table);
+
+static struct platform_driver mvebu_pcie_driver = {
+	.driver = {
+		.owner = THIS_MODULE,
+		.name = "mvebu-pcie",
+		.of_match_table =
+		   of_match_ptr(mvebu_pcie_of_match_table),
+	},
+};
+
+static int mvebu_pcie_init(void)
+{
+	return platform_driver_probe(&mvebu_pcie_driver,
+				     mvebu_pcie_probe);
+}
+
+subsys_initcall(mvebu_pcie_init);
+
+MODULE_AUTHOR("Thomas Petazzoni <thomas.petazzoni@free-electrons.com>");
+MODULE_DESCRIPTION("Marvell EBU PCIe driver");
+MODULE_LICENSE("GPL");
-- 
1.7.9.5
^ permalink raw reply related	[flat|nested] 216+ messages in thread
- * [PATCH v2 19/27] pci: PCIe driver for Marvell Armada 370/XP systems
  2013-01-28 18:56 ` [PATCH v2 19/27] pci: PCIe driver for Marvell Armada 370/XP systems Thomas Petazzoni
@ 2013-01-28 22:21   ` Stephen Warren
  2013-01-29  8:41     ` Thomas Petazzoni
  2013-01-29  3:29   ` Bjorn Helgaas
                     ` (3 subsequent siblings)
  4 siblings, 1 reply; 216+ messages in thread
From: Stephen Warren @ 2013-01-28 22:21 UTC (permalink / raw)
  To: linux-arm-kernel
On 01/28/2013 11:56 AM, Thomas Petazzoni wrote:
> This driver implements the support for the PCIe interfaces on the
> Marvell Armada 370/XP ARM SoCs. In the future, it might be extended to
> cover earlier families of Marvell SoCs, such as Dove, Orion and
> Kirkwood.
> 
> The driver implements the hw_pci operations needed by the core ARM PCI
> code to setup PCI devices and get their corresponding IRQs, and the
> pci_ops operations that are used by the PCI core to read/write the
> configuration space of PCI devices.
> 
> Since the PCIe interfaces of Marvell SoCs are completely separate and
> not linked together in a bus, this driver sets up an emulated PCI host
> bridge, with one PCI-to-PCI bridge as child for each hardware PCIe
> interface.
> 
> In addition, this driver enumerates the different PCIe slots, and for
> those having a device plugged in, it sets up the necessary address
> decoding windows, using the new armada_370_xp_alloc_pcie_window()
> function from mach-mvebu/addr-map.c.
> diff --git a/Documentation/devicetree/bindings/pci/armada-370-xp-pcie.txt b/Documentation/devicetree/bindings/pci/armada-370-xp-pcie.txt
> +Mandatory properties:
> +- compatible: must be "marvell,armada-370-xp-pcie"
> +- status: either "disabled" or "okay"
status is a standard DT property; I certainly wouldn't expect its
presence to be mandatory (there's a defined default), nor would I expect
each device's binding to redefine this property.
> +In addition, the Device Tree node must have sub-nodes describing each
> +PCIe interface, having the following mandatory properties:
> +- marvell,pcie-port: the physical PCIe port number
Should the standardized cell-index property be used here instead? Or,
perhaps that property is deprecated/discouraged...
> +- status: either "disabled" or "okay"
Similar comment as above.
> diff --git a/drivers/pci/host/Makefile b/drivers/pci/host/Makefile
> +obj-$(CONFIG_PCI_MVEBU) += pci-mvebu.o
> +ccflags-$(CONFIG_PCI_MVEBU) += \
> +	-I$(srctree)/arch/arm/plat-orion/include \
> +	-I$(srctree)/arch/arm/mach-mvebu/include
That seems a little dangerous w.r.t. multi-platform zImage. Can the
required headers be moved out to somewhere more public to avoid this?
> diff --git a/drivers/pci/host/pci-mvebu.c b/drivers/pci/host/pci-mvebu.c
> +/*
> + * Those are the product IDs used for the emulated PCI Host bridge and
> + * emulated PCI-to-PCI bridges. They are temporary until we get
> + * official IDs assigned.
> + */
> +#define MARVELL_EMULATED_HOST_BRIDGE_ID    4141
> +#define MARVELL_EMULATED_PCI_PCI_BRIDGE_ID 4242
I assume that means we can't merge this driver yet. The cover letter
mentioned a desire to merge this for 3.9; there's not much time to get
official IDs assigned, then.
> +static int mvebu_pcie_init(void)
> +{
> +	return platform_driver_probe(&mvebu_pcie_driver,
> +				     mvebu_pcie_probe);
> +}
> +
> +subsys_initcall(mvebu_pcie_init);
Why isn't that just platform_driver_register()?
> +MODULE_LICENSE("GPL");
"GPL v2".
^ permalink raw reply	[flat|nested] 216+ messages in thread
- * [PATCH v2 19/27] pci: PCIe driver for Marvell Armada 370/XP systems
  2013-01-28 22:21   ` Stephen Warren
@ 2013-01-29  8:41     ` Thomas Petazzoni
  2013-01-29  9:20       ` Thierry Reding
  2013-01-29 19:47       ` Stephen Warren
  0 siblings, 2 replies; 216+ messages in thread
From: Thomas Petazzoni @ 2013-01-29  8:41 UTC (permalink / raw)
  To: linux-arm-kernel
Dear Stephen Warren,
On Mon, 28 Jan 2013 15:21:45 -0700, Stephen Warren wrote:
> > +Mandatory properties:
> > +- compatible: must be "marvell,armada-370-xp-pcie"
> > +- status: either "disabled" or "okay"
> 
> status is a standard DT property; I certainly wouldn't expect its
> presence to be mandatory (there's a defined default), nor would I expect
> each device's binding to redefine this property.
Ok.
> > +- marvell,pcie-port: the physical PCIe port number
> 
> Should the standardized cell-index property be used here instead? Or,
> perhaps that property is deprecated/discouraged...
The problem is that I need two identifiers, the pcie-port and
pcie-lane, and it would be strange to have one referenced as
cell-index, and the other one as marvell,pcie-lane, no? Unless of
course we can put two numbers in the cell-index property, but a quick
grep in Documentation/devicetree/bindings/ seems to indicate that all
users of cell-index use it with a single identifier.
Just tell me what to do here, I don't have a strong opinion on this.
> > diff --git a/drivers/pci/host/Makefile b/drivers/pci/host/Makefile
> 
> > +obj-$(CONFIG_PCI_MVEBU) += pci-mvebu.o
> > +ccflags-$(CONFIG_PCI_MVEBU) += \
> > +	-I$(srctree)/arch/arm/plat-orion/include \
> > +	-I$(srctree)/arch/arm/mach-mvebu/include
> 
> That seems a little dangerous w.r.t. multi-platform zImage. Can the
> required headers be moved out to somewhere more public to avoid this?
Why is this dangerous for multi-platform zImage? For this specific
driver only, some SoC-specific headers are used. I don't think it
prevents another PCI driver (such as the Tegra one) from being built
into the same kernel image, no?
Also, this is kind of a temporary solution, because I can't fix all the
problems in just the PCI patch series without making it horribly large.
The reason why we need those include paths at the moment are:
 * For plat-orion/include, because of pcie.h that provides functions
   common to all Marvell SoCs regarding PCI. Ultimately, all the
   Marvell SoCs that use this common code should be migrated over to
   the DT-capable PCI driver that is submitted through this patch
   series. This will take a bit of time, and is too complex to do in
   one shot, together with the introduction of the driver itself. So
   ultimately, all the code in plat-orion/pcie.c will migrate into
   drivers/pci/host/pci-mvebu.c, and this
   plat-orion/include/plat/pcie.h file should disappear, removing the
   need for this special header path.
 * For mach-mvebu/include, because of the addr-map.h header that
   provides functions related to address decoding windows. This is also
   likely to evolve quite significantly when we'll make the PCI driver
   being used by the other Marvell SoC families (Kirkwood, Dove,
   Orion5x), and when this work is done, we can think of having a
   public header in include/linux that exposes the address decoding
   APIs, once it has stabilized a bit across the different Marvell SoC
   families.
So, the bottom line is: yes I know those include paths are not nice,
but I don't think they prevent multiplatform builds, and they are a
temporary solution until we convert more Marvell SoC families to this
new PCI driver.
> > +/*
> > + * Those are the product IDs used for the emulated PCI Host bridge and
> > + * emulated PCI-to-PCI bridges. They are temporary until we get
> > + * official IDs assigned.
> > + */
> > +#define MARVELL_EMULATED_HOST_BRIDGE_ID    4141
> > +#define MARVELL_EMULATED_PCI_PCI_BRIDGE_ID 4242
> 
> I assume that means we can't merge this driver yet. The cover letter
> mentioned a desire to merge this for 3.9; there's not much time to get
> official IDs assigned, then.
I am working on getting real IDs assigned. For now, I'd like to work on
getting all other issues fixed, and have this only problem remaining.
> 
> > +static int mvebu_pcie_init(void)
> > +{
> > +	return platform_driver_probe(&mvebu_pcie_driver,
> > +				     mvebu_pcie_probe);
> > +}
> > +
> > +subsys_initcall(mvebu_pcie_init);
> 
> Why isn't that just platform_driver_register()?
I didn't test recently, but with my first version of the patch set,
having an initialization as late as module_init() was too late. Some
PCI fixup code was being executed *before* we get the opportunity of
initializing the PCI driver, and it was crashing the kernel. I can
provide more details if you want.
> > +MODULE_LICENSE("GPL");
> 
> "GPL v2".
Sure.
Thomas
-- 
Thomas Petazzoni, Free Electrons
Kernel, drivers, real-time and embedded Linux
development, consulting, training and support.
http://free-electrons.com
^ permalink raw reply	[flat|nested] 216+ messages in thread
- * [PATCH v2 19/27] pci: PCIe driver for Marvell Armada 370/XP systems
  2013-01-29  8:41     ` Thomas Petazzoni
@ 2013-01-29  9:20       ` Thierry Reding
  2013-01-29  9:21         ` Thomas Petazzoni
  2013-02-07 10:24         ` Thomas Petazzoni
  2013-01-29 19:47       ` Stephen Warren
  1 sibling, 2 replies; 216+ messages in thread
From: Thierry Reding @ 2013-01-29  9:20 UTC (permalink / raw)
  To: linux-arm-kernel
On Tue, Jan 29, 2013 at 09:41:43AM +0100, Thomas Petazzoni wrote:
> On Mon, 28 Jan 2013 15:21:45 -0700, Stephen Warren wrote:
[...]
> > > +static int mvebu_pcie_init(void)
> > > +{
> > > +	return platform_driver_probe(&mvebu_pcie_driver,
> > > +				     mvebu_pcie_probe);
> > > +}
> > > +
> > > +subsys_initcall(mvebu_pcie_init);
> > 
> > Why isn't that just platform_driver_register()?
> 
> I didn't test recently, but with my first version of the patch set,
> having an initialization as late as module_init() was too late. Some
> PCI fixup code was being executed *before* we get the opportunity of
> initializing the PCI driver, and it was crashing the kernel. I can
> provide more details if you want.
Does this patch perhaps fix this crash?
	http://patchwork.ozlabs.org/patch/210870/
Thierry
-------------- next part --------------
A non-text attachment was scrubbed...
Name: not available
Type: application/pgp-signature
Size: 836 bytes
Desc: not available
URL: <http://lists.infradead.org/pipermail/linux-arm-kernel/attachments/20130129/579b43da/attachment.sig>
^ permalink raw reply	[flat|nested] 216+ messages in thread
- * [PATCH v2 19/27] pci: PCIe driver for Marvell Armada 370/XP systems
  2013-01-29  9:20       ` Thierry Reding
@ 2013-01-29  9:21         ` Thomas Petazzoni
  2013-02-07 10:24         ` Thomas Petazzoni
  1 sibling, 0 replies; 216+ messages in thread
From: Thomas Petazzoni @ 2013-01-29  9:21 UTC (permalink / raw)
  To: linux-arm-kernel
Dear Thierry Reding,
On Tue, 29 Jan 2013 10:20:06 +0100, Thierry Reding wrote:
> Does this patch perhaps fix this crash?
> 
> 	http://patchwork.ozlabs.org/patch/210870/
I'll test it, thanks for the notice!
Thomas
-- 
Thomas Petazzoni, Free Electrons
Kernel, drivers, real-time and embedded Linux
development, consulting, training and support.
http://free-electrons.com
^ permalink raw reply	[flat|nested] 216+ messages in thread 
- * [PATCH v2 19/27] pci: PCIe driver for Marvell Armada 370/XP systems
  2013-01-29  9:20       ` Thierry Reding
  2013-01-29  9:21         ` Thomas Petazzoni
@ 2013-02-07 10:24         ` Thomas Petazzoni
  2013-02-07 15:46           ` Bjorn Helgaas
  1 sibling, 1 reply; 216+ messages in thread
From: Thomas Petazzoni @ 2013-02-07 10:24 UTC (permalink / raw)
  To: linux-arm-kernel
Dear Thierry Reding,
On Tue, 29 Jan 2013 10:20:06 +0100, Thierry Reding wrote:
> > I didn't test recently, but with my first version of the patch set,
> > having an initialization as late as module_init() was too late. Some
> > PCI fixup code was being executed *before* we get the opportunity of
> > initializing the PCI driver, and it was crashing the kernel. I can
> > provide more details if you want.
> 
> Does this patch perhaps fix this crash?
> 
> 	http://patchwork.ozlabs.org/patch/210870/
I investigated a bit more, and managed to reproduce my crash even with
your patch applied. And indeed, my crash is really unrelated to the
pcibios function disappearing. Here is the kernel panic (and a short
analysis afterwards) :
Unhandled fault: external abort on non-linefetch (0x1008) at 0xe0910010
Internal error: : 1008 [#1] SMP ARM
Modules linked in:
CPU: 0    Not tainted  (3.8.0-rc5-00029-g80e55fd-dirty #1303)
PC is at quirk_usb_handoff_xhci+0x5c/0x284
LR is at ioremap_pte_range+0x84/0xdc
pc : [<c022717c>]    lr : [<c0150944>]    psr: a0000013
sp : df82bce8  ip : df81c000  fp : c0e09dac
r10: 00008000  r9 : 00000000  r8 : de935000
r7 : e0910000  r6 : c03f0ce0  r5 : de935000  r4 : de935000
r3 : 01c801c8  r2 : 00000000  r1 : 42007e13  r0 : e0910000
Flags: NzCv  IRQs on  FIQs on  Mode SVC_32  ISA ARM  Segment kernel
Control: 10c5387d  Table: 1e95c019  DAC: 00000015
Process swapper/0 (pid: 1, stack limit = 0xdf82a238)
Stack: (0xdf82bce8 to 0xdf82c000)
bce0:                   de935060 c01702f4 de935000 de935000 de935000 c03f0ce0
bd00: c0e220dc c02277f8 de935060 c02277d4 c03f0ce0 c0227858 c03f0ce0 c0178d98
bd20: c01a855c c0312dd0 00000000 00000000 de936f6c c0312ed0 c0e0f7cc de935000
bd40: de936c14 de936c00 df82bde8 00000001 de916668 c016a958 00000000 de935000
bd60: de936c14 c016ab5c de935800 de910014 de936c00 c016abcc 00000000 00000001
bd80: de910000 c016d234 de916668 de9166d0 de916640 00000000 df82be14 c017a93c
bda0: de916668 df82be14 df82bde8 c0012ef8 f3cec23f c0251c90 c0019da0 df805340
bdc0: f3cec23f df82bde8 c0e426e8 df82be14 df802b00 de912a90 00000060 df89e400
bde0: 00000002 c00131dc df82bde8 df82bde8 c0e454f0 00000000 de9166d0 de912af0
be00: df802b00 c0442010 df89e410 00000000 00000000 c0e0fcac 00000001 df82be54
be20: c04420b4 c017a90c 00000000 00000000 00000000 c0442054 c1000000 c8ffffff
be40: c124b5f0 00000200 00000000 00000000 00000000 de9166d0 df89e410 c0e43e48
be60: c0e0fc6c df89e410 00000000 c0e0fc6c c04563d4 c017a7c4 00000000 c01a8c24
be80: c01a8c0c c01a78e8 df89e410 c0e0fc6c df89e444 00000000 c043023c c01a7bd8
bea0: c0e0fc6c 00000000 c01a7b4c c01a632c df8067d8 df85a474 c0e0fc6c c0e170c8
bec0: de916740 c01a7228 c03d28bc c0e0fc6c c0e0fc6c df82a000 c0e220c0 00000000
bee0: c043023c c01a80d8 00000000 c0e0fc58 df82a000 c0e220c0 00000000 c043023c
bf00: c017a7c4 c01a8e1c c044fc80 df82a000 c0e220c0 c00086d4 c040e208 00000006
bf20: 0000007b c017a7c4 0000007b 00000006 00000006 c043023c c124bb15 00000000
bf40: c0e08194 c044fc80 00000006 c044fc60 c0e220c0 c043023c c04563d4 0000007b
bf60: 00000000 c04308b0 00000006 00000006 c043023c 00000000 c0456128 c0456128
bf80: 00000000 00000000 00000000 00000000 00000000 c0430958 00000000 00000000
bfa0: c03156c8 c03156d0 00000000 c000dfd8 00000000 00000000 00000000 00000000
bfc0: 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000
bfe0: 00000000 00000000 00000000 00000000 00000013 00000000 2254c9ef c6885425
[<c022717c>] (quirk_usb_handoff_xhci+0x5c/0x284) from [<c02277d4>] (quirk_usb_early_handoff.part.1+0xc0/0xe4)
[<c02277d4>] (quirk_usb_early_handoff.part.1+0xc0/0xe4) from [<c0178d98>] (pci_do_fixups+0xc4/0x17c)
[<c0178d98>] (pci_do_fixups+0xc4/0x17c) from [<c016a958>] (pci_bus_add_device+0x14/0x60)
[<c016a958>] (pci_bus_add_device+0x14/0x60) from [<c016ab5c>] (pci_bus_add_devices+0x44/0x128)
[<c016ab5c>] (pci_bus_add_devices+0x44/0x128) from [<c016abcc>] (pci_bus_add_devices+0xb4/0x128)
[<c016abcc>] (pci_bus_add_devices+0xb4/0x128) from [<c016d234>] (pci_scan_root_bus+0x7c/0xcc)
[<c016d234>] (pci_scan_root_bus+0x7c/0xcc) from [<c017a93c>] (mvebu_pcie_scan_bus+0x30/0x3c)
[<c017a93c>] (mvebu_pcie_scan_bus+0x30/0x3c) from [<c0012ef8>] (pcibios_init_hw+0x5c/0x15c)
[<c0012ef8>] (pcibios_init_hw+0x5c/0x15c) from [<c00131dc>] (pci_common_init+0x44/0xc4)
[<c00131dc>] (pci_common_init+0x44/0xc4) from [<c0442010>] (mvebu_pcie_probe+0x360/0x3a4)
[<c0442010>] (mvebu_pcie_probe+0x360/0x3a4) from [<c01a8c24>] (platform_drv_probe+0x18/0x1c)
[<c01a8c24>] (platform_drv_probe+0x18/0x1c) from [<c01a78e8>] (really_probe+0x60/0x1e0)
[<c01a78e8>] (really_probe+0x60/0x1e0) from [<c01a7bd8>] (__driver_attach+0x8c/0x90)
[<c01a7bd8>] (__driver_attach+0x8c/0x90) from [<c01a632c>] (bus_for_each_dev+0x50/0x7c)
[<c01a632c>] (bus_for_each_dev+0x50/0x7c) from [<c01a7228>] (bus_add_driver+0x168/0x22c)
[<c01a7228>] (bus_add_driver+0x168/0x22c) from [<c01a80d8>] (driver_register+0x78/0x144)
[<c01a80d8>] (driver_register+0x78/0x144) from [<c01a8e1c>] (platform_driver_probe+0x18/0xac)
[<c01a8e1c>] (platform_driver_probe+0x18/0xac) from [<c00086d4>] (do_one_initcall+0x34/0x174)
[<c00086d4>] (do_one_initcall+0x34/0x174) from [<c04308b0>] (do_basic_setup+0x90/0xc4)
[<c04308b0>] (do_basic_setup+0x90/0xc4) from [<c0430958>] (kernel_init_freeable+0x74/0x10c)
[<c0430958>] (kernel_init_freeable+0x74/0x10c) from [<c03156d0>] (kernel_init+0x8/0xe4)
[<c03156d0>] (kernel_init+0x8/0xe4) from [<c000dfd8>] (ret_from_fork+0x14/0x3c)
Code: e3a02000 ebf7c092 e2507000 0afffff7 (e5973010) 
---[ end trace 834a6081748c17ef ]---
Kernel panic - not syncing: Attempted to kill init! exitcode=0x0000000b
Basically, the problem comes from the fact that the USB XHCI code has
an early PCI fixup, that ioremap() the PCI device memory BAR and makes
access to it. Unfortunately, this fixup is called during
pcibios_init_hw(), at a point where the Marvell PCIe driver haven't yet
had a chance to set up the address decoding windows (the Linux PCI core
hasn't even configured the emulated PCI-to-PCI bridges, so I don't know
where the PCI devices will sit).
Due to this, the first access to the PCI device memory by this early
fixup triggers an exception, and the kernel panics.
For some reason, moving the Marvell PCIe driver initialization at the
subsys_initcall() level works around the problem, but I'm not sure why,
since it is actually the driver initialization that ends up calling the
PCI fixup code. But clearly, with the PCIe initialization done at
subsys_initcall() time, the PCIe is initialized, and then a lot later
the PCI fixup is executed.
Ideas welcome.
Best regards,
Thomas
-- 
Thomas Petazzoni, Free Electrons
Kernel, drivers, real-time and embedded Linux
development, consulting, training and support.
http://free-electrons.com
^ permalink raw reply	[flat|nested] 216+ messages in thread 
- * [PATCH v2 19/27] pci: PCIe driver for Marvell Armada 370/XP systems
  2013-02-07 10:24         ` Thomas Petazzoni
@ 2013-02-07 15:46           ` Bjorn Helgaas
  2013-02-07 16:00             ` Thomas Petazzoni
  0 siblings, 1 reply; 216+ messages in thread
From: Bjorn Helgaas @ 2013-02-07 15:46 UTC (permalink / raw)
  To: linux-arm-kernel
On Thu, Feb 7, 2013 at 3:24 AM, Thomas Petazzoni
<thomas.petazzoni@free-electrons.com> wrote:
> Dear Thierry Reding,
>
> On Tue, 29 Jan 2013 10:20:06 +0100, Thierry Reding wrote:
>
>> > I didn't test recently, but with my first version of the patch set,
>> > having an initialization as late as module_init() was too late. Some
>> > PCI fixup code was being executed *before* we get the opportunity of
>> > initializing the PCI driver, and it was crashing the kernel. I can
>> > provide more details if you want.
>>
>> Does this patch perhaps fix this crash?
>>
>>       http://patchwork.ozlabs.org/patch/210870/
>
> I investigated a bit more, and managed to reproduce my crash even with
> your patch applied. And indeed, my crash is really unrelated to the
> pcibios function disappearing. Here is the kernel panic (and a short
> analysis afterwards) :
Hi Thomas,
Can you post the entire dmesg log, ideally with CONFIG_PCI_DEBUG=y?
That should have more information about the enumeration process,
including what we think the XHCI BARs are and the apertures leading to
them.
The PCI core assumes that we know the host bridge apertures up front,
and I'm not sure that is true on your platform, so maybe we'll need
some changes to accommodate that.
Bjorn
^ permalink raw reply	[flat|nested] 216+ messages in thread 
- * [PATCH v2 19/27] pci: PCIe driver for Marvell Armada 370/XP systems
  2013-02-07 15:46           ` Bjorn Helgaas
@ 2013-02-07 16:00             ` Thomas Petazzoni
  2013-02-07 18:08               ` Bjorn Helgaas
  0 siblings, 1 reply; 216+ messages in thread
From: Thomas Petazzoni @ 2013-02-07 16:00 UTC (permalink / raw)
  To: linux-arm-kernel
Dear Bjorn Helgaas,
On Thu, 7 Feb 2013 08:46:06 -0700, Bjorn Helgaas wrote:
> Can you post the entire dmesg log, ideally with CONFIG_PCI_DEBUG=y?
> That should have more information about the enumeration process,
> including what we think the XHCI BARs are and the apertures leading to
> them.
Sure, see below.
> The PCI core assumes that we know the host bridge apertures up front,
> and I'm not sure that is true on your platform, so maybe we'll need
> some changes to accommodate that.
In this hardware, we need to set up the address decoding windows. So
there shouldn't be any access to a PCI device memory or I/O region
until the addresses have been assigned in the PCI-to-PCI bridge.
Note that I am know setting up the address decoding window as soon as
the address is written into the PCI-to-PCI bridge. I am no longer
waiting the end of enumeration process, and then go through the
PCI-to-PCI bridge registers to configure them.
The system tested below is an Armada 370, it has only two PCIe links.
One is connected to a XHCI USB controller, the other one to an Intel
Wireless NIC.
First the dmesg when module_init() is used, which shows the crash:
===================================================================
Booting Linux on physical CPU 0x0
Linux version 3.8.0-rc5-00029-g80e55fd-dirty (thomas at skate) (gcc version 4.6.3 (Ubuntu/Linaro 4.6.3-1ubuntu5) ) #1313 SMP Thu Feb 7 16:53:32 CET 2013
CPU: ARMv7 Processor [561f5811] revision 1 (ARMv7), cr=10c53c7d
CPU: PIPT / VIPT nonaliasing data cache, PIPT instruction cache
Machine: Marvell Armada 370/XP (Device Tree), model: Globalscale Mirabox
bootconsole [earlycon0] enabled
Memory policy: ECC disabled, Data cache writeback
PERCPU: Embedded 7 pages/cpu @c128b000 s6464 r8192 d14016 u32768
Built 1 zonelists in Zone order, mobility grouping on.  Total pages: 130048
Kernel command line: console=ttyS0,115200 earlyprintk
PID hash table entries: 2048 (order: 1, 8192 bytes)
Dentry cache hash table entries: 65536 (order: 6, 262144 bytes)
Inode-cache hash table entries: 32768 (order: 5, 131072 bytes)
__ex_table already sorted, skipping sort
Memory: 512MB = 512MB total
Memory: 504860k/504860k available, 19428k reserved, 0K highmem
Virtual kernel memory layout:
    vector  : 0xffff0000 - 0xffff1000   (   4 kB)
    fixmap  : 0xfff00000 - 0xfffe0000   ( 896 kB)
    vmalloc : 0xe0800000 - 0xff000000   ( 488 MB)
    lowmem  : 0xc0000000 - 0xe0000000   ( 512 MB)
    pkmap   : 0xbfe00000 - 0xc0000000   (   2 MB)
    modules : 0xbf000000 - 0xbfe00000   (  14 MB)
      .text : 0xc0008000 - 0xc0467700   (4478 kB)
      .init : 0xc0468000 - 0xc0e30940   (10019 kB)
      .data : 0xc0e32000 - 0xc0e5f9c0   ( 183 kB)
       .bss : 0xc0e5f9c0 - 0xc0e84fdc   ( 150 kB)
Hierarchical RCU implementation.
	RCU restricting CPUs from NR_CPUS=4 to nr_cpu_ids=1.
NR_IRQS:16 nr_irqs:16 16
Aurora cache controller enabled
l2x0: 4 ways, CACHE_ID 0x00000100, AUX_CTRL 0x1a086302, Cache size: 262144 B
sched_clock: 32 bits at 18MHz, resolution 53ns, wraps every 229064ms
Console: colour dummy device 80x30
Calibrating delay loop... 1196.85 BogoMIPS (lpj=5984256)
pid_max: default: 32768 minimum: 301
Mount-cache hash table entries: 512
CPU: Testing write buffer coherency: ok
CPU0: thread -1, cpu 0, socket -1, mpidr 0
Setting up static identity map for 0x351a28 - 0x351a80
Brought up 1 CPUs
SMP: Total of 1 processors activated (1196.85 BogoMIPS).
devtmpfs: initialized
pinctrl core: initialized pinctrl subsystem
NET: Registered protocol family 16
DMA: preallocated 1024 KiB pool for atomic coherent allocations
irq: Cannot allocate irq_descs @ IRQ27, assuming pre-allocated
irq: Cannot allocate irq_descs @ IRQ63, assuming pre-allocated
irq: Cannot allocate irq_descs @ IRQ96, assuming pre-allocated
Initializing Coherency fabric
bio: create slab <bio-0> at 0
vgaarb: loaded
SCSI subsystem initialized
usbcore: registered new interface driver usbfs
usbcore: registered new interface driver hub
usbcore: registered new device driver usb
cfg80211: Calling CRDA to update world regulatory domain
Switching to clocksource armada_370_xp_clocksource
NET: Registered protocol family 2
TCP established hash table entries: 4096 (order: 3, 32768 bytes)
TCP bind hash table entries: 4096 (order: 3, 32768 bytes)
TCP: Hash tables configured (established 4096 bind 4096)
TCP: reno registered
UDP hash table entries: 256 (order: 1, 8192 bytes)
UDP-Lite hash table entries: 256 (order: 1, 8192 bytes)
msgmni has been set to 986
Block layer SCSI generic (bsg) driver version 0.4 loaded (major 253)
io scheduler noop registered
io scheduler deadline registered
io scheduler cfq registered (default)
armada-370-pinctrl d0018000.pinctrl: registered pinctrl driver
mvebu-pcie pcie-controller.1: PCIe0.0: link up
mvebu-pcie pcie-controller.1: PCIe1.0: link up
mvebu-pcie pcie-controller.1: PCI host bridge to bus 0000:00
pci_bus 0000:00: root bus resource [io  0xc0000000-0xc000ffff]
pci_bus 0000:00: root bus resource [mem 0xc1000000-0xc8ffffff]
pci_bus 0000:00: root bus resource [bus 00-ff]
PCI: bus0: Fast back to back transfers disabled
pci 0000:00:01.0: bridge configuration invalid ([bus 00-00]), reconfiguring
pci 0000:00:02.0: bridge configuration invalid ([bus 00-00]), reconfiguring
PCI: bus1: Fast back to back transfers disabled
PCI: bus2: Fast back to back transfers disabled
Unhandled fault: external abort on non-linefetch (0x1008) at 0xe0910010
Internal error: : 1008 [#1] SMP ARM
Modules linked in:
CPU: 0    Not tainted  (3.8.0-rc5-00029-g80e55fd-dirty #1313)
PC is at quirk_usb_handoff_xhci+0x5c/0x284
LR is at ioremap_pte_range+0x84/0xdc
pc : [<c023ba8c>]    lr : [<c0150944>]    psr: a0000013
sp : df82bce8  ip : df81c000  fp : c0e43dac
r10: 00008000  r9 : 00000000  r8 : de933000
r7 : e0910000  r6 : c04267a0  r5 : de933000  r4 : de933000
r3 : 01c801c8  r2 : 00000000  r1 : 42007e13  r0 : e0910000
Flags: NzCv  IRQs on  FIQs on  Mode SVC_32  ISA ARM  Segment kernel
Control: 10c5387d  Table: 1e960019  DAC: 00000015
Process swapper/0 (pid: 1, stack limit = 0xdf82a238)
Stack: (0xdf82bce8 to 0xdf82c000)
bce0:                   de933060 c01702f4 de933000 de933000 de933000 c04267a0
bd00: c0e5f9dc c023c108 de933060 c023c0e4 c04267a0 c023c168 c04267a0 c0178d98
bd20: c01a855c c033cab4 00000000 00000000 de935f6c c033cbb4 c0e497cc de933000
bd40: de935c14 de935c00 df82bde8 00000001 de919368 c016a958 00000000 de933000
bd60: de935c14 c016ab5c de933800 de918014 de935c00 c016abcc 00000000 00000001
bd80: de918000 c016d234 de919368 de9193d0 de919340 00000000 df82be14 c017a93c
bda0: de919368 df82be14 df82bde8 c0012ef8 f3cec23f c027b974 c0019da0 df805340
bdc0: f3cec23f df82bde8 c0e7ffe8 df82be14 df802b00 de917890 00000060 df89e400
bde0: 00000002 c00131dc df82bde8 df82bde8 c0e83318 00000000 de9193d0 de9178f0
be00: df802b00 c047a010 df89e410 00000000 00000000 c0e49cac 00000001 df82be54
be20: c047a0b4 c017a90c 00000000 00000000 00000000 c047a054 c1000000 c8ffffff
be40: c12885f0 00000200 00000000 00000000 00000000 de9193d0 df89e410 c0e81748
be60: c0e49c6c df89e410 00000000 c0e49c6c c048e9b8 c017a7c4 00000000 c01a8c24
be80: c01a8c0c c01a78e8 df89e410 c0e49c6c df89e444 00000000 c046823c c01a7bd8
bea0: c0e49c6c 00000000 c01a7b4c c01a632c df8067d8 df85a474 c0e49c6c c0e510c8
bec0: de919440 c01a7228 c0402548 c0e49c6c c0e49c6c df82a000 c0e5f9c0 00000000
bee0: c046823c c01a80d8 00000000 c0e49c58 df82a000 c0e5f9c0 00000000 c046823c
bf00: c017a7c4 c01a8e1c c0488260 df82a000 c0e5f9c0 c00086d4 c0444b18 00000006
bf20: 0000008b c017a7c4 0000008b 00000006 00000006 c046823c c1288b15 00000000
bf40: c0e42194 c0488260 00000006 c0488240 c0e5f9c0 c046823c c048e9b8 0000008b
bf60: 00000000 c04688b0 00000006 00000006 c046823c 00000000 c048e708 c048e708
bf80: 00000000 00000000 00000000 00000000 00000000 c0468958 00000000 00000000
bfa0: c033f3ac c033f3b4 00000000 c000dfd8 00000000 00000000 00000000 00000000
bfc0: 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000
bfe0: 00000000 00000000 00000000 00000000 00000013 00000000 2254c9ef c6885425
[<c023ba8c>] (quirk_usb_handoff_xhci+0x5c/0x284) from [<c023c0e4>] (quirk_usb_early_handoff.part.1+0xc0/0xe4)
[<c023c0e4>] (quirk_usb_early_handoff.part.1+0xc0/0xe4) from [<c0178d98>] (pci_do_fixups+0xc4/0x17c)
[<c0178d98>] (pci_do_fixups+0xc4/0x17c) from [<c016a958>] (pci_bus_add_device+0x14/0x60)
[<c016a958>] (pci_bus_add_device+0x14/0x60) from [<c016ab5c>] (pci_bus_add_devices+0x44/0x128)
[<c016ab5c>] (pci_bus_add_devices+0x44/0x128) from [<c016abcc>] (pci_bus_add_devices+0xb4/0x128)
[<c016abcc>] (pci_bus_add_devices+0xb4/0x128) from [<c016d234>] (pci_scan_root_bus+0x7c/0xcc)
[<c016d234>] (pci_scan_root_bus+0x7c/0xcc) from [<c017a93c>] (mvebu_pcie_scan_bus+0x30/0x3c)
[<c017a93c>] (mvebu_pcie_scan_bus+0x30/0x3c) from [<c0012ef8>] (pcibios_init_hw+0x5c/0x15c)
[<c0012ef8>] (pcibios_init_hw+0x5c/0x15c) from [<c00131dc>] (pci_common_init+0x44/0xc4)
[<c00131dc>] (pci_common_init+0x44/0xc4) from [<c047a010>] (mvebu_pcie_probe+0x360/0x3a4)
[<c047a010>] (mvebu_pcie_probe+0x360/0x3a4) from [<c01a8c24>] (platform_drv_probe+0x18/0x1c)
[<c01a8c24>] (platform_drv_probe+0x18/0x1c) from [<c01a78e8>] (really_probe+0x60/0x1e0)
[<c01a78e8>] (really_probe+0x60/0x1e0) from [<c01a7bd8>] (__driver_attach+0x8c/0x90)
[<c01a7bd8>] (__driver_attach+0x8c/0x90) from [<c01a632c>] (bus_for_each_dev+0x50/0x7c)
[<c01a632c>] (bus_for_each_dev+0x50/0x7c) from [<c01a7228>] (bus_add_driver+0x168/0x22c)
[<c01a7228>] (bus_add_driver+0x168/0x22c) from [<c01a80d8>] (driver_register+0x78/0x144)
[<c01a80d8>] (driver_register+0x78/0x144) from [<c01a8e1c>] (platform_driver_probe+0x18/0xac)
[<c01a8e1c>] (platform_driver_probe+0x18/0xac) from [<c00086d4>] (do_one_initcall+0x34/0x174)
[<c00086d4>] (do_one_initcall+0x34/0x174) from [<c04688b0>] (do_basic_setup+0x90/0xc4)
[<c04688b0>] (do_basic_setup+0x90/0xc4) from [<c0468958>] (kernel_init_freeable+0x74/0x10c)
[<c0468958>] (kernel_init_freeable+0x74/0x10c) from [<c033f3b4>] (kernel_init+0x8/0xe4)
[<c033f3b4>] (kernel_init+0x8/0xe4) from [<c000dfd8>] (ret_from_fork+0x14/0x3c)
Code: e3a02000 ebf76e4e e2507000 0afffff7 (e5973010) 
---[ end trace 7097ba2281051df7 ]---
Kernel panic - not syncing: Attempted to kill init! exitcode=0x0000000b
===================================================================
And now, the dmesg when the PCIe driver is initialized at the
subsys_initcall() level, no crash happens, and everything works fine.
===================================================================
Booting Linux on physical CPU 0x0
Linux version 3.8.0-rc5-00029-g80e55fd-dirty (thomas at skate) (gcc version 4.6.3 (Ubuntu/Linaro 4.6.3-1ubuntu5) ) #1314 SMP Thu Feb 7 16:57:39 CET 2013
CPU: ARMv7 Processor [561f5811] revision 1 (ARMv7), cr=10c53c7d
CPU: PIPT / VIPT nonaliasing data cache, PIPT instruction cache
Machine: Marvell Armada 370/XP (Device Tree), model: Globalscale Mirabox
bootconsole [earlycon0] enabled
Memory policy: ECC disabled, Data cache writeback
PERCPU: Embedded 7 pages/cpu @c128b000 s6464 r8192 d14016 u32768
Built 1 zonelists in Zone order, mobility grouping on.  Total pages: 130048
Kernel command line: console=ttyS0,115200 earlyprintk
PID hash table entries: 2048 (order: 1, 8192 bytes)
Dentry cache hash table entries: 65536 (order: 6, 262144 bytes)
Inode-cache hash table entries: 32768 (order: 5, 131072 bytes)
__ex_table already sorted, skipping sort
Memory: 512MB = 512MB total
Memory: 504860k/504860k available, 19428k reserved, 0K highmem
Virtual kernel memory layout:
    vector  : 0xffff0000 - 0xffff1000   (   4 kB)
    fixmap  : 0xfff00000 - 0xfffe0000   ( 896 kB)
    vmalloc : 0xe0800000 - 0xff000000   ( 488 MB)
    lowmem  : 0xc0000000 - 0xe0000000   ( 512 MB)
    pkmap   : 0xbfe00000 - 0xc0000000   (   2 MB)
    modules : 0xbf000000 - 0xbfe00000   (  14 MB)
      .text : 0xc0008000 - 0xc0467700   (4478 kB)
      .init : 0xc0468000 - 0xc0e30940   (10019 kB)
      .data : 0xc0e32000 - 0xc0e5f9c0   ( 183 kB)
       .bss : 0xc0e5f9c0 - 0xc0e84fdc   ( 150 kB)
Hierarchical RCU implementation.
	RCU restricting CPUs from NR_CPUS=4 to nr_cpu_ids=1.
NR_IRQS:16 nr_irqs:16 16
Aurora cache controller enabled
l2x0: 4 ways, CACHE_ID 0x00000100, AUX_CTRL 0x1a086302, Cache size: 262144 B
sched_clock: 32 bits at 18MHz, resolution 53ns, wraps every 229064ms
Console: colour dummy device 80x30
Calibrating delay loop... 1196.85 BogoMIPS (lpj=5984256)
pid_max: default: 32768 minimum: 301
Mount-cache hash table entries: 512
CPU: Testing write buffer coherency: ok
CPU0: thread -1, cpu 0, socket -1, mpidr 0
Setting up static identity map for 0x351a28 - 0x351a80
Brought up 1 CPUs
SMP: Total of 1 processors activated (1196.85 BogoMIPS).
devtmpfs: initialized
pinctrl core: initialized pinctrl subsystem
NET: Registered protocol family 16
DMA: preallocated 1024 KiB pool for atomic coherent allocations
irq: Cannot allocate irq_descs @ IRQ27, assuming pre-allocated
irq: Cannot allocate irq_descs @ IRQ63, assuming pre-allocated
irq: Cannot allocate irq_descs @ IRQ96, assuming pre-allocated
Initializing Coherency fabric
bio: create slab <bio-0> at 0
mvebu-pcie pcie-controller.1: PCIe0.0: link up
mvebu-pcie pcie-controller.1: PCIe1.0: link up
mvebu-pcie pcie-controller.1: PCI host bridge to bus 0000:00
pci_bus 0000:00: root bus resource [io  0xc0000000-0xc000ffff]
pci_bus 0000:00: root bus resource [mem 0xc1000000-0xc8ffffff]
pci_bus 0000:00: root bus resource [bus 00-ff]
PCI: bus0: Fast back to back transfers disabled
pci 0000:00:01.0: bridge configuration invalid ([bus 00-00]), reconfiguring
pci 0000:00:02.0: bridge configuration invalid ([bus 00-00]), reconfiguring
PCI: bus1: Fast back to back transfers disabled
PCI: bus2: Fast back to back transfers disabled
Getting IRQ slot 1, pin 1...
Cannot get irq... 135
Getting IRQ slot 2, pin 1...
Cannot get irq... 135
Getting IRQ slot 1, pin 1...
Getting IRQ slot 2, pin 1...
pci 0000:00:01.0: BAR 8: assigned [mem 0xc1000000-0xc10fffff]
pci 0000:00:02.0: BAR 8: assigned [mem 0xc1100000-0xc11fffff]
pci 0000:01:00.0: BAR 0: assigned [mem 0xc1000000-0xc1001fff 64bit]
pci 0000:00:01.0: PCI bridge to [bus 01]
pci 0000:00:01.0:   bridge window [mem 0xc1000000-0xc10fffff]
pci 0000:02:00.0: BAR 0: assigned [mem 0xc1100000-0xc1107fff 64bit]
pci 0000:00:02.0: PCI bridge to [bus 02]
pci 0000:00:02.0:   bridge window [mem 0xc1100000-0xc11fffff]
PCI: enabling device 0000:00:01.0 (0140 -> 0143)
PCI: enabling device 0000:00:02.0 (0140 -> 0143)
vgaarb: loaded
SCSI subsystem initialized
usbcore: registered new interface driver usbfs
usbcore: registered new interface driver hub
usbcore: registered new device driver usb
cfg80211: Calling CRDA to update world regulatory domain
Switching to clocksource armada_370_xp_clocksource
NET: Registered protocol family 2
TCP established hash table entries: 4096 (order: 3, 32768 bytes)
TCP bind hash table entries: 4096 (order: 3, 32768 bytes)
TCP: Hash tables configured (established 4096 bind 4096)
TCP: reno registered
UDP hash table entries: 256 (order: 1, 8192 bytes)
UDP-Lite hash table entries: 256 (order: 1, 8192 bytes)
msgmni has been set to 986
Block layer SCSI generic (bsg) driver version 0.4 loaded (major 253)
io scheduler noop registered
io scheduler deadline registered
io scheduler cfq registered (default)
armada-370-pinctrl d0018000.pinctrl: registered pinctrl driver
mv_xor d0060800.xor: Marvell XOR driver
mv_xor d0060800.xor: Marvell XOR: ( xor cpy )
mv_xor d0060800.xor: Marvell XOR: ( xor fill cpy )
mv_xor d0060900.xor: Marvell XOR driver
mv_xor d0060900.xor: Marvell XOR: ( xor cpy )
mv_xor d0060900.xor: Marvell XOR: ( xor fill cpy )
Serial: 8250/16550 driver, 4 ports, IRQ sharing disabled
d0012000.serial: ttyS0 at MMIO 0xd0012000 (irq = 17) is a 8250
console [ttyS0] enabled, bootconsole disabled
console [ttyS0] enabled, bootconsole disabled
e1000e: Intel(R) PRO/1000 Network Driver - 2.1.4-k
e1000e: Copyright(c) 1999 - 2012 Intel Corporation.
libphy: orion_mdio_bus: probed
mvneta d0070000.ethernet eth0: mac: fe:4b:b2:e2:64:ca
mvneta d0074000.ethernet eth1: mac: 56:dd:1a:9e:e0:aa
Intel(R) Wireless WiFi driver for Linux, in-tree:
Copyright(c) 2003-2012 Intel Corporation
iwlwifi 0000:01:00.0: pci_enable_msi failed(0Xffffffff)
iwlwifi 0000:01:00.0: loaded firmware version 8.83.5.1 build 33692
iwlwifi 0000:01:00.0: CONFIG_IWLWIFI_DEBUG disabled
iwlwifi 0000:01:00.0: CONFIG_IWLWIFI_DEBUGFS disabled
iwlwifi 0000:01:00.0: CONFIG_IWLWIFI_DEVICE_TRACING disabled
iwlwifi 0000:01:00.0: CONFIG_IWLWIFI_DEVICE_TESTMODE disabled
iwlwifi 0000:01:00.0: CONFIG_IWLWIFI_P2P enabled
iwlwifi 0000:01:00.0: Detected Intel(R) Ultimate N WiFi Link 5300 AGN, REV=0x24
iwlwifi 0000:01:00.0: L1 Disabled; Enabling L0S
xhci_hcd 0000:02:00.0: xHCI Host Controller
xhci_hcd 0000:02:00.0: new USB bus registered, assigned bus number 1
xhci_hcd 0000:02:00.0: irq 99, io mem 0xc1100000
hub 1-0:1.0: USB hub found
hub 1-0:1.0: 2 ports detected
xhci_hcd 0000:02:00.0: xHCI Host Controller
xhci_hcd 0000:02:00.0: new USB bus registered, assigned bus number 2
hub 2-0:1.0: USB hub found
hub 2-0:1.0: 2 ports detected
Initializing USB Mass Storage driver...
usbcore: registered new interface driver usb-storage
USB Mass Storage support registered.
mousedev: PS/2 mouse device common for all mice
usbcore: registered new interface driver usbhid
usbhid: USB HID core driver
TCP: cubic registered
VFP support v0.3: implementor 56 architecture 2 part 20 variant 9 rev 6
/home/thomas/projets/linux-2.6/drivers/rtc/hctosys.c: unable to open rtc device (rtc0)
Freeing init memory: 10016K
Starting logging: OK
Starting mdev...
usb 1-1: new high-speed USB device number 2 using xhci_hcd
scsi0 : usb-storage 1-1:1.0
Initializing random number generator... done.
Starting network...
Welcome to Buildroot
buildroot login:
===================================================================
Do not hesitate to ask if you need more details about this.
Thanks!
Thomas
-- 
Thomas Petazzoni, Free Electrons
Kernel, drivers, real-time and embedded Linux
development, consulting, training and support.
http://free-electrons.com
^ permalink raw reply	[flat|nested] 216+ messages in thread
- * [PATCH v2 19/27] pci: PCIe driver for Marvell Armada 370/XP systems
  2013-02-07 16:00             ` Thomas Petazzoni
@ 2013-02-07 18:08               ` Bjorn Helgaas
  2013-02-07 18:15                 ` Jason Gunthorpe
  2013-02-07 18:43                 ` Thierry Reding
  0 siblings, 2 replies; 216+ messages in thread
From: Bjorn Helgaas @ 2013-02-07 18:08 UTC (permalink / raw)
  To: linux-arm-kernel
On Thu, Feb 7, 2013 at 9:00 AM, Thomas Petazzoni
<thomas.petazzoni@free-electrons.com> wrote:
> Dear Bjorn Helgaas,
>
> On Thu, 7 Feb 2013 08:46:06 -0700, Bjorn Helgaas wrote:
>
>> Can you post the entire dmesg log, ideally with CONFIG_PCI_DEBUG=y?
>> That should have more information about the enumeration process,
>> including what we think the XHCI BARs are and the apertures leading to
>> them.
>
> Sure, see below.
>
>> The PCI core assumes that we know the host bridge apertures up front,
>> and I'm not sure that is true on your platform, so maybe we'll need
>> some changes to accommodate that.
>
> In this hardware, we need to set up the address decoding windows. So
> there shouldn't be any access to a PCI device memory or I/O region
> until the addresses have been assigned in the PCI-to-PCI bridge.
I think this is the path where the crash happens (this is the same as
the backtrace you included below):
    mvebu_pcie_scan_bus
      pci_scan_root_bus
        pci_create_root_bus
        pci_scan_child_bus
        pci_bus_add_devices
          pci_bus_add_device
            pci_fixup_device(pci_fixup_final)
              quirk_usb_early_handoff           # pci_fixup_final
                quirk_usb_handoff_xhci
The problem is that we haven't assigned resources anywhere.  Normally
this is done by pci_bus_assign_resources() or
pci_assign_unassigned_bus_resources(), but I don't think there's
anything in the path above that does this.
This is not really a problem in your code; it's a generic PCI core
problem.  pci_scan_root_bus() does everything including creating the
root bus, scanning it, and adding the devices we find.  At the point
where we add a device (pci_bus_add_device()), it should be ready for a
driver to claim it -- all resource assignment should already be done.
I don't think it's completely trivial to fix this in the PCI core yet
(but we're moving in that direction) because we have some boot-time
ordering issues, e.g., x86 scans the root buses before we know about
the address space consumed by ACPI devices, so we can't just assign
the resources when we scan the bus.
I think the way you'll have to fix this in the meantime is to use
pci_create_root_bus() directly so you can do things in this sequence:
  pci_create_root_bus
  pci_scan_child_bus
  pci_bus_assign_resources
  pci_bus_add_devices
dino_probe() is an example of similar code that does this.
Bjorn
^ permalink raw reply	[flat|nested] 216+ messages in thread
- * [PATCH v2 19/27] pci: PCIe driver for Marvell Armada 370/XP systems
  2013-02-07 18:08               ` Bjorn Helgaas
@ 2013-02-07 18:15                 ` Jason Gunthorpe
  2013-02-07 18:30                   ` Bjorn Helgaas
  2013-02-07 18:43                 ` Thierry Reding
  1 sibling, 1 reply; 216+ messages in thread
From: Jason Gunthorpe @ 2013-02-07 18:15 UTC (permalink / raw)
  To: linux-arm-kernel
On Thu, Feb 07, 2013 at 11:08:45AM -0700, Bjorn Helgaas wrote:
 
> I think the way you'll have to fix this in the meantime is to use
> pci_create_root_bus() directly so you can do things in this sequence:
> 
>   pci_create_root_bus
>   pci_scan_child_bus
>   pci_bus_assign_resources
>   pci_bus_add_devices
> 
> dino_probe() is an example of similar code that does this.
Erm, but isn't this problem going to strike any system that doesn't
have resources assigned by the firmware?
If so, a common 'pci_scan_bus_from_scratch' would at least highlight
the problem and centralize the work around until it is fixed...
Many embedded systems will require this..
Regards,
Jason
^ permalink raw reply	[flat|nested] 216+ messages in thread 
- * [PATCH v2 19/27] pci: PCIe driver for Marvell Armada 370/XP systems
  2013-02-07 18:15                 ` Jason Gunthorpe
@ 2013-02-07 18:30                   ` Bjorn Helgaas
  0 siblings, 0 replies; 216+ messages in thread
From: Bjorn Helgaas @ 2013-02-07 18:30 UTC (permalink / raw)
  To: linux-arm-kernel
On Thu, Feb 7, 2013 at 11:15 AM, Jason Gunthorpe
<jgunthorpe@obsidianresearch.com> wrote:
> On Thu, Feb 07, 2013 at 11:08:45AM -0700, Bjorn Helgaas wrote:
>
>> I think the way you'll have to fix this in the meantime is to use
>> pci_create_root_bus() directly so you can do things in this sequence:
>>
>>   pci_create_root_bus
>>   pci_scan_child_bus
>>   pci_bus_assign_resources
>>   pci_bus_add_devices
>>
>> dino_probe() is an example of similar code that does this.
>
> Erm, but isn't this problem going to strike any system that doesn't
> have resources assigned by the firmware?
>
> If so, a common 'pci_scan_bus_from_scratch' would at least highlight
> the problem and centralize the work around until it is fixed...
>
> Many embedded systems will require this..
Yep, agreed on all counts.
^ permalink raw reply	[flat|nested] 216+ messages in thread 
 
- * [PATCH v2 19/27] pci: PCIe driver for Marvell Armada 370/XP systems
  2013-02-07 18:08               ` Bjorn Helgaas
  2013-02-07 18:15                 ` Jason Gunthorpe
@ 2013-02-07 18:43                 ` Thierry Reding
  1 sibling, 0 replies; 216+ messages in thread
From: Thierry Reding @ 2013-02-07 18:43 UTC (permalink / raw)
  To: linux-arm-kernel
On Thu, Feb 07, 2013 at 11:08:45AM -0700, Bjorn Helgaas wrote:
> On Thu, Feb 7, 2013 at 9:00 AM, Thomas Petazzoni
> <thomas.petazzoni@free-electrons.com> wrote:
> > Dear Bjorn Helgaas,
> >
> > On Thu, 7 Feb 2013 08:46:06 -0700, Bjorn Helgaas wrote:
> >
> >> Can you post the entire dmesg log, ideally with CONFIG_PCI_DEBUG=y?
> >> That should have more information about the enumeration process,
> >> including what we think the XHCI BARs are and the apertures leading to
> >> them.
> >
> > Sure, see below.
> >
> >> The PCI core assumes that we know the host bridge apertures up front,
> >> and I'm not sure that is true on your platform, so maybe we'll need
> >> some changes to accommodate that.
> >
> > In this hardware, we need to set up the address decoding windows. So
> > there shouldn't be any access to a PCI device memory or I/O region
> > until the addresses have been assigned in the PCI-to-PCI bridge.
> 
> I think this is the path where the crash happens (this is the same as
> the backtrace you included below):
> 
>     mvebu_pcie_scan_bus
>       pci_scan_root_bus
>         pci_create_root_bus
>         pci_scan_child_bus
>         pci_bus_add_devices
>           pci_bus_add_device
>             pci_fixup_device(pci_fixup_final)
>               quirk_usb_early_handoff           # pci_fixup_final
>                 quirk_usb_handoff_xhci
> 
> The problem is that we haven't assigned resources anywhere.  Normally
> this is done by pci_bus_assign_resources() or
> pci_assign_unassigned_bus_resources(), but I don't think there's
> anything in the path above that does this.
> 
> This is not really a problem in your code; it's a generic PCI core
> problem.  pci_scan_root_bus() does everything including creating the
> root bus, scanning it, and adding the devices we find.  At the point
> where we add a device (pci_bus_add_device()), it should be ready for a
> driver to claim it -- all resource assignment should already be done.
> 
> I don't think it's completely trivial to fix this in the PCI core yet
> (but we're moving in that direction) because we have some boot-time
> ordering issues, e.g., x86 scans the root buses before we know about
> the address space consumed by ACPI devices, so we can't just assign
> the resources when we scan the bus.
> 
> I think the way you'll have to fix this in the meantime is to use
> pci_create_root_bus() directly so you can do things in this sequence:
> 
>   pci_create_root_bus
>   pci_scan_child_bus
>   pci_bus_assign_resources
>   pci_bus_add_devices
The last two are already done by ARM's pci_common_init(). On Tegra I've
got by having a custom .scan_bus() implementation that does the first
two. Back when I used the pci_scan_root_bus() we were seeing some issues
with resource conflicts and such and there was a similar discussion at
that time.
Thomas, have you tried using the same .scan_bus() that I use on Tegra?
It should be easy to port it to Marvell, the only Tegra-specific bit is
how to get at the struct tegra_pcie.
Thierry
-------------- next part --------------
A non-text attachment was scrubbed...
Name: not available
Type: application/pgp-signature
Size: 836 bytes
Desc: not available
URL: <http://lists.infradead.org/pipermail/linux-arm-kernel/attachments/20130207/d1055f0b/attachment.sig>
^ permalink raw reply	[flat|nested] 216+ messages in thread 
 
 
 
 
 
- * [PATCH v2 19/27] pci: PCIe driver for Marvell Armada 370/XP systems
  2013-01-29  8:41     ` Thomas Petazzoni
  2013-01-29  9:20       ` Thierry Reding
@ 2013-01-29 19:47       ` Stephen Warren
  1 sibling, 0 replies; 216+ messages in thread
From: Stephen Warren @ 2013-01-29 19:47 UTC (permalink / raw)
  To: linux-arm-kernel
On 01/29/2013 01:41 AM, Thomas Petazzoni wrote:
> Dear Stephen Warren,
> 
> On Mon, 28 Jan 2013 15:21:45 -0700, Stephen Warren wrote:
> 
>>> +Mandatory properties:
>>> +- compatible: must be "marvell,armada-370-xp-pcie"
>>> +- status: either "disabled" or "okay"
>>
>> status is a standard DT property; I certainly wouldn't expect its
>> presence to be mandatory (there's a defined default), nor would I expect
>> each device's binding to redefine this property.
> 
> Ok.
> 
>>> +- marvell,pcie-port: the physical PCIe port number
>>
>> Should the standardized cell-index property be used here instead? Or,
>> perhaps that property is deprecated/discouraged...
> 
> The problem is that I need two identifiers, the pcie-port and
> pcie-lane, and it would be strange to have one referenced as
> cell-index, and the other one as marvell,pcie-lane, no?
Yes, using a custom property for half of the information and a standard
property for the other half would be odd.
> Unless of
> course we can put two numbers in the cell-index property, but a quick
> grep in Documentation/devicetree/bindings/ seems to indicate that all
> users of cell-index use it with a single identifier.
> 
> Just tell me what to do here, I don't have a strong opinion on this.
It's probably fine as-is then. Although I wasn't sure exactly what
port/lane meant; is there some kind of mux/cross-bar between the PCIe
root ports and the physical lanes/balls/pins on the chip?
>>> diff --git a/drivers/pci/host/Makefile b/drivers/pci/host/Makefile
>>
>>> +obj-$(CONFIG_PCI_MVEBU) += pci-mvebu.o
>>> +ccflags-$(CONFIG_PCI_MVEBU) += \
>>> +	-I$(srctree)/arch/arm/plat-orion/include \
>>> +	-I$(srctree)/arch/arm/mach-mvebu/include
>>
>> That seems a little dangerous w.r.t. multi-platform zImage. Can the
>> required headers be moved out to somewhere more public to avoid this?
> 
> Why is this dangerous for multi-platform zImage? For this specific
> driver only, some SoC-specific headers are used. I don't think it
> prevents another PCI driver (such as the Tegra one) from being built
> into the same kernel image, no?
Aren't those ccflags applied to everything that's built by that
Makefile? If they were applied only to one .o file, it'd probably be OK,
but I don't see how that's specified.
I'm not especially bothered with reaching into the mach/plat include
directories especially since you're well aware it needs cleaning up, I
just don't think that Tegra's PCIe driver is going to compile too well
against an Orion/mvebu header if one was to get picked up first.
^ permalink raw reply	[flat|nested] 216+ messages in thread 
 
 
- * [PATCH v2 19/27] pci: PCIe driver for Marvell Armada 370/XP systems
  2013-01-28 18:56 ` [PATCH v2 19/27] pci: PCIe driver for Marvell Armada 370/XP systems Thomas Petazzoni
  2013-01-28 22:21   ` Stephen Warren
@ 2013-01-29  3:29   ` Bjorn Helgaas
  2013-01-29  5:55     ` Jason Gunthorpe
  2013-01-29 13:22   ` Andrew Murray
                     ` (2 subsequent siblings)
  4 siblings, 1 reply; 216+ messages in thread
From: Bjorn Helgaas @ 2013-01-29  3:29 UTC (permalink / raw)
  To: linux-arm-kernel
On Mon, Jan 28, 2013 at 11:56 AM, Thomas Petazzoni
<thomas.petazzoni@free-electrons.com> wrote:
> This driver implements the support for the PCIe interfaces on the
> Marvell Armada 370/XP ARM SoCs. In the future, it might be extended to
> cover earlier families of Marvell SoCs, such as Dove, Orion and
> Kirkwood.
>
> The driver implements the hw_pci operations needed by the core ARM PCI
> code to setup PCI devices and get their corresponding IRQs, and the
> pci_ops operations that are used by the PCI core to read/write the
> configuration space of PCI devices.
>
> Since the PCIe interfaces of Marvell SoCs are completely separate and
> not linked together in a bus, this driver sets up an emulated PCI host
> bridge, with one PCI-to-PCI bridge as child for each hardware PCIe
> interface.
There's no Linux requirement that multiple PCIe interfaces appear to
be in the same hierarchy.  You can just use pci_scan_root_bus()
separately on each interface.  Each interface can be in its own domain
if necessary.
> In addition, this driver enumerates the different PCIe slots, and for
> those having a device plugged in, it sets up the necessary address
> decoding windows, using the new armada_370_xp_alloc_pcie_window()
> function from mach-mvebu/addr-map.c.
>
> Signed-off-by: Thomas Petazzoni <thomas.petazzoni@free-electrons.com>
> ---
>  .../devicetree/bindings/pci/armada-370-xp-pcie.txt |  175 +++++++
>  drivers/pci/host/Kconfig                           |    6 +
>  drivers/pci/host/Makefile                          |    4 +
>  drivers/pci/host/pci-mvebu.c                       |  500 ++++++++++++++++++++
>  4 files changed, 685 insertions(+)
>  create mode 100644 Documentation/devicetree/bindings/pci/armada-370-xp-pcie.txt
>  create mode 100644 drivers/pci/host/Makefile
>  create mode 100644 drivers/pci/host/pci-mvebu.c
>
> diff --git a/Documentation/devicetree/bindings/pci/armada-370-xp-pcie.txt b/Documentation/devicetree/bindings/pci/armada-370-xp-pcie.txt
> new file mode 100644
> index 0000000..9313e92
> --- /dev/null
> +++ b/Documentation/devicetree/bindings/pci/armada-370-xp-pcie.txt
> @@ -0,0 +1,175 @@
> +* Marvell Armada 370/XP PCIe interfaces
> +
> +Mandatory properties:
> +- compatible: must be "marvell,armada-370-xp-pcie"
> +- status: either "disabled" or "okay"
> +- #address-cells, set to <3>
> +- #size-cells, set to <2>
> +- #interrupt-cells, set to <1>
> +- bus-range: PCI bus numbers covered
> +- ranges: standard PCI-style address ranges, describing the PCIe
> +  registers for each PCIe interface, and then ranges for the PCI
> +  memory and I/O regions.
> +- interrupt-map-mask and interrupt-map are standard PCI Device Tree
> +  properties to describe the interrupts associated to each PCI
> +  interface.
> +
> +In addition, the Device Tree node must have sub-nodes describing each
> +PCIe interface, having the following mandatory properties:
> +- reg: the address and size of the PCIe registers (translated
> +  addresses according to the ranges property of the parent)
> +- clocks: the clock associated to this PCIe interface
> +- marvell,pcie-port: the physical PCIe port number
> +- status: either "disabled" or "okay"
> +
> +and the following optional properties:
> +- marvell,pcie-lane: the physical PCIe lane number, for ports having
> +  multiple lanes. If this property is not found, we assume that the
> +  value is 0.
> +
> +Example:
> +
> +pcie-controller {
> +       compatible = "marvell,armada-370-xp-pcie";
> +       status = "disabled";
> +
> +       #address-cells = <3>;
> +       #size-cells = <2>;
> +
> +       bus-range = <0x00 0xff>;
> +
> +       ranges = <0x00000800 0 0xd0040000 0xd0040000 0 0x00002000   /* port 0.0 registers */
> +                 0x00004800 0 0xd0042000 0xd0042000 0 0x00002000   /* port 2.0 registers */
> +                 0x00001000 0 0xd0044000 0xd0044000 0 0x00002000   /* port 0.1 registers */
> +                 0x00001800 0 0xd0048000 0xd0048000 0 0x00002000   /* port 0.2 registers */
> +                 0x00002000 0 0xd004C000 0xd004C000 0 0x00002000   /* port 0.3 registers */
> +                 0x00002800 0 0xd0080000 0xd0080000 0 0x00002000   /* port 1.0 registers */
> +                 0x00005000 0 0xd0082000 0xd0082000 0 0x00002000   /* port 3.0 registers */
> +                 0x00003000 0 0xd0084000 0xd0084000 0 0x00002000   /* port 1.1 registers */
> +                 0x00003800 0 0xd0088000 0xd0088000 0 0x00002000   /* port 1.2 registers */
> +                 0x00004000 0 0xd008C000 0xd008C000 0 0x00002000   /* port 1.3 registers */
> +                 0x81000000 0 0          0xc0000000 0 0x00100000   /* downstream I/O */
> +                 0x82000000 0 0          0xc1000000 0 0x08000000>; /* non-prefetchable memory */
> +
> +       #interrupt-cells = <1>;
> +       interrupt-map-mask = <0xf800 0 0 1>;
> +       interrupt-map = <0x0800 0 0 1 &mpic 58
> +                        0x1000 0 0 1 &mpic 59
> +                        0x1800 0 0 1 &mpic 60
> +                        0x2000 0 0 1 &mpic 61
> +                        0x2800 0 0 1 &mpic 62
> +                        0x3000 0 0 1 &mpic 63
> +                        0x3800 0 0 1 &mpic 64
> +                        0x4000 0 0 1 &mpic 65
> +                        0x4800 0 0 1 &mpic 99
> +                        0x5000 0 0 1 &mpic 103>;
> +
> +       pcie at 0,0 {
> +               device_type = "pciex";
> +               reg = <0x0800 0 0xd0040000 0 0x2000>;
> +               #address-cells = <3>;
> +               #size-cells = <2>;
> +               marvell,pcie-port = <0>;
> +               marvell,pcie-lane = <0>;
> +               clocks = <&gateclk 5>;
> +               status = "disabled";
> +       };
> +
> +       pcie at 0,1 {
> +               device_type = "pciex";
> +               reg = <0x1000 0 0xd0044000 0 0x2000>;
> +               #address-cells = <3>;
> +               #size-cells = <2>;
> +               marvell,pcie-port = <0>;
> +               marvell,pcie-lane = <1>;
> +               clocks = <&gateclk 6>;
> +               status = "disabled";
> +       };
> +
> +       pcie at 0,2 {
> +               device_type = "pciex";
> +               reg = <0x1800 0 0xd0048000 0 0x2000>;
> +               #address-cells = <3>;
> +               #size-cells = <2>;
> +               marvell,pcie-port = <0>;
> +               marvell,pcie-lane = <2>;
> +               clocks = <&gateclk 7>;
> +               status = "disabled";
> +       };
> +
> +       pcie at 0,3 {
> +               device_type = "pciex";
> +               reg = <0x2000 0 0xd004C000 0 0xC000>;
> +               #address-cells = <3>;
> +               #size-cells = <2>;
> +               marvell,pcie-port = <0>;
> +               marvell,pcie-lane = <3>;
> +               clocks = <&gateclk 8>;
> +               status = "disabled";
> +       };
> +
> +       pcie at 1,0 {
> +               device_type = "pciex";
> +               reg = <0x2800 0 0xd0080000 0 0x2000>;
> +               #address-cells = <3>;
> +               #size-cells = <2>;
> +               marvell,pcie-port = <1>;
> +               marvell,pcie-lane = <0>;
> +               clocks = <&gateclk 9>;
> +               status = "disabled";
> +       };
> +
> +       pcie at 1,1 {
> +               device_type = "pciex";
> +               reg = <0x3000 0 0xd0084000 0 0x2000>;
> +               #address-cells = <3>;
> +               #size-cells = <2>;
> +               marvell,pcie-port = <1>;
> +               marvell,pcie-lane = <1>;
> +               clocks = <&gateclk 10>;
> +               status = "disabled";
> +       };
> +
> +       pcie at 1,2 {
> +               device_type = "pciex";
> +               reg = <0x3800 0 0xd0088000 0 0x2000>;
> +               #address-cells = <3>;
> +               #size-cells = <2>;
> +               marvell,pcie-port = <1>;
> +               marvell,pcie-lane = <2>;
> +               clocks = <&gateclk 11>;
> +               status = "disabled";
> +       };
> +
> +       pcie at 1,3 {
> +               device_type = "pciex";
> +               reg = <0x4000 0 0xd008C000 0 0x2000>;
> +               #address-cells = <3>;
> +               #size-cells = <2>;
> +               marvell,pcie-port = <1>;
> +               marvell,pcie-lane = <3>;
> +               clocks = <&gateclk 12>;
> +               status = "disabled";
> +       };
> +       pcie at 2,0 {
> +               device_type = "pciex";
> +               reg = <0x4800 0 0xd0042000 0 0x2000>;
> +               #address-cells = <3>;
> +               #size-cells = <2>;
> +               marvell,pcie-port = <2>;
> +               marvell,pcie-lane = <0>;
> +               clocks = <&gateclk 26>;
> +               status = "disabled";
> +       };
> +
> +       pcie at 3,0 {
> +               device_type = "pciex";
> +               reg = <0x5000 0 0xd0082000 0 0x2000>;
> +               #address-cells = <3>;
> +               #size-cells = <2>;
> +               marvell,pcie-port = <3>;
> +               marvell,pcie-lane = <0>;
> +               clocks = <&gateclk 27>;
> +               status = "disabled";
> +       };
> +};
> diff --git a/drivers/pci/host/Kconfig b/drivers/pci/host/Kconfig
> index cc3a1af..03e15e7 100644
> --- a/drivers/pci/host/Kconfig
> +++ b/drivers/pci/host/Kconfig
> @@ -1,4 +1,10 @@
>  menu "PCI host controller drivers"
>         depends on PCI
>
> +config PCI_MVEBU
> +       bool "Marvell EBU PCIe controller"
> +       depends on ARCH_MVEBU
> +       select PCI_SW_HOST_BRIDGE
> +       select PCI_SW_PCI_PCI_BRIDGE
> +
>  endmenu
> diff --git a/drivers/pci/host/Makefile b/drivers/pci/host/Makefile
> new file mode 100644
> index 0000000..34d6057
> --- /dev/null
> +++ b/drivers/pci/host/Makefile
> @@ -0,0 +1,4 @@
> +obj-$(CONFIG_PCI_MVEBU) += pci-mvebu.o
> +ccflags-$(CONFIG_PCI_MVEBU) += \
> +       -I$(srctree)/arch/arm/plat-orion/include \
> +       -I$(srctree)/arch/arm/mach-mvebu/include
> diff --git a/drivers/pci/host/pci-mvebu.c b/drivers/pci/host/pci-mvebu.c
> new file mode 100644
> index 0000000..4db09e1
> --- /dev/null
> +++ b/drivers/pci/host/pci-mvebu.c
> @@ -0,0 +1,500 @@
> +/*
> + * PCIe driver for Marvell Armada 370 and Armada XP SoCs
> + *
> + * This file is licensed under the terms of the GNU General Public
> + * License version 2.  This program is licensed "as is" without any
> + * warranty of any kind, whether express or implied.
> + */
> +
> +#include <linux/kernel.h>
> +#include <linux/pci.h>
> +#include <linux/clk.h>
> +#include <linux/module.h>
> +#include <linux/slab.h>
> +#include <linux/platform_device.h>
> +#include <linux/of_address.h>
> +#include <linux/of_pci.h>
> +#include <linux/of_irq.h>
> +#include <linux/of_platform.h>
> +#include <plat/pcie.h>
> +#include <mach/addr-map.h>
> +
> +/*
> + * Those are the product IDs used for the emulated PCI Host bridge and
> + * emulated PCI-to-PCI bridges. They are temporary until we get
> + * official IDs assigned.
> + */
> +#define MARVELL_EMULATED_HOST_BRIDGE_ID    4141
> +#define MARVELL_EMULATED_PCI_PCI_BRIDGE_ID 4242
> +
> +struct mvebu_pcie_port;
> +
> +/* Structure representing all PCIe interfaces */
> +struct mvebu_pcie {
> +       struct pci_sw_host_bridge bridge;
> +       struct platform_device *pdev;
> +       struct mvebu_pcie_port *ports;
> +       struct resource io;
> +       struct resource mem;
> +       struct resource busn;
> +       int nports;
> +};
> +
> +/* Structure representing one PCIe interface */
> +struct mvebu_pcie_port {
> +       void __iomem *base;
> +       spinlock_t conf_lock;
> +       int haslink;
> +       u32 port;
> +       u32 lane;
> +       int devfn;
> +       struct clk *clk;
> +       struct pci_sw_pci_bridge bridge;
> +       struct device_node *dn;
> +};
> +
> +static inline struct mvebu_pcie *sys_to_pcie(struct pci_sys_data *sys)
> +{
> +       return sys->private_data;
> +}
> +
> +/* PCI configuration space write function */
> +static int mvebu_pcie_wr_conf(struct pci_bus *bus, u32 devfn,
> +                             int where, int size, u32 val)
> +{
> +       struct mvebu_pcie *pcie = sys_to_pcie(bus->sysdata);
> +
> +       if (bus->number != 0) {
> +               /*
> +                * Accessing a real PCIe interface, where the Linux
> +                * virtual bus number is equal to the hardware PCIe
> +                * interface number + 1
> +                */
This is really weird.  It doesn't seem like a good idea to me, but I
don't understand the whole architecture.
> +               struct mvebu_pcie_port *port;
> +               unsigned long flags;
> +               int porti, ret;
> +
> +               porti = bus->number - 1;
> +               if (porti >= pcie->nports)
> +                       return PCIBIOS_DEVICE_NOT_FOUND;
> +
> +               port = &pcie->ports[porti];
> +
> +               if (!port->haslink)
> +                       return PCIBIOS_DEVICE_NOT_FOUND;
> +
> +               if (PCI_SLOT(devfn) != 0)
> +                       return PCIBIOS_DEVICE_NOT_FOUND;
> +
> +               spin_lock_irqsave(&port->conf_lock, flags);
> +               ret = orion_pcie_wr_conf_bus(port->base, bus->number - 1,
> +                                            PCI_DEVFN(1, PCI_FUNC(devfn)),
> +                                            where, size, val);
> +               spin_unlock_irqrestore(&port->conf_lock, flags);
> +
> +               return ret;
> +       } else {
> +               /*
> +                * Accessing the emulated PCIe devices. In the first
> +                * slot, the emulated host bridge, and in the next
> +                * slots, the PCI-to-PCI bridges that correspond to
> +                * each PCIe hardware interface
> +                */
> +               if (PCI_SLOT(devfn) == 0 && PCI_FUNC(devfn) == 0)
> +                       return pci_sw_host_bridge_write(&pcie->bridge, where,
> +                                                       size, val);
> +               else if (PCI_SLOT(devfn) >= 1 &&
> +                        PCI_SLOT(devfn) <= pcie->nports) {
> +                       struct mvebu_pcie_port *port;
> +                       int porti = PCI_SLOT(devfn) - 1;
> +                       port = &pcie->ports[porti];
> +                       return pci_sw_pci_bridge_write(&port->bridge, where,
> +                                                      size, val);
> +               } else {
> +                       return PCIBIOS_DEVICE_NOT_FOUND;
> +               }
> +       }
> +
> +       return PCIBIOS_SUCCESSFUL;
> +}
> +
> +/* PCI configuration space read function */
> +static int mvebu_pcie_rd_conf(struct pci_bus *bus, u32 devfn, int where,
> +                             int size, u32 *val)
> +{
> +       struct mvebu_pcie *pcie = sys_to_pcie(bus->sysdata);
> +
> +       if (bus->number != 0) {
> +               /*
> +                * Accessing a real PCIe interface, where the Linux
> +                * virtual bus number is equal to the hardware PCIe
> +                * interface number + 1
> +                */
> +               struct mvebu_pcie_port *port;
> +               unsigned long flags;
> +               int porti, ret;
> +
> +               porti = bus->number - 1;
> +               if (porti >= pcie->nports) {
> +                       *val = 0xffffffff;
> +                       return PCIBIOS_DEVICE_NOT_FOUND;
> +               }
> +
> +               port = &pcie->ports[porti];
> +
> +               if (!port->haslink || PCI_SLOT(devfn) != 0) {
> +                       *val = 0xffffffff;
> +                       return PCIBIOS_DEVICE_NOT_FOUND;
> +               }
> +
> +               spin_lock_irqsave(&port->conf_lock, flags);
> +               ret = orion_pcie_rd_conf_bus(port->base, bus->number - 1,
> +                                            PCI_DEVFN(1, PCI_FUNC(devfn)),
> +                                            where, size, val);
> +               spin_unlock_irqrestore(&port->conf_lock, flags);
> +
> +               return ret;
> +       } else {
> +               /*
> +                * Accessing the emulated PCIe devices. In the first
> +                * slot, the emulated host bridge, and in the next
> +                * slots, the PCI-to-PCI bridges that correspond to
> +                * each PCIe hardware interface
> +                */
> +               if (PCI_SLOT(devfn) == 0 && PCI_FUNC(devfn) == 0)
> +                       return pci_sw_host_bridge_read(&pcie->bridge, where,
> +                                                      size, val);
> +               else if (PCI_SLOT(devfn) >= 1 &&
> +                        PCI_SLOT(devfn) <= pcie->nports) {
> +                       struct mvebu_pcie_port *port;
> +                       int porti = PCI_SLOT(devfn) - 1;
> +                       port = &pcie->ports[porti];
> +                       return pci_sw_pci_bridge_read(&port->bridge, where,
> +                                                     size, val);
> +               } else {
> +                       *val = 0xffffffff;
> +                       return PCIBIOS_DEVICE_NOT_FOUND;
> +               }
> +       }
> +}
> +
> +static struct pci_ops mvebu_pcie_ops = {
> +       .read = mvebu_pcie_rd_conf,
> +       .write = mvebu_pcie_wr_conf,
> +};
> +
> +static int __init mvebu_pcie_setup(int nr, struct pci_sys_data *sys)
> +{
> +       struct mvebu_pcie *pcie = sys_to_pcie(sys);
> +       int i;
> +
> +       pci_add_resource_offset(&sys->resources, &pcie->io, sys->io_offset);
> +       pci_add_resource_offset(&sys->resources, &pcie->mem, sys->mem_offset);
> +       pci_add_resource(&sys->resources, &pcie->busn);
> +
> +       pci_ioremap_io(nr * SZ_64K, pcie->io.start);
> +
> +       for (i = 0; i < pcie->nports; i++) {
> +               struct mvebu_pcie_port *port = &pcie->ports[i];
> +               orion_pcie_set_local_bus_nr(port->base, i);
> +               orion_pcie_setup(port->base);
> +       }
> +
> +       return 1;
> +}
> +
> +static int __init mvebu_pcie_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
> +{
> +       struct mvebu_pcie *pcie = sys_to_pcie(dev->bus->sysdata);
> +       struct mvebu_pcie_port *port;
> +       struct of_irq oirq;
> +       u32 laddr[3];
> +       int ret;
> +       __be32 intspec;
> +
> +       /*
> +        * Ignore requests related to the emulated host bridge or the
> +        * emulated pci-to-pci bridges
> +        */
> +       if (!dev->bus->number)
> +               return -1;
> +
> +       port = &pcie->ports[dev->bus->number - 1];
> +
> +       /*
> +        * Build an laddr array that describes the PCI device in a DT
> +        * way
> +        */
> +       laddr[0] = cpu_to_be32(port->devfn << 8);
> +       laddr[1] = laddr[2] = 0;
> +       intspec = cpu_to_be32(pin);
> +
> +       ret = of_irq_map_raw(port->dn, &intspec, 1, laddr, &oirq);
> +       if (ret) {
> +               dev_err(&pcie->pdev->dev,
> +                       "%s: of_irq_map_raw() failed, %d\n",
> +                       __func__, ret);
> +               return ret;
> +       }
> +
> +       return irq_create_of_mapping(oirq.controller, oirq.specifier,
> +                                    oirq.size);
> +}
> +
> +/*
> + * For a given PCIe interface (represented by a mvebu_pcie_port
> + * structure), we read the PCI configuration space of the
> + * corresponding PCI-to-PCI bridge in order to find out which range of
> + * I/O addresses and memory addresses have been assigned to this PCIe
> + * interface. Using these informations, we set up the appropriate
> + * address decoding windows so that the physical address are actually
> + * resolved to the right PCIe interface.
> + */
Are you inferring the host bridge apertures by using the resources
assigned to devices under the bridge, i.e., taking the union of all
the BARs and PCI-to-PCI bridge apertures of devices on the root bus?
If so, it would be much better to learn the host bridge apertures via
some non-PCI mechanism that corresponds to the actual hardware
configuration.  The config of devices below the host bridge gives you
hints, of course, but usually they don't consume all the available
space.
> +static int mvebu_pcie_window_config_port(struct mvebu_pcie *pcie,
> +                                        struct mvebu_pcie_port *port)
> +{
> +       unsigned long iobase = 0;
> +       int ret;
> +
> +       if (port->bridge.iolimit >= port->bridge.iobase) {
> +               unsigned long iolimit = 0xFFF |
> +                       ((port->bridge.iolimit & 0xF0) << 8) |
> +                       (port->bridge.iolimitupper << 16);
> +               iobase = ((port->bridge.iobase & 0xF0) << 8) |
> +                       (port->bridge.iobaseupper << 16);
> +               ret = armada_370_xp_alloc_pcie_window(port->port, port->lane,
> +                                                     iobase, iolimit-iobase,
> +                                                     IORESOURCE_IO);
> +               if (ret) {
> +                       dev_err(&pcie->pdev->dev,
> +                               "%s: could not alloc PCIe %d:%d window for I/O [0x%lx; 0x%lx]\n",
> +                               __func__, port->port, port->lane,
> +                               iobase, iolimit);
> +                       goto out_io;
> +               }
> +       }
> +
> +       if (port->bridge.memlimit >= port->bridge.membase) {
> +               unsigned long membase =
> +                       ((port->bridge.membase & 0xFFF0) << 16);
> +               unsigned long memlimit =
> +                       ((port->bridge.memlimit & 0xFFF0) << 16) | 0xFFFFF;
> +               ret = armada_370_xp_alloc_pcie_window(port->port, port->lane,
> +                                                     membase, memlimit-membase,
> +                                                     IORESOURCE_MEM);
> +               if (ret) {
> +                       dev_err(&pcie->pdev->dev,
> +                               "%s: could not alloc PCIe %d:%d window for MEM [0x%lx; 0x%lx]\n",
> +                               __func__, port->port, port->lane,
> +                               membase, memlimit);
> +                       goto out_mem;
> +               }
> +       }
> +
> +out_mem:
> +       if (port->bridge.iolimit >= port->bridge.iobase)
> +               armada_370_xp_free_pcie_window(iobase);
> +out_io:
> +       return ret;
> +}
> +
> +/*
> + * Set up the address decoding windows for all PCIe interfaces.
> + */
> +static int mvebu_pcie_window_config(struct mvebu_pcie *pcie)
> +{
> +       int i, ret;
> +
> +       for (i = 0; i < pcie->nports; i++) {
> +               struct mvebu_pcie_port *port = &pcie->ports[i];
> +               if (!port->haslink)
> +                       continue;
> +
> +               ret = mvebu_pcie_window_config_port(pcie, port);
> +               if (ret)
> +                       return ret;
> +       }
> +
> +       return 0;
> +}
> +
> +static resource_size_t mvebu_pcie_align_resource(struct pci_dev *dev,
> +                                                const struct resource *res,
> +                                                resource_size_t start,
> +                                                resource_size_t size,
> +                                                resource_size_t align)
> +{
> +       if (!(res->flags & IORESOURCE_IO))
> +               return start;
> +
> +       /*
> +        * The I/O regions must be 64K aligned, because the
> +        * granularity of PCIe I/O address decoding windows is 64 K
> +        */
> +       return round_up(start, SZ_64K);
> +}
> +
> +static int mvebu_pcie_enable(struct mvebu_pcie *pcie)
> +{
> +       struct hw_pci hw;
> +
> +       memset(&hw, 0, sizeof(hw));
> +
> +       hw.nr_controllers = 1;
> +       hw.private_data   = (void **)&pcie;
> +       hw.setup          = mvebu_pcie_setup;
> +       hw.map_irq        = mvebu_pcie_map_irq;
> +       hw.align_resource = mvebu_pcie_align_resource;
> +       hw.ops            = &mvebu_pcie_ops;
> +
> +       pci_common_init(&hw);
> +
> +       return mvebu_pcie_window_config(pcie);
> +}
> +
> +static int __init mvebu_pcie_probe(struct platform_device *pdev)
> +{
> +       struct mvebu_pcie *pcie;
> +       struct device_node *np = pdev->dev.of_node;
> +       struct device_node *child;
> +       const __be32 *range = NULL;
> +       struct resource res;
> +       int i, ret;
> +
> +       pcie = devm_kzalloc(&pdev->dev, sizeof(struct mvebu_pcie),
> +                           GFP_KERNEL);
> +       if (!pcie)
> +               return -ENOMEM;
> +
> +       pcie->pdev = pdev;
> +
> +       pci_sw_host_bridge_init(&pcie->bridge);
> +       pcie->bridge.vendor = PCI_VENDOR_ID_MARVELL;
> +       pcie->bridge.device = MARVELL_EMULATED_HOST_BRIDGE_ID;
> +
> +       /* Get the I/O and memory ranges from DT */
> +       while ((range = of_pci_process_ranges(np, &res, range)) != NULL) {
> +               if (resource_type(&res) == IORESOURCE_IO) {
> +                       memcpy(&pcie->io, &res, sizeof(res));
> +                       pcie->io.name = "I/O";
> +               }
> +               if (resource_type(&res) == IORESOURCE_MEM) {
> +                       memcpy(&pcie->mem, &res, sizeof(res));
> +                       pcie->mem.name = "MEM";
> +               }
> +       }
> +
> +       /* Get the bus range */
> +       ret = of_pci_parse_bus_range(np, &pcie->busn);
> +       if (ret) {
> +               dev_err(&pdev->dev, "failed to parse bus-range property: %d\n",
> +                       ret);
> +               return ret;
> +       }
> +
> +       for_each_child_of_node(pdev->dev.of_node, child) {
> +               if (!of_device_is_available(child))
> +                       continue;
> +               pcie->nports++;
> +       }
> +
> +       pcie->ports = devm_kzalloc(&pdev->dev, pcie->nports *
> +                                  sizeof(struct mvebu_pcie_port),
> +                                  GFP_KERNEL);
> +       if (!pcie->ports)
> +               return -ENOMEM;
> +
> +       i = 0;
> +       for_each_child_of_node(pdev->dev.of_node, child) {
> +               struct mvebu_pcie_port *port = &pcie->ports[i];
> +
> +               if (!of_device_is_available(child))
> +                       continue;
> +
> +               if (of_property_read_u32(child, "marvell,pcie-port",
> +                                        &port->port)) {
> +                       dev_warn(&pdev->dev,
> +                                "ignoring PCIe DT node, missing pcie-port property\n");
> +                       continue;
> +               }
> +
> +               if (of_property_read_u32(child, "marvell,pcie-lane",
> +                                        &port->lane))
> +                       port->lane = 0;
> +
> +               port->devfn = of_pci_get_devfn(child);
> +               if (port->devfn < 0)
> +                       continue;
> +
> +               port->base = of_iomap(child, 0);
> +               if (!port->base) {
> +                       dev_err(&pdev->dev, "PCIe%d.%d: cannot map registers\n",
> +                               port->port, port->lane);
> +                       continue;
> +               }
> +
> +               if (orion_pcie_link_up(port->base)) {
> +                       port->haslink = 1;
> +                       dev_info(&pdev->dev, "PCIe%d.%d: link up\n",
> +                                port->port, port->lane);
> +               } else {
> +                       port->haslink = 0;
> +                       dev_info(&pdev->dev, "PCIe%d.%d: link down\n",
> +                                port->port, port->lane);
> +               }
> +
> +               port->clk = of_clk_get_by_name(child, NULL);
> +               if (!port->clk) {
> +                       dev_err(&pdev->dev, "PCIe%d.%d: cannot get clock\n",
> +                              port->port, port->lane);
> +                       iounmap(port->base);
> +                       port->haslink = 0;
> +                       continue;
> +               }
> +
> +               port->dn = child;
> +
> +               clk_prepare_enable(port->clk);
> +               spin_lock_init(&port->conf_lock);
> +
> +               pci_sw_pci_bridge_init(&port->bridge);
> +               port->bridge.vendor = PCI_VENDOR_ID_MARVELL;
> +               port->bridge.device = MARVELL_EMULATED_PCI_PCI_BRIDGE_ID;
> +               port->bridge.primary_bus = 0;
> +               port->bridge.secondary_bus = PCI_SLOT(port->devfn);
> +               port->bridge.subordinate_bus = PCI_SLOT(port->devfn);
> +
> +               i++;
> +       }
> +
> +       mvebu_pcie_enable(pcie);
> +
> +       return 0;
> +}
> +
> +static const struct of_device_id mvebu_pcie_of_match_table[] = {
> +       { .compatible = "marvell,armada-370-xp-pcie", },
> +       {},
> +};
> +MODULE_DEVICE_TABLE(of, mvebu_pcie_of_match_table);
> +
> +static struct platform_driver mvebu_pcie_driver = {
> +       .driver = {
> +               .owner = THIS_MODULE,
> +               .name = "mvebu-pcie",
> +               .of_match_table =
> +                  of_match_ptr(mvebu_pcie_of_match_table),
> +       },
> +};
> +
> +static int mvebu_pcie_init(void)
> +{
> +       return platform_driver_probe(&mvebu_pcie_driver,
> +                                    mvebu_pcie_probe);
> +}
> +
> +subsys_initcall(mvebu_pcie_init);
> +
> +MODULE_AUTHOR("Thomas Petazzoni <thomas.petazzoni@free-electrons.com>");
> +MODULE_DESCRIPTION("Marvell EBU PCIe driver");
> +MODULE_LICENSE("GPL");
> --
> 1.7.9.5
>
^ permalink raw reply	[flat|nested] 216+ messages in thread
- * [PATCH v2 19/27] pci: PCIe driver for Marvell Armada 370/XP systems
  2013-01-29  3:29   ` Bjorn Helgaas
@ 2013-01-29  5:55     ` Jason Gunthorpe
  2013-01-29  8:00       ` Thomas Petazzoni
  2013-01-29 17:47       ` Bjorn Helgaas
  0 siblings, 2 replies; 216+ messages in thread
From: Jason Gunthorpe @ 2013-01-29  5:55 UTC (permalink / raw)
  To: linux-arm-kernel
On Mon, Jan 28, 2013 at 08:29:24PM -0700, Bjorn Helgaas wrote:
> On Mon, Jan 28, 2013 at 11:56 AM, Thomas Petazzoni
> <thomas.petazzoni@free-electrons.com> wrote:
> > This driver implements the support for the PCIe interfaces on the
> > Marvell Armada 370/XP ARM SoCs. In the future, it might be extended to
> > cover earlier families of Marvell SoCs, such as Dove, Orion and
> > Kirkwood.
> >
> > The driver implements the hw_pci operations needed by the core ARM PCI
> > code to setup PCI devices and get their corresponding IRQs, and the
> > pci_ops operations that are used by the PCI core to read/write the
> > configuration space of PCI devices.
> >
> > Since the PCIe interfaces of Marvell SoCs are completely separate and
> > not linked together in a bus, this driver sets up an emulated PCI host
> > bridge, with one PCI-to-PCI bridge as child for each hardware PCIe
> > interface.
> 
> There's no Linux requirement that multiple PCIe interfaces appear to
> be in the same hierarchy.  You can just use pci_scan_root_bus()
> separately on each interface.  Each interface can be in its own domain
> if necessary.
What you suggest is basically what the Marvell driver did originally,
the probelm is that Linux requires a pre-assigned aperture for each
PCI domain/root bus, and these new chips have so many PCI-E ports that
they can exhaust the physical address space, and also a limited
internal HW resource for setting address routing.
Thus they require resource allocation that is sensitive to the devices
present downstream.
By far the simplest solution is to merge all the physical links into a
single domain and rely on existing PCI resource allocation code to
drive allocation of scarce physical address space and demand allocate
the HW routing resource (specifically there are enough resources to
accomidate MMIO only devices on every bus, but not enough to
accomidate MMIO and IO on every bus).
> > +/*
> > + * For a given PCIe interface (represented by a mvebu_pcie_port
> > + * structure), we read the PCI configuration space of the
> > + * corresponding PCI-to-PCI bridge in order to find out which range of
> > + * I/O addresses and memory addresses have been assigned to this PCIe
> > + * interface. Using these informations, we set up the appropriate
> > + * address decoding windows so that the physical address are actually
> > + * resolved to the right PCIe interface.
> > + */
> 
> Are you inferring the host bridge apertures by using the resources
> assigned to devices under the bridge, i.e., taking the union of all
The flow is different, a portion of physical address space is set
aside for use by PCI-E (via DT) and that portion is specified in the
struct resource's ultimately attached to the PCI domain for the bus
scan. You could call that the 'host bridge aperture' though it doesn't
reflect any HW configuration at all. The values come from the device
tree.
During the bus scan the Linux core code splits up that contiguous
space and assigns to the PCI-PCI bridges and devices under that domain.
Each physical PCI-E link on the chip is seen by Linux through the SW
emulated PCI-PCI bridge attached to bus 0. When Linux configures the
bridge windows it triggers this code here to copy that window
information from the PCI config space into non-standard internal HW
registers.
The purpose of the SW PCI-PCI bridge and this code here is to give
the Linux PCI core control over the window (MMIO,IO,busnr) assigned
to the PCI-E link.
This arrangement with PCI-PCI bridges controlling address routing is
part of the PCI-E standard, in this instance Marvell did not implement
the required config space in HW so the driver is working around that
deficiency.
Other drivers, like tegra have a similar design, but their hardware
does implement PCI-PCI bridge configuration space and does drive
address decoding through the HW PCI-PCI window registers.
Having PCI-E links be bridges, not domains/root_bus's is in-line with
the standard and works better with the Linux PCI resource allocator.
Jason
^ permalink raw reply	[flat|nested] 216+ messages in thread 
- * [PATCH v2 19/27] pci: PCIe driver for Marvell Armada 370/XP systems
  2013-01-29  5:55     ` Jason Gunthorpe
@ 2013-01-29  8:00       ` Thomas Petazzoni
  2013-01-29 17:47       ` Bjorn Helgaas
  1 sibling, 0 replies; 216+ messages in thread
From: Thomas Petazzoni @ 2013-01-29  8:00 UTC (permalink / raw)
  To: linux-arm-kernel
Bjorn, Jason,
On Mon, 28 Jan 2013 22:55:08 -0700, Jason Gunthorpe wrote:
> > There's no Linux requirement that multiple PCIe interfaces appear to
> > be in the same hierarchy.  You can just use pci_scan_root_bus()
> > separately on each interface.  Each interface can be in its own domain
> > if necessary.
> 
> What you suggest is basically what the Marvell driver did originally,
> the probelm is that Linux requires a pre-assigned aperture for each
> PCI domain/root bus, and these new chips have so many PCI-E ports that
> they can exhaust the physical address space, and also a limited
> internal HW resource for setting address routing.
> 
> Thus they require resource allocation that is sensitive to the devices
> present downstream.
> 
> By far the simplest solution is to merge all the physical links into a
> single domain and rely on existing PCI resource allocation code to
> drive allocation of scarce physical address space and demand allocate
> the HW routing resource (specifically there are enough resources to
> accomidate MMIO only devices on every bus, but not enough to
> accomidate MMIO and IO on every bus).
> 
> > > +/*
> > > + * For a given PCIe interface (represented by a mvebu_pcie_port
> > > + * structure), we read the PCI configuration space of the
> > > + * corresponding PCI-to-PCI bridge in order to find out which range of
> > > + * I/O addresses and memory addresses have been assigned to this PCIe
> > > + * interface. Using these informations, we set up the appropriate
> > > + * address decoding windows so that the physical address are actually
> > > + * resolved to the right PCIe interface.
> > > + */
> > 
> > Are you inferring the host bridge apertures by using the resources
> > assigned to devices under the bridge, i.e., taking the union of all
> 
> The flow is different, a portion of physical address space is set
> aside for use by PCI-E (via DT) and that portion is specified in the
> struct resource's ultimately attached to the PCI domain for the bus
> scan. You could call that the 'host bridge aperture' though it doesn't
> reflect any HW configuration at all. The values come from the device
> tree.
> 
> During the bus scan the Linux core code splits up that contiguous
> space and assigns to the PCI-PCI bridges and devices under that domain.
> 
> Each physical PCI-E link on the chip is seen by Linux through the SW
> emulated PCI-PCI bridge attached to bus 0. When Linux configures the
> bridge windows it triggers this code here to copy that window
> information from the PCI config space into non-standard internal HW
> registers.
> 
> The purpose of the SW PCI-PCI bridge and this code here is to give
> the Linux PCI core control over the window (MMIO,IO,busnr) assigned
> to the PCI-E link.
> 
> This arrangement with PCI-PCI bridges controlling address routing is
> part of the PCI-E standard, in this instance Marvell did not implement
> the required config space in HW so the driver is working around that
> deficiency.
> 
> Other drivers, like tegra have a similar design, but their hardware
> does implement PCI-PCI bridge configuration space and does drive
> address decoding through the HW PCI-PCI window registers.
> 
> Having PCI-E links be bridges, not domains/root_bus's is in-line with
> the standard and works better with the Linux PCI resource allocator.
Thanks a lot Jason for this explanation, I couldn't have explained it
as clearly as you did.
Bjorn, does Jason's reply answers your questions? Or do you need other
details?
Thanks!
Thomas
-- 
Thomas Petazzoni, Free Electrons
Kernel, drivers, real-time and embedded Linux
development, consulting, training and support.
http://free-electrons.com
^ permalink raw reply	[flat|nested] 216+ messages in thread 
- * [PATCH v2 19/27] pci: PCIe driver for Marvell Armada 370/XP systems
  2013-01-29  5:55     ` Jason Gunthorpe
  2013-01-29  8:00       ` Thomas Petazzoni
@ 2013-01-29 17:47       ` Bjorn Helgaas
  2013-01-29 18:14         ` Thomas Petazzoni
  2013-01-29 18:41         ` Jason Gunthorpe
  1 sibling, 2 replies; 216+ messages in thread
From: Bjorn Helgaas @ 2013-01-29 17:47 UTC (permalink / raw)
  To: linux-arm-kernel
On Mon, Jan 28, 2013 at 10:55 PM, Jason Gunthorpe
<jgunthorpe@obsidianresearch.com> wrote:
> On Mon, Jan 28, 2013 at 08:29:24PM -0700, Bjorn Helgaas wrote:
>> On Mon, Jan 28, 2013 at 11:56 AM, Thomas Petazzoni
>> <thomas.petazzoni@free-electrons.com> wrote:
>> > This driver implements the support for the PCIe interfaces on the
>> > Marvell Armada 370/XP ARM SoCs. In the future, it might be extended to
>> > cover earlier families of Marvell SoCs, such as Dove, Orion and
>> > Kirkwood.
>> >
>> > The driver implements the hw_pci operations needed by the core ARM PCI
>> > code to setup PCI devices and get their corresponding IRQs, and the
>> > pci_ops operations that are used by the PCI core to read/write the
>> > configuration space of PCI devices.
>> >
>> > Since the PCIe interfaces of Marvell SoCs are completely separate and
>> > not linked together in a bus, this driver sets up an emulated PCI host
>> > bridge, with one PCI-to-PCI bridge as child for each hardware PCIe
>> > interface.
>>
>> There's no Linux requirement that multiple PCIe interfaces appear to
>> be in the same hierarchy.  You can just use pci_scan_root_bus()
>> separately on each interface.  Each interface can be in its own domain
>> if necessary.
>
> What you suggest is basically what the Marvell driver did originally,
> the probelm is that Linux requires a pre-assigned aperture for each
> PCI domain/root bus, and these new chips have so many PCI-E ports that
> they can exhaust the physical address space, and also a limited
> internal HW resource for setting address routing.
>
> Thus they require resource allocation that is sensitive to the devices
> present downstream.
>
> By far the simplest solution is to merge all the physical links into a
> single domain and rely on existing PCI resource allocation code to
> drive allocation of scarce physical address space and demand allocate
> the HW routing resource (specifically there are enough resources to
> accomidate MMIO only devices on every bus, but not enough to
> accomidate MMIO and IO on every bus).
>
>> > +/*
>> > + * For a given PCIe interface (represented by a mvebu_pcie_port
>> > + * structure), we read the PCI configuration space of the
>> > + * corresponding PCI-to-PCI bridge in order to find out which range of
>> > + * I/O addresses and memory addresses have been assigned to this PCIe
>> > + * interface. Using these informations, we set up the appropriate
>> > + * address decoding windows so that the physical address are actually
>> > + * resolved to the right PCIe interface.
>> > + */
>>
>> Are you inferring the host bridge apertures by using the resources
>> assigned to devices under the bridge, i.e., taking the union of all
>
> The flow is different, a portion of physical address space is set
> aside for use by PCI-E (via DT) and that portion is specified in the
> struct resource's ultimately attached to the PCI domain for the bus
> scan. You could call that the 'host bridge aperture' though it doesn't
> reflect any HW configuration at all. The values come from the device
> tree.
I think I would understand this better if we had a concrete example to
talk about, say a dmesg log and corresponding lspci -v output.
As I understand it, the DT is a description of the hardware, so in
that sense, the DT can't set aside physical address space.  It can
describe what the hardware does with the address space, and I assume
that's what you mean.  Maybe the hardware isn't configurable, e.g., it
is hard-wired to route certain address ranges to PCIe?
> During the bus scan the Linux core code splits up that contiguous
> space and assigns to the PCI-PCI bridges and devices under that domain.
>
> Each physical PCI-E link on the chip is seen by Linux through the SW
> emulated PCI-PCI bridge attached to bus 0. When Linux configures the
> bridge windows it triggers this code here to copy that window
> information from the PCI config space into non-standard internal HW
> registers.
>
> The purpose of the SW PCI-PCI bridge and this code here is to give
> the Linux PCI core control over the window (MMIO,IO,busnr) assigned
> to the PCI-E link.
>
> This arrangement with PCI-PCI bridges controlling address routing is
> part of the PCI-E standard, in this instance Marvell did not implement
> the required config space in HW so the driver is working around that
> deficiency.
>
> Other drivers, like tegra have a similar design, but their hardware
> does implement PCI-PCI bridge configuration space and does drive
> address decoding through the HW PCI-PCI window registers.
>
> Having PCI-E links be bridges, not domains/root_bus's is in-line with
> the standard and works better with the Linux PCI resource allocator.
>
> Jason
^ permalink raw reply	[flat|nested] 216+ messages in thread 
- * [PATCH v2 19/27] pci: PCIe driver for Marvell Armada 370/XP systems
  2013-01-29 17:47       ` Bjorn Helgaas
@ 2013-01-29 18:14         ` Thomas Petazzoni
  2013-01-29 18:41         ` Jason Gunthorpe
  1 sibling, 0 replies; 216+ messages in thread
From: Thomas Petazzoni @ 2013-01-29 18:14 UTC (permalink / raw)
  To: linux-arm-kernel
Dear Bjorn Helgaas,
On Tue, 29 Jan 2013 10:47:09 -0700, Bjorn Helgaas wrote:
> I think I would understand this better if we had a concrete example to
> talk about, say a dmesg log and corresponding lspci -v output.
Please note that the cover letter of this patch series has the lspci
-vvv output.
Best regards,
Thomas
-- 
Thomas Petazzoni, Free Electrons
Kernel, drivers, real-time and embedded Linux
development, consulting, training and support.
http://free-electrons.com
^ permalink raw reply	[flat|nested] 216+ messages in thread 
- * [PATCH v2 19/27] pci: PCIe driver for Marvell Armada 370/XP systems
  2013-01-29 17:47       ` Bjorn Helgaas
  2013-01-29 18:14         ` Thomas Petazzoni
@ 2013-01-29 18:41         ` Jason Gunthorpe
  2013-01-29 19:07           ` Bjorn Helgaas
  1 sibling, 1 reply; 216+ messages in thread
From: Jason Gunthorpe @ 2013-01-29 18:41 UTC (permalink / raw)
  To: linux-arm-kernel
On Tue, Jan 29, 2013 at 10:47:09AM -0700, Bjorn Helgaas wrote:
> As I understand it, the DT is a description of the hardware, so in
> that sense, the DT can't set aside physical address space.  It can
> describe what the hardware does with the address space, and I assume
> that's what you mean.  Maybe the hardware isn't configurable, e.g., it
> is hard-wired to route certain address ranges to PCIe?
The DT is largely a description of the hardware, but when it comes to
addresses, particularly HW programmable addresess, there is an general
expectation that the driver/bootloader will program HW address
decoders to either match the addresses given in the DT, or to new
values guided by the DT addresses.
In a real sense that means the DT also describes the physical address
map the kernel should use.
In the PCI-E case the DT PCI-E HW description includes physical
address ranges to use for the MMIO/IO/PREFETCH PCI-E interface windows
and the driver is expected to program the internal HW address decoders
based on those address ranges.
The catch is that the hardware decoders are on a link-by-link basis,
not on a root-complex basis, so the programming can only take place
once the Linux kernel has done PCI resource assignment.
So when I say set aside, I mean for instance, the PCI-E entry in DT
has 128M of physical address space marked for PCI MMIO use. The kernel
does PCI resource allocation and the HW decoders in each link will be
set to claim some portion of the 128M - based on the MMIO windows
programmed on the PCI-PCI root port bridges. The reamining part of the
128M is dead address space, not claimed by any hardware block at all.
Jason
^ permalink raw reply	[flat|nested] 216+ messages in thread 
- * [PATCH v2 19/27] pci: PCIe driver for Marvell Armada 370/XP systems
  2013-01-29 18:41         ` Jason Gunthorpe
@ 2013-01-29 19:07           ` Bjorn Helgaas
  2013-01-29 19:18             ` Jason Gunthorpe
  0 siblings, 1 reply; 216+ messages in thread
From: Bjorn Helgaas @ 2013-01-29 19:07 UTC (permalink / raw)
  To: linux-arm-kernel
On Tue, Jan 29, 2013 at 11:41 AM, Jason Gunthorpe
<jgunthorpe@obsidianresearch.com> wrote:
> On Tue, Jan 29, 2013 at 10:47:09AM -0700, Bjorn Helgaas wrote:
>
>> As I understand it, the DT is a description of the hardware, so in
>> that sense, the DT can't set aside physical address space.  It can
>> describe what the hardware does with the address space, and I assume
>> that's what you mean.  Maybe the hardware isn't configurable, e.g., it
>> is hard-wired to route certain address ranges to PCIe?
>
> The DT is largely a description of the hardware, but when it comes to
> addresses, particularly HW programmable addresess, there is an general
> expectation that the driver/bootloader will program HW address
> decoders to either match the addresses given in the DT, or to new
> values guided by the DT addresses.
>
> In a real sense that means the DT also describes the physical address
> map the kernel should use.
>
> In the PCI-E case the DT PCI-E HW description includes physical
> address ranges to use for the MMIO/IO/PREFETCH PCI-E interface windows
> and the driver is expected to program the internal HW address decoders
> based on those address ranges.
>
> The catch is that the hardware decoders are on a link-by-link basis,
> not on a root-complex basis, so the programming can only take place
> once the Linux kernel has done PCI resource assignment.
>
> So when I say set aside, I mean for instance, the PCI-E entry in DT
> has 128M of physical address space marked for PCI MMIO use. The kernel
> does PCI resource allocation and the HW decoders in each link will be
> set to claim some portion of the 128M - based on the MMIO windows
> programmed on the PCI-PCI root port bridges. The reamining part of the
> 128M is dead address space, not claimed by any hardware block at all.
Thanks, this really helps get to the issue that the PCI core will care
about.  The root ports look like normal bridges, so the core assumes
it can manage their windows as needed, as long as the windows stay
inside the host bridge apertures that are logically upstream from the
root ports.
In your example, it sounds like the 128M should be treated as the host
bridge aperture.  Is there any reason not to do that?  It sounds like
there's no place you can actually program that 128M region into the
hardware, and you would just program pieces of that region as root
port windows.  But that should be OK from the core's perspective.
^ permalink raw reply	[flat|nested] 216+ messages in thread 
- * [PATCH v2 19/27] pci: PCIe driver for Marvell Armada 370/XP systems
  2013-01-29 19:07           ` Bjorn Helgaas
@ 2013-01-29 19:18             ` Jason Gunthorpe
  2013-01-29 19:38               ` Bjorn Helgaas
  0 siblings, 1 reply; 216+ messages in thread
From: Jason Gunthorpe @ 2013-01-29 19:18 UTC (permalink / raw)
  To: linux-arm-kernel
On Tue, Jan 29, 2013 at 12:07:00PM -0700, Bjorn Helgaas wrote:
> > So when I say set aside, I mean for instance, the PCI-E entry in DT
> > has 128M of physical address space marked for PCI MMIO use. The kernel
> > does PCI resource allocation and the HW decoders in each link will be
> > set to claim some portion of the 128M - based on the MMIO windows
> > programmed on the PCI-PCI root port bridges. The reamining part of the
> > 128M is dead address space, not claimed by any hardware block at all.
> 
> Thanks, this really helps get to the issue that the PCI core will care
> about.  The root ports look like normal bridges, so the core assumes
> it can manage their windows as needed, as long as the windows stay
> inside the host bridge apertures that are logically upstream from the
> root ports.
Yes, that is basically correct. This is what the PCI-E specification
says the root complex/root port should look like and this is what some
SOC hardware implements fully in hardware. The small wrinkle with
Marvell is that the PCI-PCI bridge config space is created by the
driver since the HW does not expose a standard config space.
> In your example, it sounds like the 128M should be treated as the host
> bridge aperture.  Is there any reason not to do that?  It sounds like
> there's no place you can actually program that 128M region into the
> hardware, and you would just program pieces of that region as root
> port windows.  But that should be OK from the core's perspective.
AFAIK this is already what Thomas's driver is doing..
Regards,
Jason
^ permalink raw reply	[flat|nested] 216+ messages in thread 
- * [PATCH v2 19/27] pci: PCIe driver for Marvell Armada 370/XP systems
  2013-01-29 19:18             ` Jason Gunthorpe
@ 2013-01-29 19:38               ` Bjorn Helgaas
  2013-01-29 22:27                 ` Bjorn Helgaas
  0 siblings, 1 reply; 216+ messages in thread
From: Bjorn Helgaas @ 2013-01-29 19:38 UTC (permalink / raw)
  To: linux-arm-kernel
On Tue, Jan 29, 2013 at 12:18 PM, Jason Gunthorpe
<jgunthorpe@obsidianresearch.com> wrote:
> On Tue, Jan 29, 2013 at 12:07:00PM -0700, Bjorn Helgaas wrote:
>> > So when I say set aside, I mean for instance, the PCI-E entry in DT
>> > has 128M of physical address space marked for PCI MMIO use. The kernel
>> > does PCI resource allocation and the HW decoders in each link will be
>> > set to claim some portion of the 128M - based on the MMIO windows
>> > programmed on the PCI-PCI root port bridges. The reamining part of the
>> > 128M is dead address space, not claimed by any hardware block at all.
>>
>> Thanks, this really helps get to the issue that the PCI core will care
>> about.  The root ports look like normal bridges, so the core assumes
>> it can manage their windows as needed, as long as the windows stay
>> inside the host bridge apertures that are logically upstream from the
>> root ports.
>
> Yes, that is basically correct. This is what the PCI-E specification
> says the root complex/root port should look like and this is what some
> SOC hardware implements fully in hardware. The small wrinkle with
> Marvell is that the PCI-PCI bridge config space is created by the
> driver since the HW does not expose a standard config space.
Oh, so the actual *root port* itself doesn't conform to the spec?
Wow, that's worse than I expected.
Then I guess you have emulate it and make sure its config space is
complete enough and functional enough so that all the link management,
power management, AER, etc., code in the core works as well as it
would with a conforming device.
>> In your example, it sounds like the 128M should be treated as the host
>> bridge aperture.  Is there any reason not to do that?  It sounds like
>> there's no place you can actually program that 128M region into the
>> hardware, and you would just program pieces of that region as root
>> port windows.  But that should be OK from the core's perspective.
>
> AFAIK this is already what Thomas's driver is doing..
>
> Regards,
> Jason
^ permalink raw reply	[flat|nested] 216+ messages in thread 
- * [PATCH v2 19/27] pci: PCIe driver for Marvell Armada 370/XP systems
  2013-01-29 19:38               ` Bjorn Helgaas
@ 2013-01-29 22:27                 ` Bjorn Helgaas
  2013-01-30  4:24                   ` Jason Gunthorpe
  2013-01-30  9:35                   ` Thomas Petazzoni
  0 siblings, 2 replies; 216+ messages in thread
From: Bjorn Helgaas @ 2013-01-29 22:27 UTC (permalink / raw)
  To: linux-arm-kernel
On Tue, Jan 29, 2013 at 12:38 PM, Bjorn Helgaas <bhelgaas@google.com> wrote:
> On Tue, Jan 29, 2013 at 12:18 PM, Jason Gunthorpe
> <jgunthorpe@obsidianresearch.com> wrote:
>> On Tue, Jan 29, 2013 at 12:07:00PM -0700, Bjorn Helgaas wrote:
>>> > So when I say set aside, I mean for instance, the PCI-E entry in DT
>>> > has 128M of physical address space marked for PCI MMIO use. The kernel
>>> > does PCI resource allocation and the HW decoders in each link will be
>>> > set to claim some portion of the 128M - based on the MMIO windows
>>> > programmed on the PCI-PCI root port bridges. The reamining part of the
>>> > 128M is dead address space, not claimed by any hardware block at all.
>>>
>>> Thanks, this really helps get to the issue that the PCI core will care
>>> about.  The root ports look like normal bridges, so the core assumes
>>> it can manage their windows as needed, as long as the windows stay
>>> inside the host bridge apertures that are logically upstream from the
>>> root ports.
>>
>> Yes, that is basically correct. This is what the PCI-E specification
>> says the root complex/root port should look like and this is what some
>> SOC hardware implements fully in hardware. The small wrinkle with
>> Marvell is that the PCI-PCI bridge config space is created by the
>> driver since the HW does not expose a standard config space.
>
> Oh, so the actual *root port* itself doesn't conform to the spec?
> Wow, that's worse than I expected.
>
> Then I guess you have emulate it and make sure its config space is
> complete enough and functional enough so that all the link management,
> power management, AER, etc., code in the core works as well as it
> would with a conforming device.
I'm not sure the existing emulation in these patches is sufficient.
For example, pci_sw_pci_bridge_write() updates bridge->membase when we
write to the window register, but I don't see anything that updates
the actual hardware decoder.  That might be done in
mvebu_pcie_window_config_port() via armada_370_xp_alloc_pcie_window(),
but that looks like it's only done once.  If the PCI core updates a
root port window later, I don't see where the hardware decoder will be
updated.
Maybe you're counting on the window assignments to be static?  The PCI
core doesn't guarantee anything like that, though in the absence of
hotplug I don't know any reason why it would change things.
I also forgot about the bus number munging in mvebu_pcie_rd_conf().
The PCI core can update the bridge secondary/subordinate registers.
It looks like you don't support writing to them, and the read path
(pci_sw_pci_bridge_read()) looks like it doesn't do any translation
between the hardware and Linux bus numbers.  I don't understand the
system well enough to know if this is an issue.
^ permalink raw reply	[flat|nested] 216+ messages in thread 
- * [PATCH v2 19/27] pci: PCIe driver for Marvell Armada 370/XP systems
  2013-01-29 22:27                 ` Bjorn Helgaas
@ 2013-01-30  4:24                   ` Jason Gunthorpe
  2013-01-30  9:35                   ` Thomas Petazzoni
  1 sibling, 0 replies; 216+ messages in thread
From: Jason Gunthorpe @ 2013-01-30  4:24 UTC (permalink / raw)
  To: linux-arm-kernel
On Tue, Jan 29, 2013 at 03:27:43PM -0700, Bjorn Helgaas wrote:
> I'm not sure the existing emulation in these patches is sufficient.
> For example, pci_sw_pci_bridge_write() updates bridge->membase when we
> write to the window register, but I don't see anything that updates
> the actual hardware decoder.  That might be done in
> mvebu_pcie_window_config_port() via armada_370_xp_alloc_pcie_window(),
> but that looks like it's only done once.  If the PCI core updates a
> root port window later, I don't see where the hardware decoder will be
> updated.
> 
> Maybe you're counting on the window assignments to be static?  The PCI
> core doesn't guarantee anything like that, though in the absence of
> hotplug I don't know any reason why it would change things.
 
Agree..
Thomas, I think you need to directly update the Marvell hardware
registers when config writes are made to the SW bridge. If this means
it is too hard/complex to keep the code general then I'd say make it
part of the Marvell host driver.
> I also forgot about the bus number munging in mvebu_pcie_rd_conf().
> The PCI core can update the bridge secondary/subordinate registers.
> It looks like you don't support writing to them, and the read path
> (pci_sw_pci_bridge_read()) looks like it doesn't do any translation
> between the hardware and Linux bus numbers.  I don't understand the
> system well enough to know if this is an issue.
I was chatting with Thomas on this subject, it looks like there is a
HW register that needs to be set to the subordinate bus number of the
bridge, that will solve this weirdness.
Jason
^ permalink raw reply	[flat|nested] 216+ messages in thread 
- * [PATCH v2 19/27] pci: PCIe driver for Marvell Armada 370/XP systems
  2013-01-29 22:27                 ` Bjorn Helgaas
  2013-01-30  4:24                   ` Jason Gunthorpe
@ 2013-01-30  9:35                   ` Thomas Petazzoni
  2013-01-30 18:52                     ` Bjorn Helgaas
  1 sibling, 1 reply; 216+ messages in thread
From: Thomas Petazzoni @ 2013-01-30  9:35 UTC (permalink / raw)
  To: linux-arm-kernel
Dear Bjorn Helgaas,
On Tue, 29 Jan 2013 15:27:43 -0700, Bjorn Helgaas wrote:
> I'm not sure the existing emulation in these patches is sufficient.
> For example, pci_sw_pci_bridge_write() updates bridge->membase when we
> write to the window register, but I don't see anything that updates
> the actual hardware decoder.  That might be done in
> mvebu_pcie_window_config_port() via armada_370_xp_alloc_pcie_window(),
> but that looks like it's only done once.
That's correct. I currently let the Linux PCI core enumerate the
real PCIe devices, allocate the resources, and set the appropriate
values in the emulated PCI-to-PCI bridge registers. Once this is all
done, the Marvell PCIe driver looks at each PCI-to-PCI bridge, reads
the membase and iobase registers, and creates address decoding windows
so that the physical addresses assigned by the Linux PCI core actually
resolve to the right PCIe interface. This is done once for all.
> If the PCI core updates a root port window later, I don't see where the hardware
> decoder will be updated.
It will not be updated.
> Maybe you're counting on the window assignments to be static?  The PCI
> core doesn't guarantee anything like that, though in the absence of
> hotplug I don't know any reason why it would change things.
Right. Is supporting hotplug a show-stopper to get this included? I
think it could be added later, if it happens to be needed, no?
I could of course do it, but the patch series is already quite large
and complicated, so if we could merge a simple, but working, version
first, and then improve on top of it when needed, it would be nice.
> I also forgot about the bus number munging in mvebu_pcie_rd_conf().
> The PCI core can update the bridge secondary/subordinate registers.
> It looks like you don't support writing to them, and the read path
> (pci_sw_pci_bridge_read()) looks like it doesn't do any translation
> between the hardware and Linux bus numbers.  I don't understand the
> system well enough to know if this is an issue.
Right. Could you explain a little bit for what reasons the PCI core
could update the secondary/subordinate registers, and to what values it
sets them?
For now, I statically assign the secondary bus register value to be
X+1, where X is the number of the PCIe interface, since X=0 is reserved
for the root bus (which has the host bridge and the PCI-to-PCI
bridges).
Also, could you detail what kind of translation I should be doing when
reading the hardware and Linux bus numbers?
I apologize for asking so many, probably silly, questions, but I am
still learning all those internal PCI mechanisms.
Thanks,
Thomas
-- 
Thomas Petazzoni, Free Electrons
Kernel, drivers, real-time and embedded Linux
development, consulting, training and support.
http://free-electrons.com
^ permalink raw reply	[flat|nested] 216+ messages in thread 
- * [PATCH v2 19/27] pci: PCIe driver for Marvell Armada 370/XP systems
  2013-01-30  9:35                   ` Thomas Petazzoni
@ 2013-01-30 18:52                     ` Bjorn Helgaas
  2013-01-30 22:28                       ` Thomas Petazzoni
  2013-01-31 16:04                       ` Thomas Petazzoni
  0 siblings, 2 replies; 216+ messages in thread
From: Bjorn Helgaas @ 2013-01-30 18:52 UTC (permalink / raw)
  To: linux-arm-kernel
On Wed, Jan 30, 2013 at 2:35 AM, Thomas Petazzoni
<thomas.petazzoni@free-electrons.com> wrote:
> Dear Bjorn Helgaas,
>
> On Tue, 29 Jan 2013 15:27:43 -0700, Bjorn Helgaas wrote:
>
>> I'm not sure the existing emulation in these patches is sufficient.
>> For example, pci_sw_pci_bridge_write() updates bridge->membase when we
>> write to the window register, but I don't see anything that updates
>> the actual hardware decoder.  That might be done in
>> mvebu_pcie_window_config_port() via armada_370_xp_alloc_pcie_window(),
>> but that looks like it's only done once.
>
> That's correct. I currently let the Linux PCI core enumerate the
> real PCIe devices, allocate the resources, and set the appropriate
> values in the emulated PCI-to-PCI bridge registers. Once this is all
> done, the Marvell PCIe driver looks at each PCI-to-PCI bridge, reads
> the membase and iobase registers, and creates address decoding windows
> so that the physical addresses assigned by the Linux PCI core actually
> resolve to the right PCIe interface. This is done once for all.
>
>> If the PCI core updates a root port window later, I don't see where the hardware
>> decoder will be updated.
>
> It will not be updated.
>
>> Maybe you're counting on the window assignments to be static?  The PCI
>> core doesn't guarantee anything like that, though in the absence of
>> hotplug I don't know any reason why it would change things.
>
> Right. Is supporting hotplug a show-stopper to get this included? I
> think it could be added later, if it happens to be needed, no?
>
> I could of course do it, but the patch series is already quite large
> and complicated, so if we could merge a simple, but working, version
> first, and then improve on top of it when needed, it would be nice.
I'm most concerned about the stuff in drivers/pci.  I hesitate to
merge drivers/pci/sw-pci-pci-bridge.c as-is because it's a model
that's not connected to hardware and only works in a completely static
situation, and the rest of the PCI core can't really deal with that.
But I don't think supporting hotplug should be a show-stopper at this
point, either.  It sounds like we might be heading towards hooking
this up more directly to the Marvell hardware, which will make it more
arch-dependent.  Something like that could either go in arch/arm, or
in some not-quite-so-generic spot under drivers/pci.
>> I also forgot about the bus number munging in mvebu_pcie_rd_conf().
>> The PCI core can update the bridge secondary/subordinate registers.
>> It looks like you don't support writing to them, and the read path
>> (pci_sw_pci_bridge_read()) looks like it doesn't do any translation
>> between the hardware and Linux bus numbers.  I don't understand the
>> system well enough to know if this is an issue.
>
> Right. Could you explain a little bit for what reasons the PCI core
> could update the secondary/subordinate registers, and to what values it
> sets them?
The secondary/subordinate registers effectively define a bus number
aperture that tells the bridge which transactions to claim and forward
downstream.  When enumerating devices, we may update the subordinate
bus number to widen the aperture so we can enumerate an arbitrary tree
behind the bridge.  When we're finished, we'll probably narrow it by
updating the subordinate again, so the unused bus number space can be
used for other bridges.  I don't know the exact details of the
algorithm, and they're likely to change anyway, but pci_scan_bridge()
is where most of it happens.
It looks like your current system doesn't support trees below the
bridges, but hopefully we can make it so the generic enumeration
algorithms still work.
> For now, I statically assign the secondary bus register value to be
> X+1, where X is the number of the PCIe interface, since X=0 is reserved
> for the root bus (which has the host bridge and the PCI-to-PCI
> bridges).
That makes sense but limits you to a single bus (and really, a single
device since this is PCIe) below the bridge.
> Also, could you detail what kind of translation I should be doing when
> reading the hardware and Linux bus numbers?
I'm hoping that the register Jason mentioned is enough to avoid the
need for translation.  If it's not, we can explore this a bit more.
Bjorn
^ permalink raw reply	[flat|nested] 216+ messages in thread 
- * [PATCH v2 19/27] pci: PCIe driver for Marvell Armada 370/XP systems
  2013-01-30 18:52                     ` Bjorn Helgaas
@ 2013-01-30 22:28                       ` Thomas Petazzoni
  2013-01-30 23:10                         ` Jason Gunthorpe
  2013-01-30 23:48                         ` Bjorn Helgaas
  2013-01-31 16:04                       ` Thomas Petazzoni
  1 sibling, 2 replies; 216+ messages in thread
From: Thomas Petazzoni @ 2013-01-30 22:28 UTC (permalink / raw)
  To: linux-arm-kernel
Dear Bjorn Helgaas,
On Wed, 30 Jan 2013 11:52:15 -0700, Bjorn Helgaas wrote:
> I'm most concerned about the stuff in drivers/pci.  I hesitate to
> merge drivers/pci/sw-pci-pci-bridge.c as-is because it's a model
> that's not connected to hardware and only works in a completely static
> situation, and the rest of the PCI core can't really deal with that.
> 
> But I don't think supporting hotplug should be a show-stopper at this
> point, either.  It sounds like we might be heading towards hooking
> this up more directly to the Marvell hardware, which will make it more
> arch-dependent.  Something like that could either go in arch/arm, or
> in some not-quite-so-generic spot under drivers/pci.
If you really don't want sw-pci-pci-bridge.c in drivers/pci, then I can
make it a part of the drivers/pci/host/pci-mvebu.c driver itself. I
initially followed the idea started by Thierry Redding for the emulated
host bridge, but if you feel that this emulated PCI-to-PCI bridge is
too specific to this driver, then I'm fine with keeping it inside the
driver itself.
> >> I also forgot about the bus number munging in mvebu_pcie_rd_conf().
> >> The PCI core can update the bridge secondary/subordinate registers.
> >> It looks like you don't support writing to them, and the read path
> >> (pci_sw_pci_bridge_read()) looks like it doesn't do any translation
> >> between the hardware and Linux bus numbers.  I don't understand the
> >> system well enough to know if this is an issue.
> >
> > Right. Could you explain a little bit for what reasons the PCI core
> > could update the secondary/subordinate registers, and to what
> > values it sets them?
> 
> The secondary/subordinate registers effectively define a bus number
> aperture that tells the bridge which transactions to claim and forward
> downstream.  When enumerating devices, we may update the subordinate
> bus number to widen the aperture so we can enumerate an arbitrary tree
> behind the bridge.  When we're finished, we'll probably narrow it by
> updating the subordinate again, so the unused bus number space can be
> used for other bridges.  I don't know the exact details of the
> algorithm, and they're likely to change anyway, but pci_scan_bridge()
> is where most of it happens.
> 
> It looks like your current system doesn't support trees below the
> bridges, but hopefully we can make it so the generic enumeration
> algorithms still work.
In practice, in our situation, there isn't a tree below the bridge.
There is one single device. I'd prefer to not implement features that I
cannot effectively test, and let the implementation of those additional
features to whoever will need them, and therefore be able to test them.
I guess that if I integrate the PCI-to-PCI bridge emulation code within
the Marvell driver, then I can keep it fairly limited to whatever the
Marvell PCI driver requires, no?
> > For now, I statically assign the secondary bus register value to be
> > X+1, where X is the number of the PCIe interface, since X=0 is
> > reserved for the root bus (which has the host bridge and the
> > PCI-to-PCI bridges).
> 
> That makes sense but limits you to a single bus (and really, a single
> device since this is PCIe) below the bridge.
Which is exactly what is happening here.
> > Also, could you detail what kind of translation I should be doing
> > when reading the hardware and Linux bus numbers?
> 
> I'm hoping that the register Jason mentioned is enough to avoid the
> need for translation.  If it's not, we can explore this a bit more.
Ok.
Thanks,
Thomas
-- 
Thomas Petazzoni, Free Electrons
Kernel, drivers, real-time and embedded Linux
development, consulting, training and support.
http://free-electrons.com
^ permalink raw reply	[flat|nested] 216+ messages in thread 
- * [PATCH v2 19/27] pci: PCIe driver for Marvell Armada 370/XP systems
  2013-01-30 22:28                       ` Thomas Petazzoni
@ 2013-01-30 23:10                         ` Jason Gunthorpe
  2013-01-30 23:48                         ` Bjorn Helgaas
  1 sibling, 0 replies; 216+ messages in thread
From: Jason Gunthorpe @ 2013-01-30 23:10 UTC (permalink / raw)
  To: linux-arm-kernel
On Wed, Jan 30, 2013 at 11:28:36PM +0100, Thomas Petazzoni wrote:
> > It looks like your current system doesn't support trees below the
> > bridges, but hopefully we can make it so the generic enumeration
> > algorithms still work.
> 
> In practice, in our situation, there isn't a tree below the bridge.
> There is one single device. I'd prefer to not implement features that I
> cannot effectively test, and let the implementation of those additional
> features to whoever will need them, and therefore be able to test
> them.
Agreed it is hard to test, but be aware that any system that has PCI-E
slots can host an add-in card that has a bridge on it. These are midly
common in some areas like high port count ethernet cards.
If you aren't going to attempt the implementation then a really big
FIXME that the config access routing is not correct and needs to be
based on the bus range assigned to the bridge would be friendly :)
Jason
^ permalink raw reply	[flat|nested] 216+ messages in thread 
- * [PATCH v2 19/27] pci: PCIe driver for Marvell Armada 370/XP systems
  2013-01-30 22:28                       ` Thomas Petazzoni
  2013-01-30 23:10                         ` Jason Gunthorpe
@ 2013-01-30 23:48                         ` Bjorn Helgaas
  1 sibling, 0 replies; 216+ messages in thread
From: Bjorn Helgaas @ 2013-01-30 23:48 UTC (permalink / raw)
  To: linux-arm-kernel
On Wed, Jan 30, 2013 at 3:28 PM, Thomas Petazzoni
<thomas.petazzoni@free-electrons.com> wrote:
> Dear Bjorn Helgaas,
>
> On Wed, 30 Jan 2013 11:52:15 -0700, Bjorn Helgaas wrote:
>> It looks like your current system doesn't support trees below the
>> bridges, but hopefully we can make it so the generic enumeration
>> algorithms still work.
>
> In practice, in our situation, there isn't a tree below the bridge.
> There is one single device. I'd prefer to not implement features that I
> cannot effectively test, and let the implementation of those additional
> features to whoever will need them, and therefore be able to test them.
I understand the concern about testing, but my advice is to not use
that as an excuse to artificially limit the functionality of the code
you're writing :)
You're talking about emulating a bridge, and the bridge really doesn't
know or care what's downstream.  If it works with a single device
downstream, it should work with another bridge downstream.  Many
aspects of bridge configuration can be tested with creative
application of setpci, so that might be a possibility, too.
> I guess that if I integrate the PCI-to-PCI bridge emulation code within
> the Marvell driver, then I can keep it fairly limited to whatever the
> Marvell PCI driver requires, no?
Yeah, it's just that the pci_dev for this emulated bridge is used by
the generic PCI core code, e.g., pci_scan_bridge(), so whatever
emulation you do has to be robust enough that the core won't notice
it's emulated.
Bjorn
^ permalink raw reply	[flat|nested] 216+ messages in thread 
 
- * [PATCH v2 19/27] pci: PCIe driver for Marvell Armada 370/XP systems
  2013-01-30 18:52                     ` Bjorn Helgaas
  2013-01-30 22:28                       ` Thomas Petazzoni
@ 2013-01-31 16:04                       ` Thomas Petazzoni
  2013-01-31 16:30                         ` Bjorn Helgaas
  1 sibling, 1 reply; 216+ messages in thread
From: Thomas Petazzoni @ 2013-01-31 16:04 UTC (permalink / raw)
  To: linux-arm-kernel
Dear Bjorn Helgaas,
On Wed, 30 Jan 2013 11:52:15 -0700, Bjorn Helgaas wrote:
> The secondary/subordinate registers effectively define a bus number
> aperture that tells the bridge which transactions to claim and forward
> downstream.  When enumerating devices, we may update the subordinate
> bus number to widen the aperture so we can enumerate an arbitrary tree
> behind the bridge.  When we're finished, we'll probably narrow it by
> updating the subordinate again, so the unused bus number space can be
> used for other bridges.  I don't know the exact details of the
> algorithm, and they're likely to change anyway, but pci_scan_bridge()
> is where most of it happens.
> 
> It looks like your current system doesn't support trees below the
> bridges, but hopefully we can make it so the generic enumeration
> algorithms still work.
The PCI-to-PCI bridge specification says that the Primary Bus Number
Register, Secondary Bus Number Register and Subordinate Bus Number
Register of the PCI configuration space of a PCI-to-PCI bridge should
all be set to 0 after reset.
Until now, I was forcing a specific value of the Secondary Bus Number
and Subordinate Bus Number (1 for my first bridge, 2 for my second
bridge, etc.).
Following you're recommendation, I've changed this, and left those
values initialized to 0 by default, in order to let Linux set correct
values. Yes, Linux does assign appropriate values in the Secondary Bus
Number Register. But before that Linux also complains loudly that the
bridge configuration is invalid:
pci 0000:00:01.0: bridge configuration invalid ([bus 00-00]), reconfiguring
pci 0000:00:02.0: bridge configuration invalid ([bus 00-00]), reconfiguring
pci 0000:00:03.0: bridge configuration invalid ([bus 00-00]), reconfiguring
pci 0000:00:04.0: bridge configuration invalid ([bus 00-00]), reconfiguring
pci 0000:00:05.0: bridge configuration invalid ([bus 00-00]), reconfiguring
pci 0000:00:06.0: bridge configuration invalid ([bus 00-00]), reconfiguring
Looking at the code, we have:
        /* Check if setup is sensible at all */
        if (!pass &&
            (primary != bus->number || secondary <= bus->number ||
             secondary > subordinate)) {
                dev_info(&dev->dev, "bridge configuration invalid ([bus %02x-%02x]), reconfiguring\n",
                         secondary, subordinate);
                broken = 1;
        }
Due to the default values of the Primary Bus Number Register, Secondary
Bus Number Register and Subordinate Bus Number Register, we have:
 primary = 0
 secondary = 0
 subordinate = 0
We are enumerating the root bus, so bus->number = 0. Therefore:
 * The test primary != bus->number is false, so it's not the problem.
 * secondary <= bus->number is true, because secondary = 0, and
   bus->number = 0. It is the problem.
 * secondary > subordinate is false.
So I'm not sure what to do with this...
Thoas
-- 
Thomas Petazzoni, Free Electrons
Kernel, drivers, real-time and embedded Linux
development, consulting, training and support.
http://free-electrons.com
^ permalink raw reply	[flat|nested] 216+ messages in thread
- * [PATCH v2 19/27] pci: PCIe driver for Marvell Armada 370/XP systems
  2013-01-31 16:04                       ` Thomas Petazzoni
@ 2013-01-31 16:30                         ` Bjorn Helgaas
  2013-01-31 16:33                           ` Thomas Petazzoni
  2013-01-31 16:42                           ` Russell King - ARM Linux
  0 siblings, 2 replies; 216+ messages in thread
From: Bjorn Helgaas @ 2013-01-31 16:30 UTC (permalink / raw)
  To: linux-arm-kernel
On Thu, Jan 31, 2013 at 9:04 AM, Thomas Petazzoni
<thomas.petazzoni@free-electrons.com> wrote:
> Dear Bjorn Helgaas,
>
> On Wed, 30 Jan 2013 11:52:15 -0700, Bjorn Helgaas wrote:
>
>> The secondary/subordinate registers effectively define a bus number
>> aperture that tells the bridge which transactions to claim and forward
>> downstream.  When enumerating devices, we may update the subordinate
>> bus number to widen the aperture so we can enumerate an arbitrary tree
>> behind the bridge.  When we're finished, we'll probably narrow it by
>> updating the subordinate again, so the unused bus number space can be
>> used for other bridges.  I don't know the exact details of the
>> algorithm, and they're likely to change anyway, but pci_scan_bridge()
>> is where most of it happens.
>>
>> It looks like your current system doesn't support trees below the
>> bridges, but hopefully we can make it so the generic enumeration
>> algorithms still work.
>
> The PCI-to-PCI bridge specification says that the Primary Bus Number
> Register, Secondary Bus Number Register and Subordinate Bus Number
> Register of the PCI configuration space of a PCI-to-PCI bridge should
> all be set to 0 after reset.
>
> Until now, I was forcing a specific value of the Secondary Bus Number
> and Subordinate Bus Number (1 for my first bridge, 2 for my second
> bridge, etc.).
>
> Following you're recommendation, I've changed this, and left those
> values initialized to 0 by default, in order to let Linux set correct
> values. Yes, Linux does assign appropriate values in the Secondary Bus
> Number Register. But before that Linux also complains loudly that the
> bridge configuration is invalid:
>
> pci 0000:00:01.0: bridge configuration invalid ([bus 00-00]), reconfiguring
> pci 0000:00:02.0: bridge configuration invalid ([bus 00-00]), reconfiguring
> pci 0000:00:03.0: bridge configuration invalid ([bus 00-00]), reconfiguring
> pci 0000:00:04.0: bridge configuration invalid ([bus 00-00]), reconfiguring
> pci 0000:00:05.0: bridge configuration invalid ([bus 00-00]), reconfiguring
> pci 0000:00:06.0: bridge configuration invalid ([bus 00-00]), reconfiguring
Linux makes the unwarranted assumption that the PCI hierarchy has
already been configured by firmware.  If the only problem is the
messages above, I think we could just rework the message so it doesn't
look like an error.  I would guess that we probably also see the same
distressing message when we hot-add a card with a bridge on it,
because firmware won't have initialized the bridge.
My rule of thumb is that I like to note something in dmesg about the
initial configuration of bus/mem/io apertures and BARs, as well as
indications when we update them.  That way, the dmesg log should
contain enough information to debug most enumeration and configuration
defects.  pci_scan_bridge() is somewhat lacking in this regard.
Bjorn
^ permalink raw reply	[flat|nested] 216+ messages in thread 
- * [PATCH v2 19/27] pci: PCIe driver for Marvell Armada 370/XP systems
  2013-01-31 16:30                         ` Bjorn Helgaas
@ 2013-01-31 16:33                           ` Thomas Petazzoni
  2013-01-31 17:03                             ` Bjorn Helgaas
  2013-01-31 16:42                           ` Russell King - ARM Linux
  1 sibling, 1 reply; 216+ messages in thread
From: Thomas Petazzoni @ 2013-01-31 16:33 UTC (permalink / raw)
  To: linux-arm-kernel
Dear Bjorn Helgaas,
On Thu, 31 Jan 2013 09:30:07 -0700, Bjorn Helgaas wrote:
> > Following you're recommendation, I've changed this, and left those
> > values initialized to 0 by default, in order to let Linux set correct
> > values. Yes, Linux does assign appropriate values in the Secondary Bus
> > Number Register. But before that Linux also complains loudly that the
> > bridge configuration is invalid:
> >
> > pci 0000:00:01.0: bridge configuration invalid ([bus 00-00]), reconfiguring
> > pci 0000:00:02.0: bridge configuration invalid ([bus 00-00]), reconfiguring
> > pci 0000:00:03.0: bridge configuration invalid ([bus 00-00]), reconfiguring
> > pci 0000:00:04.0: bridge configuration invalid ([bus 00-00]), reconfiguring
> > pci 0000:00:05.0: bridge configuration invalid ([bus 00-00]), reconfiguring
> > pci 0000:00:06.0: bridge configuration invalid ([bus 00-00]), reconfiguring
> 
> Linux makes the unwarranted assumption that the PCI hierarchy has
> already been configured by firmware.  If the only problem is the
> messages above, I think we could just rework the message so it doesn't
> look like an error.  I would guess that we probably also see the same
> distressing message when we hot-add a card with a bridge on it,
> because firmware won't have initialized the bridge.
> 
> My rule of thumb is that I like to note something in dmesg about the
> initial configuration of bus/mem/io apertures and BARs, as well as
> indications when we update them.  That way, the dmesg log should
> contain enough information to debug most enumeration and configuration
> defects.  pci_scan_bridge() is somewhat lacking in this regard.
Ok. Would something like:
 "bridge configuration with unassigned bus numbers ([bus 00-00]), reconfiguring"
be an acceptable to replace this one?
Thanks for your quick feedback,
Thomas
-- 
Thomas Petazzoni, Free Electrons
Kernel, drivers, real-time and embedded Linux
development, consulting, training and support.
http://free-electrons.com
^ permalink raw reply	[flat|nested] 216+ messages in thread 
- * [PATCH v2 19/27] pci: PCIe driver for Marvell Armada 370/XP systems
  2013-01-31 16:33                           ` Thomas Petazzoni
@ 2013-01-31 17:03                             ` Bjorn Helgaas
  0 siblings, 0 replies; 216+ messages in thread
From: Bjorn Helgaas @ 2013-01-31 17:03 UTC (permalink / raw)
  To: linux-arm-kernel
On Thu, Jan 31, 2013 at 9:33 AM, Thomas Petazzoni
<thomas.petazzoni@free-electrons.com> wrote:
> Dear Bjorn Helgaas,
>
> On Thu, 31 Jan 2013 09:30:07 -0700, Bjorn Helgaas wrote:
>
>> > Following you're recommendation, I've changed this, and left those
>> > values initialized to 0 by default, in order to let Linux set correct
>> > values. Yes, Linux does assign appropriate values in the Secondary Bus
>> > Number Register. But before that Linux also complains loudly that the
>> > bridge configuration is invalid:
>> >
>> > pci 0000:00:01.0: bridge configuration invalid ([bus 00-00]), reconfiguring
>> > pci 0000:00:02.0: bridge configuration invalid ([bus 00-00]), reconfiguring
>> > pci 0000:00:03.0: bridge configuration invalid ([bus 00-00]), reconfiguring
>> > pci 0000:00:04.0: bridge configuration invalid ([bus 00-00]), reconfiguring
>> > pci 0000:00:05.0: bridge configuration invalid ([bus 00-00]), reconfiguring
>> > pci 0000:00:06.0: bridge configuration invalid ([bus 00-00]), reconfiguring
>>
>> Linux makes the unwarranted assumption that the PCI hierarchy has
>> already been configured by firmware.  If the only problem is the
>> messages above, I think we could just rework the message so it doesn't
>> look like an error.  I would guess that we probably also see the same
>> distressing message when we hot-add a card with a bridge on it,
>> because firmware won't have initialized the bridge.
>>
>> My rule of thumb is that I like to note something in dmesg about the
>> initial configuration of bus/mem/io apertures and BARs, as well as
>> indications when we update them.  That way, the dmesg log should
>> contain enough information to debug most enumeration and configuration
>> defects.  pci_scan_bridge() is somewhat lacking in this regard.
>
> Ok. Would something like:
>
>  "bridge configuration with unassigned bus numbers ([bus 00-00]), reconfiguring"
>
> be an acceptable to replace this one?
Seems reasonable.
^ permalink raw reply	[flat|nested] 216+ messages in thread 
 
- * [PATCH v2 19/27] pci: PCIe driver for Marvell Armada 370/XP systems
  2013-01-31 16:30                         ` Bjorn Helgaas
  2013-01-31 16:33                           ` Thomas Petazzoni
@ 2013-01-31 16:42                           ` Russell King - ARM Linux
  1 sibling, 0 replies; 216+ messages in thread
From: Russell King - ARM Linux @ 2013-01-31 16:42 UTC (permalink / raw)
  To: linux-arm-kernel
On Thu, Jan 31, 2013 at 09:30:07AM -0700, Bjorn Helgaas wrote:
> Linux makes the unwarranted assumption that the PCI hierarchy has
> already been configured by firmware.  If the only problem is the
> messages above, I think we could just rework the message so it doesn't
> look like an error.
That's not a safe assumption, especially on platforms where there's no
BIOS (like ARM platforms).  Thankfully, for the platforms I care about,
the boot loaders I wrote for them _do_ do a full bus setup, so I don't
see a problem. :)
However, I have historially had the kernel over many years (probably
around 14 or 15 now) reassign all resources so that things are how the
kernel wants them, and not how my half-hearted attempt at setting them
up did (which only does a limited job enough to get the system to a
state where we can load the kernel.)
^ permalink raw reply	[flat|nested] 216+ messages in thread 
 
 
 
 
 
 
 
 
 
 
 
 
- * [PATCH v2 19/27] pci: PCIe driver for Marvell Armada 370/XP systems
  2013-01-28 18:56 ` [PATCH v2 19/27] pci: PCIe driver for Marvell Armada 370/XP systems Thomas Petazzoni
  2013-01-28 22:21   ` Stephen Warren
  2013-01-29  3:29   ` Bjorn Helgaas
@ 2013-01-29 13:22   ` Andrew Murray
  2013-01-29 13:45     ` Thomas Petazzoni
  2013-02-07 14:37     ` Thomas Petazzoni
  2013-01-30 11:32   ` Russell King - ARM Linux
  2013-02-01  0:34   ` Stephen Warren
  4 siblings, 2 replies; 216+ messages in thread
From: Andrew Murray @ 2013-01-29 13:22 UTC (permalink / raw)
  To: linux-arm-kernel
On Mon, Jan 28, 2013 at 06:56:28PM +0000, Thomas Petazzoni wrote:
> This driver implements the support for the PCIe interfaces on the
> Marvell Armada 370/XP ARM SoCs. In the future, it might be extended to
> cover earlier families of Marvell SoCs, such as Dove, Orion and
> Kirkwood.
> 
> The driver implements the hw_pci operations needed by the core ARM PCI
> code to setup PCI devices and get their corresponding IRQs, and the
> pci_ops operations that are used by the PCI core to read/write the
> configuration space of PCI devices.
> 
> Since the PCIe interfaces of Marvell SoCs are completely separate and
> not linked together in a bus, this driver sets up an emulated PCI host
> bridge, with one PCI-to-PCI bridge as child for each hardware PCIe
> interface.
> 
> In addition, this driver enumerates the different PCIe slots, and for
> those having a device plugged in, it sets up the necessary address
> decoding windows, using the new armada_370_xp_alloc_pcie_window()
> function from mach-mvebu/addr-map.c.
> 
> Signed-off-by: Thomas Petazzoni <thomas.petazzoni@free-electrons.com>
[snip]
> +static int __init mvebu_pcie_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
> +{
[snip]
> +
> +       /*
> +        * Build an laddr array that describes the PCI device in a DT
> +        * way
> +        */
> +       laddr[0] = cpu_to_be32(port->devfn << 8);
> +       laddr[1] = laddr[2] = 0;
> +       intspec = cpu_to_be32(pin);
> +
> +       ret = of_irq_map_raw(port->dn, &intspec, 1, laddr, &oirq);
> +       if (ret) {
> +               dev_err(&pcie->pdev->dev,
> +                       "%s: of_irq_map_raw() failed, %d\n",
> +                       __func__, ret);
> +               return ret;
> +       }
Are you able to replace the above code with a call to of_irq_map_pci? I'm not
sure which approach is better. The of_irq_map_pci function doesn't require the
pin argument and instead uses the DT and/or performs its own pin swizzling. I
guess this means that if there are PCIe devices in the DT tree that does any
thing strange with pins then it would be reflected in the IRQ you get. I've
found that you will also need to provide an implementation of
pcibios_get_phb_of_node for this to work correctly (see my RFC bios32 patch).
> +
> +       return irq_create_of_mapping(oirq.controller, oirq.specifier,
> +                                    oirq.size);
> +}
> +static int mvebu_pcie_enable(struct mvebu_pcie *pcie)
> +{
> +       struct hw_pci hw;
[snip]
> +       pci_common_init(&hw);
> +
> +       return mvebu_pcie_window_config(pcie);
> +}
> +
> +static int __init mvebu_pcie_probe(struct platform_device *pdev)
> +{
[snip]
> +
> +       mvebu_pcie_enable(pcie);
> +
> +       return 0;
> +}
> +
> +static const struct of_device_id mvebu_pcie_of_match_table[] = {
> +       { .compatible = "marvell,armada-370-xp-pcie", },
> +       {},
> +};
> +MODULE_DEVICE_TABLE(of, mvebu_pcie_of_match_table);
> +
> +static struct platform_driver mvebu_pcie_driver = {
> +       .driver = {
> +               .owner = THIS_MODULE,
> +               .name = "mvebu-pcie",
> +               .of_match_table =
> +                  of_match_ptr(mvebu_pcie_of_match_table),
> +       },
> +};
> +
> +static int mvebu_pcie_init(void)
> +{
> +       return platform_driver_probe(&mvebu_pcie_driver,
> +                                    mvebu_pcie_probe);
> +}
If you have multiple 'mvebu-pcie' in your DT then you will end up
with multiple calls to
mvebu_pcie_probe/mvebu_pcie_enable/pci_common_init.
However pci_common_init/pcibios_init_hw assumes it will only ever be called
once, and will thus result in trying to create multiple busses with the same
bus number. (The first root bus it creates is always zero provided you haven't
implemented hw->scan).
I noticed this in Thierry's patch set and posted an RFC patch which overcomes
this issue (patchwork.kernel.org/patch/2001171) and others. Perhaps you would
want to include this in your patchset?
Andrew Murray
^ permalink raw reply	[flat|nested] 216+ messages in thread
- * [PATCH v2 19/27] pci: PCIe driver for Marvell Armada 370/XP systems
  2013-01-29 13:22   ` Andrew Murray
@ 2013-01-29 13:45     ` Thomas Petazzoni
  2013-01-29 14:05       ` Andrew Murray
  2013-02-07 14:37     ` Thomas Petazzoni
  1 sibling, 1 reply; 216+ messages in thread
From: Thomas Petazzoni @ 2013-01-29 13:45 UTC (permalink / raw)
  To: linux-arm-kernel
Dear Andrew Murray,
On Tue, 29 Jan 2013 13:22:04 +0000, Andrew Murray wrote:
> > +static int __init mvebu_pcie_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
> > +{
> 
> [snip]
> 
> > +
> > +       /*
> > +        * Build an laddr array that describes the PCI device in a DT
> > +        * way
> > +        */
> > +       laddr[0] = cpu_to_be32(port->devfn << 8);
> > +       laddr[1] = laddr[2] = 0;
> > +       intspec = cpu_to_be32(pin);
> > +
> > +       ret = of_irq_map_raw(port->dn, &intspec, 1, laddr, &oirq);
> > +       if (ret) {
> > +               dev_err(&pcie->pdev->dev,
> > +                       "%s: of_irq_map_raw() failed, %d\n",
> > +                       __func__, ret);
> > +               return ret;
> > +       }
> 
> Are you able to replace the above code with a call to of_irq_map_pci? I'm not
> sure which approach is better. The of_irq_map_pci function doesn't require the
> pin argument and instead uses the DT and/or performs its own pin swizzling. I
> guess this means that if there are PCIe devices in the DT tree that does any
> thing strange with pins then it would be reflected in the IRQ you get. I've
> found that you will also need to provide an implementation of
> pcibios_get_phb_of_node for this to work correctly (see my RFC bios32 patch).
I did try using the of_irq_map_pci() function, but unfortunately, it
didn't work. IIRC, it didn't work because none of the pci_dev in my PCI
tree had any 'struct device_node' associated to them, or at least not
the one that had the right pdev->bus->number and pdev->devfn.
But, I guess that your patch that implements pcibios_get_phb_of_node()
should fix this problem. I'll try this. Thanks!
> > +static int mvebu_pcie_init(void)
> > +{
> > +       return platform_driver_probe(&mvebu_pcie_driver,
> > +                                    mvebu_pcie_probe);
> > +}
> 
> If you have multiple 'mvebu-pcie' in your DT then you will end up
> with multiple calls to
> mvebu_pcie_probe/mvebu_pcie_enable/pci_common_init.
Right. In practice, there will only ever be a single DT node, since all
PCIe interfaces are sub-nodes of the PCI controller node. But I
understand the theoretical problem.
> However pci_common_init/pcibios_init_hw assumes it will only ever be called
> once, and will thus result in trying to create multiple busses with the same
> bus number. (The first root bus it creates is always zero provided you haven't
> implemented hw->scan).
> 
> I noticed this in Thierry's patch set and posted an RFC patch which overcomes
> this issue (patchwork.kernel.org/patch/2001171) and others. Perhaps you would
> want to include this in your patchset?
Sure, I'll give it a test, and report if it works for me.
Thanks a lot!
Thomas
-- 
Thomas Petazzoni, Free Electrons
Kernel, drivers, real-time and embedded Linux
development, consulting, training and support.
http://free-electrons.com
^ permalink raw reply	[flat|nested] 216+ messages in thread
- * [PATCH v2 19/27] pci: PCIe driver for Marvell Armada 370/XP systems
  2013-01-29 13:45     ` Thomas Petazzoni
@ 2013-01-29 14:05       ` Andrew Murray
  2013-01-29 14:20         ` Thierry Reding
  0 siblings, 1 reply; 216+ messages in thread
From: Andrew Murray @ 2013-01-29 14:05 UTC (permalink / raw)
  To: linux-arm-kernel
On Tue, Jan 29, 2013 at 01:45:22PM +0000, Thomas Petazzoni wrote:
> Dear Andrew Murray,
> 
> On Tue, 29 Jan 2013 13:22:04 +0000, Andrew Murray wrote:
> 
> > > +static int __init mvebu_pcie_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
> > > +{
> > 
> > [snip]
> > 
> > > +
> > > +       /*
> > > +        * Build an laddr array that describes the PCI device in a DT
> > > +        * way
> > > +        */
> > > +       laddr[0] = cpu_to_be32(port->devfn << 8);
> > > +       laddr[1] = laddr[2] = 0;
> > > +       intspec = cpu_to_be32(pin);
> > > +
> > > +       ret = of_irq_map_raw(port->dn, &intspec, 1, laddr, &oirq);
> > > +       if (ret) {
> > > +               dev_err(&pcie->pdev->dev,
> > > +                       "%s: of_irq_map_raw() failed, %d\n",
> > > +                       __func__, ret);
> > > +               return ret;
> > > +       }
> > 
> > Are you able to replace the above code with a call to of_irq_map_pci? I'm not
> > sure which approach is better. The of_irq_map_pci function doesn't require the
> > pin argument and instead uses the DT and/or performs its own pin swizzling. I
> > guess this means that if there are PCIe devices in the DT tree that does any
> > thing strange with pins then it would be reflected in the IRQ you get. I've
> > found that you will also need to provide an implementation of
> > pcibios_get_phb_of_node for this to work correctly (see my RFC bios32 patch).
> 
> I did try using the of_irq_map_pci() function, but unfortunately, it
> didn't work. IIRC, it didn't work because none of the pci_dev in my PCI
> tree had any 'struct device_node' associated to them, or at least not
> the one that had the right pdev->bus->number and pdev->devfn.
> 
> But, I guess that your patch that implements pcibios_get_phb_of_node()
> should fix this problem. I'll try this. Thanks!
My bios32 patch departs slightly from your v2 04/27 patch in that it updates
hw_pci to contain a device node rather than opaque private data and my
pcibios_get_phb_of_node implementation relies on this. If you wanted to stick
with the implementation you and Thierry share then you'd have to find another
way to get to the device node from the void **private_data.
 
> > > +static int mvebu_pcie_init(void)
> > > +{
> > > +       return platform_driver_probe(&mvebu_pcie_driver,
> > > +                                    mvebu_pcie_probe);
> > > +}
> > 
> > If you have multiple 'mvebu-pcie' in your DT then you will end up
> > with multiple calls to
> > mvebu_pcie_probe/mvebu_pcie_enable/pci_common_init.
> 
> Right. In practice, there will only ever be a single DT node, since all
> PCIe interfaces are sub-nodes of the PCI controller node. But I
> understand the theoretical problem.
> 
> > However pci_common_init/pcibios_init_hw assumes it will only ever be called
> > once, and will thus result in trying to create multiple busses with the same
> > bus number. (The first root bus it creates is always zero provided you haven't
> > implemented hw->scan).
> > 
> > I noticed this in Thierry's patch set and posted an RFC patch which overcomes
> > this issue (patchwork.kernel.org/patch/2001171) and others. Perhaps you would
> > want to include this in your patchset?
> 
> Sure, I'll give it a test, and report if it works for me.
> 
> Thanks a lot!
> 
> Thomas
> -- 
> Thomas Petazzoni, Free Electrons
> Kernel, drivers, real-time and embedded Linux
> development, consulting, training and support.
> http://free-electrons.com
> 
^ permalink raw reply	[flat|nested] 216+ messages in thread
- * [PATCH v2 19/27] pci: PCIe driver for Marvell Armada 370/XP systems
  2013-01-29 14:05       ` Andrew Murray
@ 2013-01-29 14:20         ` Thierry Reding
  2013-01-29 14:29           ` Thomas Petazzoni
  0 siblings, 1 reply; 216+ messages in thread
From: Thierry Reding @ 2013-01-29 14:20 UTC (permalink / raw)
  To: linux-arm-kernel
On Tue, Jan 29, 2013 at 02:05:22PM +0000, Andrew Murray wrote:
> On Tue, Jan 29, 2013 at 01:45:22PM +0000, Thomas Petazzoni wrote:
> > Dear Andrew Murray,
> > 
> > On Tue, 29 Jan 2013 13:22:04 +0000, Andrew Murray wrote:
> > 
> > > > +static int __init mvebu_pcie_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
> > > > +{
> > > 
> > > [snip]
> > > 
> > > > +
> > > > +       /*
> > > > +        * Build an laddr array that describes the PCI device in a DT
> > > > +        * way
> > > > +        */
> > > > +       laddr[0] = cpu_to_be32(port->devfn << 8);
> > > > +       laddr[1] = laddr[2] = 0;
> > > > +       intspec = cpu_to_be32(pin);
> > > > +
> > > > +       ret = of_irq_map_raw(port->dn, &intspec, 1, laddr, &oirq);
> > > > +       if (ret) {
> > > > +               dev_err(&pcie->pdev->dev,
> > > > +                       "%s: of_irq_map_raw() failed, %d\n",
> > > > +                       __func__, ret);
> > > > +               return ret;
> > > > +       }
> > > 
> > > Are you able to replace the above code with a call to of_irq_map_pci? I'm not
> > > sure which approach is better. The of_irq_map_pci function doesn't require the
> > > pin argument and instead uses the DT and/or performs its own pin swizzling. I
> > > guess this means that if there are PCIe devices in the DT tree that does any
> > > thing strange with pins then it would be reflected in the IRQ you get. I've
> > > found that you will also need to provide an implementation of
> > > pcibios_get_phb_of_node for this to work correctly (see my RFC bios32 patch).
> > 
> > I did try using the of_irq_map_pci() function, but unfortunately, it
> > didn't work. IIRC, it didn't work because none of the pci_dev in my PCI
> > tree had any 'struct device_node' associated to them, or at least not
> > the one that had the right pdev->bus->number and pdev->devfn.
> > 
> > But, I guess that your patch that implements pcibios_get_phb_of_node()
> > should fix this problem. I'll try this. Thanks!
> 
> My bios32 patch departs slightly from your v2 04/27 patch in that it updates
> hw_pci to contain a device node rather than opaque private data and my
> pcibios_get_phb_of_node implementation relies on this. If you wanted to stick
> with the implementation you and Thierry share then you'd have to find another
> way to get to the device node from the void **private_data.
If at all possible I think the right thing to do is reuse the generic
pcibios_get_phb_of_node() implementation. On Tegra this turned out to
require a minimal change to the DT bindings of the root port nodes to
make sure they provide the correct address in the reg property.
Thierry
-------------- next part --------------
A non-text attachment was scrubbed...
Name: not available
Type: application/pgp-signature
Size: 836 bytes
Desc: not available
URL: <http://lists.infradead.org/pipermail/linux-arm-kernel/attachments/20130129/47c1c5ce/attachment.sig>
^ permalink raw reply	[flat|nested] 216+ messages in thread
- * [PATCH v2 19/27] pci: PCIe driver for Marvell Armada 370/XP systems
  2013-01-29 14:20         ` Thierry Reding
@ 2013-01-29 14:29           ` Thomas Petazzoni
  2013-01-29 15:02             ` Thierry Reding
  0 siblings, 1 reply; 216+ messages in thread
From: Thomas Petazzoni @ 2013-01-29 14:29 UTC (permalink / raw)
  To: linux-arm-kernel
Dear Thierry Reding,
On Tue, 29 Jan 2013 15:20:15 +0100, Thierry Reding wrote:
> If at all possible I think the right thing to do is reuse the generic
> pcibios_get_phb_of_node() implementation. On Tegra this turned out to
> require a minimal change to the DT bindings of the root port nodes to
> make sure they provide the correct address in the reg property.
Could you detail the change that was needed? The DT bindings I use for
the Marvell PCIe driver are very, very similar to the ones you use for
Tegra, since I basically inspired my entire DT binding on your work.
And still, I think of_irq_map_pci() wasn't working for me.
Best regards,
Thomas
-- 
Thomas Petazzoni, Free Electrons
Kernel, drivers, real-time and embedded Linux
development, consulting, training and support.
http://free-electrons.com
^ permalink raw reply	[flat|nested] 216+ messages in thread 
- * [PATCH v2 19/27] pci: PCIe driver for Marvell Armada 370/XP systems
  2013-01-29 14:29           ` Thomas Petazzoni
@ 2013-01-29 15:02             ` Thierry Reding
  2013-01-29 15:08               ` Andrew Murray
  2013-01-29 15:10               ` Thomas Petazzoni
  0 siblings, 2 replies; 216+ messages in thread
From: Thierry Reding @ 2013-01-29 15:02 UTC (permalink / raw)
  To: linux-arm-kernel
On Tue, Jan 29, 2013 at 03:29:37PM +0100, Thomas Petazzoni wrote:
> Dear Thierry Reding,
> 
> On Tue, 29 Jan 2013 15:20:15 +0100, Thierry Reding wrote:
> 
> > If at all possible I think the right thing to do is reuse the generic
> > pcibios_get_phb_of_node() implementation. On Tegra this turned out to
> > require a minimal change to the DT bindings of the root port nodes to
> > make sure they provide the correct address in the reg property.
> 
> Could you detail the change that was needed? The DT bindings I use for
> the Marvell PCIe driver are very, very similar to the ones you use for
> Tegra, since I basically inspired my entire DT binding on your work.
> And still, I think of_irq_map_pci() wasn't working for me.
Now that I think about it, there were a few more changes needed.
For one, the reg property of the root port nodes need to be in the
format specified by the PCI DT binding. That is, 3 cells for the
address and 2 cells for the size.
So I end up with something like this:
	pcie-controller {
		...
		ranges = <0x00000800 0 0x80000000 0x80000000 0 0x00001000 /* port 0 registers */
			  0x00001000 0 0x80001000 0x80001000 0 0x00001000 /* port 1 registers */
			  ...>;
		pci at 1,0 {
			reg = <0x000800 0 0x80000000 0 0x1000>;
			...
		};
		pci at 2,0 {
			reg = <0x001000 0 0x80001000 0 0x1000>;
			...
		};
	};
So what happens here is that for each root port (pci at 1,0 and pci at 2,0),
the reg property is translated into the parent address space via the
pcie-controller's ranges property. pci at 1,0 gets the memory region
0x80000000-0x80000fff and pci at 2,0 gets 0x80001000-0x80001fff. (These are
actually windows through which the configuration space of the root ports
is accessed.)
At the same time this reg property maps both devices into the PCI
address space at addresses 0:01.0 and 0:02.0 respectively.
The second change is that you can't rely on ARM's default implementation
of the bus scan operation, which calls pci_scan_root_bus(), passing in a
NULL as the struct device which acts as the bus' parent. So on Tegra I
added a custom implementation which calls pci_create_root_bus(), passing
in the struct device of the PCI host bridge, whose .of_node field will
be set to the pcie-controller node above. Incidentally this also fixed
another issue where the PCI core and ARM's pci_common_init() both
eventually end up calling pci_bus_add_devices(). I don't remember the
exact symptoms but I think this was causing resource conflicts during
the second enumeration or so.
Because a proper struct device with the correct .of_node field is passed
into pci_create_root_bus(), the generic pcibios_get_phb_of_node() will
know how to find it by looking at bus->bridge->parent->of_node. After
that the generic matching code will search the bridge (pcie-controller)
node's children and relate them to the struct pci_dev by devfn. This is
done in pci_set_of_node() defined in drivers/pci/of.c, which calls
of_pci_find_child_device() from drivers/of/of_pci.c.
This is quite convoluted, but I hope it helps.
Thierry
-------------- next part --------------
A non-text attachment was scrubbed...
Name: not available
Type: application/pgp-signature
Size: 836 bytes
Desc: not available
URL: <http://lists.infradead.org/pipermail/linux-arm-kernel/attachments/20130129/1a0b14a8/attachment.sig>
^ permalink raw reply	[flat|nested] 216+ messages in thread
- * [PATCH v2 19/27] pci: PCIe driver for Marvell Armada 370/XP systems
  2013-01-29 15:02             ` Thierry Reding
@ 2013-01-29 15:08               ` Andrew Murray
  2013-01-29 15:10               ` Thomas Petazzoni
  1 sibling, 0 replies; 216+ messages in thread
From: Andrew Murray @ 2013-01-29 15:08 UTC (permalink / raw)
  To: linux-arm-kernel
On Tue, Jan 29, 2013 at 03:02:01PM +0000, Thierry Reding wrote:
> On Tue, Jan 29, 2013 at 03:29:37PM +0100, Thomas Petazzoni wrote:
> > Dear Thierry Reding,
> > 
> > On Tue, 29 Jan 2013 15:20:15 +0100, Thierry Reding wrote:
> > 
> > > If at all possible I think the right thing to do is reuse the generic
> > > pcibios_get_phb_of_node() implementation. On Tegra this turned out to
> > > require a minimal change to the DT bindings of the root port nodes to
> > > make sure they provide the correct address in the reg property.
> > 
> > Could you detail the change that was needed? The DT bindings I use for
> > the Marvell PCIe driver are very, very similar to the ones you use for
> > Tegra, since I basically inspired my entire DT binding on your work.
> > And still, I think of_irq_map_pci() wasn't working for me.
> 
> Now that I think about it, there were a few more changes needed.
> For one, the reg property of the root port nodes need to be in the
> format specified by the PCI DT binding. That is, 3 cells for the
> address and 2 cells for the size.
> 
> So I end up with something like this:
> 
> 	pcie-controller {
> 		...
> 
> 		ranges = <0x00000800 0 0x80000000 0x80000000 0 0x00001000 /* port 0 registers */
> 			  0x00001000 0 0x80001000 0x80001000 0 0x00001000 /* port 1 registers */
> 			  ...>;
> 
> 		pci at 1,0 {
> 			reg = <0x000800 0 0x80000000 0 0x1000>;
> 			...
> 		};
> 
> 		pci at 2,0 {
> 			reg = <0x001000 0 0x80001000 0 0x1000>;
> 			...
> 		};
> 	};
> 
> So what happens here is that for each root port (pci at 1,0 and pci at 2,0),
> the reg property is translated into the parent address space via the
> pcie-controller's ranges property. pci at 1,0 gets the memory region
> 0x80000000-0x80000fff and pci at 2,0 gets 0x80001000-0x80001fff. (These are
> actually windows through which the configuration space of the root ports
> is accessed.)
> 
> At the same time this reg property maps both devices into the PCI
> address space at addresses 0:01.0 and 0:02.0 respectively.
> 
> The second change is that you can't rely on ARM's default implementation
> of the bus scan operation, which calls pci_scan_root_bus(), passing in a
> NULL as the struct device which acts as the bus' parent. So on Tegra I
> added a custom implementation which calls pci_create_root_bus(), passing
> in the struct device of the PCI host bridge, whose .of_node field will
> be set to the pcie-controller node above. Incidentally this also fixed
> another issue where the PCI core and ARM's pci_common_init() both
> eventually end up calling pci_bus_add_devices(). I don't remember the
> exact symptoms but I think this was causing resource conflicts during
> the second enumeration or so.
> 
> Because a proper struct device with the correct .of_node field is passed
> into pci_create_root_bus(), the generic pcibios_get_phb_of_node() will
> know how to find it by looking at bus->bridge->parent->of_node. After
> that the generic matching code will search the bridge (pcie-controller)
> node's children and relate them to the struct pci_dev by devfn. This is
> done in pci_set_of_node() defined in drivers/pci/of.c, which calls
> of_pci_find_child_device() from drivers/of/of_pci.c.
> 
> This is quite convoluted, but I hope it helps.
Thanks this is very helpful. I will see if this lets me avoid implementing
pcibios_get_phb_of_node.
Thanks,
Andrew Murray
^ permalink raw reply	[flat|nested] 216+ messages in thread
- * [PATCH v2 19/27] pci: PCIe driver for Marvell Armada 370/XP systems
  2013-01-29 15:02             ` Thierry Reding
  2013-01-29 15:08               ` Andrew Murray
@ 2013-01-29 15:10               ` Thomas Petazzoni
  1 sibling, 0 replies; 216+ messages in thread
From: Thomas Petazzoni @ 2013-01-29 15:10 UTC (permalink / raw)
  To: linux-arm-kernel
Dear Thierry Reding,
On Tue, 29 Jan 2013 16:02:01 +0100, Thierry Reding wrote:
> Now that I think about it, there were a few more changes needed.
> For one, the reg property of the root port nodes need to be in the
> format specified by the PCI DT binding. That is, 3 cells for the
> address and 2 cells for the size.
> 
> So I end up with something like this:
> 
> 	pcie-controller {
> 		...
> 
> 		ranges = <0x00000800 0 0x80000000 0x80000000 0 0x00001000 /* port 0 registers */
> 			  0x00001000 0 0x80001000 0x80001000 0 0x00001000 /* port 1 registers */
> 			  ...>;
> 
> 		pci at 1,0 {
> 			reg = <0x000800 0 0x80000000 0 0x1000>;
> 			...
> 		};
> 
> 		pci at 2,0 {
> 			reg = <0x001000 0 0x80001000 0 0x1000>;
> 			...
> 		};
> 	};
> 
> So what happens here is that for each root port (pci at 1,0 and pci at 2,0),
> the reg property is translated into the parent address space via the
> pcie-controller's ranges property. pci at 1,0 gets the memory region
> 0x80000000-0x80000fff and pci at 2,0 gets 0x80001000-0x80001fff. (These are
> actually windows through which the configuration space of the root ports
> is accessed.)
> 
> At the same time this reg property maps both devices into the PCI
> address space at addresses 0:01.0 and 0:02.0 respectively.
This part I think I've done exactly the same thing in the Marvell PCIe
DT binding.
> The second change is that you can't rely on ARM's default implementation
> of the bus scan operation, which calls pci_scan_root_bus(), passing in a
> NULL as the struct device which acts as the bus' parent. So on Tegra I
> added a custom implementation which calls pci_create_root_bus(), passing
> in the struct device of the PCI host bridge, whose .of_node field will
> be set to the pcie-controller node above. Incidentally this also fixed
> another issue where the PCI core and ARM's pci_common_init() both
> eventually end up calling pci_bus_add_devices(). I don't remember the
> exact symptoms but I think this was causing resource conflicts during
> the second enumeration or so.
> 
> Because a proper struct device with the correct .of_node field is passed
> into pci_create_root_bus(), the generic pcibios_get_phb_of_node() will
> know how to find it by looking at bus->bridge->parent->of_node. After
> that the generic matching code will search the bridge (pcie-controller)
> node's children and relate them to the struct pci_dev by devfn. This is
> done in pci_set_of_node() defined in drivers/pci/of.c, which calls
> of_pci_find_child_device() from drivers/of/of_pci.c.
This is quite certainly the part that I was missing. I'll try this and
let you know.
Thanks a lot for the lengthy, but very useful explanation!
Thomas
-- 
Thomas Petazzoni, Free Electrons
Kernel, drivers, real-time and embedded Linux
development, consulting, training and support.
http://free-electrons.com
^ permalink raw reply	[flat|nested] 216+ messages in thread
 
 
 
 
 
- * [PATCH v2 19/27] pci: PCIe driver for Marvell Armada 370/XP systems
  2013-01-29 13:22   ` Andrew Murray
  2013-01-29 13:45     ` Thomas Petazzoni
@ 2013-02-07 14:37     ` Thomas Petazzoni
  2013-02-07 15:51       ` Andrew Murray
  1 sibling, 1 reply; 216+ messages in thread
From: Thomas Petazzoni @ 2013-02-07 14:37 UTC (permalink / raw)
  To: linux-arm-kernel
Dear Andrew Murray,
On Tue, 29 Jan 2013 13:22:04 +0000, Andrew Murray wrote:
> > +       /*
> > +        * Build an laddr array that describes the PCI device in a
> > DT
> > +        * way
> > +        */
> > +       laddr[0] = cpu_to_be32(port->devfn << 8);
> > +       laddr[1] = laddr[2] = 0;
> > +       intspec = cpu_to_be32(pin);
> > +
> > +       ret = of_irq_map_raw(port->dn, &intspec, 1, laddr, &oirq);
> > +       if (ret) {
> > +               dev_err(&pcie->pdev->dev,
> > +                       "%s: of_irq_map_raw() failed, %d\n",
> > +                       __func__, ret);
> > +               return ret;
> > +       }
> 
> Are you able to replace the above code with a call to of_irq_map_pci?
> I'm not sure which approach is better. The of_irq_map_pci function
> doesn't require the pin argument and instead uses the DT and/or
> performs its own pin swizzling. I guess this means that if there are
> PCIe devices in the DT tree that does any thing strange with pins
> then it would be reflected in the IRQ you get. I've found that you
> will also need to provide an implementation of
> pcibios_get_phb_of_node for this to work correctly (see my RFC bios32
> patch).
I tried to do so, but it doesn't work properly. Let me explain what I
did and the behavior that I observe.
First of all, I didn't reimplement the pcibios_get_phb_of_node(), but
instead, as Thierry Reding suggested, simply implemented the
hw_pci.scan() function as follows:
static struct pci_bus *mvebu_pcie_scan_bus(int nr, struct pci_sys_data *sys)
{
	struct mvebu_pcie *pcie = sys_to_pcie(sys);
	return pci_scan_root_bus(&pcie->pdev->dev, sys->busnr,
				 &mvebu_pcie_ops, sys, &sys->resources);
}
This allows to pass the "struct device *" pointer, which ultimately
allows the PCI devices to carry a pointer to the corresponding DT node.
The DT representing my PCIe controller and its interfaces is the
following:
		pcie-controller {
			compatible = "marvell,armada-370-xp-pcie";
			status = "disabled";
			#address-cells = <3>;
			#size-cells = <2>;
			bus-range = <0x00 0xff>;
			ranges = <0x00000800 0 0xd0040000 0xd0040000 0 0x00002000   /* port 0.0 registers */
				  0x00001000 0 0xd0080000 0xd0080000 0 0x00002000   /* port 1.0 registers */
				  0x81000000 0 0	  0xc0000000 0 0x00010000   /* downstream I/O */
				  0x82000000 0 0	  0xc1000000 0 0x08000000>; /* non-prefetchable memory */
			#interrupt-cells = <1>;
			interrupt-map-mask = <0xf800 0 0 1>;
			interrupt-map = <0x0800 0 0 1 &mpic 58 /* port 0.0 */
					 0x1000 0 0 1 &mpic 62>; /* port 1.0 */
			pcie at 0,0 {
				device_type = "pciex";
				reg = <0x0800 0 0xd0040000 0 0x2000>;
				#address-cells = <3>;
				#size-cells = <2>;
				marvell,pcie-port = <0>;
				marvell,pcie-lane = <0>;
				clocks = <&gateclk 5>;
				status = "disabled";
			};
			pcie at 1,0 {
				device_type = "pciex";
				reg = <0x1000 0 0xd0080000 0 0x2000>;
				#address-cells = <3>;
				#size-cells = <2>;
				marvell,pcie-port = <1>;
				marvell,pcie-lane = <0>;
				clocks = <&gateclk 9>;
				status = "disabled";
			};
		};
So we have two PCIe interfaces. lspci shows the following output:
00:01.0 PCI bridge: Marvell Technology Group Ltd. Device 1092
00:02.0 PCI bridge: Marvell Technology Group Ltd. Device 1092
01:00.0 Network controller: Intel Corporation Ultimate N WiFi Link 5300
02:00.0 USB controller: ASMedia Technology Inc. Device 1040
So:
 * On bus 0, we have two PCI-to-PCI bridges. Those are emulated in
   software and allow the Marvell PCIe driver to get dynamic assignment
   of address decoding windows. The entries in the interrupt-map DT
   property match the bus number and slot number of those PCI-to-PCI
   bridges.
 * On bus 1, we have the real PCIe device connected to the first PCIe
   interface. This bus 1 is made "visible" thanks to the 00:01.0
   PCI-to-PCI bridge.
 * On bus 2, we have the real PCIe device connected to the second PCIe
   interface. This bus 2 is made "visible" thanks to the 00:02.0
   PCI-to-PCI bridge.
Now, when I call the of_irq_map_pci() function, the problem is that the
"struct pci_dev" that it receives is the one corresponding to the
particular PCIe device we're interested in. And this "struct pci_dev"
has a non-NULL pointer to the "struct device_node" representing
"pcie0,0" or "pci0,1" above. Since the "struct device_node" is
non-NULL, of_irq_map_pci() builds a laddr[] with the bus number and
devfn of this device: bus number is 1, devfn is 0. And this doesn't
match with the interrupt-map that is in my DT, which associates the
interrupts numbers with the PCI-to-PCI bridges rather than the devices
themselves.
To me, the "struct pci_dev" representing the real PCIe devices should
have a NULL "struct device_node" pointer, because those device are not
represented in the DT. If this was the case, then the of_irq_map_pci()
would go one level up in the PCI hierarchy, find the "struct pci_dev"
that corresponds to the PCI-to-PCI bridge, which generates an laddr[]
having a bus number a devfn value matching the interrupt-map property.
If I do the following hack in of_irq_map_pci(), then everything works
nicely:
                } else {
                        /* We found a P2P bridge, check if it has a node */
                        ppnode = pci_device_to_OF_node(ppdev);
+                       if (pdev->bus->number != 0)
+                               ppnode = NULL;
                }
What this hack does is that if we are not on bus 0, it means that the
pci_dev is a real PCIe device, and therefore we force the code to
asssume it does not have a DT reference.
Isn't there a problem here in the PCI/DT code ? Is it normal that a
PCIe device that isn't described in the DT carries a non-NULL struct
device_node pointer?
If you need some more details about the problem, do not hesitate to ask.
Thanks a lot for your help,
Thomas
-- 
Thomas Petazzoni, Free Electrons
Kernel, drivers, real-time and embedded Linux
development, consulting, training and support.
http://free-electrons.com
^ permalink raw reply	[flat|nested] 216+ messages in thread
- * [PATCH v2 19/27] pci: PCIe driver for Marvell Armada 370/XP systems
  2013-02-07 14:37     ` Thomas Petazzoni
@ 2013-02-07 15:51       ` Andrew Murray
  2013-02-07 16:19         ` Thomas Petazzoni
  0 siblings, 1 reply; 216+ messages in thread
From: Andrew Murray @ 2013-02-07 15:51 UTC (permalink / raw)
  To: linux-arm-kernel
On Thu, Feb 07, 2013 at 02:37:50PM +0000, Thomas Petazzoni wrote:
> Dear Andrew Murray,
> 
> On Tue, 29 Jan 2013 13:22:04 +0000, Andrew Murray wrote:
> 
> > > +       /*
> > > +        * Build an laddr array that describes the PCI device in a
> > > DT
> > > +        * way
> > > +        */
> > > +       laddr[0] = cpu_to_be32(port->devfn << 8);
> > > +       laddr[1] = laddr[2] = 0;
> > > +       intspec = cpu_to_be32(pin);
> > > +
> > > +       ret = of_irq_map_raw(port->dn, &intspec, 1, laddr, &oirq);
> > > +       if (ret) {
> > > +               dev_err(&pcie->pdev->dev,
> > > +                       "%s: of_irq_map_raw() failed, %d\n",
> > > +                       __func__, ret);
> > > +               return ret;
> > > +       }
> > 
> > Are you able to replace the above code with a call to of_irq_map_pci?
> > I'm not sure which approach is better. The of_irq_map_pci function
> > doesn't require the pin argument and instead uses the DT and/or
> > performs its own pin swizzling. I guess this means that if there are
> > PCIe devices in the DT tree that does any thing strange with pins
> > then it would be reflected in the IRQ you get. I've found that you
> > will also need to provide an implementation of
> > pcibios_get_phb_of_node for this to work correctly (see my RFC bios32
> > patch).
> 
> I tried to do so, but it doesn't work properly. Let me explain what I
> did and the behavior that I observe.
> 
> First of all, I didn't reimplement the pcibios_get_phb_of_node(), but
> instead, as Thierry Reding suggested, simply implemented the
> hw_pci.scan() function as follows:
I've not had any time to test Thierry's solution to avoid implementing
pcibios_get_phb_of_node - but it did seem to work for him and seem correct
at the time.
> 
> static struct pci_bus *mvebu_pcie_scan_bus(int nr, struct pci_sys_data *sys)
> {
> 	struct mvebu_pcie *pcie = sys_to_pcie(sys);
> 	return pci_scan_root_bus(&pcie->pdev->dev, sys->busnr,
> 				 &mvebu_pcie_ops, sys, &sys->resources);
> }
> 
> This allows to pass the "struct device *" pointer, which ultimately
> allows the PCI devices to carry a pointer to the corresponding DT node.
> 
> The DT representing my PCIe controller and its interfaces is the
> following:
> 
> 		pcie-controller {
> 			compatible = "marvell,armada-370-xp-pcie";
> 			status = "disabled";
> 
> 			#address-cells = <3>;
> 			#size-cells = <2>;
> 
> 			bus-range = <0x00 0xff>;
> 
> 			ranges = <0x00000800 0 0xd0040000 0xd0040000 0 0x00002000   /* port 0.0 registers */
> 				  0x00001000 0 0xd0080000 0xd0080000 0 0x00002000   /* port 1.0 registers */
> 				  0x81000000 0 0	  0xc0000000 0 0x00010000   /* downstream I/O */
> 				  0x82000000 0 0	  0xc1000000 0 0x08000000>; /* non-prefetchable memory */
> 
> 			#interrupt-cells = <1>;
> 			interrupt-map-mask = <0xf800 0 0 1>;
> 			interrupt-map = <0x0800 0 0 1 &mpic 58 /* port 0.0 */
> 					 0x1000 0 0 1 &mpic 62>; /* port 1.0 */
> 
> 			pcie at 0,0 {
> 				device_type = "pciex";
> 				reg = <0x0800 0 0xd0040000 0 0x2000>;
> 				#address-cells = <3>;
> 				#size-cells = <2>;
> 				marvell,pcie-port = <0>;
> 				marvell,pcie-lane = <0>;
> 				clocks = <&gateclk 5>;
> 				status = "disabled";
> 			};
> 
> 			pcie at 1,0 {
> 				device_type = "pciex";
> 				reg = <0x1000 0 0xd0080000 0 0x2000>;
> 				#address-cells = <3>;
> 				#size-cells = <2>;
> 				marvell,pcie-port = <1>;
> 				marvell,pcie-lane = <0>;
> 				clocks = <&gateclk 9>;
> 				status = "disabled";
> 			};
> 		};
> 
> So we have two PCIe interfaces. lspci shows the following output:
> 
> 00:01.0 PCI bridge: Marvell Technology Group Ltd. Device 1092
> 00:02.0 PCI bridge: Marvell Technology Group Ltd. Device 1092
> 01:00.0 Network controller: Intel Corporation Ultimate N WiFi Link 5300
> 02:00.0 USB controller: ASMedia Technology Inc. Device 1040
> 
> So:
> 
>  * On bus 0, we have two PCI-to-PCI bridges. Those are emulated in
>    software and allow the Marvell PCIe driver to get dynamic assignment
>    of address decoding windows. The entries in the interrupt-map DT
>    property match the bus number and slot number of those PCI-to-PCI
>    bridges.
> 
>  * On bus 1, we have the real PCIe device connected to the first PCIe
>    interface. This bus 1 is made "visible" thanks to the 00:01.0
>    PCI-to-PCI bridge.
> 
>  * On bus 2, we have the real PCIe device connected to the second PCIe
>    interface. This bus 2 is made "visible" thanks to the 00:02.0
>    PCI-to-PCI bridge.
> 
> Now, when I call the of_irq_map_pci() function, the problem is that the
> "struct pci_dev" that it receives is the one corresponding to the
> particular PCIe device we're interested in. And this "struct pci_dev"
> has a non-NULL pointer to the "struct device_node" representing
> "pcie0,0" or "pci0,1" above. Since the "struct device_node" is
> non-NULL, of_irq_map_pci() builds a laddr[] with the bus number and
> devfn of this device: bus number is 1, devfn is 0. And this doesn't
> match with the interrupt-map that is in my DT, which associates the
> interrupts numbers with the PCI-to-PCI bridges rather than the devices
> themselves.
> 
> To me, the "struct pci_dev" representing the real PCIe devices should
> have a NULL "struct device_node" pointer, because those device are not
> represented in the DT. If this was the case, then the of_irq_map_pci()
> would go one level up in the PCI hierarchy, find the "struct pci_dev"
> that corresponds to the PCI-to-PCI bridge, which generates an laddr[]
> having a bus number a devfn value matching the interrupt-map property.
Yes this is my current understanding.
> 
> If I do the following hack in of_irq_map_pci(), then everything works
> nicely:
> 
>                 } else {
>                         /* We found a P2P bridge, check if it has a node */
>                         ppnode = pci_device_to_OF_node(ppdev);
> +                       if (pdev->bus->number != 0)
> +                               ppnode = NULL;
>                 }
> 
> 
> What this hack does is that if we are not on bus 0, it means that the
> pci_dev is a real PCIe device, and therefore we force the code to
> asssume it does not have a DT reference.
> 
> Isn't there a problem here in the PCI/DT code ? Is it normal that a
> PCIe device that isn't described in the DT carries a non-NULL struct
> device_node pointer?
I would suggest the issue isn't in the PCI/DT code. This is what I see
with my implementation (which uses an implementation of
pcibios_get_phb_of_node) - I'd like to believe this is correct behaviour:
- of_irq_map_pci is called for an endpoint (5:0:0), its pdev->dev.of_node
  is NULL. As I don't have a representation of this endpoint in my DT the
  of_irq_map_pci code proceeds to walk the fabric.
- Starting with the pdev for 5:0:0, of_irq_map_pci sees it has a parent
  (downstream switch bridge), in this case pdev(5:0:0)->bus->self->dev.of_node
  is NULL, due to this swizzling occurs and we walk the parent's bus (bus 2)
- This continues to bus 1 and then bus 0 where of_irq_map_pci realises that
  it has no parent (its the host bridge). Due to the implementation of
  pcibios_get_phb_of_node a of_node is produced, this is then used to construct
  a lspec (for bus number 0).
The of_irq_map_pci code stops walking as soon as it finds a function in the
tree that has a device node. This suggests that if you represent a bridge
you must also include an interrupt-map. The problem here is that you have
represented a bridge but not included a map.
I can think of three solutions:
1. Something like this:
                 } else {
                         /* We found a P2P bridge, check if it has a node */
                         ppnode = pci_device_to_OF_node(ppdev);
 +                       if (ppnode doesnt have an interrupt-map)//if (pdev->bus->number != 0)
 +                               ppnode = NULL;
                 }
2. Remove the bridges from the DT? Or remove the map from pcie-controller and
   add a map each to pcie at 0,1 and pcie at 1,1?
3. Change the mask of your map so that it doesn't care about bus numbers. I
   have a map that looks like this:
                interrupt-map-mask = <0 0 0 7>;
                interrupt-map = <0 0 0 1 ... //anything coming in on INTA
                                 0 0 0 2 ... //anything coming in on INTB
                                 0 0 0 3 ... ...
                                 0 0 0 4 ... ...
Andrew Murray
> 
> If you need some more details about the problem, do not hesitate to ask.
> 
> Thanks a lot for your help,
> 
> Thomas
> -- 
> Thomas Petazzoni, Free Electrons
> Kernel, drivers, real-time and embedded Linux
> development, consulting, training and support.
> http://free-electrons.com
> 
^ permalink raw reply	[flat|nested] 216+ messages in thread
- * [PATCH v2 19/27] pci: PCIe driver for Marvell Armada 370/XP systems
  2013-02-07 15:51       ` Andrew Murray
@ 2013-02-07 16:19         ` Thomas Petazzoni
  2013-02-07 16:40           ` Thomas Petazzoni
  0 siblings, 1 reply; 216+ messages in thread
From: Thomas Petazzoni @ 2013-02-07 16:19 UTC (permalink / raw)
  To: linux-arm-kernel
Dear Andrew Murray,
On Thu, 7 Feb 2013 15:51:17 +0000, Andrew Murray wrote:
> > First of all, I didn't reimplement the pcibios_get_phb_of_node(), but
> > instead, as Thierry Reding suggested, simply implemented the
> > hw_pci.scan() function as follows:
> 
> I've not had any time to test Thierry's solution to avoid implementing
> pcibios_get_phb_of_node - but it did seem to work for him and seem correct
> at the time.
At least there are device tree nodes associated to PCI devices, so it
looks good from this perspective.
> > To me, the "struct pci_dev" representing the real PCIe devices should
> > have a NULL "struct device_node" pointer, because those device are not
> > represented in the DT. If this was the case, then the of_irq_map_pci()
> > would go one level up in the PCI hierarchy, find the "struct pci_dev"
> > that corresponds to the PCI-to-PCI bridge, which generates an laddr[]
> > having a bus number a devfn value matching the interrupt-map property.
> 
> Yes this is my current understanding.
Ok. But that's not what happens: the "struct pci_dev" representing the
real PCIe device *DOES* have a non-NULL struct device_node pointer. And
this is what makes the entire thing fail.
> I would suggest the issue isn't in the PCI/DT code. This is what I see
I believe it is, because as I said above, the struct pci_dev associated
to a real PCIe device should not have a struct device_node pointer,
because this PCIe device has been dynamically enumerated and is
therefore not part of the device tree.
> with my implementation (which uses an implementation of
> pcibios_get_phb_of_node) - I'd like to believe this is correct behaviour:
> 
> - of_irq_map_pci is called for an endpoint (5:0:0), its pdev->dev.of_node
>   is NULL. As I don't have a representation of this endpoint in my DT the
>   of_irq_map_pci code proceeds to walk the fabric.
I believe this should be the behavior. But this not what happens: the
pdev->dev.of_node of an endpoint pci_dev is not NULL.
> - Starting with the pdev for 5:0:0, of_irq_map_pci sees it has a parent
>   (downstream switch bridge), in this case pdev(5:0:0)->bus->self->dev.of_node
>   is NULL, due to this swizzling occurs and we walk the parent's bus (bus 2)
> - This continues to bus 1 and then bus 0 where of_irq_map_pci realises that
>   it has no parent (its the host bridge). Due to the implementation of
>   pcibios_get_phb_of_node a of_node is produced, this is then used to construct
>   a lspec (for bus number 0).
> 
> The of_irq_map_pci code stops walking as soon as it finds a function in the
> tree that has a device node. This suggests that if you represent a bridge
> you must also include an interrupt-map. The problem here is that you have
> represented a bridge but not included a map.
I understood that it walks up the PCI hierarchy, and that's fine. As
I've shown in my previous e-mail, the only problem is that this
pdev->dev.of_node should be NULL for the PCIe endpoint device. If it
were NULL, then everything would work correctly, as I could confirmed
by the hack I did in of_irq_map_pci().
> I can think of three solutions:
> 
> 1. Something like this:
> 
>                  } else {
>                          /* We found a P2P bridge, check if it has a node */
>                          ppnode = pci_device_to_OF_node(ppdev);
>  +                       if (ppnode doesnt have an interrupt-map)//if (pdev->bus->number != 0)
>  +                               ppnode = NULL;
>                  }
> 
> 2. Remove the bridges from the DT? Or remove the map from pcie-controller and
>    add a map each to pcie at 0,1 and pcie at 1,1?
> 
> 3. Change the mask of your map so that it doesn't care about bus numbers. I
>    have a map that looks like this:
> 
>                 interrupt-map-mask = <0 0 0 7>;
>                 interrupt-map = <0 0 0 1 ... //anything coming in on INTA
>                                  0 0 0 2 ... //anything coming in on INTB
>                                  0 0 0 3 ... ...
>                                  0 0 0 4 ... ...
Unfortunately, I don't quite agree with any of your three solutions. I
still do believe the root problem is that pdev->dev.of_node should be
NULL for the PCIe endpoints, since those devices are not probed with
the Device Tree.
Best regards,
Thomas
-- 
Thomas Petazzoni, Free Electrons
Kernel, drivers, real-time and embedded Linux
development, consulting, training and support.
http://free-electrons.com
^ permalink raw reply	[flat|nested] 216+ messages in thread
- * [PATCH v2 19/27] pci: PCIe driver for Marvell Armada 370/XP systems
  2013-02-07 16:19         ` Thomas Petazzoni
@ 2013-02-07 16:40           ` Thomas Petazzoni
  2013-02-07 16:53             ` Andrew Murray
  0 siblings, 1 reply; 216+ messages in thread
From: Thomas Petazzoni @ 2013-02-07 16:40 UTC (permalink / raw)
  To: linux-arm-kernel
Dear Thomas Petazzoni,
On Thu, 7 Feb 2013 17:19:04 +0100, Thomas Petazzoni wrote:
> Unfortunately, I don't quite agree with any of your three solutions. I
> still do believe the root problem is that pdev->dev.of_node should be
> NULL for the PCIe endpoints, since those devices are not probed with
> the Device Tree.
Looking more at this, the pdev->dev.of_node is in fact NULL, but the
code uses the of_node of the parent PCI device.
So in fact the problem is indeed that the subnodes pcie0,0 and pcie1,0
are seen as corresponding to the PCI-to-PCI bridges.
Thanks,
Thomas
-- 
Thomas Petazzoni, Free Electrons
Kernel, drivers, real-time and embedded Linux
development, consulting, training and support.
http://free-electrons.com
^ permalink raw reply	[flat|nested] 216+ messages in thread 
- * [PATCH v2 19/27] pci: PCIe driver for Marvell Armada 370/XP systems
  2013-02-07 16:40           ` Thomas Petazzoni
@ 2013-02-07 16:53             ` Andrew Murray
  2013-02-07 17:14               ` Thomas Petazzoni
  0 siblings, 1 reply; 216+ messages in thread
From: Andrew Murray @ 2013-02-07 16:53 UTC (permalink / raw)
  To: linux-arm-kernel
On Thu, Feb 07, 2013 at 04:40:40PM +0000, Thomas Petazzoni wrote:
> Dear Thomas Petazzoni,
> 
> On Thu, 7 Feb 2013 17:19:04 +0100, Thomas Petazzoni wrote:
> 
> > Unfortunately, I don't quite agree with any of your three solutions. I
> > still do believe the root problem is that pdev->dev.of_node should be
> > NULL for the PCIe endpoints, since those devices are not probed with
> > the Device Tree.
> 
> Looking more at this, the pdev->dev.of_node is in fact NULL, but the
> code uses the of_node of the parent PCI device.
> 
> So in fact the problem is indeed that the subnodes pcie0,0 and pcie1,0
> are seen as corresponding to the PCI-to-PCI bridges.
I would suggest changing the interrupt-mask to match any bus number. (Don't
forget that the secondary bus number of each of your emulated bridges will
vary depending on how many devices are detected underneath each root port,
assuming you don't try and partition bus numbers or use domains between ports).
Andrew Murray
> 
> Thanks,
> 
> Thomas
> -- 
> Thomas Petazzoni, Free Electrons
> Kernel, drivers, real-time and embedded Linux
> development, consulting, training and support.
> http://free-electrons.com
> 
^ permalink raw reply	[flat|nested] 216+ messages in thread 
- * [PATCH v2 19/27] pci: PCIe driver for Marvell Armada 370/XP systems
  2013-02-07 16:53             ` Andrew Murray
@ 2013-02-07 17:14               ` Thomas Petazzoni
  2013-02-07 17:29                 ` Andrew Murray
  0 siblings, 1 reply; 216+ messages in thread
From: Thomas Petazzoni @ 2013-02-07 17:14 UTC (permalink / raw)
  To: linux-arm-kernel
Dear Andrew Murray,
On Thu, 7 Feb 2013 16:53:47 +0000, Andrew Murray wrote:
> > So in fact the problem is indeed that the subnodes pcie0,0 and pcie1,0
> > are seen as corresponding to the PCI-to-PCI bridges.
> 
> I would suggest changing the interrupt-mask to match any bus number. (Don't
> forget that the secondary bus number of each of your emulated bridges will
> vary depending on how many devices are detected underneath each root port,
> assuming you don't try and partition bus numbers or use domains between ports).
I don't think this would work. Currently, the interrupt-map associates
the interrupts with the PCI-to-PCI bridges, i.e devices 00:01, 00:02,
00:03, 00:04, 00:05, etc.
The real PCIe devices themselves are at 01:00, 02:00, 03:00, 04:00,
05:00. Each of them sit on a different bus, at devfn = 0.
So if I ignore the bus number, how could the PCI code find what is the
matching interrupt?
Thanks,
Thomas
-- 
Thomas Petazzoni, Free Electrons
Kernel, drivers, real-time and embedded Linux
development, consulting, training and support.
http://free-electrons.com
^ permalink raw reply	[flat|nested] 216+ messages in thread 
- * [PATCH v2 19/27] pci: PCIe driver for Marvell Armada 370/XP systems
  2013-02-07 17:14               ` Thomas Petazzoni
@ 2013-02-07 17:29                 ` Andrew Murray
  2013-02-07 17:37                   ` Thomas Petazzoni
  0 siblings, 1 reply; 216+ messages in thread
From: Andrew Murray @ 2013-02-07 17:29 UTC (permalink / raw)
  To: linux-arm-kernel
On Thu, Feb 07, 2013 at 05:14:18PM +0000, Thomas Petazzoni wrote:
> Dear Andrew Murray,
> 
> On Thu, 7 Feb 2013 16:53:47 +0000, Andrew Murray wrote:
> 
> > > So in fact the problem is indeed that the subnodes pcie0,0 and pcie1,0
> > > are seen as corresponding to the PCI-to-PCI bridges.
> > 
> > I would suggest changing the interrupt-mask to match any bus number. (Don't
> > forget that the secondary bus number of each of your emulated bridges will
> > vary depending on how many devices are detected underneath each root port,
> > assuming you don't try and partition bus numbers or use domains between ports).
> 
> I don't think this would work. Currently, the interrupt-map associates
> the interrupts with the PCI-to-PCI bridges, i.e devices 00:01, 00:02,
> 00:03, 00:04, 00:05, etc.
> 
> The real PCIe devices themselves are at 01:00, 02:00, 03:00, 04:00,
> 05:00. Each of them sit on a different bus, at devfn = 0.
> 
> So if I ignore the bus number, how could the PCI code find what is the
> matching interrupt?
Apologies if I've missed information about your hardware in the other
discussion (I've tried to keep up) - does your hardware raise a single host
interrupt for each pin regardless to which bridge they come in on - or do you
separate A,B,C,D host interrupts for each bridge?
If you have only 4 interrupt sources for legacy interrupts then you shouldn't
need to care which bus/device/function they were generated on (of_pci_map_irq
takes care of this for you).
During enumeration an interrupt number should be assigned to each requesting
device which reflects the pin (after swizzling) which will arrive at the host
bridges. That interrupt number should be shared across all devices that
requested the same pin (after swizzling) - i.e. shared interrupts. So all you
need to do is map A,B,C,D interrupts with the interrupt they come into the 
CPU.
Andrew Murray
> 
> Thanks,
> 
> Thomas
> -- 
> Thomas Petazzoni, Free Electrons
> Kernel, drivers, real-time and embedded Linux
> development, consulting, training and support.
> http://free-electrons.com
> 
^ permalink raw reply	[flat|nested] 216+ messages in thread 
- * [PATCH v2 19/27] pci: PCIe driver for Marvell Armada 370/XP systems
  2013-02-07 17:29                 ` Andrew Murray
@ 2013-02-07 17:37                   ` Thomas Petazzoni
  2013-02-07 18:21                     ` Jason Gunthorpe
  2013-02-07 18:30                     ` Andrew Murray
  0 siblings, 2 replies; 216+ messages in thread
From: Thomas Petazzoni @ 2013-02-07 17:37 UTC (permalink / raw)
  To: linux-arm-kernel
Dear Andrew Murray,
On Thu, 7 Feb 2013 17:29:34 +0000, Andrew Murray wrote:
> > So if I ignore the bus number, how could the PCI code find what is the
> > matching interrupt?
> 
> Apologies if I've missed information about your hardware in the other
> discussion (I've tried to keep up) - does your hardware raise a single host
> interrupt for each pin regardless to which bridge they come in on - or do you
> separate A,B,C,D host interrupts for each bridge?
There are separate A,B,C,D interrupts for each PCIe interface, and each
PCIe interface is represented by an emulated PCI-to-PCI bridge. See my
interrupt-map:
			interrupt-map = <0x0800 0 0 1 &mpic 58
				         0x1000 0 0 1 &mpic 59
					 0x1800 0 0 1 &mpic 60
					 0x2000 0 0 1 &mpic 61
					 0x2800 0 0 1 &mpic 62
				         0x3000 0 0 1 &mpic 63
					 0x3800 0 0 1 &mpic 64
					 0x4000 0 0 1 &mpic 65
					 0x4800 0 0 1 &mpic 99
					 0x5000 0 0 1 &mpic 103>;
Here I have 10 PCIe interfaces, and therefore 10 interrupts.
There is only one interrupt per PCIe interface, and for now, I don't
distinguish A,B,C,D (I will do it later, it requires reading a register
to know if the interrupt came from A, B, C or D, but that's a different
problem).
> If you have only 4 interrupt sources for legacy interrupts then you shouldn't
> need to care which bus/device/function they were generated on (of_pci_map_irq
> takes care of this for you).
No, I have interrupts per PCIe interface, so I really need to take care
of the relation between the PCIe device and the PCIe interface it is
connected to.
Thomas
-- 
Thomas Petazzoni, Free Electrons
Kernel, drivers, real-time and embedded Linux
development, consulting, training and support.
http://free-electrons.com
^ permalink raw reply	[flat|nested] 216+ messages in thread 
- * [PATCH v2 19/27] pci: PCIe driver for Marvell Armada 370/XP systems
  2013-02-07 17:37                   ` Thomas Petazzoni
@ 2013-02-07 18:21                     ` Jason Gunthorpe
  2013-02-07 23:25                       ` Arnd Bergmann
  2013-02-07 18:30                     ` Andrew Murray
  1 sibling, 1 reply; 216+ messages in thread
From: Jason Gunthorpe @ 2013-02-07 18:21 UTC (permalink / raw)
  To: linux-arm-kernel
On Thu, Feb 07, 2013 at 06:37:43PM +0100, Thomas Petazzoni wrote:
> Dear Andrew Murray,
> 
> On Thu, 7 Feb 2013 17:29:34 +0000, Andrew Murray wrote:
> 
> > > So if I ignore the bus number, how could the PCI code find what is the
> > > matching interrupt?
> > 
> > Apologies if I've missed information about your hardware in the other
> > discussion (I've tried to keep up) - does your hardware raise a single host
> > interrupt for each pin regardless to which bridge they come in on - or do you
> > separate A,B,C,D host interrupts for each bridge?
> 
> There are separate A,B,C,D interrupts for each PCIe interface, and each
> PCIe interface is represented by an emulated PCI-to-PCI bridge. See my
> interrupt-map:
> 
> 			interrupt-map = <0x0800 0 0 1 &mpic 58
> 				         0x1000 0 0 1 &mpic 59
> 					 0x1800 0 0 1 &mpic 60
> 					 0x2000 0 0 1 &mpic 61
> 					 0x2800 0 0 1 &mpic 62
> 				         0x3000 0 0 1 &mpic 63
> 					 0x3800 0 0 1 &mpic 64
> 					 0x4000 0 0 1 &mpic 65
> 					 0x4800 0 0 1 &mpic 99
> 					 0x5000 0 0 1 &mpic 103>;
> 
> Here I have 10 PCIe interfaces, and therefore 10 interrupts.
>
> There is only one interrupt per PCIe interface, and for now, I don't
> distinguish A,B,C,D (I will do it later, it requires reading a register
> to know if the interrupt came from A, B, C or D, but that's a different
> problem).
Right, someday you can have all 40 interrupts :)
This interrupt-map stuff is ugly, I looks like it was designed as a
way for the firmware to communicate the per-device interrupt
assignment to the OS - using it to describe a HW setup is a bit more
tortured for sure..
Frankly, I think it should not be used in PCI-E drivers *at all*
beacuse there is no need for an external interrupt routing
description. [1]
PCI-E *only* has inband interrupt delivery, so the host driver has
100% of the information it needs to convert a INTx signal received on
a link to a Linux interrupt number.
So in a PCI-E world translation should look like this:
 - Start at the source device INTx
 - Traverse up bridges performing INTx swizzling according to the
   PCI-E spec
 - When the host bridge is reached call into the host driver and pass
   - The INTx number
   - The bus/device/function of last bridge traversed
 - The host bridge driver returns a Linux interrupt number.
This *always* works for (compliant) PCI-E. An OF interrupt map is
never, ever, needed. Providing a common mechanism can remove this
complex OF stuff and ultimately help everyone :)
However, if interrupt-map is used, I strongly suspect it should be
placed on each bridge node (as Andrew mentioned), and it should be
very simple:
link at 0 {
reg = <0x800 0 0  0 0>; // Bus 0, Dev 0x10, Fn 0
interrupt-mask = <0x0 0 0 7>;
interrupt-map = <0x0000 0 0 1 &mpic 58 // INTA
                 0x0000 0 0 2 &mpic 58 // INTB
                 0x0000 0 0 3 &mpic 58 // INTC
                 0x0000 0 0 4 &mpic 58>; // INTD
}
Which hopefully says 'when you reach this bridge node, look only at
the INTx pin number and convert it'.
I don't think you can make interrupt-map work were you have placed
it..
Jason
1 - For PCI/PCI-X the physical INTx interrupt pins on any device,
    anywhere in the system could by physically tied to any interrupt
    ingress in the system. Even if they are behind bridges. It is a
    total free for all.
^ permalink raw reply	[flat|nested] 216+ messages in thread
- * [PATCH v2 19/27] pci: PCIe driver for Marvell Armada 370/XP systems
  2013-02-07 18:21                     ` Jason Gunthorpe
@ 2013-02-07 23:25                       ` Arnd Bergmann
  2013-02-08  0:44                         ` Jason Gunthorpe
  0 siblings, 1 reply; 216+ messages in thread
From: Arnd Bergmann @ 2013-02-07 23:25 UTC (permalink / raw)
  To: linux-arm-kernel
On Thursday 07 February 2013, Jason Gunthorpe wrote:
> On Thu, Feb 07, 2013 at 06:37:43PM +0100, Thomas Petazzoni wrote:
> Right, someday you can have all 40 interrupts :)
> 
> This interrupt-map stuff is ugly, I looks like it was designed as a
> way for the firmware to communicate the per-device interrupt
> assignment to the OS - using it to describe a HW setup is a bit more
> tortured for sure..
It is usually used for PCI (non-PCIe) systems, where you can have
multiple slots on the same bus and each slot has its own four
interrupt lines routed to an upstream interrupt controller, not
necessarily the same one for each slot.
> Frankly, I think it should not be used in PCI-E drivers *at all*
> beacuse there is no need for an external interrupt routing
> description. [1]
> 
> PCI-E *only* has inband interrupt delivery, so the host driver has
> 100% of the information it needs to convert a INTx signal received on
> a link to a Linux interrupt number.
> 
> So in a PCI-E world translation should look like this:
>  - Start at the source device INTx
>  - Traverse up bridges performing INTx swizzling according to the
>    PCI-E spec
>  - When the host bridge is reached call into the host driver and pass
>    - The INTx number
>    - The bus/device/function of last bridge traversed
>  - The host bridge driver returns a Linux interrupt number.
It's the same as on PCI, except that there you stop when you reach
the slot that is physically connected to the interrupt controller.
> This *always* works for (compliant) PCI-E. An OF interrupt map is
> never, ever, needed. Providing a common mechanism can remove this
> complex OF stuff and ultimately help everyone :)
> 
> However, if interrupt-map is used, I strongly suspect it should be
> placed on each bridge node (as Andrew mentioned), and it should be
> very simple:
> 
> link at 0 {
> reg = <0x800 0 0  0 0>; // Bus 0, Dev 0x10, Fn 0
> interrupt-mask = <0x0 0 0 7>;
> interrupt-map = <0x0000 0 0 1 &mpic 58 // INTA
>                  0x0000 0 0 2 &mpic 58 // INTB
>                  0x0000 0 0 3 &mpic 58 // INTC
>                  0x0000 0 0 4 &mpic 58>; // INTD
> }
The interrupt-map property only makes sense for the host bridge,
not for bridges below it, which don't normally get represented
in the device tree.
> Jason
> 1 - For PCI/PCI-X the physical INTx interrupt pins on any device,
>     anywhere in the system could by physically tied to any interrupt
>     ingress in the system. Even if they are behind bridges. It is a
>     total free for all.
I would not be surprised to see system that have a PCIe to
PCI bridge and slots behind that which are directly connected
to an upstream interrupt controller. In fact, I believe I have
worked on a system like that (IBM QS21 and QS22), and thankfully
the interrupt-map property did let us model this correctly.
	Arnd
^ permalink raw reply	[flat|nested] 216+ messages in thread
- * [PATCH v2 19/27] pci: PCIe driver for Marvell Armada 370/XP systems
  2013-02-07 23:25                       ` Arnd Bergmann
@ 2013-02-08  0:44                         ` Jason Gunthorpe
  2013-02-09 22:23                           ` Arnd Bergmann
  0 siblings, 1 reply; 216+ messages in thread
From: Jason Gunthorpe @ 2013-02-08  0:44 UTC (permalink / raw)
  To: linux-arm-kernel
On Thu, Feb 07, 2013 at 11:25:23PM +0000, Arnd Bergmann wrote:
> > link at 0 {
> > reg = <0x800 0 0  0 0>; // Bus 0, Dev 0x10, Fn 0
> > interrupt-mask = <0x0 0 0 7>;
> > interrupt-map = <0x0000 0 0 1 &mpic 58 // INTA
> >                  0x0000 0 0 2 &mpic 58 // INTB
> >                  0x0000 0 0 3 &mpic 58 // INTC
> >                  0x0000 0 0 4 &mpic 58>; // INTD
> > }
> 
> The interrupt-map property only makes sense for the host bridge,
> not for bridges below it, which don't normally get represented
> in the device tree.
Linux scans up the PCI bus until it finds a PCI device with a matching
OF node. It then constructs an interrupt map 'laddr' (ie the
bus:dev.fn) for the child device of this OF node.
If you don't have any DT PCI nodes then this should always fold down
to doing a lookup with bus=0, and device representing the 'slot' in
legacy PCI.
However, as soon as you provide a node for a bridge in DT this halts
the 'fold down' and goes to the interrupt-map with a device on the
subordinate bus number of the bridge.
This makes *lots* of sense, if you have bridges providing bus slots
then you include the bridge in DT to stop the 'fold down' at that
known bridge, giving you a chance to see the interrupt wiring behind
the bridge.
This matches the design of PCI - if you know how interrupts are hooked
up then use that information, otherwise assume the INTx interrupts
swizzle and search upward. This is how add-in cards with PCI bridges
are supported.
This behavior seems complex, but sane to me. I wouldn't change it as
Andrew suggested.
Thomas's problem is the presence of the static DT node for the root
port bridge. Since the node is static you can't know what the runtime
determined subordinate bus numbers will be, so there is no possible
way to write an interrupt-map at the host bridge.
Putting the map in the bridge's DT node seems elegant and correct to
me - the map is describing the actual hardware - the root port bridge
is actually terminating INTx from downstream devices and converting
them to CPU interrupts. (FWIW discrete HT to PCIe bridges do something
similar)
If you imagine the case you alluded to, a PCI-E root port, connected
to a PCI-E to PCI bridge, with 2 physical PCI bus slots. The
interrupts for the 2 slots are routed to the CPU directly:
link at 0 {
 reg = </* Bus 0, Dev 0x10, Fn 0 */>; // Root Port bridge
  // Match on INTx (not used since the pci-bridge doesn't create inband INTx)
  interrupt-mask = <0x0 0 0 7>;
  interrupt-map = <0x0000 0 0 1 &pic 0  // Inband INTA
                   0x0000 0 0 2 &pic 1  // Inband INTB
		   ..
 pci_bridge@0 {
    reg = </* Bus 1, Dev 0x10, Fn 0 */>; // PCIe to PCI bridge
  
    // Match on the device/slot and INTx pin
    interrupt-mask = <0x7f 0 0 7>;
    interrupt-map = <0x00xx 0 0 1 &pic 2 // Slot 0 physical INTA
                     0x00xx 0 0 1 &pic 3 // Slot 1 physical INTA
                     ..
 }
}
To me, this seems to be a much more accurate description of how the
hardware is constructed then trying to cram all this information into
the host bridge's interrupt map. It shows clearly where inband INTA
messages arriving@the root port are directed as well as where the
slot by slot out-of-band interrupt wires on the PCI bus are directed.
Jason
^ permalink raw reply	[flat|nested] 216+ messages in thread
- * [PATCH v2 19/27] pci: PCIe driver for Marvell Armada 370/XP systems
  2013-02-08  0:44                         ` Jason Gunthorpe
@ 2013-02-09 22:23                           ` Arnd Bergmann
  2013-02-12 19:26                             ` Jason Gunthorpe
  0 siblings, 1 reply; 216+ messages in thread
From: Arnd Bergmann @ 2013-02-09 22:23 UTC (permalink / raw)
  To: linux-arm-kernel
On Friday 08 February 2013, Jason Gunthorpe wrote:
> On Thu, Feb 07, 2013 at 11:25:23PM +0000, Arnd Bergmann wrote:
> > > link at 0 {
> > > reg = <0x800 0 0  0 0>; // Bus 0, Dev 0x10, Fn 0
> > > interrupt-mask = <0x0 0 0 7>;
> > > interrupt-map = <0x0000 0 0 1 &mpic 58 // INTA
> > >                  0x0000 0 0 2 &mpic 58 // INTB
> > >                  0x0000 0 0 3 &mpic 58 // INTC
> > >                  0x0000 0 0 4 &mpic 58>; // INTD
> > > }
> > 
> > The interrupt-map property only makes sense for the host bridge,
> > not for bridges below it, which don't normally get represented
> > in the device tree.
> 
> Linux scans up the PCI bus until it finds a PCI device with a matching
> OF node. It then constructs an interrupt map 'laddr' (ie the
> bus:dev.fn) for the child device of this OF node.
> 
> If you don't have any DT PCI nodes then this should always fold down
> to doing a lookup with bus=0, and device representing the 'slot' in
> legacy PCI.
> 
> However, as soon as you provide a node for a bridge in DT this halts
> the 'fold down' and goes to the interrupt-map with a device on the
> subordinate bus number of the bridge.
> 
> This makes *lots* of sense, if you have bridges providing bus slots
> then you include the bridge in DT to stop the 'fold down' at that
> known bridge, giving you a chance to see the interrupt wiring behind
> the bridge.
I would argue that it matters not so much what Linux does but what
the standard says, but it seems they both agree with you in this
case: http://www.openfirmware.org/1275/practice/imap/imap0_9d.pdf
defines that "At any level in the interrupt tree, a mapping may
need to take place between the child interrupt domain and the
parent?s. This is represented by a new property called 'interrupt-map'".
> This matches the design of PCI - if you know how interrupts are hooked
> up then use that information, otherwise assume the INTx interrupts
> swizzle and search upward. This is how add-in cards with PCI bridges
> are supported.
Note that the implicit swizzling was not part of the original PCI
binding, which assumed that all devices were explicitly represented
in the device tree, and we don't normally do that any more because
PCI can be probed easily, and we cannot assume that all PCI BARs
have been correctly assigned by the firmware before the OS
is booting. Having the interrupt-map at PCI host controller
node is convenient because it lets us define unit interrupt
specifiers for devices that are not represented in the device
tree themselves.
I think the key question here is whether there is just one interrupt
domain across all bridges because the hardware requires the unit
address to be unique, or whether each PCIe port has its own
unit address space, and thereby interrupt domain that requires
its own interrupt-map.
If there is just one domain, we have the choice whether to have
one interrupt-map for the entire domain, or to have one
interrupt map per PCIe port for the devices under that port.
I would consider it more logical to have a single interrupt-map
for the interrupt domain, because that is essentially what
lets us describe the interrupt daomain as a whole.
Of course, if each port has its own domain, we have to have
a separate interrupt map for each one.
> Thomas's problem is the presence of the static DT node for the root
> port bridge. Since the node is static you can't know what the runtime
> determined subordinate bus numbers will be, so there is no possible
> way to write an interrupt-map at the host bridge.
Right, that is a problem if there are additional bridges. I guess
we could represent all devices on bus 0 easily because their
address would be fixed,  but can't uniquely identify anything
below them.
> If you imagine the case you alluded to, a PCI-E root port, connected
> to a PCI-E to PCI bridge, with 2 physical PCI bus slots. The
> interrupts for the 2 slots are routed to the CPU directly:
> 
> link at 0 {
>  reg = </* Bus 0, Dev 0x10, Fn 0 */>; // Root Port bridge
> 
>   // Match on INTx (not used since the pci-bridge doesn't create inband INTx)
>   interrupt-mask = <0x0 0 0 7>;
>   interrupt-map = <0x0000 0 0 1 &pic 0  // Inband INTA
>                    0x0000 0 0 2 &pic 1  // Inband INTB
What are these two interrupts in the example then?
>  pci_bridge at 0 {
>     reg = </* Bus 1, Dev 0x10, Fn 0 */>; // PCIe to PCI bridge
The device would be "pci at 10", right?
>     // Match on the device/slot and INTx pin
>     interrupt-mask = <0x7f 0 0 7>;
>     interrupt-map = <0x00xx 0 0 1 &pic 2 // Slot 0 physical INTA
>                      0x00xx 0 0 1 &pic 3 // Slot 1 physical INTA
>                      ..
>  }
> }
You are accidentally matching the on the register number, not the
device number here, right? The interrupt-map-mask should be
<0xf800 0 0 7> to match the device.
> To me, this seems to be a much more accurate description of how the
> hardware is constructed then trying to cram all this information into
> the host bridge's interrupt map. It shows clearly where inband INTA
> messages arriving at the root port are directed as well as where the
> slot by slot out-of-band interrupt wires on the PCI bus are directed.
Yes, I guess you're right.
	Arnd
^ permalink raw reply	[flat|nested] 216+ messages in thread
- * [PATCH v2 19/27] pci: PCIe driver for Marvell Armada 370/XP systems
  2013-02-09 22:23                           ` Arnd Bergmann
@ 2013-02-12 19:26                             ` Jason Gunthorpe
  0 siblings, 0 replies; 216+ messages in thread
From: Jason Gunthorpe @ 2013-02-12 19:26 UTC (permalink / raw)
  To: linux-arm-kernel
On Sat, Feb 09, 2013 at 10:23:11PM +0000, Arnd Bergmann wrote:
> > This makes *lots* of sense, if you have bridges providing bus slots
> > then you include the bridge in DT to stop the 'fold down' at that
> > known bridge, giving you a chance to see the interrupt wiring behind
> > the bridge.
>
> I would argue that it matters not so much what Linux does but what
> the standard says, but it seems they both agree with you in this
> case: http://www.openfirmware.org/1275/practice/imap/imap0_9d.pdf
> defines that "At any level in the interrupt tree, a mapping may
> need to take place between the child interrupt domain and the
> parent???s. This is represented by a new property called 'interrupt-map'".
Right, the standard (and Linux) allows the property in any node.
> > This matches the design of PCI - if you know how interrupts are hooked
> > up then use that information, otherwise assume the INTx interrupts
> > swizzle and search upward. This is how add-in cards with PCI bridges
> > are supported.
> 
> Note that the implicit swizzling was not part of the original PCI
> binding, which assumed that all devices were explicitly represented
> in the device tree, and we don't normally do that any more because
Yes, if the tree includes all PCI devices then you don't need
interrupt-map, just place an interrupt property directly on the end
nodes.
However the implicit swizzling and 'fold down' is pretty much
essential to support the PCI standards for hot plug behind bridges.
> PCI can be probed easily, and we cannot assume that all PCI BARs
> have been correctly assigned by the firmware before the OS
> is booting. Having the interrupt-map at PCI host controller
> node is convenient because it lets us define unit interrupt
> specifiers for devices that are not represented in the device
> tree themselves.
Right, interrupt-map is actually only needed when the DT is
incomplete, or to support hot pluggable ports.
> I think the key question here is whether there is just one interrupt
> domain across all bridges because the hardware requires the unit
> address to be unique, or whether each PCIe port has its own
> unit address space, and thereby interrupt domain that requires
> its own interrupt-map.
IMHO, the interrupt domains should describe the underlying hardware,
and in many cases the HW is designed so that every PCI bus bridge has
it's own downstream interrupt layout - and thus is an interrupt domain.
> > If you imagine the case you alluded to, a PCI-E root port, connected
> > to a PCI-E to PCI bridge, with 2 physical PCI bus slots. The
> > interrupts for the 2 slots are routed to the CPU directly:
> > 
> > link at 0 {
> >  reg = </* Bus 0, Dev 0x10, Fn 0 */>; // Root Port bridge
> > 
> >   // Match on INTx (not used since the pci-bridge doesn't create inband INTx)
> >   interrupt-mask = <0x0 0 0 7>;
> >   interrupt-map = <0x0000 0 0 1 &pic 0  // Inband INTA
> >                    0x0000 0 0 2 &pic 1  // Inband INTB
> 
> What are these two interrupts in the example then?
This shows that the HW block 'link at 0' - which is a PCI Express root
port bridge - accepts inband INTx messages and converts them to CPU
interrupts pic 0/1/...
Since this is a general function, and fully self contained, it can be
placed in the general SOC's dtsi.
However, the board has a hard-wired PCIe to PCI bridge with PCI slots,
and never generates inband INTx. We can then describe that chip via
the following stanza in the board specific dts:
> >  pci_bridge at 0 {
> >     reg = </* Bus 1, Dev 0x10, Fn 0 */>; // PCIe to PCI bridge
> 
> The device would be "pci at 10", right?
Probably best to use the hex version of the regs value /* Bus 1, Dev
0x10, Fn 0 */, but nothing inspects that, right?
> >     // Match on the device/slot and INTx pin
> >     interrupt-mask = <0x7f 0 0 7>;
> >     interrupt-map = <0x00xx 0 0 1 &pic 2 // Slot 0 physical INTA
> >                      0x00xx 0 0 1 &pic 3 // Slot 1 physical INTA
> >                      ..
> >  }
> > }
> 
> You are accidentally matching the on the register number, not the
> device number here, right? The interrupt-map-mask should be
> <0xf800 0 0 7> to match the device.
Right, only match the device, ignore the bus.
There is also another variant, if the PCIe to PCI bridge provides its
own interrupt pins and converts those to inband PCIe INTx messages,
then the PCB can wire up the PCI bus slots to the bridge's INTx pins
according to some pattern and describe that pattern in DT:
pci_bridge at 0 {
   reg = </* Bus 1, Dev 0x10, Fn 0 */>; // PCIe to PCI bridge
   interrupt-mask = <0xf800 0 0 7>;
   interrupt-map = <0x00xx 0 0 1 &pci_bridge0 0 0 0 1 // Slot 0 physical INTA to inband INTA
                    0x00xx 0 0 1 &pci_bridge0 0 0 0 2 // Slot 1 physical INTA to inband INTB
		    ...
}
(minus errors, haven't tried this one, but the standard says it should
be OK)
Which would be processed as:
 - pci_bridge at 0 converts out of brand interrupts into in-band
   interrupts according its interrupt-map, and then sends those
   upstream.
 - link@0 converts in band interrupts into CPU interrupts according
   to its interrupt map.
In my experience the above is a common case.
Boot firmware could fold all this down to a single interrupt map, and
hide the programming of the IOAPIC/etc from the OS, but the HW is
still undertaking these transformations..
Anyhow, it sounds like Thomas has had success using this approach, so
it works.
Cheers,
Jason
^ permalink raw reply	[flat|nested] 216+ messages in thread
 
 
 
 
- * [PATCH v2 19/27] pci: PCIe driver for Marvell Armada 370/XP systems
  2013-02-07 17:37                   ` Thomas Petazzoni
  2013-02-07 18:21                     ` Jason Gunthorpe
@ 2013-02-07 18:30                     ` Andrew Murray
  2013-02-07 23:27                       ` Arnd Bergmann
  1 sibling, 1 reply; 216+ messages in thread
From: Andrew Murray @ 2013-02-07 18:30 UTC (permalink / raw)
  To: linux-arm-kernel
On 7 February 2013 17:37, Thomas Petazzoni
<thomas.petazzoni@free-electrons.com> wrote:
> Dear Andrew Murray,
>
> On Thu, 7 Feb 2013 17:29:34 +0000, Andrew Murray wrote:
>
>> > So if I ignore the bus number, how could the PCI code find what is the
>> > matching interrupt?
>>
>> Apologies if I've missed information about your hardware in the other
>> discussion (I've tried to keep up) - does your hardware raise a single host
>> interrupt for each pin regardless to which bridge they come in on - or do you
>> separate A,B,C,D host interrupts for each bridge?
>
> There are separate A,B,C,D interrupts for each PCIe interface, and each
> PCIe interface is represented by an emulated PCI-to-PCI bridge. See my
> interrupt-map:
>
>                         interrupt-map = <0x0800 0 0 1 &mpic 58
>                                          0x1000 0 0 1 &mpic 59
>                                          0x1800 0 0 1 &mpic 60
>                                          0x2000 0 0 1 &mpic 61
>                                          0x2800 0 0 1 &mpic 62
>                                          0x3000 0 0 1 &mpic 63
>                                          0x3800 0 0 1 &mpic 64
>                                          0x4000 0 0 1 &mpic 65
>                                          0x4800 0 0 1 &mpic 99
>                                          0x5000 0 0 1 &mpic 103>;
>
> Here I have 10 PCIe interfaces, and therefore 10 interrupts.
>
> There is only one interrupt per PCIe interface, and for now, I don't
> distinguish A,B,C,D (I will do it later, it requires reading a register
> to know if the interrupt came from A, B, C or D, but that's a different
> problem).
>
>> If you have only 4 interrupt sources for legacy interrupts then you shouldn't
>> need to care which bus/device/function they were generated on (of_pci_map_irq
>> takes care of this for you).
>
> No, I have interrupts per PCIe interface, so I really need to take care
> of the relation between the PCIe device and the PCIe interface it is
> connected to.
In that case, I think you can create a mask that only checks for the
device number and INT pin (i.e. ignore bus and function). Looking at
your mask - it already does this...
                         interrupt-map = <0x0800 0 0 1 &mpic 58
                                          0x1000 0 0 1 &mpic 59
                                          0x1800 0 0 1 &mpic 60
                                          0x2000 0 0 1 &mpic 61
                                          0x2800 0 0 1 &mpic 62
                                          0x3000 0 0 1 &mpic 63
                                          0x3800 0 0 1 &mpic 64
                                          0x4000 0 0 1 &mpic 65
                                          0x4800 0 0 1 &mpic 99
                                          0x5000 0 0 1 &mpic 103>;
I'm not sure if the device pin part of the map is correct (I always
forget how this works) - but I know this would definately work:
                         interrupt-map-mask = <0xf800 0 0 7>
                         interrupt-map = <0x0800 0 0 1 &mpic 58
                                          0x0800 0 0 2 &mpic 58
                                          0x0800 0 0 3 &mpic 58
                                          0x0800 0 0 4 &mpic 58
                                          0x1000 0 0 1 &mpic 59
                                          0x1000 0 0 2 &mpic 59
                                          0x1000 0 0 3 &mpic 59
                                          0x1000 0 0 4 &mpic 59
                                          ....
In any case, I've realized that my original suggestion of changing the
map won't quite do (apologies). This is because the OF code won't even
look at this map as it stops@the emulated bridge below. In addition
to this type of mapping - you'll also need to investigate my solution
1 and 2.
Andrew Murray
>
> Thomas
> --
> Thomas Petazzoni, Free Electrons
> Kernel, drivers, real-time and embedded Linux
> development, consulting, training and support.
> http://free-electrons.com
> --
> To unsubscribe from this list: send the line "unsubscribe linux-pci" in
> the body of a message to majordomo at vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
^ permalink raw reply	[flat|nested] 216+ messages in thread
- * [PATCH v2 19/27] pci: PCIe driver for Marvell Armada 370/XP systems
  2013-02-07 18:30                     ` Andrew Murray
@ 2013-02-07 23:27                       ` Arnd Bergmann
  0 siblings, 0 replies; 216+ messages in thread
From: Arnd Bergmann @ 2013-02-07 23:27 UTC (permalink / raw)
  To: linux-arm-kernel
On Thursday 07 February 2013, Andrew Murray wrote:
> I'm not sure if the device pin part of the map is correct (I always
> forget how this works) - but I know this would definately work:
> 
>                          interrupt-map-mask = <0xf800 0 0 7>
>                          interrupt-map = <0x0800 0 0 1 &mpic 58
>                                           0x0800 0 0 2 &mpic 58
>                                           0x0800 0 0 3 &mpic 58
>                                           0x0800 0 0 4 &mpic 58
>                                           0x1000 0 0 1 &mpic 59
>                                           0x1000 0 0 2 &mpic 59
>                                           0x1000 0 0 3 &mpic 59
>                                           0x1000 0 0 4 &mpic 59
>                                           ....
If all the pins are routed to the same interrupt, you can leave
the pin out of the map-mask and save a bunch of lines in the
interrupt-map, at least  by the spec. I have not looked at the
source code to see if that is how Linux implements the lookup,
but that can be fixed if necessary.
	Arnd
^ permalink raw reply	[flat|nested] 216+ messages in thread 
 
 
 
 
 
 
 
 
 
 
- * [PATCH v2 19/27] pci: PCIe driver for Marvell Armada 370/XP systems
  2013-01-28 18:56 ` [PATCH v2 19/27] pci: PCIe driver for Marvell Armada 370/XP systems Thomas Petazzoni
                     ` (2 preceding siblings ...)
  2013-01-29 13:22   ` Andrew Murray
@ 2013-01-30 11:32   ` Russell King - ARM Linux
  2013-01-30 11:37     ` Thomas Petazzoni
  2013-01-30 12:03     ` Thierry Reding
  2013-02-01  0:34   ` Stephen Warren
  4 siblings, 2 replies; 216+ messages in thread
From: Russell King - ARM Linux @ 2013-01-30 11:32 UTC (permalink / raw)
  To: linux-arm-kernel
On Mon, Jan 28, 2013 at 07:56:28PM +0100, Thomas Petazzoni wrote:
> +static resource_size_t mvebu_pcie_align_resource(struct pci_dev *dev,
> +						 const struct resource *res,
> +						 resource_size_t start,
> +						 resource_size_t size,
> +						 resource_size_t align)
> +{
> +	if (!(res->flags & IORESOURCE_IO))
> +		return start;
> +
> +	/*
> +	 * The I/O regions must be 64K aligned, because the
> +	 * granularity of PCIe I/O address decoding windows is 64 K
> +	 */
> +	return round_up(start, SZ_64K);
> +}
You do realise that this will result in all PCI I/O BARs being rounded
up to 64K.
I've just been digging through the PCI code and have come across a
function - pcibios_window_alignment() - which the PCI code allows to be
overriden which allows you to increase the alignment requirement of
bridge windows.  It takes the PCI bus and window type as arguments.
I'd suggest using that, and checking whether the bus which is passed
corresponds with a bus which gives you problems, so that you don't
impose the 64K requirement on downstream bridges.
^ permalink raw reply	[flat|nested] 216+ messages in thread
- * [PATCH v2 19/27] pci: PCIe driver for Marvell Armada 370/XP systems
  2013-01-30 11:32   ` Russell King - ARM Linux
@ 2013-01-30 11:37     ` Thomas Petazzoni
  2013-01-30 12:03     ` Thierry Reding
  1 sibling, 0 replies; 216+ messages in thread
From: Thomas Petazzoni @ 2013-01-30 11:37 UTC (permalink / raw)
  To: linux-arm-kernel
Russell,
On Wed, 30 Jan 2013 11:32:46 +0000, Russell King - ARM Linux wrote:
> You do realise that this will result in all PCI I/O BARs being rounded
> up to 64K.
Hum, yes, correct.
> I've just been digging through the PCI code and have come across a
> function - pcibios_window_alignment() - which the PCI code allows to be
> overriden which allows you to increase the alignment requirement of
> bridge windows.  It takes the PCI bus and window type as arguments.
> 
> I'd suggest using that, and checking whether the bus which is passed
> corresponds with a bus which gives you problems, so that you don't
> impose the 64K requirement on downstream bridges.
Seems interesting indeed, I'll look into this idea! Thanks!
Thomas
-- 
Thomas Petazzoni, Free Electrons
Kernel, drivers, real-time and embedded Linux
development, consulting, training and support.
http://free-electrons.com
^ permalink raw reply	[flat|nested] 216+ messages in thread 
- * [PATCH v2 19/27] pci: PCIe driver for Marvell Armada 370/XP systems
  2013-01-30 11:32   ` Russell King - ARM Linux
  2013-01-30 11:37     ` Thomas Petazzoni
@ 2013-01-30 12:03     ` Thierry Reding
  2013-01-30 13:07       ` Thomas Petazzoni
  2013-01-30 15:08       ` Russell King - ARM Linux
  1 sibling, 2 replies; 216+ messages in thread
From: Thierry Reding @ 2013-01-30 12:03 UTC (permalink / raw)
  To: linux-arm-kernel
On Wed, Jan 30, 2013 at 11:32:46AM +0000, Russell King - ARM Linux wrote:
> On Mon, Jan 28, 2013 at 07:56:28PM +0100, Thomas Petazzoni wrote:
> > +static resource_size_t mvebu_pcie_align_resource(struct pci_dev *dev,
> > +						 const struct resource *res,
> > +						 resource_size_t start,
> > +						 resource_size_t size,
> > +						 resource_size_t align)
> > +{
> > +	if (!(res->flags & IORESOURCE_IO))
> > +		return start;
> > +
> > +	/*
> > +	 * The I/O regions must be 64K aligned, because the
> > +	 * granularity of PCIe I/O address decoding windows is 64 K
> > +	 */
> > +	return round_up(start, SZ_64K);
> > +}
> 
> You do realise that this will result in all PCI I/O BARs being rounded
> up to 64K.
> 
> I've just been digging through the PCI code and have come across a
> function - pcibios_window_alignment() - which the PCI code allows to be
> overriden which allows you to increase the alignment requirement of
> bridge windows.  It takes the PCI bus and window type as arguments.
> 
> I'd suggest using that, and checking whether the bus which is passed
> corresponds with a bus which gives you problems, so that you don't
> impose the 64K requirement on downstream bridges.
That approach isn't going to work very well with multi-platform, though,
since the function can only be overridden on a per-architecture basis.
Thierry
-------------- next part --------------
A non-text attachment was scrubbed...
Name: not available
Type: application/pgp-signature
Size: 836 bytes
Desc: not available
URL: <http://lists.infradead.org/pipermail/linux-arm-kernel/attachments/20130130/74cffa44/attachment.sig>
^ permalink raw reply	[flat|nested] 216+ messages in thread
- * [PATCH v2 19/27] pci: PCIe driver for Marvell Armada 370/XP systems
  2013-01-30 12:03     ` Thierry Reding
@ 2013-01-30 13:07       ` Thomas Petazzoni
  2013-01-30 15:08       ` Russell King - ARM Linux
  1 sibling, 0 replies; 216+ messages in thread
From: Thomas Petazzoni @ 2013-01-30 13:07 UTC (permalink / raw)
  To: linux-arm-kernel
Dear Thierry Reding,
On Wed, 30 Jan 2013 13:03:44 +0100, Thierry Reding wrote:
> That approach isn't going to work very well with multi-platform,
> though, since the function can only be overridden on a
> per-architecture basis.
I can do like is done for pcibios_align_resource(): put a single
implementation of the function in arch/arm/kernel/bios32.c, and make it
call a hook registered in the hw_pci structure, or even directly use a
numerical value in hw_pci, as Russell suggested earlier.
Best regards,
Thomas
-- 
Thomas Petazzoni, Free Electrons
Kernel, drivers, real-time and embedded Linux
development, consulting, training and support.
http://free-electrons.com
^ permalink raw reply	[flat|nested] 216+ messages in thread 
- * [PATCH v2 19/27] pci: PCIe driver for Marvell Armada 370/XP systems
  2013-01-30 12:03     ` Thierry Reding
  2013-01-30 13:07       ` Thomas Petazzoni
@ 2013-01-30 15:08       ` Russell King - ARM Linux
  2013-01-30 15:19         ` Russell King - ARM Linux
  1 sibling, 1 reply; 216+ messages in thread
From: Russell King - ARM Linux @ 2013-01-30 15:08 UTC (permalink / raw)
  To: linux-arm-kernel
On Wed, Jan 30, 2013 at 01:03:44PM +0100, Thierry Reding wrote:
> On Wed, Jan 30, 2013 at 11:32:46AM +0000, Russell King - ARM Linux wrote:
> > On Mon, Jan 28, 2013 at 07:56:28PM +0100, Thomas Petazzoni wrote:
> > > +static resource_size_t mvebu_pcie_align_resource(struct pci_dev *dev,
> > > +						 const struct resource *res,
> > > +						 resource_size_t start,
> > > +						 resource_size_t size,
> > > +						 resource_size_t align)
> > > +{
> > > +	if (!(res->flags & IORESOURCE_IO))
> > > +		return start;
> > > +
> > > +	/*
> > > +	 * The I/O regions must be 64K aligned, because the
> > > +	 * granularity of PCIe I/O address decoding windows is 64 K
> > > +	 */
> > > +	return round_up(start, SZ_64K);
> > > +}
> > 
> > You do realise that this will result in all PCI I/O BARs being rounded
> > up to 64K.
> > 
> > I've just been digging through the PCI code and have come across a
> > function - pcibios_window_alignment() - which the PCI code allows to be
> > overriden which allows you to increase the alignment requirement of
> > bridge windows.  It takes the PCI bus and window type as arguments.
> > 
> > I'd suggest using that, and checking whether the bus which is passed
> > corresponds with a bus which gives you problems, so that you don't
> > impose the 64K requirement on downstream bridges.
> 
> That approach isn't going to work very well with multi-platform, though,
> since the function can only be overridden on a per-architecture basis.
The same can be said of all the various other functions which the PCI
stuff expects the arch to provide, yet we seem to cope just fine...
^ permalink raw reply	[flat|nested] 216+ messages in thread
- * [PATCH v2 19/27] pci: PCIe driver for Marvell Armada 370/XP systems
  2013-01-30 15:08       ` Russell King - ARM Linux
@ 2013-01-30 15:19         ` Russell King - ARM Linux
  2013-01-30 15:36           ` Thomas Petazzoni
  2013-01-31  7:10           ` [PATCH v2 19/27] pci: PCIe driver for Marvell Armada 370/XP systems Thierry Reding
  0 siblings, 2 replies; 216+ messages in thread
From: Russell King - ARM Linux @ 2013-01-30 15:19 UTC (permalink / raw)
  To: linux-arm-kernel
On Wed, Jan 30, 2013 at 03:08:56PM +0000, Russell King - ARM Linux wrote:
> On Wed, Jan 30, 2013 at 01:03:44PM +0100, Thierry Reding wrote:
> > On Wed, Jan 30, 2013 at 11:32:46AM +0000, Russell King - ARM Linux wrote:
> > > On Mon, Jan 28, 2013 at 07:56:28PM +0100, Thomas Petazzoni wrote:
> > > > +static resource_size_t mvebu_pcie_align_resource(struct pci_dev *dev,
> > > > +						 const struct resource *res,
> > > > +						 resource_size_t start,
> > > > +						 resource_size_t size,
> > > > +						 resource_size_t align)
> > > > +{
> > > > +	if (!(res->flags & IORESOURCE_IO))
> > > > +		return start;
> > > > +
> > > > +	/*
> > > > +	 * The I/O regions must be 64K aligned, because the
> > > > +	 * granularity of PCIe I/O address decoding windows is 64 K
> > > > +	 */
> > > > +	return round_up(start, SZ_64K);
> > > > +}
> > > 
> > > You do realise that this will result in all PCI I/O BARs being rounded
> > > up to 64K.
> > > 
> > > I've just been digging through the PCI code and have come across a
> > > function - pcibios_window_alignment() - which the PCI code allows to be
> > > overriden which allows you to increase the alignment requirement of
> > > bridge windows.  It takes the PCI bus and window type as arguments.
> > > 
> > > I'd suggest using that, and checking whether the bus which is passed
> > > corresponds with a bus which gives you problems, so that you don't
> > > impose the 64K requirement on downstream bridges.
> > 
> > That approach isn't going to work very well with multi-platform, though,
> > since the function can only be overridden on a per-architecture basis.
> 
> The same can be said of all the various other functions which the PCI
> stuff expects the arch to provide, yet we seem to cope just fine...
And this (untested) is how it's done:
 arch/arm/include/asm/mach/pci.h |    1 +
 arch/arm/kernel/bios32.c        |    8 ++++++++
 2 files changed, 9 insertions(+), 0 deletions(-)
diff --git a/arch/arm/include/asm/mach/pci.h b/arch/arm/include/asm/mach/pci.h
index db9fedb..bba0cf3 100644
--- a/arch/arm/include/asm/mach/pci.h
+++ b/arch/arm/include/asm/mach/pci.h
@@ -29,6 +29,7 @@ struct hw_pci {
 	void		(*postinit)(void);
 	u8		(*swizzle)(struct pci_dev *dev, u8 *pin);
 	int		(*map_irq)(const struct pci_dev *dev, u8 slot, u8 pin);
+	resource_size_t	(*window_align)(struct pci_bus *, unsigned long);
 };
 
 /*
diff --git a/arch/arm/kernel/bios32.c b/arch/arm/kernel/bios32.c
index 379cf32..32c3bd9 100644
--- a/arch/arm/kernel/bios32.c
+++ b/arch/arm/kernel/bios32.c
@@ -581,6 +581,14 @@ resource_size_t pcibios_align_resource(void *data, const struct resource *res,
 	return start;
 }
 
+resource_size_t pcibios_window_alignment(struct pci_bus *bus,
+					 unsigned long type)
+{
+	struct pci_sys_data *sys = bus->sysdata;
+
+	return sys->window_alignment ? sys->window_alignment(bus, type) : 1;
+}
+
 /**
  * pcibios_enable_device - Enable I/O and memory.
  * @dev: PCI device to be enabled
^ permalink raw reply related	[flat|nested] 216+ messages in thread
- * [PATCH v2 19/27] pci: PCIe driver for Marvell Armada 370/XP systems
  2013-01-30 15:19         ` Russell King - ARM Linux
@ 2013-01-30 15:36           ` Thomas Petazzoni
  2013-01-30 15:46             ` Russell King - ARM Linux
  2013-01-31  7:10           ` [PATCH v2 19/27] pci: PCIe driver for Marvell Armada 370/XP systems Thierry Reding
  1 sibling, 1 reply; 216+ messages in thread
From: Thomas Petazzoni @ 2013-01-30 15:36 UTC (permalink / raw)
  To: linux-arm-kernel
Dear Russell King - ARM Linux,
On Wed, 30 Jan 2013 15:19:34 +0000, Russell King - ARM Linux wrote:
> diff --git a/arch/arm/include/asm/mach/pci.h b/arch/arm/include/asm/mach/pci.h
> index db9fedb..bba0cf3 100644
> --- a/arch/arm/include/asm/mach/pci.h
> +++ b/arch/arm/include/asm/mach/pci.h
> @@ -29,6 +29,7 @@ struct hw_pci {
>  	void		(*postinit)(void);
>  	u8		(*swizzle)(struct pci_dev *dev, u8 *pin);
>  	int		(*map_irq)(const struct pci_dev *dev, u8 slot, u8 pin);
> +	resource_size_t	(*window_align)(struct pci_bus *, unsigned long);
>  };
Didn't you say just yesterday that you would prefer a numerical value
rather than a hook that could do random things? :-)
Best regards,
Thomas
-- 
Thomas Petazzoni, Free Electrons
Kernel, drivers, real-time and embedded Linux
development, consulting, training and support.
http://free-electrons.com
^ permalink raw reply	[flat|nested] 216+ messages in thread
- * [PATCH v2 19/27] pci: PCIe driver for Marvell Armada 370/XP systems
  2013-01-30 15:36           ` Thomas Petazzoni
@ 2013-01-30 15:46             ` Russell King - ARM Linux
  2013-01-31 14:30               ` Thomas Petazzoni
  0 siblings, 1 reply; 216+ messages in thread
From: Russell King - ARM Linux @ 2013-01-30 15:46 UTC (permalink / raw)
  To: linux-arm-kernel
On Wed, Jan 30, 2013 at 04:36:59PM +0100, Thomas Petazzoni wrote:
> Dear Russell King - ARM Linux,
> 
> On Wed, 30 Jan 2013 15:19:34 +0000, Russell King - ARM Linux wrote:
> > diff --git a/arch/arm/include/asm/mach/pci.h b/arch/arm/include/asm/mach/pci.h
> > index db9fedb..bba0cf3 100644
> > --- a/arch/arm/include/asm/mach/pci.h
> > +++ b/arch/arm/include/asm/mach/pci.h
> > @@ -29,6 +29,7 @@ struct hw_pci {
> >  	void		(*postinit)(void);
> >  	u8		(*swizzle)(struct pci_dev *dev, u8 *pin);
> >  	int		(*map_irq)(const struct pci_dev *dev, u8 slot, u8 pin);
> > +	resource_size_t	(*window_align)(struct pci_bus *, unsigned long);
> >  };
> 
> Didn't you say just yesterday that you would prefer a numerical value
> rather than a hook that could do random things? :-)
Hrh, right. :)
And it also helps if I modify the right struct too!  Try this instead.
Overwrite sys->win_align_mem / sys->win_align_io in the setup function
as required.
 arch/arm/include/asm/mach/pci.h |    2 ++
 arch/arm/kernel/bios32.c        |   17 +++++++++++++++++
 2 files changed, 19 insertions(+), 0 deletions(-)
diff --git a/arch/arm/include/asm/mach/pci.h b/arch/arm/include/asm/mach/pci.h
index db9fedb..a2301ae 100644
--- a/arch/arm/include/asm/mach/pci.h
+++ b/arch/arm/include/asm/mach/pci.h
@@ -42,6 +42,8 @@ struct pci_sys_data {
 	int		busnr;		/* primary bus number			*/
 	u64		mem_offset;	/* bus->cpu memory mapping offset	*/
 	unsigned long	io_offset;	/* bus->cpu IO mapping offset		*/
+	resource_size_t	win_align_mem;
+	resource_size_t	win_align_io;
 	struct pci_bus	*bus;		/* PCI bus				*/
 	struct list_head resources;	/* root bus resources (apertures)       */
 	struct resource io_res;
diff --git a/arch/arm/kernel/bios32.c b/arch/arm/kernel/bios32.c
index 379cf32..ba81630 100644
--- a/arch/arm/kernel/bios32.c
+++ b/arch/arm/kernel/bios32.c
@@ -463,6 +463,8 @@ static void __init pcibios_init_hw(struct hw_pci *hw, struct list_head *head)
 		sys->swizzle = hw->swizzle;
 		sys->map_irq = hw->map_irq;
 		INIT_LIST_HEAD(&sys->resources);
+		sys->win_align_mem = 1;
+		sys->win_align_io = 1;
 
 		ret = hw->setup(nr, sys);
 
@@ -581,6 +583,21 @@ resource_size_t pcibios_align_resource(void *data, const struct resource *res,
 	return start;
 }
 
+resource_size_t pcibios_window_alignment(struct pci_bus *bus,
+					 unsigned long type)
+{
+	struct pci_sys_data *sys = bus->sysdata;
+
+	/* Ignore downstream buses */
+	if (!bus->parent) {
+		if (type & IORESOURCE_MEM)
+			return sys->win_align_mem;
+		if (type & IORESOURCE_IO)
+			return sys->win_align_io;
+	}
+	return 1;
+}
+
 /**
  * pcibios_enable_device - Enable I/O and memory.
  * @dev: PCI device to be enabled
^ permalink raw reply related	[flat|nested] 216+ messages in thread
- * [PATCH v2 19/27] pci: PCIe driver for Marvell Armada 370/XP systems
  2013-01-30 15:46             ` Russell King - ARM Linux
@ 2013-01-31 14:30               ` Thomas Petazzoni
  2013-01-31 14:50                 ` Russell King - ARM Linux
  0 siblings, 1 reply; 216+ messages in thread
From: Thomas Petazzoni @ 2013-01-31 14:30 UTC (permalink / raw)
  To: linux-arm-kernel
Dear Russell King - ARM Linux,
On Wed, 30 Jan 2013 15:46:02 +0000, Russell King - ARM Linux wrote:
> +resource_size_t pcibios_window_alignment(struct pci_bus *bus,
> +					 unsigned long type)
> +{
> +	struct pci_sys_data *sys = bus->sysdata;
> +
> +	/* Ignore downstream buses */
> +	if (!bus->parent) {
> +		if (type & IORESOURCE_MEM)
> +			return sys->win_align_mem;
> +		if (type & IORESOURCE_IO)
> +			return sys->win_align_io;
> +	}
> +	return 1;
> +}
> +
Unfortunately, this doesn't work as is for me: the if (!bus->parent)
prevents the thing from being effective. Here my lspci output:
# /usr/sbin/lspci 
00:00.0 Host bridge: Marvell Technology Group Ltd. Device 102d
00:01.0 PCI bridge: Marvell Technology Group Ltd. Device 1092
00:02.0 PCI bridge: Marvell Technology Group Ltd. Device 1092
00:03.0 PCI bridge: Marvell Technology Group Ltd. Device 1092
00:04.0 PCI bridge: Marvell Technology Group Ltd. Device 1092
00:05.0 PCI bridge: Marvell Technology Group Ltd. Device 1092
00:06.0 PCI bridge: Marvell Technology Group Ltd. Device 1092
03:00.0 SCSI storage controller: Marvell Technology Group Ltd. 88SX7042 PCI-e 4-port SATA-II (rev 02)
05:00.0 Ethernet controller: Intel Corporation 82572EI Gigabit Ethernet Controller (Copper) (rev 06)
And the function pcibios_window_alignment() only gets called for bus 1,
2, 3, 4, 5, 6 and never for bus 0. And therefore, the !bus->parent test
is always false. So, if I keep your implementation, I have the following
addresses assigned to my bridges:
pci 0000:00:03.0: PCI bridge to [bus 03]
pci 0000:00:03.0:   bridge window [io  0xc0000000-0xc0000fff]
pci 0000:00:03.0:   bridge window [mem 0xc1000000-0xc10fffff]
pci 0000:00:03.0:   bridge window [mem 0xc1100000-0xc11fffff pref]
pci 0000:00:05.0: PCI bridge to [bus 05]
pci 0000:00:05.0:   bridge window [io  0xc0001000-0xc0001fff]
pci 0000:00:05.0:   bridge window [mem 0xc1200000-0xc12fffff]
pci 0000:00:05.0:   bridge window [mem 0xc1300000-0xc13fffff pref]
Notice how the io window of the second bridge starts 4K after the io
window of the first bridge. Which cannot work on Marvell SoC, due to
the 64KB alignment.
If, however, I remove the !bus->parent test, the I/O addresses
correctly take into account the 64K requirement:
pci 0000:00:03.0: PCI bridge to [bus 03]
pci 0000:00:03.0:   bridge window [io  0xc0000000-0xc000ffff]
pci 0000:00:03.0:   bridge window [mem 0xc1000000-0xc10fffff]
pci 0000:00:03.0:   bridge window [mem 0xc1100000-0xc11fffff pref]
pci 0000:00:05.0: PCI bridge to [bus 05]
pci 0000:00:05.0:   bridge window [io  0xc0010000-0xc001ffff]
pci 0000:00:05.0:   bridge window [mem 0xc1200000-0xc12fffff]
pci 0000:00:05.0:   bridge window [mem 0xc1300000-0xc13fffff pref]
Any idea?
Thanks,
Thomas
-- 
Thomas Petazzoni, Free Electrons
Kernel, drivers, real-time and embedded Linux
development, consulting, training and support.
http://free-electrons.com
^ permalink raw reply	[flat|nested] 216+ messages in thread
- * [PATCH v2 19/27] pci: PCIe driver for Marvell Armada 370/XP systems
  2013-01-31 14:30               ` Thomas Petazzoni
@ 2013-01-31 14:50                 ` Russell King - ARM Linux
  2013-01-31 14:57                   ` Thomas Petazzoni
  0 siblings, 1 reply; 216+ messages in thread
From: Russell King - ARM Linux @ 2013-01-31 14:50 UTC (permalink / raw)
  To: linux-arm-kernel
On Thu, Jan 31, 2013 at 03:30:41PM +0100, Thomas Petazzoni wrote:
> Dear Russell King - ARM Linux,
> 
> On Wed, 30 Jan 2013 15:46:02 +0000, Russell King - ARM Linux wrote:
> 
> > +resource_size_t pcibios_window_alignment(struct pci_bus *bus,
> > +					 unsigned long type)
> > +{
> > +	struct pci_sys_data *sys = bus->sysdata;
> > +
> > +	/* Ignore downstream buses */
> > +	if (!bus->parent) {
> > +		if (type & IORESOURCE_MEM)
> > +			return sys->win_align_mem;
> > +		if (type & IORESOURCE_IO)
> > +			return sys->win_align_io;
> > +	}
> > +	return 1;
> > +}
> > +
> 
> Unfortunately, this doesn't work as is for me: the if (!bus->parent)
> prevents the thing from being effective. Here my lspci output:
> 
> # /usr/sbin/lspci 
> 00:00.0 Host bridge: Marvell Technology Group Ltd. Device 102d
> 00:01.0 PCI bridge: Marvell Technology Group Ltd. Device 1092
> 00:02.0 PCI bridge: Marvell Technology Group Ltd. Device 1092
> 00:03.0 PCI bridge: Marvell Technology Group Ltd. Device 1092
> 00:04.0 PCI bridge: Marvell Technology Group Ltd. Device 1092
> 00:05.0 PCI bridge: Marvell Technology Group Ltd. Device 1092
> 00:06.0 PCI bridge: Marvell Technology Group Ltd. Device 1092
> 03:00.0 SCSI storage controller: Marvell Technology Group Ltd. 88SX7042 PCI-e 4-port SATA-II (rev 02)
> 05:00.0 Ethernet controller: Intel Corporation 82572EI Gigabit Ethernet Controller (Copper) (rev 06)
> 
> And the function pcibios_window_alignment() only gets called for bus 1,
> 2, 3, 4, 5, 6 and never for bus 0.
That's the exact reverse of what I'd expect: the child buses should
have a non-NULL parent pointer.  Hmm.  Try changing that for !bus->self -
that should make it effective only on the host bridge.
But... hang on...
/*
 * Returns true if the pci bus is root (behind host-pci bridge),
 * false otherwise
 */
static inline bool pci_is_root_bus(struct pci_bus *pbus)
{
        return !(pbus->parent);
}
So the original test _is_ correct, and should only be triggered for
the _root_ bus, that being bus 0 in the above case.
But... wait a moment, what are you saying?  Which bridges need this
fixup?  The Marvell PCI-to-PCI bridges or the host bridge?
^ permalink raw reply	[flat|nested] 216+ messages in thread
- * [PATCH v2 19/27] pci: PCIe driver for Marvell Armada 370/XP systems
  2013-01-31 14:50                 ` Russell King - ARM Linux
@ 2013-01-31 14:57                   ` Thomas Petazzoni
  2013-01-31 15:08                     ` Russell King - ARM Linux
  0 siblings, 1 reply; 216+ messages in thread
From: Thomas Petazzoni @ 2013-01-31 14:57 UTC (permalink / raw)
  To: linux-arm-kernel
Dear Russell King - ARM Linux,
On Thu, 31 Jan 2013 14:50:02 +0000, Russell King - ARM Linux wrote:
> > > +	/* Ignore downstream buses */
> > > +	if (!bus->parent) {
> > > +		if (type & IORESOURCE_MEM)
> > > +			return sys->win_align_mem;
> > > +		if (type & IORESOURCE_IO)
> > > +			return sys->win_align_io;
> > > +	}
> > > +	return 1;
> > > +}
> > > +
> > 
> > Unfortunately, this doesn't work as is for me: the if (!bus->parent)
> > prevents the thing from being effective. Here my lspci output:
> > 
> > # /usr/sbin/lspci 
> > 00:00.0 Host bridge: Marvell Technology Group Ltd. Device 102d
> > 00:01.0 PCI bridge: Marvell Technology Group Ltd. Device 1092
> > 00:02.0 PCI bridge: Marvell Technology Group Ltd. Device 1092
> > 00:03.0 PCI bridge: Marvell Technology Group Ltd. Device 1092
> > 00:04.0 PCI bridge: Marvell Technology Group Ltd. Device 1092
> > 00:05.0 PCI bridge: Marvell Technology Group Ltd. Device 1092
> > 00:06.0 PCI bridge: Marvell Technology Group Ltd. Device 1092
> > 03:00.0 SCSI storage controller: Marvell Technology Group Ltd. 88SX7042 PCI-e 4-port SATA-II (rev 02)
> > 05:00.0 Ethernet controller: Intel Corporation 82572EI Gigabit Ethernet Controller (Copper) (rev 06)
> > 
> > And the function pcibios_window_alignment() only gets called for bus 1,
> > 2, 3, 4, 5, 6 and never for bus 0.
> 
> That's the exact reverse of what I'd expect: the child buses should
> have a non-NULL parent pointer.
Indeed. But this function never gets called with bus->number == 0, only
with bus->number = 1, 2, 3, 4, 5, 6. So those are child busses, and
therefore they have a parent.
If I had a debug message in this pcibios_window_alignment() function
(which gets shown unconditionally, i.e the debug message is outside the
if condition we are discussing), then I get:
pcibios_window_alignment: called for bus ef371c00 (sysdata=ef2f6bc0), number 1
pcibios_window_alignment: called for bus ef371c00 (sysdata=ef2f6bc0), number 1
pcibios_window_alignment: called for bus ef371c00 (sysdata=ef2f6bc0), number 1
pcibios_window_alignment: called for bus ef371a00 (sysdata=ef2f6bc0), number 2
pcibios_window_alignment: called for bus ef371a00 (sysdata=ef2f6bc0), number 2
pcibios_window_alignment: called for bus ef371a00 (sysdata=ef2f6bc0), number 2
pcibios_window_alignment: called for bus ef371800 (sysdata=ef2f6bc0), number 3
pcibios_window_alignment: called for bus ef371800 (sysdata=ef2f6bc0), number 3
pcibios_window_alignment: called for bus ef371800 (sysdata=ef2f6bc0), number 3
pcibios_window_alignment: called for bus ef371600 (sysdata=ef2f6bc0), number 4
pcibios_window_alignment: called for bus ef371600 (sysdata=ef2f6bc0), number 4
pcibios_window_alignment: called for bus ef371600 (sysdata=ef2f6bc0), number 4
pcibios_window_alignment: called for bus ef371400 (sysdata=ef2f6bc0), number 5
pcibios_window_alignment: called for bus ef371400 (sysdata=ef2f6bc0), number 5
pcibios_window_alignment: called for bus ef371400 (sysdata=ef2f6bc0), number 5
pcibios_window_alignment: called for bus ef371200 (sysdata=ef2f6bc0), number 6
pcibios_window_alignment: called for bus ef371200 (sysdata=ef2f6bc0), number 6
pcibios_window_alignment: called for bus ef371200 (sysdata=ef2f6bc0), number 6
See, never called bus bus number 0.
> Hmm.  Try changing that for !bus->self -
> that should make it effective only on the host bridge.
> 
> But... hang on...
> 
> /*
>  * Returns true if the pci bus is root (behind host-pci bridge),
>  * false otherwise
>  */
> static inline bool pci_is_root_bus(struct pci_bus *pbus)
> {
>         return !(pbus->parent);
> }
> 
> So the original test _is_ correct, and should only be triggered for
> the _root_ bus, that being bus 0 in the above case.
Except that this pcibios_window_alignement() function is apparently
never called for the root bus.
> 
> But... wait a moment, what are you saying?  Which bridges need this
> fixup?  The Marvell PCI-to-PCI bridges or the host bridge?
I am talking about the PCI-to-PCI bridges. I want the I/O windows
assigned to each PCI-to-PCI bridge to be 64K aligned. The PCI-to-PCI
bridges are devices that sit on bus 0, each giving access to the child
buses 1, 2, 3, 4, 5, 6.
I have the impression that the pcibios_window_alignment() function is
called on the *child* bus to know the requested alignments for the
bridge that sits on the parent bus and gives access to this child bus.
Best regards,
Thomas
-- 
Thomas Petazzoni, Free Electrons
Kernel, drivers, real-time and embedded Linux
development, consulting, training and support.
http://free-electrons.com
^ permalink raw reply	[flat|nested] 216+ messages in thread
- * [PATCH v2 19/27] pci: PCIe driver for Marvell Armada 370/XP systems
  2013-01-31 14:57                   ` Thomas Petazzoni
@ 2013-01-31 15:08                     ` Russell King - ARM Linux
  2013-01-31 15:22                       ` Thomas Petazzoni
  0 siblings, 1 reply; 216+ messages in thread
From: Russell King - ARM Linux @ 2013-01-31 15:08 UTC (permalink / raw)
  To: linux-arm-kernel
On Thu, Jan 31, 2013 at 03:57:37PM +0100, Thomas Petazzoni wrote:
> Indeed. But this function never gets called with bus->number == 0, only
> with bus->number = 1, 2, 3, 4, 5, 6. So those are child busses, and
> therefore they have a parent.
Having thought about it, yes, that's what I expect, because root bus
resources are never resized.  Root bus resources are normally setup
before probing to indicate what size they _can_ be and define what
space is available to the downstream devices.
Child busses (behind a PCI-2-PCI bridge) are a different matter - these
will be adjusted according to their on-bus devices and the windows for
them sized and allocated appropriately _within_ the confines of the
root bus resource.
> I am talking about the PCI-to-PCI bridges. I want the I/O windows
> assigned to each PCI-to-PCI bridge to be 64K aligned. The PCI-to-PCI
> bridges are devices that sit on bus 0, each giving access to the child
> buses 1, 2, 3, 4, 5, 6.
Right, so you've just confirmed that this _is_ the right hook and it
_is_ being called at the right time.
However, I had interpreted your requirement as the _host_ bridge only
(insufficient information in your previous emails, or I missed it).
If that's what your bridge requires, then we need to detect it via
its vendor and device IDs and only apply this fixup to those bridges
which require a 64K alignment.
So, the IDs are vendor:device = 0x11ab:0x1092 ?  And let me get this
straight, it _is_ a specific requirement for this particular bridge
P2P bridge?
^ permalink raw reply	[flat|nested] 216+ messages in thread 
- * [PATCH v2 19/27] pci: PCIe driver for Marvell Armada 370/XP systems
  2013-01-31 15:08                     ` Russell King - ARM Linux
@ 2013-01-31 15:22                       ` Thomas Petazzoni
  2013-01-31 15:36                         ` Russell King - ARM Linux
  0 siblings, 1 reply; 216+ messages in thread
From: Thomas Petazzoni @ 2013-01-31 15:22 UTC (permalink / raw)
  To: linux-arm-kernel
Dear Russell King - ARM Linux,
On Thu, 31 Jan 2013 15:08:01 +0000, Russell King - ARM Linux wrote:
> On Thu, Jan 31, 2013 at 03:57:37PM +0100, Thomas Petazzoni wrote:
> > Indeed. But this function never gets called with bus->number == 0, only
> > with bus->number = 1, 2, 3, 4, 5, 6. So those are child busses, and
> > therefore they have a parent.
> 
> Having thought about it, yes, that's what I expect, because root bus
> resources are never resized.  Root bus resources are normally setup
> before probing to indicate what size they _can_ be and define what
> space is available to the downstream devices.
> 
> Child busses (behind a PCI-2-PCI bridge) are a different matter - these
> will be adjusted according to their on-bus devices and the windows for
> them sized and allocated appropriately _within_ the confines of the
> root bus resource.
Ok.
> > I am talking about the PCI-to-PCI bridges. I want the I/O windows
> > assigned to each PCI-to-PCI bridge to be 64K aligned. The PCI-to-PCI
> > bridges are devices that sit on bus 0, each giving access to the child
> > buses 1, 2, 3, 4, 5, 6.
> 
> Right, so you've just confirmed that this _is_ the right hook and it
> _is_ being called at the right time.
Ok.
> However, I had interpreted your requirement as the _host_ bridge only
> (insufficient information in your previous emails, or I missed it).
Might be insufficient information, or wrong terminology on my side.
> If that's what your bridge requires, then we need to detect it via
> its vendor and device IDs and only apply this fixup to those bridges
> which require a 64K alignment.
> 
> So, the IDs are vendor:device = 0x11ab:0x1092 ?  And let me get this
> straight, it _is_ a specific requirement for this particular bridge
> P2P bridge?
The vendor/device ID will change. This P2P bridge is emulated. However,
I'm not sure why you need to know the vendor:device ID to make the
fixup specific.
The fixup is already specific to those bridges, since I am just setting
pci_sys_data->win_align_io to 64K for the particular buses that are
downstream the problematic bridges. So it doesn't affect any other bus
on the system, and therefore I don't think this fixup needs to be made
specific to a given vendor:device, no?
Best regards,
Thomas
-- 
Thomas Petazzoni, Free Electrons
Kernel, drivers, real-time and embedded Linux
development, consulting, training and support.
http://free-electrons.com
^ permalink raw reply	[flat|nested] 216+ messages in thread 
- * [PATCH v2 19/27] pci: PCIe driver for Marvell Armada 370/XP systems
  2013-01-31 15:22                       ` Thomas Petazzoni
@ 2013-01-31 15:36                         ` Russell King - ARM Linux
  2013-01-31 15:47                           ` Thomas Petazzoni
  2013-01-31 16:18                           ` Arnd Bergmann
  0 siblings, 2 replies; 216+ messages in thread
From: Russell King - ARM Linux @ 2013-01-31 15:36 UTC (permalink / raw)
  To: linux-arm-kernel
On Thu, Jan 31, 2013 at 04:22:37PM +0100, Thomas Petazzoni wrote:
> Dear Russell King - ARM Linux,
> 
> On Thu, 31 Jan 2013 15:08:01 +0000, Russell King - ARM Linux wrote:
> > If that's what your bridge requires, then we need to detect it via
> > its vendor and device IDs and only apply this fixup to those bridges
> > which require a 64K alignment.
> > 
> > So, the IDs are vendor:device = 0x11ab:0x1092 ?  And let me get this
> > straight, it _is_ a specific requirement for this particular bridge
> > P2P bridge?
> 
> The vendor/device ID will change. This P2P bridge is emulated. However,
> I'm not sure why you need to know the vendor:device ID to make the
> fixup specific.
> 
> The fixup is already specific to those bridges, since I am just setting
> pci_sys_data->win_align_io to 64K for the particular buses that are
> downstream the problematic bridges. So it doesn't affect any other bus
> on the system, and therefore I don't think this fixup needs to be made
> specific to a given vendor:device, no?
The pci_sys_data is not specific to one bus.  It's specific from the
root bus downwards, and is shared by all child busses.
The problem is if you have some card or a conventional P2P bridge which
has 4K windows.  If you merely set the alignment to 64K for all bridges,
then all bridges get this treatment whether or not they need it.  That's
what I'm trying to avoid.
Take, for instance, a cardbus bridge (remember, there are PCI cards which
can be plugged in to give you a cardbus slot.)  I have a device here which
can be plugged into a cardbus slot which has not just one P2P bridge but
two, and a bunch of downsteam devices, including VGA, ethernet, USB, PS/2
etc.  (Okay, Linux doesn't support this hardware because of crappy X86
stuff, despite the fact Windows cope with it just fine.)
There have been cards in the past which have had P2P bridges on them as
well.
So, simply believing that the only P2P bridges in the system will be
those on the physical board is a mistake.
^ permalink raw reply	[flat|nested] 216+ messages in thread 
- * [PATCH v2 19/27] pci: PCIe driver for Marvell Armada 370/XP systems
  2013-01-31 15:36                         ` Russell King - ARM Linux
@ 2013-01-31 15:47                           ` Thomas Petazzoni
  2013-01-31 15:48                             ` Russell King - ARM Linux
  2013-01-31 16:18                           ` Arnd Bergmann
  1 sibling, 1 reply; 216+ messages in thread
From: Thomas Petazzoni @ 2013-01-31 15:47 UTC (permalink / raw)
  To: linux-arm-kernel
Dear Russell King - ARM Linux,
On Thu, 31 Jan 2013 15:36:25 +0000, Russell King - ARM Linux wrote:
> > The fixup is already specific to those bridges, since I am just setting
> > pci_sys_data->win_align_io to 64K for the particular buses that are
> > downstream the problematic bridges. So it doesn't affect any other bus
> > on the system, and therefore I don't think this fixup needs to be made
> > specific to a given vendor:device, no?
> 
> The pci_sys_data is not specific to one bus.  It's specific from the
> root bus downwards, and is shared by all child busses.
Ah, ok, that's the part I was missing.
> The problem is if you have some card or a conventional P2P bridge which
> has 4K windows.  If you merely set the alignment to 64K for all bridges,
> then all bridges get this treatment whether or not they need it.  That's
> what I'm trying to avoid.
> 
> Take, for instance, a cardbus bridge (remember, there are PCI cards which
> can be plugged in to give you a cardbus slot.)  I have a device here which
> can be plugged into a cardbus slot which has not just one P2P bridge but
> two, and a bunch of downsteam devices, including VGA, ethernet, USB, PS/2
> etc.  (Okay, Linux doesn't support this hardware because of crappy X86
> stuff, despite the fact Windows cope with it just fine.)
> 
> There have been cards in the past which have had P2P bridges on them as
> well.
> 
> So, simply believing that the only P2P bridges in the system will be
> those on the physical board is a mistake.
Yes, indeed, I understand this. I just thought this pci_sys_data
structure was per-bus. Of course, if it's shared by all buses on the
system, we need a way to apply this fixup only to the Marvell bridges.
Should I just hard-code this special fixup in
pcibios_window_alignment() with a check on VID/PID ?
Thanks,
Thomas
-- 
Thomas Petazzoni, Free Electrons
Kernel, drivers, real-time and embedded Linux
development, consulting, training and support.
http://free-electrons.com
^ permalink raw reply	[flat|nested] 216+ messages in thread 
- * [PATCH v2 19/27] pci: PCIe driver for Marvell Armada 370/XP systems
  2013-01-31 15:47                           ` Thomas Petazzoni
@ 2013-01-31 15:48                             ` Russell King - ARM Linux
  0 siblings, 0 replies; 216+ messages in thread
From: Russell King - ARM Linux @ 2013-01-31 15:48 UTC (permalink / raw)
  To: linux-arm-kernel
On Thu, Jan 31, 2013 at 04:47:35PM +0100, Thomas Petazzoni wrote:
> Dear Russell King - ARM Linux,
> 
> On Thu, 31 Jan 2013 15:36:25 +0000, Russell King - ARM Linux wrote:
> 
> > > The fixup is already specific to those bridges, since I am just setting
> > > pci_sys_data->win_align_io to 64K for the particular buses that are
> > > downstream the problematic bridges. So it doesn't affect any other bus
> > > on the system, and therefore I don't think this fixup needs to be made
> > > specific to a given vendor:device, no?
> > 
> > The pci_sys_data is not specific to one bus.  It's specific from the
> > root bus downwards, and is shared by all child busses.
> 
> Ah, ok, that's the part I was missing.
> 
> > The problem is if you have some card or a conventional P2P bridge which
> > has 4K windows.  If you merely set the alignment to 64K for all bridges,
> > then all bridges get this treatment whether or not they need it.  That's
> > what I'm trying to avoid.
> > 
> > Take, for instance, a cardbus bridge (remember, there are PCI cards which
> > can be plugged in to give you a cardbus slot.)  I have a device here which
> > can be plugged into a cardbus slot which has not just one P2P bridge but
> > two, and a bunch of downsteam devices, including VGA, ethernet, USB, PS/2
> > etc.  (Okay, Linux doesn't support this hardware because of crappy X86
> > stuff, despite the fact Windows cope with it just fine.)
> > 
> > There have been cards in the past which have had P2P bridges on them as
> > well.
> > 
> > So, simply believing that the only P2P bridges in the system will be
> > those on the physical board is a mistake.
> 
> Yes, indeed, I understand this. I just thought this pci_sys_data
> structure was per-bus. Of course, if it's shared by all buses on the
> system, we need a way to apply this fixup only to the Marvell bridges.
> 
> Should I just hard-code this special fixup in
> pcibios_window_alignment() with a check on VID/PID ?
Yes please.
^ permalink raw reply	[flat|nested] 216+ messages in thread 
 
- * [PATCH v2 19/27] pci: PCIe driver for Marvell Armada 370/XP systems
  2013-01-31 15:36                         ` Russell King - ARM Linux
  2013-01-31 15:47                           ` Thomas Petazzoni
@ 2013-01-31 16:18                           ` Arnd Bergmann
  2013-01-31 18:02                             ` Jason Gunthorpe
  1 sibling, 1 reply; 216+ messages in thread
From: Arnd Bergmann @ 2013-01-31 16:18 UTC (permalink / raw)
  To: linux-arm-kernel
On Thursday 31 January 2013 15:36:25 Russell King - ARM Linux wrote:
> The pci_sys_data is not specific to one bus.  It's specific from the
> root bus downwards, and is shared by all child busses.
> 
> The problem is if you have some card or a conventional P2P bridge which
> has 4K windows.  If you merely set the alignment to 64K for all bridges,
> then all bridges get this treatment whether or not they need it.  That's
> what I'm trying to avoid.
> 
> Take, for instance, a cardbus bridge (remember, there are PCI cards which
> can be plugged in to give you a cardbus slot.)  I have a device here which
> can be plugged into a cardbus slot which has not just one P2P bridge but
> two, and a bunch of downsteam devices, including VGA, ethernet, USB, PS/2
> etc.  (Okay, Linux doesn't support this hardware because of crappy X86
> stuff, despite the fact Windows cope with it just fine.)
> 
> There have been cards in the past which have had P2P bridges on them as
> well.
> 
> So, simply believing that the only P2P bridges in the system will be
> those on the physical board is a mistake.
I was going to write something similar. Actually I think it's worse because
the case of an extra P2P bridge is quite likely for devices that actually
use I/O space, given that the use of I/O space is deprecated on PCIe.
This also means that a lot of devices using I/O space are legacy crap
and have random bugs regarding PCI standard compliance. I would not
expect those devices in general to do the right thing when I/O ports
beyond 65535 are used, although a lot of them would work.
For all I could tell, the safest solution with the I/O space would
be to pretend we had a shared 64K I/O space for all of the PCIe
ports on Armada XP, and map a separate 64K window for each port
using a different io_offset for each one.
This way, you can have a device on the second PCIe port use e.g. I/O
port number 0x3f8 for a legacy UART on the bus, which gets translated
into the Linux-visible port number 0x103f8.
The currently used method to have io_offset=0 for all PCIe ports
and use separate I/O port ranges of 64K for each PCIe port probably
still works for most devices, except those where we hardcode a port
number in the Linux device driver, or where the high address bits
don't get decoded properly.
	Arnd
^ permalink raw reply	[flat|nested] 216+ messages in thread 
- * [PATCH v2 19/27] pci: PCIe driver for Marvell Armada 370/XP systems
  2013-01-31 16:18                           ` Arnd Bergmann
@ 2013-01-31 18:02                             ` Jason Gunthorpe
  2013-01-31 20:46                               ` Arnd Bergmann
  0 siblings, 1 reply; 216+ messages in thread
From: Jason Gunthorpe @ 2013-01-31 18:02 UTC (permalink / raw)
  To: linux-arm-kernel
On Thu, Jan 31, 2013 at 05:18:46PM +0100, Arnd Bergmann wrote:
> For all I could tell, the safest solution with the I/O space would
> be to pretend we had a shared 64K I/O space for all of the PCIe
> ports on Armada XP, and map a separate 64K window for each port
> using a different io_offset for each one.
> This way, you can have a device on the second PCIe port use e.g. I/O
> port number 0x3f8 for a legacy UART on the bus, which gets translated
> into the Linux-visible port number 0x103f8.
> 
> The currently used method to have io_offset=0 for all PCIe ports
> and use separate I/O port ranges of 64K for each PCIe port probably
> still works for most devices, except those where we hardcode a port
> number in the Linux device driver, or where the high address bits
> don't get decoded properly.
Thinking about this some more, which of these methods to choose is
going to be dictated by what the Marvell HW does.
Since the IO space in the TLP is a full 32 bits, it matters what 32
bit value the HW PCI core places in the IO Rd/Wr transaction. This
value *must* match the value given to the Linux PCI core for resource
allocation, because it must be correctly programmed by Linux into the
downstream BARs/bridge windows.
So there are probably two choices for what the HW does, given a MBUS
window of 0xDEAD0000 -> 0xDEADFFFF set for IO, a read from physical
address 0xDEAD0000 produces a IO Rd TLP with either '0x00000000' or
'0xDEAD0000' in the address field.
If it is 0xDEAD0000, then Thomas has to keep what he has now, you
can't mess with this address. Verify that the full 32 bit address
exactly matching the MBUS window address is written to the PCI-PCI
bridge IO base/limit registers.
If it is 0x00000000 then the mmap scheme I outlined before must be
used, and verify that only 0->0xFFFF is written to the PCI-PCI bridge
IO base/limit registers..
My guess is the Marvell PCI-E copies whatever address is it given into
the IO TLP, so it would be the 0xDEAD0000 behaviour, however I bet you
can use the MBUS window target address remapping feature to get the
0x00000000 behaviour as well (though there are a limited number of
remappable MBUS windows, so that is probably not a good idea)
Jason
^ permalink raw reply	[flat|nested] 216+ messages in thread 
- * [PATCH v2 19/27] pci: PCIe driver for Marvell Armada 370/XP systems
  2013-01-31 18:02                             ` Jason Gunthorpe
@ 2013-01-31 20:46                               ` Arnd Bergmann
  2013-01-31 22:44                                 ` Jason Gunthorpe
  0 siblings, 1 reply; 216+ messages in thread
From: Arnd Bergmann @ 2013-01-31 20:46 UTC (permalink / raw)
  To: linux-arm-kernel
On Thursday 31 January 2013, Jason Gunthorpe wrote:
> Thinking about this some more, which of these methods to choose is
> going to be dictated by what the Marvell HW does.
> 
> Since the IO space in the TLP is a full 32 bits, it matters what 32
> bit value the HW PCI core places in the IO Rd/Wr transaction. This
> value must match the value given to the Linux PCI core for resource
> allocation, because it must be correctly programmed by Linux into the
> downstream BARs/bridge windows.
> 
> So there are probably two choices for what the HW does, given a MBUS
> window of 0xDEAD0000 -> 0xDEADFFFF set for IO, a read from physical
> address 0xDEAD0000 produces a IO Rd TLP with either '0x00000000' or
> '0xDEAD0000' in the address field.
> 
> If it is 0xDEAD0000, then Thomas has to keep what he has now, you
> can't mess with this address. Verify that the full 32 bit address
> exactly matching the MBUS window address is written to the PCI-PCI
> bridge IO base/limit registers.
If you do this, you break all sorts of expectations in the kernel and
I guess you'd have to set the io_offset value of that bus to 0x21530000
in order to make Linux I/O port 0 go to the first byte of the window
and come out as 0xDEAD0000 on the bus, but you still won't be able to
use legacy devices with hardcoded I/O port numbers.
> If it is 0x00000000 then the mmap scheme I outlined before must be
> used, and verify that only 0->0xFFFF is written to the PCI-PCI bridge
> IO base/limit registers..
For the primary bus, yes, but there are still two options for the
second one: you can either start at 0 again or you can continue
at 0x10000 as we do for mv78xx0 and kirkwood for instance. Both
approaches probably have their merit.
	Arnd
^ permalink raw reply	[flat|nested] 216+ messages in thread 
- * [PATCH v2 19/27] pci: PCIe driver for Marvell Armada 370/XP systems
  2013-01-31 20:46                               ` Arnd Bergmann
@ 2013-01-31 22:44                                 ` Jason Gunthorpe
  2013-02-01 11:30                                   ` Arnd Bergmann
  2013-02-06 16:51                                   ` Thomas Petazzoni
  0 siblings, 2 replies; 216+ messages in thread
From: Jason Gunthorpe @ 2013-01-31 22:44 UTC (permalink / raw)
  To: linux-arm-kernel
On Thu, Jan 31, 2013 at 08:46:22PM +0000, Arnd Bergmann wrote:
> > If it is 0xDEAD0000, then Thomas has to keep what he has now, you
> > can't mess with this address. Verify that the full 32 bit address
> > exactly matching the MBUS window address is written to the PCI-PCI
> > bridge IO base/limit registers.
> 
> If you do this, you break all sorts of expectations in the kernel and
> I guess you'd have to set the io_offset value of that bus to 0x21530000
> in order to make Linux I/O port 0 go to the first byte of the window
> and come out as 0xDEAD0000 on the bus, but you still won't be able to
> use legacy devices with hardcoded I/O port numbers.
I'm not sure exactly how the PCI core handles this, but it does look
like pci_add_resource_offset via io_offset is the answer. I'm not sure
what goes in the struct resource passed to the PCI core - the *bus* IO
address range or the *kernel* IO address range..
> > If it is 0x00000000 then the mmap scheme I outlined before must be
> > used, and verify that only 0->0xFFFF is written to the PCI-PCI bridge
> > IO base/limit registers..
> 
> For the primary bus, yes, but there are still two options for the
> second one: you can either start at 0 again or you can continue
No, for *all* links. You use a mmap scheme with 4k granularity, I
explained in a past email, but to quickly review..
- Each link gets 64k of reserved physical address space for IO,
  this is just set aside, no MBUS windows are permantently assigned.
- Linux is told to use a 64k IO range with bus IO address 0->0xFFFF
- When the IO base/limit register in the link PCI-PCI bridge is programmed
  the driver gets a 4k aligned region somewhere from 0->0xFFFF and then:
    - Allocates a 64k MBUS window that translates physical address
      0xZZZZxxxx to IO bus address 0x0000xxxx (goes in the TLP) for
      that link
    - Uses pci_ioremap_io to map the fraction of the link's 64k MBUS window
      allocated to that bridge to the correct offset in the 
      PCI_IO_VIRT_BASE region
So you'd end up with a MMU mapping something like:
  PCI_IO_VIRT_BASE    MBUS_IO_PHYS_BASE
    0->4k          => 0      -> 4k             // 4k assigned to link0
    4k->8k         => 64k+4k -> 64k+8k         // 4k assigned to link1
    8k->24k        => 128k+8k -> 128k+24k      // 8k assigned to link2
Where the physical mbus window for each link starts on each 64k block.
Thomas: This solves the need to have alignment of the IO regions, and
gets rid of any trouble with 32 bit IO addreses, however you'll need
to allocate the remap capable mbus windows separately for use by IO
mappings..
Though, there is still a problem with the MMIO mbus window
alignment. mbus windows are aligned to a multiple of their size, PCI
MMIO bridge windows are always aligned to 1M...
> at 0x10000 as we do for mv78xx0 and kirkwood for instance. Both
> approaches probably have their merit.
Kirkwood uses the MBUS remapping registers to set the TLP address of
link 0 to start at 0 and of link 1 to start at 0x10000 - so it is
consistent with what you describe..
However, this is a suboptimal way to run the HW. It would be much
better to place each link in a seperate PCI domain and have each link
start its bus IO address at 0, and assign the kernel IO address in
sequential 64k blocks as today.
Though, it is my hope that Thomas's driver will work on Kirkwood as
well...
Jason
^ permalink raw reply	[flat|nested] 216+ messages in thread
- * [PATCH v2 19/27] pci: PCIe driver for Marvell Armada 370/XP systems
  2013-01-31 22:44                                 ` Jason Gunthorpe
@ 2013-02-01 11:30                                   ` Arnd Bergmann
  2013-02-01 19:52                                     ` Jason Gunthorpe
  2013-02-06 16:51                                   ` Thomas Petazzoni
  1 sibling, 1 reply; 216+ messages in thread
From: Arnd Bergmann @ 2013-02-01 11:30 UTC (permalink / raw)
  To: linux-arm-kernel
On Thursday 31 January 2013, Jason Gunthorpe wrote:
> On Thu, Jan 31, 2013 at 08:46:22PM +0000, Arnd Bergmann wrote:
> 
> > > If it is 0xDEAD0000, then Thomas has to keep what he has now, you
> > > can't mess with this address. Verify that the full 32 bit address
> > > exactly matching the MBUS window address is written to the PCI-PCI
> > > bridge IO base/limit registers.
> > 
> > If you do this, you break all sorts of expectations in the kernel and
> > I guess you'd have to set the io_offset value of that bus to 0x21530000
> > in order to make Linux I/O port 0 go to the first byte of the window
> > and come out as 0xDEAD0000 on the bus, but you still won't be able to
> > use legacy devices with hardcoded I/O port numbers.
> 
> I'm not sure exactly how the PCI core handles this, but it does look
> like pci_add_resource_offset via io_offset is the answer. I'm not sure
> what goes in the struct resource passed to the PCI core - the *bus* IO
> address range or the *kernel* IO address range..
IO Resources are always expressed in the kernel's view, so they are in
the range from 0 to IO_SPACE_LIMIT. The idea is that you can have multiple
buses that each have their own address space start at 0, but can put
them into the kernel address space at a different address.
Each device on any bus can still use I/O addresses starting at zero,
and you could have e.g. a VGA card on two buses each respond to I/O cycles
on port 0x3c0, but the PCI core will translate the resources to appear
in the kernel space at 0x103c0 for the second one.
> > > If it is 0x00000000 then the mmap scheme I outlined before must be
> > > used, and verify that only 0->0xFFFF is written to the PCI-PCI bridge
> > > IO base/limit registers..
> > 
> > For the primary bus, yes, but there are still two options for the
> > second one: you can either start at 0 again or you can continue
> 
> No, for *all* links. You use a mmap scheme with 4k granularity, I
> explained in a past email, but to quickly review..
> 
> - Each link gets 64k of reserved physical address space for IO,
>   this is just set aside, no MBUS windows are permantently assigned.
> - Linux is told to use a 64k IO range with bus IO address 0->0xFFFF
> - When the IO base/limit register in the link PCI-PCI bridge is programmed
>   the driver gets a 4k aligned region somewhere from 0->0xFFFF and then:
>     - Allocates a 64k MBUS window that translates physical address
>       0xZZZZxxxx to IO bus address 0x0000xxxx (goes in the TLP) for
>       that link
>     - Uses pci_ioremap_io to map the fraction of the link's 64k MBUS window
>       allocated to that bridge to the correct offset in the 
>       PCI_IO_VIRT_BASE region
We'd have to change pci_ioremap_io to allow mapping less than 64k, but
yes, that would work, too. I don't see an advantage to it though,
other than having io_offset always be zero.
> > at 0x10000 as we do for mv78xx0 and kirkwood for instance. Both
> > approaches probably have their merit.
> 
> Kirkwood uses the MBUS remapping registers to set the TLP address of
> link 0 to start at 0 and of link 1 to start at 0x10000 - so it is
> consistent with what you describe..
Right, so it also uses io_offset = 0 all the time, which means the
bus I/O port numbers are identical to the Linux I/O port numbers,
but they go beyond 64K on the bus on the second and later links.
> However, this is a suboptimal way to run the HW. It would be much
> better to place each link in a seperate PCI domain and have each link
> start its bus IO address at 0, and assign the kernel IO address in
> sequential 64k blocks as today.
I agree.
	Arnd
^ permalink raw reply	[flat|nested] 216+ messages in thread 
- * [PATCH v2 19/27] pci: PCIe driver for Marvell Armada 370/XP systems
  2013-02-01 11:30                                   ` Arnd Bergmann
@ 2013-02-01 19:52                                     ` Jason Gunthorpe
  0 siblings, 0 replies; 216+ messages in thread
From: Jason Gunthorpe @ 2013-02-01 19:52 UTC (permalink / raw)
  To: linux-arm-kernel
On Fri, Feb 01, 2013 at 11:30:18AM +0000, Arnd Bergmann wrote:
> IO Resources are always expressed in the kernel's view, so they are
> in the range from 0 to IO_SPACE_LIMIT. The idea is that you can have
> multiple buses that each have their own address space start at 0,
> but can put them into the kernel address space at a different
> address.
Sure, I see that.. and that seems reasonable as long as any IO bus
address aliases are put in separate PCI domains. It would be wonky if
devices on different PCI bus numbers in a single PCI domain had
overlaping IO addresses.
> > No, for *all* links. You use a mmap scheme with 4k granularity, I
> > explained in a past email, but to quickly review..
> > 
> > - Each link gets 64k of reserved physical address space for IO,
> >   this is just set aside, no MBUS windows are permantently assigned.
> > - Linux is told to use a 64k IO range with bus IO address 0->0xFFFF
> > - When the IO base/limit register in the link PCI-PCI bridge is programmed
> >   the driver gets a 4k aligned region somewhere from 0->0xFFFF and then:
> >     - Allocates a 64k MBUS window that translates physical address
> >       0xZZZZxxxx to IO bus address 0x0000xxxx (goes in the TLP) for
> >       that link
> >     - Uses pci_ioremap_io to map the fraction of the link's 64k MBUS window
> >       allocated to that bridge to the correct offset in the 
> >       PCI_IO_VIRT_BASE region
> 
> We'd have to change pci_ioremap_io to allow mapping less than 64k, but
> yes, that would work, too. I don't see an advantage to it though,
> other than having io_offset always be zero.
Erm, that is the whole point. No PCI device in the system, on any of
the 10 links, would be required to use a 32 bit IO address. All are 16
bit and there is no compatibility problem on any links. You don't need
to declare any one link as being 'io supporting' or something like
that, it just works out of the box.
Jason
^ permalink raw reply	[flat|nested] 216+ messages in thread 
 
- * [PATCH v2 19/27] pci: PCIe driver for Marvell Armada 370/XP systems
  2013-01-31 22:44                                 ` Jason Gunthorpe
  2013-02-01 11:30                                   ` Arnd Bergmann
@ 2013-02-06 16:51                                   ` Thomas Petazzoni
  2013-02-06 17:09                                     ` Jason Gunthorpe
  1 sibling, 1 reply; 216+ messages in thread
From: Thomas Petazzoni @ 2013-02-06 16:51 UTC (permalink / raw)
  To: linux-arm-kernel
Dear Jason Gunthorpe,
On Thu, 31 Jan 2013 15:44:59 -0700, Jason Gunthorpe wrote:
> > For the primary bus, yes, but there are still two options for the
> > second one: you can either start at 0 again or you can continue
> 
> No, for *all* links. You use a mmap scheme with 4k granularity, I
> explained in a past email, but to quickly review..
> 
> - Each link gets 64k of reserved physical address space for IO,
>   this is just set aside, no MBUS windows are permantently assigned.
> - Linux is told to use a 64k IO range with bus IO address 0->0xFFFF
> - When the IO base/limit register in the link PCI-PCI bridge is programmed
>   the driver gets a 4k aligned region somewhere from 0->0xFFFF and then:
>     - Allocates a 64k MBUS window that translates physical address
>       0xZZZZxxxx to IO bus address 0x0000xxxx (goes in the TLP) for
>       that link
>     - Uses pci_ioremap_io to map the fraction of the link's 64k MBUS window
>       allocated to that bridge to the correct offset in the 
>       PCI_IO_VIRT_BASE region
This, I think I now understand.
> So you'd end up with a MMU mapping something like:
>   PCI_IO_VIRT_BASE    MBUS_IO_PHYS_BASE
>     0->4k          => 0      -> 4k             // 4k assigned to link0
>     4k->8k         => 64k+4k -> 64k+8k         // 4k assigned to link1
>     8k->24k        => 128k+8k -> 128k+24k      // 8k assigned to link2
I am not sure to understand your example, starting at the second line.
Shouldn't the second line have been
      4k->8k         => 64k -> 64k+4k
 ?
If you do:
      4k->8k         => 64k+4k -> 64k+8k
as you suggested, then when the device driver will do an inl(0x4) on
this device, the device will receive the equivalent of an inl(0x1004),
no?
I understand that I have two choices here:
 * First one is to make the I/O regions of all PCIe links fit below the
   default IO_SPACE_LIMIT (0xffff) by doing the mapping trick you
   described above.
 * Second one is to have one 64 KB block for each PCIe link, which
   would require raising the IO_SPACE_LIMIT on this platform.
Is this correct?
If so, then what I don't understand is that Kirkwood does the second
thing (from arch/arm/mach-kirkwood/pcie.c) :
        switch (index) {
        case 0:
		[...]
		/* Here the code is mapping 0 -> 64k */
                pci_ioremap_io(SZ_64K * sys->busnr, KIRKWOOD_PCIE_IO_PHYS_BASE);
                break;
        case 1:
		[...]
		/* And here 64k -> 128k */
                pci_ioremap_io(SZ_64K * sys->busnr,
                               KIRKWOOD_PCIE1_IO_PHYS_BASE);
                break;
So it has PCI I/O space from 0 to 128k, but still it seems to use the
default IO_SPACE_LIMIT of 0xffff. How can this work? Maybe nobody every
used a device on the second PCIe link that required I/O accesses.
> Where the physical mbus window for each link starts on each 64k block.
> 
> Thomas: This solves the need to have alignment of the IO regions, and
> gets rid of any trouble with 32 bit IO addreses, however you'll need
> to allocate the remap capable mbus windows separately for use by IO
> mappings..
> 
> Though, there is still a problem with the MMIO mbus window
> alignment. mbus windows are aligned to a multiple of their size, PCI
> MMIO bridge windows are always aligned to 1M...
Can't this be solved using the window_alignement() hook we've been
discussing separately? Just like we teach the Linux PCI core about our
alignment requirements of 64K for the I/O regions, we could teach it
about our alignment requirement on memory regions as well. No?
> Though, it is my hope that Thomas's driver will work on Kirkwood as
> well...
Yes, my plan is to have it working on Kirkwood. This WE, I was given a
Kirkwood based machine that has a usable PCIe device.
Best regards,
Thomas
-- 
Thomas Petazzoni, Free Electrons
Kernel, drivers, real-time and embedded Linux
development, consulting, training and support.
http://free-electrons.com
^ permalink raw reply	[flat|nested] 216+ messages in thread
- * [PATCH v2 19/27] pci: PCIe driver for Marvell Armada 370/XP systems
  2013-02-06 16:51                                   ` Thomas Petazzoni
@ 2013-02-06 17:09                                     ` Jason Gunthorpe
  2013-02-06 17:18                                       ` Thomas Petazzoni
  0 siblings, 1 reply; 216+ messages in thread
From: Jason Gunthorpe @ 2013-02-06 17:09 UTC (permalink / raw)
  To: linux-arm-kernel
On Wed, Feb 06, 2013 at 05:51:28PM +0100, Thomas Petazzoni wrote:
> > So you'd end up with a MMU mapping something like:
> >   PCI_IO_VIRT_BASE    MBUS_IO_PHYS_BASE
> >     0->4k          => 0      -> 4k             // 4k assigned to link0
> >     4k->8k         => 64k+4k -> 64k+8k         // 4k assigned to link1
> >     8k->24k        => 128k+8k -> 128k+24k      // 8k assigned to link2
> 
> I am not sure to understand your example, starting at the second line.
> Shouldn't the second line have been
> 
>       4k->8k         => 64k -> 64k+4k
No..
 
> as you suggested, then when the device driver will do an inl(0x4) on
> this device, the device will receive the equivalent of an inl(0x1004),
> no?
Link 0 translates like:
- Linux driver does inl(0x4)
- ARM layer converts that into a read from PCI_IO_VIRT_BASE + 0x4
- The CPU TLB converts that into a read from CPU physical
  0xc0000000 + 0x4
- The MBUS window remap register converts that into a read from IO
  space 0x4
- The address 0x4 is placed in the PCI-E IO transaction of link 0
Link 1 translates like:
- Linux driver does inl(0x1004)
- ARM layer converts that into a read from PCI_IO_VIRT_BASE + 0x1004
- The CPU TLB converts that into a read from CPU physical
  0xc0000000 + 0x11004 (ie the mbus window for the link 1)
- The MBUS window remap register converts that into a read from IO
  space 0x1004
- The address 0x1004 is placed in the PCI-E IO transaction of link 1
Noting that in both instances the IO address passed to inl is what
eventually appears on the PCI-E link after all the translation is
completed.
The CPU MMU is being used used to route 4k aligned ranges to the
correct link.
> I understand that I have two choices here:
> 
>  * First one is to make the I/O regions of all PCIe links fit below the
>    default IO_SPACE_LIMIT (0xffff) by doing the mapping trick you
>    described above.
> 
>  * Second one is to have one 64 KB block for each PCIe link, which
>    would require raising the IO_SPACE_LIMIT on this platform.
Yes, however, AFIAK this is the environment you should be running in:
#define IO_SPACE_LIMIT  ((resource_size_t)0xfffff)
Which is 5 f's not 4.
> > Though, there is still a problem with the MMIO mbus window
> > alignment. mbus windows are aligned to a multiple of their size, PCI
> > MMIO bridge windows are always aligned to 1M...
> 
> Can't this be solved using the window_alignement() hook we've been
> discussing separately? Just like we teach the Linux PCI core about our
> alignment requirements of 64K for the I/O regions, we could teach it
> about our alignment requirement on memory regions as well. No?
Hopefully :) As long as it can adjust the start and length you should
be fine.
Jason
^ permalink raw reply	[flat|nested] 216+ messages in thread 
- * [PATCH v2 19/27] pci: PCIe driver for Marvell Armada 370/XP systems
  2013-02-06 17:09                                     ` Jason Gunthorpe
@ 2013-02-06 17:18                                       ` Thomas Petazzoni
  2013-02-06 17:50                                         ` Jason Gunthorpe
  0 siblings, 1 reply; 216+ messages in thread
From: Thomas Petazzoni @ 2013-02-06 17:18 UTC (permalink / raw)
  To: linux-arm-kernel
Dear Jason Gunthorpe,
On Wed, 6 Feb 2013 10:09:03 -0700, Jason Gunthorpe wrote:
> Link 0 translates like:
> 
> - Linux driver does inl(0x4)
> - ARM layer converts that into a read from PCI_IO_VIRT_BASE + 0x4
> - The CPU TLB converts that into a read from CPU physical
>   0xc0000000 + 0x4
> - The MBUS window remap register converts that into a read from IO
>   space 0x4
> - The address 0x4 is placed in the PCI-E IO transaction of link 0
> 
> Link 1 translates like:
> 
> - Linux driver does inl(0x1004)
> - ARM layer converts that into a read from PCI_IO_VIRT_BASE + 0x1004
> - The CPU TLB converts that into a read from CPU physical
>   0xc0000000 + 0x11004 (ie the mbus window for the link 1)
> - The MBUS window remap register converts that into a read from IO
>   space 0x1004
> - The address 0x1004 is placed in the PCI-E IO transaction of link 1
This last step is exactly what I thought would not work. If the PCIe
device has say 64 bytes of I/O space, then this 0x1004 PCI-E
transaction will be out of bounds, no?
Sorry, still learning how PCI works...
> > I understand that I have two choices here:
> > 
> >  * First one is to make the I/O regions of all PCIe links fit below the
> >    default IO_SPACE_LIMIT (0xffff) by doing the mapping trick you
> >    described above.
> > 
> >  * Second one is to have one 64 KB block for each PCIe link, which
> >    would require raising the IO_SPACE_LIMIT on this platform.
> 
> Yes, however, AFIAK this is the environment you should be running in:
> 
> #define IO_SPACE_LIMIT  ((resource_size_t)0xfffff)
> 
> Which is 5 f's not 4.
Aaah, you're right. My eyes got the number of f wrong. So I have 16
times 64 KB. So why would I bother doing the MMU trick if I can just
nicely remap 64 KB for each PCIe link ?
> > Can't this be solved using the window_alignement() hook we've been
> > discussing separately? Just like we teach the Linux PCI core about our
> > alignment requirements of 64K for the I/O regions, we could teach it
> > about our alignment requirement on memory regions as well. No?
> 
> Hopefully :) As long as it can adjust the start and length you should
> be fine.
Why would you need to adjust the length? If Linux allocates a 2 MB
resource on a 1 MB boundary, we simply increase the start address to
the next 2 MB boundary, and that's it. Why would the length need to
change?
Thanks,
Thomas
-- 
Thomas Petazzoni, Free Electrons
Kernel, drivers, real-time and embedded Linux
development, consulting, training and support.
http://free-electrons.com
^ permalink raw reply	[flat|nested] 216+ messages in thread 
- * [PATCH v2 19/27] pci: PCIe driver for Marvell Armada 370/XP systems
  2013-02-06 17:18                                       ` Thomas Petazzoni
@ 2013-02-06 17:50                                         ` Jason Gunthorpe
  2013-02-06 18:02                                           ` Thomas Petazzoni
                                                             ` (2 more replies)
  0 siblings, 3 replies; 216+ messages in thread
From: Jason Gunthorpe @ 2013-02-06 17:50 UTC (permalink / raw)
  To: linux-arm-kernel
On Wed, Feb 06, 2013 at 06:18:52PM +0100, Thomas Petazzoni wrote:
> Dear Jason Gunthorpe,
> 
> On Wed, 6 Feb 2013 10:09:03 -0700, Jason Gunthorpe wrote:
> 
> > Link 0 translates like:
> > 
> > - Linux driver does inl(0x4)
> > - ARM layer converts that into a read from PCI_IO_VIRT_BASE + 0x4
> > - The CPU TLB converts that into a read from CPU physical
> >   0xc0000000 + 0x4
> > - The MBUS window remap register converts that into a read from IO
> >   space 0x4
> > - The address 0x4 is placed in the PCI-E IO transaction of link 0
> > 
> > Link 1 translates like:
> > 
> > - Linux driver does inl(0x1004)
> > - ARM layer converts that into a read from PCI_IO_VIRT_BASE + 0x1004
> > - The CPU TLB converts that into a read from CPU physical
> >   0xc0000000 + 0x11004 (ie the mbus window for the link 1)
> > - The MBUS window remap register converts that into a read from IO
> >   space 0x1004
> > - The address 0x1004 is placed in the PCI-E IO transaction of link 1
> 
> This last step is exactly what I thought would not work. If the PCIe
> device has say 64 bytes of I/O space, then this 0x1004 PCI-E
> transaction will be out of bounds, no?
No.. PCI end devices are required to decode all 32 bits of address,
less the bits requires for their allocation. So a device with 64 bytes
of IO will match bits 31:6 and then use bits 5:0 for the internal
register.
So a full 32 bit address is technically fine, according to the spec,
however:
 - The 32 bit decode is an optional feature on bridges
 - Some devices are broken because x86 only uses the low 64k.
So for best compatibility it is ideal to put everything in the low
64k.
However, IO space really should not be used by anything except very
legacy devices, so if the MMU business is a hassle for some reason I'd
just go with the 64k aligned method.
> > > Can't this be solved using the window_alignement() hook we've been
> > > discussing separately? Just like we teach the Linux PCI core about our
> > > alignment requirements of 64K for the I/O regions, we could teach it
> > > about our alignment requirement on memory regions as well. No?
> > 
> > Hopefully :) As long as it can adjust the start and length you should
> > be fine.
> 
> Why would you need to adjust the length? If Linux allocates a 2 MB
> resource on a 1 MB boundary, we simply increase the start address to
> the next 2 MB boundary, and that's it. Why would the length need to
> change?
Well, lets say 3MB is the example. A 3mb region needs to fit inside a
4mb MBUS window. If you align the start to 4mb then the pci-e core
needs to know that it can't use the extra 1mb covered by the mbus
window. mbus windows must not overlap.
Adjusting the bridge window length to be 4mb communicates that dead
space to the PCI core, and presumably this shows up in lspci and
whatnot.
I suppose if you align the end to 4mb (thus creating the gap before,
not after) things should work out OK, but the information that the gap
is routed to a specific PCI link is lost..
Jason
^ permalink raw reply	[flat|nested] 216+ messages in thread
- * [PATCH v2 19/27] pci: PCIe driver for Marvell Armada 370/XP systems
  2013-02-06 17:50                                         ` Jason Gunthorpe
@ 2013-02-06 18:02                                           ` Thomas Petazzoni
  2013-02-06 18:22                                           ` Stephen Warren
       [not found]                                           ` <20130207165009.73b1f340@skate>
  2 siblings, 0 replies; 216+ messages in thread
From: Thomas Petazzoni @ 2013-02-06 18:02 UTC (permalink / raw)
  To: linux-arm-kernel
Dear Jason Gunthorpe,
On Wed, 6 Feb 2013 10:50:19 -0700, Jason Gunthorpe wrote:
> No.. PCI end devices are required to decode all 32 bits of address,
> less the bits requires for their allocation. So a device with 64 bytes
> of IO will match bits 31:6 and then use bits 5:0 for the internal
> register.
> 
> So a full 32 bit address is technically fine, according to the spec,
> however:
>  - The 32 bit decode is an optional feature on bridges
>  - Some devices are broken because x86 only uses the low 64k.
Thanks again for the great explanation, it is now clear to me.
> So for best compatibility it is ideal to put everything in the low
> 64k.
> 
> However, IO space really should not be used by anything except very
> legacy devices, so if the MMU business is a hassle for some reason I'd
> just go with the 64k aligned method.
Me too :-)
It's what is done on Kirkwood since a long time, and apparently hasn't
caused any trouble so far.
> > Why would you need to adjust the length? If Linux allocates a 2 MB
> > resource on a 1 MB boundary, we simply increase the start address to
> > the next 2 MB boundary, and that's it. Why would the length need to
> > change?
> 
> Well, lets say 3MB is the example. A 3mb region needs to fit inside a
> 4mb MBUS window. If you align the start to 4mb then the pci-e core
> needs to know that it can't use the extra 1mb covered by the mbus
> window. mbus windows must not overlap.
Grumble, grumble, you're right. I now understand. Need to think of it,
because the current pcibios_window_alignment() thing only allows to
adjust the start address if I'm correct. But I'll have a more detailed
look. Or maybe Russell can comment on this specific topic?
Thanks!
Thomas
-- 
Thomas Petazzoni, Free Electrons
Kernel, drivers, real-time and embedded Linux
development, consulting, training and support.
http://free-electrons.com
^ permalink raw reply	[flat|nested] 216+ messages in thread
- * [PATCH v2 19/27] pci: PCIe driver for Marvell Armada 370/XP systems
  2013-02-06 17:50                                         ` Jason Gunthorpe
  2013-02-06 18:02                                           ` Thomas Petazzoni
@ 2013-02-06 18:22                                           ` Stephen Warren
  2013-02-06 18:39                                             ` Jason Gunthorpe
  2013-02-06 18:42                                             ` Thomas Petazzoni
       [not found]                                           ` <20130207165009.73b1f340@skate>
  2 siblings, 2 replies; 216+ messages in thread
From: Stephen Warren @ 2013-02-06 18:22 UTC (permalink / raw)
  To: linux-arm-kernel
On 02/06/2013 10:50 AM, Jason Gunthorpe wrote:
> On Wed, Feb 06, 2013 at 06:18:52PM +0100, Thomas Petazzoni wrote:
>> Dear Jason Gunthorpe,
>>
>> On Wed, 6 Feb 2013 10:09:03 -0700, Jason Gunthorpe wrote:
>>
>>> Link 0 translates like:
>>>
>>> - Linux driver does inl(0x4)
>>> - ARM layer converts that into a read from PCI_IO_VIRT_BASE + 0x4
>>> - The CPU TLB converts that into a read from CPU physical
>>>   0xc0000000 + 0x4
>>> - The MBUS window remap register converts that into a read from IO
>>>   space 0x4
>>> - The address 0x4 is placed in the PCI-E IO transaction of link 0
>>>
>>> Link 1 translates like:
>>>
>>> - Linux driver does inl(0x1004)
>>> - ARM layer converts that into a read from PCI_IO_VIRT_BASE + 0x1004
>>> - The CPU TLB converts that into a read from CPU physical
>>>   0xc0000000 + 0x11004 (ie the mbus window for the link 1)
>>> - The MBUS window remap register converts that into a read from IO
>>>   space 0x1004
>>> - The address 0x1004 is placed in the PCI-E IO transaction of link 1
>>
>> This last step is exactly what I thought would not work. If the PCIe
>> device has say 64 bytes of I/O space, then this 0x1004 PCI-E
>> transaction will be out of bounds, no?
> 
> No.. PCI end devices are required to decode all 32 bits of address,
> less the bits requires for their allocation. So a device with 64 bytes
> of IO will match bits 31:6 and then use bits 5:0 for the internal
> register.
Didn't Arnd say (earlier this thread) that PCI devices using IO BARs
were probably fairly legacy and hence might be buggy and might not obey
that rule? Now, I'd guess it's safe within the first 64k of IO space
though, so perhaps he was only talking about IO BAR bases >= 64k being
dubious? That would imply a device might only use bits 15:6 for matching
the BAR base and 5:0 for the internal register for a 64-byte BAR.
^ permalink raw reply	[flat|nested] 216+ messages in thread
- * [PATCH v2 19/27] pci: PCIe driver for Marvell Armada 370/XP systems
  2013-02-06 18:22                                           ` Stephen Warren
@ 2013-02-06 18:39                                             ` Jason Gunthorpe
  2013-02-06 18:42                                             ` Thomas Petazzoni
  1 sibling, 0 replies; 216+ messages in thread
From: Jason Gunthorpe @ 2013-02-06 18:39 UTC (permalink / raw)
  To: linux-arm-kernel
On Wed, Feb 06, 2013 at 11:22:35AM -0700, Stephen Warren wrote:
> > No.. PCI end devices are required to decode all 32 bits of address,
> > less the bits requires for their allocation. So a device with 64 bytes
> > of IO will match bits 31:6 and then use bits 5:0 for the internal
> > register.
> 
> Didn't Arnd say (earlier this thread) that PCI devices using IO BARs
> were probably fairly legacy and hence might be buggy and might not obey
> that rule? Now, I'd guess it's safe within the first 64k of IO space
> though, so perhaps he was only talking about IO BAR bases >= 64k being
> dubious? That would imply a device might only use bits 15:6 for matching
> the BAR base and 5:0 for the internal register for a 64-byte BAR.
Right, that is what I was referring to when I said:
> - Some devices are broken because x86 only uses the low 64k.
Fortunately on PCI-E IO TLPs will be fully routed before they are sent
down a link, so downstream of a PCI-E link we will never see aliasing
of the low 16 bits.
This means if you do bridge to legacy PCI, and you do use devices that
don't decode the upper 16 bits that it will still work OK, because the
low 16 bits on the legacy PCI bus will still be unique in each device
on that bus.
That doesn't save you from weird legacy ISA stuff, or stuff that
doesn't respect the BARs, or other crazyness..
My personal hope would be that nobody using a PCI-E ARM SOC ever has
to deal with anything to do with IO space ;)
Jason
^ permalink raw reply	[flat|nested] 216+ messages in thread 
- * [PATCH v2 19/27] pci: PCIe driver for Marvell Armada 370/XP systems
  2013-02-06 18:22                                           ` Stephen Warren
  2013-02-06 18:39                                             ` Jason Gunthorpe
@ 2013-02-06 18:42                                             ` Thomas Petazzoni
  2013-02-06 22:04                                               ` Arnd Bergmann
  1 sibling, 1 reply; 216+ messages in thread
From: Thomas Petazzoni @ 2013-02-06 18:42 UTC (permalink / raw)
  To: linux-arm-kernel
Dear Stephen Warren,
On Wed, 06 Feb 2013 11:22:35 -0700, Stephen Warren wrote:
> > No.. PCI end devices are required to decode all 32 bits of address,
> > less the bits requires for their allocation. So a device with 64 bytes
> > of IO will match bits 31:6 and then use bits 5:0 for the internal
> > register.
> 
> Didn't Arnd say (earlier this thread) that PCI devices using IO BARs
> were probably fairly legacy and hence might be buggy and might not obey
> that rule? Now, I'd guess it's safe within the first 64k of IO space
> though, so perhaps he was only talking about IO BAR bases >= 64k being
> dubious? That would imply a device might only use bits 15:6 for matching
> the BAR base and 5:0 for the internal register for a 64-byte BAR.
The thing is that the existing PCIe support for earlier Marvell SoC
families already use more than the first 64 KB to map the I/O BARs, and
this hasn't apparently caused any problems. We're talking about PCIe
support, not PCI, so I guess a lot of the very legacy devices are
simply not part of the equation.
Can't we simply agree on having a first implementation that does the
simple thing, like the existing PCIe implementation for earlier Marvell
SoC families, and improve that if it happens to be needed, depending on
user feedback?
Best regards,
Thomas
-- 
Thomas Petazzoni, Free Electrons
Kernel, drivers, real-time and embedded Linux
development, consulting, training and support.
http://free-electrons.com
^ permalink raw reply	[flat|nested] 216+ messages in thread 
- * [PATCH v2 19/27] pci: PCIe driver for Marvell Armada 370/XP systems
  2013-02-06 18:42                                             ` Thomas Petazzoni
@ 2013-02-06 22:04                                               ` Arnd Bergmann
  0 siblings, 0 replies; 216+ messages in thread
From: Arnd Bergmann @ 2013-02-06 22:04 UTC (permalink / raw)
  To: linux-arm-kernel
On Wednesday 06 February 2013, Thomas Petazzoni wrote:
> Can't we simply agree on having a first implementation that does the
> simple thing, like the existing PCIe implementation for earlier Marvell
> SoC families, and improve that if it happens to be needed, depending on
> user feedback?
Makes sense. I just looked up the kirkwood source to verify that the
window is set up to map PCI IO address 0x10000-0x1ffff for the second
bus to KIRKWOOD_PCIE1_IO_PHYS_BASE, which is mapped to logical port
number 0x10000-0x1ffff (identity mapping).
	Arnd
^ permalink raw reply	[flat|nested] 216+ messages in thread 
 
 
- [parent not found: <20130207165009.73b1f340@skate>] 
- * Giving special alignment/size constraints to the Linux PCI core?
       [not found]                                           ` <20130207165009.73b1f340@skate>
@ 2013-02-07 23:33                                             ` Arnd Bergmann
  2013-02-08  4:21                                             ` Bjorn Helgaas
  1 sibling, 0 replies; 216+ messages in thread
From: Arnd Bergmann @ 2013-02-07 23:33 UTC (permalink / raw)
  To: linux-arm-kernel
On Thursday 07 February 2013, Thomas Petazzoni wrote:
> I am unfortunately starting to believe that using the standard PCI
> resource allocator is too complicated for our hardware, and that we
> should maybe have a dedicated allocator. But I would really like to
> avoid that if possible.
I see this just as more evidence that the emulated P2P bridge approach
is not the easiest solution and that it would be easier to go back
to adding the ports separately and make it possible to have every
port assign the BARs first and then set the resources based on
the physical address of the window we pick for it.
	Arnd
^ permalink raw reply	[flat|nested] 216+ messages in thread
- * Giving special alignment/size constraints to the Linux PCI core?
       [not found]                                           ` <20130207165009.73b1f340@skate>
  2013-02-07 23:33                                             ` Giving special alignment/size constraints to the Linux PCI core? Arnd Bergmann
@ 2013-02-08  4:21                                             ` Bjorn Helgaas
  2013-02-08  8:14                                               ` Thomas Petazzoni
  1 sibling, 1 reply; 216+ messages in thread
From: Bjorn Helgaas @ 2013-02-08  4:21 UTC (permalink / raw)
  To: linux-arm-kernel
On Thu, Feb 7, 2013 at 8:50 AM, Thomas Petazzoni
<thomas.petazzoni@free-electrons.com> wrote:
> Bjorn,
>
> I guess I need your advice on the below problem, but here is a summary.
>
> Basically, the PCI-to-PCI bridge specifications require that the memory
> address assigned to a PCI-to-PCI bridge is aligned on a 1 MB boundary.
>
> Unfortunately, on Marvell hardware, we can only create address decoding
> windows that are aligned on a multiple of their size (a 1 MB window
> must be 1 MB aligned, a 2 MB window must be 2 MB aligned, etc.).
>
> How can we teach the Linux PCI core about this requirement, so that it
> does a proper assignment of addresses at the PCI-to-PCI bridge level?
> For the I/O addresses, Russell suggested the pcibios_window_alignment()
> hook, but it doesn't receive the size of the resource, so we can't
> determine what alignment is needed.
>
> As Jason points out below, we need to be able to tell the PCI core that
> a given memory area needs some alignment, but also that its size is
> larger than what the PCIe device claims, because we cannot create
> address decoding windows of an arbitrary size. For example, an address
> decoding window of 3 MB is not possible, so if a device wants 3 MB,
> then we would need to extend this memory area to 4 MB so that the next
> device doesn't get an address decoding window that overlaps with the
> previous one.
>
> I was hoping that the emulated PCI-to-PCI bridge could, by its
> behavior, teach the Linux PCI core about these special constraints.
> However, reading the PCI-to-PCI bridge specification, I don't see how
> to achieve that.
>
> Do you have some suggestions?
Huh.  That hardware looks less and less like a P2P bridge all the time
:(  You can't configure it via standard PCI config accesses, and the
aperture alignment and size constraints sound completely non-standard.
 Are the specs for this thing public?
I could imagine changing pcibios_window_alignment() to take the
resource, so it could deal with the alignment question (though I
haven't looked in detail and there might be some implementation issue
with that).
With regard to the size issue (3MB window using 4MB of address space),
I can't think of a reasonable way to teach the PCI core about both
sizes.  But is there any reason to program the bridge for a 3MB window
instead of a 4MB window, given that there's nothing else we can do
with the extra 1MB anyway?  Is a 3MB window even possible?  I would
think something that must be aligned on its size would be restricted
to power-of-2 sizes anyway, just like PCI BARs are.  Maybe you can
just always round up window sizes to a power of 2?
Bjorn
> On Wed, 6 Feb 2013 10:50:19 -0700, Jason Gunthorpe wrote:
>
>> > > > Can't this be solved using the window_alignement() hook we've
>> > > > been discussing separately? Just like we teach the Linux PCI
>> > > > core about our alignment requirements of 64K for the I/O
>> > > > regions, we could teach it about our alignment requirement on
>> > > > memory regions as well. No?
>> > >
>> > > Hopefully :) As long as it can adjust the start and length you
>> > > should be fine.
>> >
>> > Why would you need to adjust the length? If Linux allocates a 2 MB
>> > resource on a 1 MB boundary, we simply increase the start address to
>> > the next 2 MB boundary, and that's it. Why would the length need to
>> > change?
>>
>> Well, lets say 3MB is the example. A 3mb region needs to fit inside a
>> 4mb MBUS window. If you align the start to 4mb then the pci-e core
>> needs to know that it can't use the extra 1mb covered by the mbus
>> window. mbus windows must not overlap.
>>
>> Adjusting the bridge window length to be 4mb communicates that dead
>> space to the PCI core, and presumably this shows up in lspci and
>> whatnot.
>>
>> I suppose if you align the end to 4mb (thus creating the gap before,
>> not after) things should work out OK, but the information that the gap
>> is routed to a specific PCI link is lost..
>>
>> Jason
>>
>> _______________________________________________
>> linux-arm-kernel mailing list
>> linux-arm-kernel at lists.infradead.org
>> http://lists.infradead.org/mailman/listinfo/linux-arm-kernel
>
>
>
> --
> Thomas Petazzoni, Free Electrons
> Kernel, drivers, real-time and embedded Linux
> development, consulting, training and support.
> http://free-electrons.com
^ permalink raw reply	[flat|nested] 216+ messages in thread
- * Giving special alignment/size constraints to the Linux PCI core?
  2013-02-08  4:21                                             ` Bjorn Helgaas
@ 2013-02-08  8:14                                               ` Thomas Petazzoni
  2013-02-12 16:00                                                 ` Arnd Bergmann
  0 siblings, 1 reply; 216+ messages in thread
From: Thomas Petazzoni @ 2013-02-08  8:14 UTC (permalink / raw)
  To: linux-arm-kernel
Dear Bjorn Helgaas,
On Thu, 7 Feb 2013 21:21:57 -0700, Bjorn Helgaas wrote:
> Huh.  That hardware looks less and less like a P2P bridge all the time
> :(  You can't configure it via standard PCI config accesses, and the
> aperture alignment and size constraints sound completely non-standard.
>  Are the specs for this thing public?
The specs for the Armada XP are not yet public, but the one for earlier
Marvell SoC families are, and the PCIe stuff works basically the same.
The main difference between the Kirkwood family and Armada XP is that
Kirkwood had only 2 PCIe interfaces, so we could perfectly fine do a
static allocation of address decoding windows (see the four PCIe windows
in arch/arm/mach-kirkwood/addr-map.c:addr_map_info[]), while the Armada
XP has 10 PCIe interfaces, which makes the static allocation solution
unreasonable.
The list of publicly available specifications for Marvell EBU SoC is
available at Documentation/arm/Marvell/README. For Kirkwood, I would
recommend
http://www.marvell.com/embedded-processors/kirkwood/assets/FS_88F6180_9x_6281_OpenSource.pdf.
See chapter 2.3:
  The PCI Express address decoding scheme restricts the address window
  to a size of 2^n, and to a start address that is aligned to the window
  size.
> I could imagine changing pcibios_window_alignment() to take the
> resource, so it could deal with the alignment question (though I
> haven't looked in detail and there might be some implementation issue
> with that).
> 
> With regard to the size issue (3MB window using 4MB of address space),
> I can't think of a reasonable way to teach the PCI core about both
> sizes.  But is there any reason to program the bridge for a 3MB window
> instead of a 4MB window, given that there's nothing else we can do
> with the extra 1MB anyway?  Is a 3MB window even possible?  I would
> think something that must be aligned on its size would be restricted
> to power-of-2 sizes anyway, just like PCI BARs are.  Maybe you can
> just always round up window sizes to a power of 2?
The window sizes are power of two sizes. I didn't realize that it was
also the case for PCI BARs. Then there is no problem with the size I
guess, and only a problem of alignment. Having the possibility to
get the resource and return a fixed up start address would solve the
problem I'd say.
Best regards,
Thomas
-- 
Thomas Petazzoni, Free Electrons
Kernel, drivers, real-time and embedded Linux
development, consulting, training and support.
http://free-electrons.com
^ permalink raw reply	[flat|nested] 216+ messages in thread 
- * Giving special alignment/size constraints to the Linux PCI core?
  2013-02-08  8:14                                               ` Thomas Petazzoni
@ 2013-02-12 16:00                                                 ` Arnd Bergmann
  2013-02-12 18:41                                                   ` Jason Gunthorpe
  0 siblings, 1 reply; 216+ messages in thread
From: Arnd Bergmann @ 2013-02-12 16:00 UTC (permalink / raw)
  To: linux-arm-kernel
On Friday 08 February 2013, Thomas Petazzoni wrote:
> Dear Bjorn Helgaas,
> 
> On Thu, 7 Feb 2013 21:21:57 -0700, Bjorn Helgaas wrote:
> 
> > Huh.  That hardware looks less and less like a P2P bridge all the time
> > :(  You can't configure it via standard PCI config accesses, and the
> > aperture alignment and size constraints sound completely non-standard.
Right.
> > I could imagine changing pcibios_window_alignment() to take the
> > resource, so it could deal with the alignment question (though I
> > haven't looked in detail and there might be some implementation issue
> > with that).
> > 
> > With regard to the size issue (3MB window using 4MB of address space),
> > I can't think of a reasonable way to teach the PCI core about both
> > sizes.  But is there any reason to program the bridge for a 3MB window
> > instead of a 4MB window, given that there's nothing else we can do
> > with the extra 1MB anyway?  Is a 3MB window even possible?  I would
> > think something that must be aligned on its size would be restricted
> > to power-of-2 sizes anyway, just like PCI BARs are.  Maybe you can
> > just always round up window sizes to a power of 2?
> 
> The window sizes are power of two sizes. I didn't realize that it was
> also the case for PCI BARs. Then there is no problem with the size I
> guess, and only a problem of alignment. Having the possibility to
> get the resource and return a fixed up start address would solve the
> problem I'd say.
I thought that only device BARs in PCI had natural alignment, while
bridges don't.
I tried understanding the actual problem we have with the current
procedure, which on today's kirkwood is rought implemented in ARM's
pci_common_init() as follows:
for_each_root_bus() {
	pci_scan_root_bus();
}
for_each_root_bus {
	pci_bus_size_bridges();
	pci_bus_assign_resources();
	pci_enable_bridges();
	pci_bus_add_devices();
}
This is using hardcoded windows today, which are set up before calling
pci_scan_root_bus(). With your change, there is only one root bus,
and you intercept the pci_bus_assign_resources() stage in order to
set up the hardware specific window configuration for each PCIe
port of that root bus.
My feeling is that an easier solution would be to keep separate
root buses for each port, which then behaves completely PCIe
compliant, but add a hook in the procedure above to set up the
address translation windows between the pci_bus_size_bridges()
and the pci_bus_assign_resources() calls.
	Arnd
^ permalink raw reply	[flat|nested] 216+ messages in thread
- * Giving special alignment/size constraints to the Linux PCI core?
  2013-02-12 16:00                                                 ` Arnd Bergmann
@ 2013-02-12 18:41                                                   ` Jason Gunthorpe
  2013-02-12 19:02                                                     ` Arnd Bergmann
  0 siblings, 1 reply; 216+ messages in thread
From: Jason Gunthorpe @ 2013-02-12 18:41 UTC (permalink / raw)
  To: linux-arm-kernel
On Tue, Feb 12, 2013 at 04:00:08PM +0000, Arnd Bergmann wrote:
> > The window sizes are power of two sizes. I didn't realize that it was
> > also the case for PCI BARs. Then there is no problem with the size I
> > guess, and only a problem of alignment. Having the possibility to
> > get the resource and return a fixed up start address would solve the
> > problem I'd say.
> 
> I thought that only device BARs in PCI had natural alignment, while
> bridges don't.
Right
> My feeling is that an easier solution would be to keep separate
> root buses for each port, which then behaves completely PCIe
> compliant, but add a hook in the procedure above to set up the
> address translation windows between the pci_bus_size_bridges()
> and the pci_bus_assign_resources() calls.
This process is only done during driver initialization. How would you
support PCI-E device hotplug (my systems rely on this)? Hotplug works
today with the existing Marvell driver, however that relies on
pre-allocated windows.
Jason
^ permalink raw reply	[flat|nested] 216+ messages in thread 
- * Giving special alignment/size constraints to the Linux PCI core?
  2013-02-12 18:41                                                   ` Jason Gunthorpe
@ 2013-02-12 19:02                                                     ` Arnd Bergmann
  2013-02-12 19:38                                                       ` Jason Gunthorpe
  0 siblings, 1 reply; 216+ messages in thread
From: Arnd Bergmann @ 2013-02-12 19:02 UTC (permalink / raw)
  To: linux-arm-kernel
On Tuesday 12 February 2013, Jason Gunthorpe wrote:
> > My feeling is that an easier solution would be to keep separate
> > root buses for each port, which then behaves completely PCIe
> > compliant, but add a hook in the procedure above to set up the
> > address translation windows between the pci_bus_size_bridges()
> > and the pci_bus_assign_resources() calls.
> 
> This process is only done during driver initialization. How would you
> support PCI-E device hotplug (my systems rely on this)? Hotplug works
> today with the existing Marvell driver, however that relies on
> pre-allocated windows.
I did not expect hotplug to work with either approach. How does
it work with the existing driver? From my understanding, you still
assign all the top-level P2P bridge resources at bootup, and only
if that happens to have some space left before the next bridge,
it would be possible to fit in a hotplug device.
	Arnd
^ permalink raw reply	[flat|nested] 216+ messages in thread 
- * Giving special alignment/size constraints to the Linux PCI core?
  2013-02-12 19:02                                                     ` Arnd Bergmann
@ 2013-02-12 19:38                                                       ` Jason Gunthorpe
  2013-02-12 23:05                                                         ` Arnd Bergmann
  0 siblings, 1 reply; 216+ messages in thread
From: Jason Gunthorpe @ 2013-02-12 19:38 UTC (permalink / raw)
  To: linux-arm-kernel
On Tue, Feb 12, 2013 at 07:02:14PM +0000, Arnd Bergmann wrote:
> On Tuesday 12 February 2013, Jason Gunthorpe wrote:
> > > My feeling is that an easier solution would be to keep separate
> > > root buses for each port, which then behaves completely PCIe
> > > compliant, but add a hook in the procedure above to set up the
> > > address translation windows between the pci_bus_size_bridges()
> > > and the pci_bus_assign_resources() calls.
> > 
> > This process is only done during driver initialization. How would you
> > support PCI-E device hotplug (my systems rely on this)? Hotplug works
> > today with the existing Marvell driver, however that relies on
> > pre-allocated windows.
> 
> I did not expect hotplug to work with either approach. How does
> it work with the existing driver? From my understanding, you still
> assign all the top-level P2P bridge resources at bootup, and only
> if that happens to have some space left before the next bridge,
> it would be possible to fit in a hotplug device.
PCI-E hotplug can be supported through the generic /sys/bus/pci/rescan
mechanism, which forces a rediscovery/re-evaluation of all the buses
in the system. The PCI core is smart enough to know what it can/can
not reassign and can move the bridge windows around (IIRC most of the
issues here are resolved these days?).
For PCI-E, the root port bridge can be placed anywhere in the host
aperture, so as long as the host aperture isn't filled the core can
allocate a memory region for the now active port.
Thomas's driver should support this as the regions and windows are all
properly dynamic.
This is one reason why re-using the PCI core code is so desirable, it
handles all these complexities.
Jason
^ permalink raw reply	[flat|nested] 216+ messages in thread 
- * Giving special alignment/size constraints to the Linux PCI core?
  2013-02-12 19:38                                                       ` Jason Gunthorpe
@ 2013-02-12 23:05                                                         ` Arnd Bergmann
  2013-02-13  0:32                                                           ` Jason Gunthorpe
  0 siblings, 1 reply; 216+ messages in thread
From: Arnd Bergmann @ 2013-02-12 23:05 UTC (permalink / raw)
  To: linux-arm-kernel
On Tuesday 12 February 2013, Jason Gunthorpe wrote:
> PCI-E hotplug can be supported through the generic /sys/bus/pci/rescan
> mechanism, which forces a rediscovery/re-evaluation of all the buses
> in the system. The PCI core is smart enough to know what it can/can
> not reassign and can move the bridge windows around (IIRC most of the
> issues here are resolved these days?).
> 
> For PCI-E, the root port bridge can be placed anywhere in the host
> aperture, so as long as the host aperture isn't filled the core can
> allocate a memory region for the now active port.
> 
> Thomas's driver should support this as the regions and windows are all
> properly dynamic.
Ah, so you only allow hotplugging into the root ports, but not behind
additional bridges that have active devices on them, right?
I guess that is a common limitation for PCIe hotplugging.
 
> This is one reason why re-using the PCI core code is so desirable, it
> handles all these complexities.
Ok, I'm starting to understand what the entire discussion is about ;)
	Arnd
^ permalink raw reply	[flat|nested] 216+ messages in thread 
- * Giving special alignment/size constraints to the Linux PCI core?
  2013-02-12 23:05                                                         ` Arnd Bergmann
@ 2013-02-13  0:32                                                           ` Jason Gunthorpe
  2013-02-13 18:53                                                             ` Arnd Bergmann
  2013-02-13 21:02                                                             ` Yinghai Lu
  0 siblings, 2 replies; 216+ messages in thread
From: Jason Gunthorpe @ 2013-02-13  0:32 UTC (permalink / raw)
  To: linux-arm-kernel
On Tue, Feb 12, 2013 at 11:05:28PM +0000, Arnd Bergmann wrote:
> On Tuesday 12 February 2013, Jason Gunthorpe wrote:
> > PCI-E hotplug can be supported through the generic /sys/bus/pci/rescan
> > mechanism, which forces a rediscovery/re-evaluation of all the buses
> > in the system. The PCI core is smart enough to know what it can/can
> > not reassign and can move the bridge windows around (IIRC most of the
> > issues here are resolved these days?).
> > 
> > For PCI-E, the root port bridge can be placed anywhere in the host
> > aperture, so as long as the host aperture isn't filled the core can
> > allocate a memory region for the now active port.
> > 
> > Thomas's driver should support this as the regions and windows are all
> > properly dynamic.
> 
> Ah, so you only allow hotplugging into the root ports, but not behind
> additional bridges that have active devices on them, right?
 
> I guess that is a common limitation for PCIe hotplugging.
In all the cases I've worked with, it has been a root port hot plug,
but I could imagine something like ExpressCard requiring a second
bridge.
The standard answer is to leave appropriate gaps. My *guess* on this
matter is that on x86 the gaps are left, as appropriate, by the boot
firmware. Eg an ExpressCard slot will always have a window assigned to
its bridge and Linux would typically not reassign it (or similar).
PCI core support for firmware-less embedded will someday need to do
something similar, eg via a special DT attribute on hot plug capable
ports.
Just to circle back on this whole thread - Thomas's solution is pretty
good, it covers pretty much all the use cases. I think it is a good
place to start, and as the firmware-less 'drivers/pci/host' concept
develops the right support will eventually come, as everyone is now
aware of the need to control the host bridge aperture from the core
PCI code.
Jason
^ permalink raw reply	[flat|nested] 216+ messages in thread 
- * Giving special alignment/size constraints to the Linux PCI core?
  2013-02-13  0:32                                                           ` Jason Gunthorpe
@ 2013-02-13 18:53                                                             ` Arnd Bergmann
  2013-02-13 19:12                                                               ` Jason Gunthorpe
  2013-02-13 21:02                                                             ` Yinghai Lu
  1 sibling, 1 reply; 216+ messages in thread
From: Arnd Bergmann @ 2013-02-13 18:53 UTC (permalink / raw)
  To: linux-arm-kernel
On Wednesday 13 February 2013, Jason Gunthorpe wrote:
> On Tue, Feb 12, 2013 at 11:05:28PM +0000, Arnd Bergmann wrote:
> > On Tuesday 12 February 2013, Jason Gunthorpe wrote:
> > > PCI-E hotplug can be supported through the generic /sys/bus/pci/rescan
> > > mechanism, which forces a rediscovery/re-evaluation of all the buses
> > > in the system. The PCI core is smart enough to know what it can/can
> > > not reassign and can move the bridge windows around (IIRC most of the
> > > issues here are resolved these days?).
> > > 
> > > For PCI-E, the root port bridge can be placed anywhere in the host
> > > aperture, so as long as the host aperture isn't filled the core can
> > > allocate a memory region for the now active port.
> > > 
> > > Thomas's driver should support this as the regions and windows are all
> > > properly dynamic.
> > 
> > Ah, so you only allow hotplugging into the root ports, but not behind
> > additional bridges that have active devices on them, right?
>  
> > I guess that is a common limitation for PCIe hotplugging.
> 
> In all the cases I've worked with, it has been a root port hot plug,
> but I could imagine something like ExpressCard requiring a second
> bridge.
Ok, I see.
> The standard answer is to leave appropriate gaps. My *guess* on this
> matter is that on x86 the gaps are left, as appropriate, by the boot
> firmware. Eg an ExpressCard slot will always have a window assigned to
> its bridge and Linux would typically not reassign it (or similar).
> 
> PCI core support for firmware-less embedded will someday need to do
> something similar, eg via a special DT attribute on hot plug capable
> ports.
I saw that the PCI core reserves 2MB memory space and 256 bytes of
I/O space per hotplug capable bridge by default, and you can override
these at boot time if you need more. I wonder if this means that
we end up using two of the precious address space windows for each
unused root port to already map these at boot time, and it certainly
works for most adapters, but this does not seem better than assigning
static windows of the same size at boot time for each port.
> Just to circle back on this whole thread - Thomas's solution is pretty
> good, it covers pretty much all the use cases. I think it is a good
> place to start, and as the firmware-less 'drivers/pci/host' concept
> develops the right support will eventually come, as everyone is now
> aware of the need to control the host bridge aperture from the core
> PCI code.
I agree the solution is not all that bad, I just want to be convinced
that it actually has advantages over the simpler approaches.
	Arnd
^ permalink raw reply	[flat|nested] 216+ messages in thread 
- * Giving special alignment/size constraints to the Linux PCI core?
  2013-02-13 18:53                                                             ` Arnd Bergmann
@ 2013-02-13 19:12                                                               ` Jason Gunthorpe
  2013-02-13 19:51                                                                 ` Thomas Petazzoni
  2013-02-13 21:10                                                                 ` Arnd Bergmann
  0 siblings, 2 replies; 216+ messages in thread
From: Jason Gunthorpe @ 2013-02-13 19:12 UTC (permalink / raw)
  To: linux-arm-kernel
On Wed, Feb 13, 2013 at 06:53:14PM +0000, Arnd Bergmann wrote:
> > The standard answer is to leave appropriate gaps. My *guess* on this
> > matter is that on x86 the gaps are left, as appropriate, by the boot
> > firmware. Eg an ExpressCard slot will always have a window assigned to
> > its bridge and Linux would typically not reassign it (or similar).
> > 
> > PCI core support for firmware-less embedded will someday need to do
> > something similar, eg via a special DT attribute on hot plug capable
> > ports.
> 
> I saw that the PCI core reserves 2MB memory space and 256 bytes of
> I/O space per hotplug capable bridge by default, and you can
> override
Haven't looked at how it determines what is hot plug
capable.. Technically every PCI-E port is hot plug capable, it really
depends on the specific board if a port can actually be hot plugged or
not - so maybe that is what gets set in DT?
> these at boot time if you need more. I wonder if this means that
> we end up using two of the precious address space windows for each
> unused root port to already map these at boot time, and it certainly
> works for most adapters, but this does not seem better than assigning
> static windows of the same size at boot time for each port.
If the PCI core programs the decoder on the bridge, then it will
consume a window - however if there is nothing behind the bridge then
leaving the brdige window disabled, but reserving the memory region is
a sensible thing to do.
I'm not sure what the state of the PCI core is today on this point,
but it could be altered..
Also the host driver can check the link status before consuming a
window, no link = no window.
Thomas, what were your test results on your 10 slot system? Did all 10
P2P bridges appear in lspci? Was there any address space reservation
for hot plug?
> > Just to circle back on this whole thread - Thomas's solution is pretty
> > good, it covers pretty much all the use cases. I think it is a good
> > place to start, and as the firmware-less 'drivers/pci/host' concept
> > develops the right support will eventually come, as everyone is now
> > aware of the need to control the host bridge aperture from the core
> > PCI code.
> 
> I agree the solution is not all that bad, I just want to be convinced
> that it actually has advantages over the simpler approaches.
Unfortunatelly my Marvell systems do not have oversubscribed mbus
windows, so I can't really comment on this :( However I do use the
hotplug capability in the current driver, so at least for me, it is
important to not loose that when trying to solve the oversubcription.
Jason
^ permalink raw reply	[flat|nested] 216+ messages in thread 
- * Giving special alignment/size constraints to the Linux PCI core?
  2013-02-13 19:12                                                               ` Jason Gunthorpe
@ 2013-02-13 19:51                                                                 ` Thomas Petazzoni
  2013-02-13 21:10                                                                 ` Arnd Bergmann
  1 sibling, 0 replies; 216+ messages in thread
From: Thomas Petazzoni @ 2013-02-13 19:51 UTC (permalink / raw)
  To: linux-arm-kernel
Dear Jason Gunthorpe,
On Wed, 13 Feb 2013 12:12:04 -0700, Jason Gunthorpe wrote:
> Thomas, what were your test results on your 10 slot system? Did all 10
> P2P bridges appear in lspci? Was there any address space reservation
> for hot plug?
My test system "only" has 6 PCIe slots, so I couldn't test with the
entire 10 possible PCIe interfaces. But 6 PCIe slots is still a good
number :-)
If you look at the cover letter of the PATCHv3, you'll see the output
of lspci -vvv. It shows that I create one PCI-to-PCI bridge for each
PCIe interface, regardless of whether the link is here or not. And for
those where there is nothing behind the bridge, no address space
reservation occurs, so I am not allocating address decoding windows for
those unused PCIe interfaces.
Best regards,
Thomas
-- 
Thomas Petazzoni, Free Electrons
Kernel, drivers, real-time and embedded Linux
development, consulting, training and support.
http://free-electrons.com
^ permalink raw reply	[flat|nested] 216+ messages in thread 
- * Giving special alignment/size constraints to the Linux PCI core?
  2013-02-13 19:12                                                               ` Jason Gunthorpe
  2013-02-13 19:51                                                                 ` Thomas Petazzoni
@ 2013-02-13 21:10                                                                 ` Arnd Bergmann
  2013-02-13 21:20                                                                   ` Yinghai Lu
  1 sibling, 1 reply; 216+ messages in thread
From: Arnd Bergmann @ 2013-02-13 21:10 UTC (permalink / raw)
  To: linux-arm-kernel
On Wednesday 13 February 2013, Jason Gunthorpe wrote:
> On Wed, Feb 13, 2013 at 06:53:14PM +0000, Arnd Bergmann wrote:
> 
> > > The standard answer is to leave appropriate gaps. My *guess* on this
> > > matter is that on x86 the gaps are left, as appropriate, by the boot
> > > firmware. Eg an ExpressCard slot will always have a window assigned to
> > > its bridge and Linux would typically not reassign it (or similar).
> > > 
> > > PCI core support for firmware-less embedded will someday need to do
> > > something similar, eg via a special DT attribute on hot plug capable
> > > ports.
> > 
> > I saw that the PCI core reserves 2MB memory space and 256 bytes of
> > I/O space per hotplug capable bridge by default, and you can
> > override
> 
> Haven't looked at how it determines what is hot plug
> capable.. Technically every PCI-E port is hot plug capable, it really
> depends on the specific board if a port can actually be hot plugged or
> not - so maybe that is what gets set in DT?
The "is_hotplug_bridge" flag that determines this gets set for PCIe
bridges with the PCI_EXP_SLTCAP_HPC (hot plug capable) bit set in the
PCI_EXP_SLTCAP word.
> > these at boot time if you need more. I wonder if this means that
> > we end up using two of the precious address space windows for each
> > unused root port to already map these at boot time, and it certainly
> > works for most adapters, but this does not seem better than assigning
> > static windows of the same size at boot time for each port.
> 
> If the PCI core programs the decoder on the bridge, then it will
> consume a window - however if there is nothing behind the bridge then
> leaving the brdige window disabled, but reserving the memory region is
> a sensible thing to do.
>
> I'm not sure what the state of the PCI core is today on this point,
> but it could be altered..
The problem I see with the current implementation is that it reserves
a fixed size window and does not reassign the window of the bridge
itself, only the devices below it, at least if I am reading the
code correctly. I have not tried this myself.
 
> Also the host driver can check the link status before consuming a
> window, no link = no window.
Right, that works. Even if the link is up, it might require only
I/O or memory windows, rather than always using both. 
> > > Just to circle back on this whole thread - Thomas's solution is pretty
> > > good, it covers pretty much all the use cases. I think it is a good
> > > place to start, and as the firmware-less 'drivers/pci/host' concept
> > > develops the right support will eventually come, as everyone is now
> > > aware of the need to control the host bridge aperture from the core
> > > PCI code.
> > 
> > I agree the solution is not all that bad, I just want to be convinced
> > that it actually has advantages over the simpler approaches.
> 
> Unfortunatelly my Marvell systems do not have oversubscribed mbus
> windows, so I can't really comment on this :( However I do use the
> hotplug capability in the current driver, so at least for me, it is
> important to not loose that when trying to solve the oversubcription.
One thing worth trying is probably to hack the driver to only use
a couple of the available windows and see what happens when you hotplug
one card into all the slots one at a time.
	Arnd
^ permalink raw reply	[flat|nested] 216+ messages in thread 
- * Giving special alignment/size constraints to the Linux PCI core?
  2013-02-13 21:10                                                                 ` Arnd Bergmann
@ 2013-02-13 21:20                                                                   ` Yinghai Lu
  2013-02-13 22:24                                                                     ` Arnd Bergmann
  0 siblings, 1 reply; 216+ messages in thread
From: Yinghai Lu @ 2013-02-13 21:20 UTC (permalink / raw)
  To: linux-arm-kernel
On Wed, Feb 13, 2013 at 1:10 PM, Arnd Bergmann <arnd@arndb.de> wrote:
>
> The problem I see with the current implementation is that it reserves
> a fixed size window and does not reassign the window of the bridge
> itself, only the devices below it, at least if I am reading the
> code correctly. I have not tried this myself.
Which file?
Current code we *do* change the bridge itself resource (mmio, pref mmio, and io)
in
pciehp_configure_device/pci_assign_unassigned_bridge_resources
Let met if it does not work you.
Thanks
Yinghai
^ permalink raw reply	[flat|nested] 216+ messages in thread 
- * Giving special alignment/size constraints to the Linux PCI core?
  2013-02-13 21:20                                                                   ` Yinghai Lu
@ 2013-02-13 22:24                                                                     ` Arnd Bergmann
  0 siblings, 0 replies; 216+ messages in thread
From: Arnd Bergmann @ 2013-02-13 22:24 UTC (permalink / raw)
  To: linux-arm-kernel
On Wednesday 13 February 2013, Yinghai Lu wrote:
> On Wed, Feb 13, 2013 at 1:10 PM, Arnd Bergmann <arnd@arndb.de> wrote:
> >
> > The problem I see with the current implementation is that it reserves
> > a fixed size window and does not reassign the window of the bridge
> > itself, only the devices below it, at least if I am reading the
> > code correctly. I have not tried this myself.
> 
> Which file?
> 
> Current code we do change the bridge itself resource (mmio, pref mmio, and io)
> in
> pciehp_configure_device/pci_assign_unassigned_bridge_resources
> 
> Let met if it does not work you.
That is the code I was looking at, but I probably misunderstood something
there. I did not actually run it, just attempted to understand what it
does by inspection.
I found the code now, sorry for the confusion on my end.
Jason, Thomas: you win :-)
The concept that root ports don't get resized is hardwired a lot of places
along the way. That could be changed, but there is a significant risk
of regressions if we try that. Adding fake bridges to work around that
isn't the nicest solution, but the code is there and works without
being able to break something else, so let's do that unless there are
new problems that make it harder.
	Arnd
^ permalink raw reply	[flat|nested] 216+ messages in thread 
 
 
 
 
- * Giving special alignment/size constraints to the Linux PCI core?
  2013-02-13  0:32                                                           ` Jason Gunthorpe
  2013-02-13 18:53                                                             ` Arnd Bergmann
@ 2013-02-13 21:02                                                             ` Yinghai Lu
  1 sibling, 0 replies; 216+ messages in thread
From: Yinghai Lu @ 2013-02-13 21:02 UTC (permalink / raw)
  To: linux-arm-kernel
On Tue, Feb 12, 2013 at 4:32 PM, Jason Gunthorpe
<jgunthorpe@obsidianresearch.com> wrote:
> On Tue, Feb 12, 2013 at 11:05:28PM +0000, Arnd Bergmann wrote:
>>
>> Ah, so you only allow hotplugging into the root ports, but not behind
>> additional bridges that have active devices on them, right?
>
>> I guess that is a common limitation for PCIe hotplugging.
>
> In all the cases I've worked with, it has been a root port hot plug,
> but I could imagine something like ExpressCard requiring a second
> bridge.
>
> The standard answer is to leave appropriate gaps. My *guess* on this
> matter is that on x86 the gaps are left, as appropriate, by the boot
> firmware. Eg an ExpressCard slot will always have a window assigned to
> its bridge and Linux would typically not reassign it (or similar).
Yes, x86 BIOS will leave some pad number for every pcie root port.
Also with patches in
git://git.kernel.org/pub/scm/linux/kernel/git/yinghai/linux-yinghai.git
 for-pci-busn-alloc
could realloc bus range to get big bus number.
Thanks
Yinghai
^ permalink raw reply	[flat|nested] 216+ messages in thread 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
- * [PATCH v2 19/27] pci: PCIe driver for Marvell Armada 370/XP systems
  2013-01-30 15:19         ` Russell King - ARM Linux
  2013-01-30 15:36           ` Thomas Petazzoni
@ 2013-01-31  7:10           ` Thierry Reding
  1 sibling, 0 replies; 216+ messages in thread
From: Thierry Reding @ 2013-01-31  7:10 UTC (permalink / raw)
  To: linux-arm-kernel
On Wed, Jan 30, 2013 at 03:19:34PM +0000, Russell King - ARM Linux wrote:
> On Wed, Jan 30, 2013 at 03:08:56PM +0000, Russell King - ARM Linux wrote:
> > On Wed, Jan 30, 2013 at 01:03:44PM +0100, Thierry Reding wrote:
> > > On Wed, Jan 30, 2013 at 11:32:46AM +0000, Russell King - ARM Linux wrote:
> > > > On Mon, Jan 28, 2013 at 07:56:28PM +0100, Thomas Petazzoni wrote:
> > > > > +static resource_size_t mvebu_pcie_align_resource(struct pci_dev *dev,
> > > > > +						 const struct resource *res,
> > > > > +						 resource_size_t start,
> > > > > +						 resource_size_t size,
> > > > > +						 resource_size_t align)
> > > > > +{
> > > > > +	if (!(res->flags & IORESOURCE_IO))
> > > > > +		return start;
> > > > > +
> > > > > +	/*
> > > > > +	 * The I/O regions must be 64K aligned, because the
> > > > > +	 * granularity of PCIe I/O address decoding windows is 64 K
> > > > > +	 */
> > > > > +	return round_up(start, SZ_64K);
> > > > > +}
> > > > 
> > > > You do realise that this will result in all PCI I/O BARs being rounded
> > > > up to 64K.
> > > > 
> > > > I've just been digging through the PCI code and have come across a
> > > > function - pcibios_window_alignment() - which the PCI code allows to be
> > > > overriden which allows you to increase the alignment requirement of
> > > > bridge windows.  It takes the PCI bus and window type as arguments.
> > > > 
> > > > I'd suggest using that, and checking whether the bus which is passed
> > > > corresponds with a bus which gives you problems, so that you don't
> > > > impose the 64K requirement on downstream bridges.
> > > 
> > > That approach isn't going to work very well with multi-platform, though,
> > > since the function can only be overridden on a per-architecture basis.
> > 
> > The same can be said of all the various other functions which the PCI
> > stuff expects the arch to provide, yet we seem to cope just fine...
> 
> And this (untested) is how it's done:
> 
>  arch/arm/include/asm/mach/pci.h |    1 +
>  arch/arm/kernel/bios32.c        |    8 ++++++++
>  2 files changed, 9 insertions(+), 0 deletions(-)
> 
> diff --git a/arch/arm/include/asm/mach/pci.h b/arch/arm/include/asm/mach/pci.h
> index db9fedb..bba0cf3 100644
> --- a/arch/arm/include/asm/mach/pci.h
> +++ b/arch/arm/include/asm/mach/pci.h
> @@ -29,6 +29,7 @@ struct hw_pci {
>  	void		(*postinit)(void);
>  	u8		(*swizzle)(struct pci_dev *dev, u8 *pin);
>  	int		(*map_irq)(const struct pci_dev *dev, u8 slot, u8 pin);
> +	resource_size_t	(*window_align)(struct pci_bus *, unsigned long);
>  };
>  
>  /*
> diff --git a/arch/arm/kernel/bios32.c b/arch/arm/kernel/bios32.c
> index 379cf32..32c3bd9 100644
> --- a/arch/arm/kernel/bios32.c
> +++ b/arch/arm/kernel/bios32.c
> @@ -581,6 +581,14 @@ resource_size_t pcibios_align_resource(void *data, const struct resource *res,
>  	return start;
>  }
>  
> +resource_size_t pcibios_window_alignment(struct pci_bus *bus,
> +					 unsigned long type)
> +{
> +	struct pci_sys_data *sys = bus->sysdata;
> +
> +	return sys->window_alignment ? sys->window_alignment(bus, type) : 1;
> +}
> +
>  /**
>   * pcibios_enable_device - Enable I/O and memory.
>   * @dev: PCI device to be enabled
Yes, something like that'll work. I had been under the impression that
what you proposed was overriding pcibios_window_alignment() for Marvell
only.
Thierry
-------------- next part --------------
A non-text attachment was scrubbed...
Name: not available
Type: application/pgp-signature
Size: 836 bytes
Desc: not available
URL: <http://lists.infradead.org/pipermail/linux-arm-kernel/attachments/20130131/88de3d77/attachment.sig>
^ permalink raw reply	[flat|nested] 216+ messages in thread
 
 
 
 
- * [PATCH v2 19/27] pci: PCIe driver for Marvell Armada 370/XP systems
  2013-01-28 18:56 ` [PATCH v2 19/27] pci: PCIe driver for Marvell Armada 370/XP systems Thomas Petazzoni
                     ` (3 preceding siblings ...)
  2013-01-30 11:32   ` Russell King - ARM Linux
@ 2013-02-01  0:34   ` Stephen Warren
  2013-02-01  1:41     ` Jason Gunthorpe
  4 siblings, 1 reply; 216+ messages in thread
From: Stephen Warren @ 2013-02-01  0:34 UTC (permalink / raw)
  To: linux-arm-kernel
On 01/28/2013 11:56 AM, Thomas Petazzoni wrote:
> This driver implements the support for the PCIe interfaces on the
> Marvell Armada 370/XP ARM SoCs. In the future, it might be extended to
> cover earlier families of Marvell SoCs, such as Dove, Orion and
> Kirkwood.
Bjorn and I happen to live very close, so we got together today and
talked about PCIe on ARM.
One of the questions he asked is: why does the window management on the
Marvell SoCs need to be dynamic?
(Sorry if this was covered earlier; I vaguely recall some discussion on
the topic, but couldn't find it quickly)
As background, PCIe enumeration in Linux usually works like:
1) You start off with some CPU physical address regions that generate
transactions on the PCIe bus.
2) You enumerate all the PCIe devices, and assign an address to each BAR
found, carved out of the PCIe address range corresponding to the regions
you knew from (1).
However, it sounds like the Marvell code wants to:
1) Start off with no real knowledge of the CPU physical address that
will generate transactions on the PCIe bus, since you want to assign
that later.
2) You enumerate all the PCIe devices, and assign an address. But, what
address range do you use?
3) Then you program the SoC's windows to set up the CPU->PCIe address
translations.
Am I recalling what you're trying to do correctly, or am I completely
confused?
Now, I recall that a related issue was that you are tight on CPU
physical address space, and the second algorithm above would allow the
size of the PCIe controller's window configuration to be as small as
possible, and hence there would be more CPU physical address space
available to fit in other peripherals.
However, why does this need to be dynamic? On a particular board, you
know all the other (non-PCIe) peripherals that you need to fit into the
CPU physical address space, so you know how much is left over for PCIe,
so why not always make the PCIe window fill up all the available space,
and use the first algorithm I described above? And also, I think you
always know the exact set of PCIe devices that are attached to the
boards, so you know the exact BAR size requirements there (or are there
user-accessible PCIe devices; I don't think so from your recent comments
about PCIe<->PCIe bridges not needing to be supported since the user
couldn't plug one in?)
Note that with DT, you can easily specify the window location/size in
the board .dts file rather than the SoC .dtsi file, so it can easily be
customized based on how much physical address space is taken up by RAM,
directly mapped NOR flash, etc.
With a static window configuration in DT, you'd end up with a system
that worked much like any x86 system or Tegra, with some static memory
range available to PCIe. It's just that in your case, the region
location/size could change from boot to boot based on DT, whereas it's
hard-coded in HW for Tegra and I assume x86 too.
^ permalink raw reply	[flat|nested] 216+ messages in thread
- * [PATCH v2 19/27] pci: PCIe driver for Marvell Armada 370/XP systems
  2013-02-01  0:34   ` Stephen Warren
@ 2013-02-01  1:41     ` Jason Gunthorpe
  2013-02-01  2:21       ` Stephen Warren
  0 siblings, 1 reply; 216+ messages in thread
From: Jason Gunthorpe @ 2013-02-01  1:41 UTC (permalink / raw)
  To: linux-arm-kernel
On Thu, Jan 31, 2013 at 05:34:36PM -0700, Stephen Warren wrote:
> On 01/28/2013 11:56 AM, Thomas Petazzoni wrote:
> > This driver implements the support for the PCIe interfaces on the
> > Marvell Armada 370/XP ARM SoCs. In the future, it might be extended to
> > cover earlier families of Marvell SoCs, such as Dove, Orion and
> > Kirkwood.
> 
> Bjorn and I happen to live very close, so we got together today and
> talked about PCIe on ARM.
> 
> One of the questions he asked is: why does the window management on the
> Marvell SoCs need to be dynamic?
> (Sorry if this was covered earlier; I vaguely recall some discussion on
> the topic, but couldn't find it quickly)
Well I've answered it several times, so has Thomas.. Lets try again,
please save for future reference :)
Lets seperate two things.
The CPU physical address ranges reserved for PCI bus access are not
dynamic. This is set in DT, or whatever, statically. Just like every
other PCI case on ARM. Just like Tegra. That is not the issue.
What is required is that the division of this space amongst the 10
physical PCI-E links must be dynamic. Just like x86. Just like the new
tegra driver. [1]
Is that clear?
> As background, PCIe enumeration in Linux usually works like:
> 
> 1) You start off with some CPU physical address regions that generate
> transactions on the PCIe bus.
> 
> 2) You enumerate all the PCIe devices, and assign an address to each BAR
> found, carved out of the PCIe address range corresponding to the regions
> you knew from (1).
Step 2 also includes 'assign address windows to all the physical PCI-E
links'. This is very important because it is what this entire
discussion is about.
Look at how tegra or x86 works, the CPU physical addresses for PCI-E
do nothing until the PCI-to-PCI bridge window registers in each link's
configuration space are setup. Until that is done the SOC doesn't know
which link to send the transaction to.
Marvell is the same, until the link's window registers are setup the
CPU addresses don't go anywhere.
Notice this has absolutely no effect on the host bridge aperture.
This is a link-by-link configuration of what addresses *go down that
link*.
The big difference is the link window registers for Marvell do not
conform to the PCI configuration space specification. They are Marvell
specific.
This is what the glue code in the host driver does, it converts the
Marvell specificness into something the kernel can undertstand and
control. There are countless ways to do this, but please accept it
is necessary that it be done...
> Now, I recall that a related issue was that you are tight on CPU
> physical address space, and the second algorithm above would allow the
> size of the PCIe controller's window configuration to be as small as
> possible, and hence there would be more CPU physical address space
> available to fit in other peripherals.
Physical address space is certainly a concern, but availability of
decoder windows is the major one. Each link requires one decoder
window for MMIO and one for IO, and possibly one for prefetch. The
chip doesn't have 30 decoder windows. So the allocation of decoders to
links must be dynamic, based on the requirements of the downstream
endports on the link.
> However, why does this need to be dynamic? On a particular board, you
> know all the other (non-PCIe) peripherals that you need to fit into the
> CPU physical address space, so you know how much is left over for PCIe,
> so why not always make the PCIe window fill up all the available
> space,
Because there is no such thing as an all-links PCIe window on this
hardware.
Each link has a seperate window.
If you get rid of all the dynamic allocation then every link must
statically reserve some portion of physical address space and some
number of decoder windows.
That more or less means you need to know what is going to be on the
other side of every link when you write the DT.
> With a static window configuration in DT, you'd end up with a system
> that worked much like any x86 system or Tegra, with some static memory
> range available to PCIe. It's just that in your case, the region
> location/size could change from boot to boot based on DT, whereas it's
> hard-coded in HW for Tegra and I assume x86 too.
How is this better? Now you have a system where you have to customize
the DT before you connect a PCI-E device. What if someone uses this
chip in a configuration with physical slots? How does that work? What
about hotplug? What about a unified kernel? That is *not* like x86 or
tegra.
IMHO Thomas's direction in his proposed driver ends up working very
close to the new tegra driver, and has the sort of dynamic allocation
and discovery people expect from PCI-E.
Jason
1 - The new tegra driver switches from calling ARM's pci_common_init
    once for every physical link, to once for the SOC. It does this by
    fixing the routing of config transactions so that the kernel sees
    the per-link PCI-PCI root port bridge config space provided by the
    hardware at the correct place. By doing this it changes from
    statically allocating a physical memory region for each link to
    statically allocating a region for all the links, and dynamically
    dividing that region amongst the links.
^ permalink raw reply	[flat|nested] 216+ messages in thread
- * [PATCH v2 19/27] pci: PCIe driver for Marvell Armada 370/XP systems
  2013-02-01  1:41     ` Jason Gunthorpe
@ 2013-02-01  2:21       ` Stephen Warren
  2013-02-01  3:51         ` Jason Gunthorpe
  2013-02-01  8:46         ` Thomas Petazzoni
  0 siblings, 2 replies; 216+ messages in thread
From: Stephen Warren @ 2013-02-01  2:21 UTC (permalink / raw)
  To: linux-arm-kernel
On 01/31/2013 06:41 PM, Jason Gunthorpe wrote:
> On Thu, Jan 31, 2013 at 05:34:36PM -0700, Stephen Warren wrote:
>> On 01/28/2013 11:56 AM, Thomas Petazzoni wrote:
>>> This driver implements the support for the PCIe interfaces on the
>>> Marvell Armada 370/XP ARM SoCs. In the future, it might be extended to
>>> cover earlier families of Marvell SoCs, such as Dove, Orion and
>>> Kirkwood.
>>
>> Bjorn and I happen to live very close, so we got together today and
>> talked about PCIe on ARM.
>>
>> One of the questions he asked is: why does the window management on the
>> Marvell SoCs need to be dynamic?
> 
>> (Sorry if this was covered earlier; I vaguely recall some discussion on
>> the topic, but couldn't find it quickly)
> 
> Well I've answered it several times, so has Thomas.. Lets try again,
> please save for future reference :)
> 
> Lets seperate two things.
> 
> The CPU physical address ranges reserved for PCI bus access are not
> dynamic. This is set in DT, or whatever, statically. Just like every
> other PCI case on ARM. Just like Tegra. That is not the issue.
> 
> What is required is that the division of this space amongst the 10
> physical PCI-E links must be dynamic. Just like x86. Just like the new
> tegra driver. [1]
> 
> Is that clear?
Yes.
>> As background, PCIe enumeration in Linux usually works like:
>>
>> 1) You start off with some CPU physical address regions that generate
>> transactions on the PCIe bus.
>>
>> 2) You enumerate all the PCIe devices, and assign an address to each BAR
>> found, carved out of the PCIe address range corresponding to the regions
>> you knew from (1).
> 
> Step 2 also includes 'assign address windows to all the physical PCI-E
> links'. This is very important because it is what this entire
> discussion is about.
OK.
> Look at how tegra or x86 works, the CPU physical addresses for PCI-E
> do nothing until the PCI-to-PCI bridge window registers in each link's
> configuration space are setup. Until that is done the SOC doesn't know
> which link to send the transaction to.
>From my perspective, this is slightly the wrong way of describing the
issue, but I see what you mean:
At least on Tegra and I think x86, any transaction that goes to the
physical PCIe aperture is translated onto (internal) PCIe bus 0, so the
lack of window or PCIe/PCIe bridge BAR register programming doesn't
prevent the transaction going /somewhere/ (even if "somewhere" is only
half way to where it's useful!). The difference is pretty subtle. The
issue is that without the PCIe/PCIe bridge BARs programmed, the PCIe
transactions won't get off bus 0 and onto a downstream bus of one of the
PCIe/PCIe bridges, or put another way, no PCIe/PCIe will claim the
transaction that happens on PCIe bus 0
(Using "PCIe/PCIe bridge" above to mean "PCIe root port")
> Marvell is the same, until the link's window registers are setup the
> CPU addresses don't go anywhere.
> 
> Notice this has absolutely no effect on the host bridge aperture.
> This is a link-by-link configuration of what addresses *go down that
> link*.
Right.
> The big difference is the link window registers for Marvell do not
> conform to the PCI configuration space specification. They are Marvell
> specific.
> 
> This is what the glue code in the host driver does, it converts the
> Marvell specificness into something the kernel can undertstand and
> control. There are countless ways to do this, but please accept it
> is necessary that it be done...
Sure.
>> Now, I recall that a related issue was that you are tight on CPU
>> physical address space, and the second algorithm above would allow the
>> size of the PCIe controller's window configuration to be as small as
>> possible, and hence there would be more CPU physical address space
>> available to fit in other peripherals.
> 
> Physical address space is certainly a concern, but availability of
> decoder windows is the major one. Each link requires one decoder
> window for MMIO and one for IO, and possibly one for prefetch. The
> chip doesn't have 30 decoder windows. So the allocation of decoders to
> links must be dynamic, based on the requirements of the downstream
> endports on the link.
Oh I see...
I originally thought the issue was that the windows were between CPU
physical address space and the PCIe host controller itself. But in fact,
the windows are between PCIe bus 0 and the root ports, so they're the
equivalent of the standard PCIe root port (or PCIe/PCIe bridge) BAR
registers. And related, these BAR/window registers are usually part of
each PCIe root port itself, and hence there's a whole number dedicated
to each root port, but on Marvell there's a *global* pool of these
BARs/windows instead.
Now I think I finally understand the architecture of your HW.
>> However, why does this need to be dynamic? On a particular board, you
>> know all the other (non-PCIe) peripherals that you need to fit into the
>> CPU physical address space, so you know how much is left over for PCIe,
>> so why not always make the PCIe window fill up all the available
>> space,
> 
> Because there is no such thing as an all-links PCIe window on this
> hardware.
> 
> Each link has a seperate window.
> 
> If you get rid of all the dynamic allocation then every link must
> statically reserve some portion of physical address space and some
> number of decoder windows.
> 
> That more or less means you need to know what is going to be on the
> other side of every link when you write the DT.
So, the dynamic programming of the windows on Marvell HW is the exact
logical equivalent of programming a standard PCIe root port's BAR
registers. It makes perfect sense that should be dynamic. Presumably
this is something you can make work inside your emulated PCIe/PCIe
bridge module, simply by capturing writes to the BAR registers, and
translating them into writes to the Marvell window registers.
Now, I do have one follow-on question: You said you don't have 30
windows, but how many do you have free after allocating windows to any
other peripherals that need them, relative to (3 *
number-of-root-ports-in-the-SoC)? (3 being IO+Mem+PrefetchableMem.)
The thing here is that when the PCIe core writes to a root port BAR
window to configure/enable it the first time, you'll need to capture
that transaction and dynamically allocate a window and program it in a
way equivalent to what the BAR register write would have achieved on
standard HW. Later, the window might need resizing, or even to be
completely disabled, if the PCIe core were to change the standard BAR
register. Dynamically allocating a window when the BAR is written seems
a little heavy-weight.
So while it's obvious that window base address and size shouldn't be
static, I wonder if the assignment of a specific window ID to a specific
root port ID shouldn be dynamic or static. For example, if your HW
configuration leaves you with 6 windows available, you could support 2
PCIe root ports by statically assigning 3 windows to serve each of those
2 root ports. Would that work, or are there systems where over-commit is
needed, e.g. if there's no IO space behind a root port, you could get
away with two windows per root port, and hence be able to run 3 root
ports rather than just 2? Still, if you know which PCIe devices are
being the root ports, you could still represent the over-commit
statically in DT
Still, I supose doing it dynamically in the driver does end up being a
lot less to think about for someone creating the DT for a new board.
Having to translate standard root port BAR register writes to Marvell
window register allocation/writes would imply that the emulated root
port code has to be very closely tied into the Marvell PCIe driver, and
not something that could be at all generic in the most part.
>> With a static window configuration in DT, you'd end up with a system
>> that worked much like any x86 system or Tegra, with some static memory
>> range available to PCIe. It's just that in your case, the region
>> location/size could change from boot to boot based on DT, whereas it's
>> hard-coded in HW for Tegra and I assume x86 too.
> 
> How is this better? Now you have a system where you have to customize
> the DT before you connect a PCI-E device. What if someone uses this
> chip in a configuration with physical slots? How does that work? What
> about hotplug? What about a unified kernel? That is *not* like x86 or
> tegra.
Right. Now that I really understand what the windows are doing, I can
see that a static window configuration (address/size, perhaps rather
than windows are used) would not be appropriate.
> IMHO Thomas's direction in his proposed driver ends up working very
> close to the new tegra driver, and has the sort of dynamic allocation
> and discovery people expect from PCI-E.
> 
> Jason
> 
> 1 - The new tegra driver switches from calling ARM's pci_common_init
>     once for every physical link, to once for the SOC. It does this by
>     fixing the routing of config transactions so that the kernel sees
>     the per-link PCI-PCI root port bridge config space provided by the
>     hardware at the correct place. By doing this it changes from
>     statically allocating a physical memory region for each link to
>     statically allocating a region for all the links, and dynamically
>     dividing that region amongst the links.
Right, we have both (or all 3) root ports show up in the same PCIe domain.
^ permalink raw reply	[flat|nested] 216+ messages in thread 
- * [PATCH v2 19/27] pci: PCIe driver for Marvell Armada 370/XP systems
  2013-02-01  2:21       ` Stephen Warren
@ 2013-02-01  3:51         ` Jason Gunthorpe
  2013-02-01  9:03           ` Thomas Petazzoni
  2013-02-01  8:46         ` Thomas Petazzoni
  1 sibling, 1 reply; 216+ messages in thread
From: Jason Gunthorpe @ 2013-02-01  3:51 UTC (permalink / raw)
  To: linux-arm-kernel
On Thu, Jan 31, 2013 at 07:21:02PM -0700, Stephen Warren wrote:
> I originally thought the issue was that the windows were between CPU
> physical address space and the PCIe host controller itself. But in fact,
> the windows are between PCIe bus 0 and the root ports, so they're
> the
Donno what exactly is inside tegra, but on Marvell the docs describe
an an internal bus cross bar/whatever and each PCI-E link gets a port
on that structure. There is no such physical thing as a 'PCI bus 0',
and they didn't arrange the hardware in a way that makes it easy for
the host driver to create one like tegra did :(
> equivalent of the standard PCIe root port (or PCIe/PCIe bridge) BAR
> registers. And related, these BAR/window registers are usually part of
> each PCIe root port itself, and hence there's a whole number dedicated
> to each root port, but on Marvell there's a *global* pool of these
> BARs/windows instead.
Right, that is all following the PCI-E spec, and is reasonable and
sane. What Marvell did is take an *end port core* and jam'd it on
their internal bus without adjusting things to follow the PCI-E spec
regarding config space and what not.
> > That more or less means you need to know what is going to be on the
> > other side of every link when you write the DT.
> 
> So, the dynamic programming of the windows on Marvell HW is the exact
> logical equivalent of programming a standard PCIe root port's BAR
> registers. It makes perfect sense that should be dynamic. Presumably
> this is something you can make work inside your emulated PCIe/PCIe
> bridge module, simply by capturing writes to the BAR registers, and
> translating them into writes to the Marvell window registers.
Yes, that is exactly the idea.
 
> Now, I do have one follow-on question: You said you don't have 30
> windows, but how many do you have free after allocating windows to any
> other peripherals that need them, relative to (3 *
> number-of-root-ports-in-the-SoC)? (3 being IO+Mem+PrefetchableMem.)
Thomas will have to answer this, it varies depending on the SOC, and
what other on chip peripherals are in use. For instance Kirkwood has
the same design but there are plenty of windows for the two PCI-E
links.
Still how would you even connect a limited number of regions on a link
by link basis to the common PCI code?
> The thing here is that when the PCIe core writes to a root port BAR
> window to configure/enable it the first time, you'll need to capture
> that transaction and dynamically allocate a window and program it in a
> way equivalent to what the BAR register write would have achieved on
> standard HW. Later, the window might need resizing, or even to be
> completely disabled, if the PCIe core were to change the standard
> BAR
Right. This is pretty straightforward except for the need to hook the
alignment fixup..
> register. Dynamically allocating a window when the BAR is written seems
> a little heavy-weight.
I think what Thomas had here was pretty small, and the windows need to
be shared with other on chip periphals beyond PCI-E..
 
> needed, e.g. if there's no IO space behind a root port, you could get
> away with two windows per root port, and hence be able to run 3 root
> ports rather than just 2?
Right, this is the main point. If you plug in 3 devices and they all
only use MMIO regions then you only need to grab 3 windows. The kernel
disables the unused windows on the bridge so it is easy to tell when
they are disused.
> Still, I supose doing it dynamically in the driver does end up being a
> lot less to think about for someone creating the DT for a new board.
Agreed, these restrictions are all so HW specific, subtle and have
nothing to do with the PCI-E spec. Codifying them once in the driver
seems like the way to keep this crazyness out of the PCI core and away
from users of the SOC.
 
> Having to translate standard root port BAR register writes to Marvell
> window register allocation/writes would imply that the emulated root
> port code has to be very closely tied into the Marvell PCIe driver, and
> not something that could be at all generic in the most part.
Agreed.. At the very least generic code would need call back
functions to the driver... It has a fair bit to do for Marvell:
 - Translate MMIO, prefetch and IO ranges to mbus windows
 - Keep track of the secondary/subordinate bus numbers and fiddle
   with other hardware registers to set those up
 - Copy the link state/control regsiters from the end port config
   space into the bridge express root port capability
 - Probably ditto for AER as well..
Probably simpler just to make one for marvell then mess excessively
with callbacks..
Cheers,
Jason
^ permalink raw reply	[flat|nested] 216+ messages in thread 
- * [PATCH v2 19/27] pci: PCIe driver for Marvell Armada 370/XP systems
  2013-02-01  3:51         ` Jason Gunthorpe
@ 2013-02-01  9:03           ` Thomas Petazzoni
  2013-02-01 16:07             ` Arnd Bergmann
  0 siblings, 1 reply; 216+ messages in thread
From: Thomas Petazzoni @ 2013-02-01  9:03 UTC (permalink / raw)
  To: linux-arm-kernel
Dear Jason Gunthorpe,
Thanks again for continuing this discussion while I was sleeping :-)
On Thu, 31 Jan 2013 20:51:15 -0700, Jason Gunthorpe wrote:
> > Now, I do have one follow-on question: You said you don't have 30
> > windows, but how many do you have free after allocating windows to
> > any other peripherals that need them, relative to (3 *
> > number-of-root-ports-in-the-SoC)? (3 being IO+Mem+PrefetchableMem.)
> 
> Thomas will have to answer this, it varies depending on the SOC, and
> what other on chip peripherals are in use. For instance Kirkwood has
> the same design but there are plenty of windows for the two PCI-E
> links.
Right. I already answered this point directly to Stephen. On Kirkwood,
there are many windows and two PCIe links, so the windows were
statically allocated. On Armada XP, there are 20 windows and 10 PCIe
links. Static allocation is no longer reasonable.
> > The thing here is that when the PCIe core writes to a root port BAR
> > window to configure/enable it the first time, you'll need to capture
> > that transaction and dynamically allocate a window and program it
> > in a way equivalent to what the BAR register write would have
> > achieved on standard HW. Later, the window might need resizing, or
> > even to be completely disabled, if the PCIe core were to change the
> > standard BAR
> 
> Right. This is pretty straightforward except for the need to hook the
> alignment fixup..
> 
> > register. Dynamically allocating a window when the BAR is written
> > seems a little heavy-weight.
> 
> I think what Thomas had here was pretty small, and the windows need to
> be shared with other on chip periphals beyond PCI-E..
Yes, it is not very complicated. We already have some common code that
creates/removes those windows, so it is just a matter of calling the
right thing at the right time. Definitely not hundreds of line of crap.
> Right, this is the main point. If you plug in 3 devices and they all
> only use MMIO regions then you only need to grab 3 windows. The kernel
> disables the unused windows on the bridge so it is easy to tell when
> they are disused.
Ah, I'm interested in further discussing this. I currently have a setup
with one SATA PCIe card and one NIC PCIe card. On the NIC, the I/O
ports are said to be "disabled", but still an I/O region gets allocated
in the PCI-to-PCI bridge that gives access to this particular device.
The device in question is:
05:00.0 Ethernet controller: Intel Corporation 82572EI Gigabit Ethernet Controller (Copper) (rev 06)
	Subsystem: Intel Corporation PRO/1000 PT Server Adapter
	Control: I/O- Mem+ BusMaster+ SpecCycle- MemWINV- VGASnoop- ParErr+ Stepping- SERR+ FastB2B- DisINTx-
	Status: Cap+ 66MHz- UDF- FastB2B- ParErr- DEVSEL=fast >TAbort- <TAbort- <MAbort- >SERR- <PERR- INTx-
	Latency: 0, Cache Line Size: 64 bytes
	Interrupt: pin A routed to IRQ 106
	Region 0: Memory at c1200000 (32-bit, non-prefetchable) [size=128K]
	Region 1: Memory at c1220000 (32-bit, non-prefetchable) [size=128K]
	Region 2: I/O ports at c0010000 [disabled] [size=32]
	[virtual] Expansion ROM@c1300000 [disabled] [size=128K]
So the Region 2 is disabled. But, in the corresponding bridge:
00:05.0 PCI bridge: Marvell Technology Group Ltd. Device 1092 (prog-if 00 [Normal decode])
	Control: I/O+ Mem+ BusMaster+ SpecCycle- MemWINV- VGASnoop- ParErr+ Stepping- SERR+ FastB2B- DisINTx-
	Status: Cap+ 66MHz+ UDF+ FastB2B+ ParErr+ DEVSEL=?? >TAbort+ <TAbort+ <MAbort+ >SERR+ <PERR+ INTx+
	Latency: 0, Cache Line Size: 64 bytes
	Bus: primary=00, secondary=05, subordinate=05, sec-latency=0
	I/O behind bridge: c0010000-c001ffff
	Memory behind bridge: c1200000-c12fffff
	Prefetchable memory behind bridge: c1300000-c13fffff
So there is really a range of I/O addresses associated to it, even
though the device will apparently not use it. Would it be possible to
detect that the I/O range is not used by the device, and therefore
avoid the allocation of an address decoding window for this I/O range?
> Agreed.. At the very least generic code would need call back
> functions to the driver... It has a fair bit to do for Marvell:
>  - Translate MMIO, prefetch and IO ranges to mbus windows
>  - Keep track of the secondary/subordinate bus numbers and fiddle
>    with other hardware registers to set those up
>  - Copy the link state/control regsiters from the end port config
>    space into the bridge express root port capability
>  - Probably ditto for AER as well..
> 
> Probably simpler just to make one for marvell then mess excessively
> with callbacks..
As replied to Stephen, I've chosen to bring the PCI-to-PCI bridge
emulation code directly into the driver, specifically for this reason.
Best regards,
Thomas
-- 
Thomas Petazzoni, Free Electrons
Kernel, drivers, real-time and embedded Linux
development, consulting, training and support.
http://free-electrons.com
^ permalink raw reply	[flat|nested] 216+ messages in thread 
- * [PATCH v2 19/27] pci: PCIe driver for Marvell Armada 370/XP systems
  2013-02-01  9:03           ` Thomas Petazzoni
@ 2013-02-01 16:07             ` Arnd Bergmann
  2013-02-01 16:26               ` Russell King - ARM Linux
  0 siblings, 1 reply; 216+ messages in thread
From: Arnd Bergmann @ 2013-02-01 16:07 UTC (permalink / raw)
  To: linux-arm-kernel
On Friday 01 February 2013, Thomas Petazzoni wrote:
> So there is really a range of I/O addresses associated to it, even
> though the device will apparently not use it. Would it be possible to
> detect that the I/O range is not used by the device, and therefore
> avoid the allocation of an address decoding window for this I/O range?
I suspect it just gets disabled because the port number 0xc0010000 is
larger than IO_PORT_LIMIT and we cannot access that offset inside
of the virtual memory window we use for PIO.
	Arnd
^ permalink raw reply	[flat|nested] 216+ messages in thread 
- * [PATCH v2 19/27] pci: PCIe driver for Marvell Armada 370/XP systems
  2013-02-01 16:07             ` Arnd Bergmann
@ 2013-02-01 16:26               ` Russell King - ARM Linux
  2013-02-01 17:45                 ` Arnd Bergmann
  0 siblings, 1 reply; 216+ messages in thread
From: Russell King - ARM Linux @ 2013-02-01 16:26 UTC (permalink / raw)
  To: linux-arm-kernel
On Fri, Feb 01, 2013 at 04:07:49PM +0000, Arnd Bergmann wrote:
> On Friday 01 February 2013, Thomas Petazzoni wrote:
> > So there is really a range of I/O addresses associated to it, even
> > though the device will apparently not use it. Would it be possible to
> > detect that the I/O range is not used by the device, and therefore
> > avoid the allocation of an address decoding window for this I/O range?
> 
> I suspect it just gets disabled because the port number 0xc0010000 is
> larger than IO_PORT_LIMIT and we cannot access that offset inside
> of the virtual memory window we use for PIO.
You're running into that trap again which you fall into on other
architectures.
If you arrange for your PCI IO space to start at 0 rather than the
physical address that it appears on your CPU, then you shouldn't
end up with it extending up to something at 0xcXXXXXXX.
Remember that we should be ensuring that inb(0) hits the first address
of the cross-subarch PCI IO area - this alway requires that any sub-arch
taking part in a multiplatform kernel must start its IO space addresses
at 0 and not the physical address on the local CPU.
^ permalink raw reply	[flat|nested] 216+ messages in thread 
- * [PATCH v2 19/27] pci: PCIe driver for Marvell Armada 370/XP systems
  2013-02-01 16:26               ` Russell King - ARM Linux
@ 2013-02-01 17:45                 ` Arnd Bergmann
  2013-02-01 19:58                   ` Jason Gunthorpe
  0 siblings, 1 reply; 216+ messages in thread
From: Arnd Bergmann @ 2013-02-01 17:45 UTC (permalink / raw)
  To: linux-arm-kernel
On Friday 01 February 2013, Russell King - ARM Linux wrote:
> On Fri, Feb 01, 2013 at 04:07:49PM +0000, Arnd Bergmann wrote:
> > On Friday 01 February 2013, Thomas Petazzoni wrote:
> > > So there is really a range of I/O addresses associated to it, even
> > > though the device will apparently not use it. Would it be possible to
> > > detect that the I/O range is not used by the device, and therefore
> > > avoid the allocation of an address decoding window for this I/O range?
> > 
> > I suspect it just gets disabled because the port number 0xc0010000 is
> > larger than IO_PORT_LIMIT and we cannot access that offset inside
> > of the virtual memory window we use for PIO.
> 
> You're running into that trap again which you fall into on other
> architectures.
> 
> If you arrange for your PCI IO space to start at 0 rather than the
> physical address that it appears on your CPU, then you shouldn't
> end up with it extending up to something at 0xcXXXXXXX.
> 
> Remember that we should be ensuring that inb(0) hits the first address
> of the cross-subarch PCI IO area - this alway requires that any sub-arch
> taking part in a multiplatform kernel must start its IO space addresses
> at 0 and not the physical address on the local CPU.
Yes, that was my point. I think in this case, the bug is in the new
of_pci_process_ranges functions, which returns a 'struct resource'
translated into IORESOURCE_MEM space, but with the type set
to IORESOURCE_IO. This resource then gets passed to 
pci_add_resource_offset().
	Arnd
^ permalink raw reply	[flat|nested] 216+ messages in thread 
- * [PATCH v2 19/27] pci: PCIe driver for Marvell Armada 370/XP systems
  2013-02-01 17:45                 ` Arnd Bergmann
@ 2013-02-01 19:58                   ` Jason Gunthorpe
  0 siblings, 0 replies; 216+ messages in thread
From: Jason Gunthorpe @ 2013-02-01 19:58 UTC (permalink / raw)
  To: linux-arm-kernel
On Fri, Feb 01, 2013 at 05:45:29PM +0000, Arnd Bergmann wrote:
> Yes, that was my point. I think in this case, the bug is in the new
> of_pci_process_ranges functions, which returns a 'struct resource'
> translated into IORESOURCE_MEM space, but with the type set
> to IORESOURCE_IO. This resource then gets passed to 
> pci_add_resource_offset().
A standard way to express the required address translation from CPU
physical address to IO bus address might help other people avoid this
trap??
Jason
^ permalink raw reply	[flat|nested] 216+ messages in thread 
 
 
 
 
 
- * [PATCH v2 19/27] pci: PCIe driver for Marvell Armada 370/XP systems
  2013-02-01  2:21       ` Stephen Warren
  2013-02-01  3:51         ` Jason Gunthorpe
@ 2013-02-01  8:46         ` Thomas Petazzoni
  2013-02-01 16:02           ` Arnd Bergmann
  2013-02-01 17:57           ` Stephen Warren
  1 sibling, 2 replies; 216+ messages in thread
From: Thomas Petazzoni @ 2013-02-01  8:46 UTC (permalink / raw)
  To: linux-arm-kernel
Dear Stephen Warren,
Thanks for this great discussion. I see that Jason has already answered
most of the questions on the why we need this "dynamicity" in the window
configuration. A few comments below.
On Thu, 31 Jan 2013 19:21:02 -0700, Stephen Warren wrote:
> So, the dynamic programming of the windows on Marvell HW is the exact
> logical equivalent of programming a standard PCIe root port's BAR
> registers. It makes perfect sense that should be dynamic. Presumably
> this is something you can make work inside your emulated PCIe/PCIe
> bridge module, simply by capturing writes to the BAR registers, and
> translating them into writes to the Marvell window registers.
That's what I'm doing. In the PATCHv2, my PCIe host driver was reading
back the BARs in the PCI-to-PCI bridges configuration space, and was
setting up the windows according to the addresses that had been
assigned to each bridge.
I am currently working in making this more dynamic: it is directly when
the BAR is being written to in the PCI-to-PCI bridge configuration
space that a window will be setup.
> Now, I do have one follow-on question: You said you don't have 30
> windows, but how many do you have free after allocating windows to any
> other peripherals that need them, relative to (3 *
> number-of-root-ports-in-the-SoC)? (3 being IO+Mem+PrefetchableMem.)
We have 20 windows on Armada XP if I remember correctly, and they are
not only used for PCIe, but also to map the BootROM (needed to boot
secondary CPUs), to map SPI flashes or NOR flashes, for example. So
they are really shared between many uses. In terms of PCIe, there are
only two types of windows: I/O and Memory, there is no notion of
Prefetchable Memory window as far as I could see.
We have up to 10 PCIe interfaces, and only 20 windows. It means that
you basically can't use all PCIe interfaces, there will necessarily be
some limit, due to the limited number of windows.
Also, I'd like to point out that the dynamic configuration is needed
for two reasons:
 * The number of windows, as we are discussing now.
 * The amount of physical address space available. If you don't
   dynamically configure those windows, then you have to account the
   "worst case", i.e the PCIe devices that require very large memory
   areas. So you end up creating static windows that reserve 32M or 64M
   or 128M *per* PCIe link. You can see that it "consumes" pretty
   quickly a large part of the 4G physical address space that we have.
   Thanks to the dynamic window configuration that we do with the
   PCI-to-PCI bridge, we can size the windows exactly the size needed
   by the downstream device on each PCIe interface.
> The thing here is that when the PCIe core writes to a root port BAR
> window to configure/enable it the first time, you'll need to capture
> that transaction and dynamically allocate a window and program it in a
> way equivalent to what the BAR register write would have achieved on
> standard HW. Later, the window might need resizing, or even to be
> completely disabled, if the PCIe core were to change the standard BAR
> register. Dynamically allocating a window when the BAR is written
> seems a little heavy-weight.
Why?
> So while it's obvious that window base address and size shouldn't be
> static, I wonder if the assignment of a specific window ID to a
> specific root port ID shouldn be dynamic or static. For example, if
> your HW configuration leaves you with 6 windows available, you could
> support 2 PCIe root ports by statically assigning 3 windows to serve
> each of those 2 root ports. Would that work, or are there systems
> where over-commit is needed, e.g. if there's no IO space behind a
> root port, you could get away with two windows per root port, and
> hence be able to run 3 root ports rather than just 2? Still, if you
> know which PCIe devices are being the root ports, you could still
> represent the over-commit statically in DT
For now, I haven't figured out how not to allocate an I/O window if the
downstream device doesn't use I/O, but I'd like to achieve that, as it
would save one of the two windows needed per PCIe interface... and many
PCIe devices don't need the I/O window.
> Still, I supose doing it dynamically in the driver does end up being a
> lot less to think about for someone creating the DT for a new board.
Indeed.
> Having to translate standard root port BAR register writes to Marvell
> window register allocation/writes would imply that the emulated root
> port code has to be very closely tied into the Marvell PCIe driver,
> and not something that could be at all generic in the most part.
Right. I've already moved the PCI-to-PCI bridge code from the generic
drivers/pci/sw-pci-pci-bridge.c location to be directly into the
driver. It also to integrate more tightly things like window allocation.
> Right. Now that I really understand what the windows are doing, I can
> see that a static window configuration (address/size, perhaps rather
> than windows are used) would not be appropriate.
Glad to see we reached the same conclusion :-)
Best regards,
Thomas
-- 
Thomas Petazzoni, Free Electrons
Kernel, drivers, real-time and embedded Linux
development, consulting, training and support.
http://free-electrons.com
^ permalink raw reply	[flat|nested] 216+ messages in thread 
- * [PATCH v2 19/27] pci: PCIe driver for Marvell Armada 370/XP systems
  2013-02-01  8:46         ` Thomas Petazzoni
@ 2013-02-01 16:02           ` Arnd Bergmann
  2013-02-01 17:57           ` Stephen Warren
  1 sibling, 0 replies; 216+ messages in thread
From: Arnd Bergmann @ 2013-02-01 16:02 UTC (permalink / raw)
  To: linux-arm-kernel
On Friday 01 February 2013, Thomas Petazzoni wrote:
> > So while it's obvious that window base address and size shouldn't be
> > static, I wonder if the assignment of a specific window ID to a
> > specific root port ID shouldn be dynamic or static. For example, if
> > your HW configuration leaves you with 6 windows available, you could
> > support 2 PCIe root ports by statically assigning 3 windows to serve
> > each of those 2 root ports. Would that work, or are there systems
> > where over-commit is needed, e.g. if there's no IO space behind a
> > root port, you could get away with two windows per root port, and
> > hence be able to run 3 root ports rather than just 2? Still, if you
> > know which PCIe devices are being the root ports, you could still
> > represent the over-commit statically in DT
> 
> For now, I haven't figured out how not to allocate an I/O window if the
> downstream device doesn't use I/O, but I'd like to achieve that, as it
> would save one of the two windows needed per PCIe interface... and many
> PCIe devices don't need the I/O window.
The easiest hack would be to only ever allow one I/O window on exactly
one of the ports, and not do PIO on the other ports at all. Given the
various troubles of making any other combination work, that sounds like
a good enough compromise to me. A lot of the add-on cards would not
work in the remaining ports anyway, and worrying too much about that
legacy hardware may just not be worth it. That way, you only need 11
windows for PCIe (10*mem, 1*IO), which will always fit.
Do you actually have /any/ PCIe cards with PIO BARs that you can test
with?
	Arnd
^ permalink raw reply	[flat|nested] 216+ messages in thread 
- * [PATCH v2 19/27] pci: PCIe driver for Marvell Armada 370/XP systems
  2013-02-01  8:46         ` Thomas Petazzoni
  2013-02-01 16:02           ` Arnd Bergmann
@ 2013-02-01 17:57           ` Stephen Warren
  2013-02-01 19:39             ` Jason Gunthorpe
  1 sibling, 1 reply; 216+ messages in thread
From: Stephen Warren @ 2013-02-01 17:57 UTC (permalink / raw)
  To: linux-arm-kernel
On 02/01/2013 01:46 AM, Thomas Petazzoni wrote:
> Dear Stephen Warren,
> 
> Thanks for this great discussion. I see that Jason has already answered
> most of the questions on the why we need this "dynamicity" in the window
> configuration. A few comments below.
> 
> On Thu, 31 Jan 2013 19:21:02 -0700, Stephen Warren wrote:
> 
>> So, the dynamic programming of the windows on Marvell HW is the exact
>> logical equivalent of programming a standard PCIe root port's BAR
>> registers. It makes perfect sense that should be dynamic. Presumably
>> this is something you can make work inside your emulated PCIe/PCIe
>> bridge module, simply by capturing writes to the BAR registers, and
>> translating them into writes to the Marvell window registers.
> 
> That's what I'm doing. In the PATCHv2, my PCIe host driver was reading
> back the BARs in the PCI-to-PCI bridges configuration space, and was
> setting up the windows according to the addresses that had been
> assigned to each bridge.
> 
> I am currently working in making this more dynamic: it is directly when
> the BAR is being written to in the PCI-to-PCI bridge configuration
> space that a window will be setup.
> 
>> Now, I do have one follow-on question: You said you don't have 30
>> windows, but how many do you have free after allocating windows to any
>> other peripherals that need them, relative to (3 *
>> number-of-root-ports-in-the-SoC)? (3 being IO+Mem+PrefetchableMem.)
> 
> We have 20 windows on Armada XP if I remember correctly, and they are
> not only used for PCIe, but also to map the BootROM (needed to boot
> secondary CPUs), to map SPI flashes or NOR flashes, for example. So
> they are really shared between many uses. In terms of PCIe, there are
> only two types of windows: I/O and Memory, there is no notion of
> Prefetchable Memory window as far as I could see.
In Tegra, we end up having separate MMIO vs. Prefetchable MMIO chunks of
our overall PCIe aperture. However, the HW setup appears the same for
both of those. I'm not sure if it's a bug in the driver, or if it's just
to separate the two address spaces so that the page tables can be
configured for those two regions with large rather than small
granularity. I need to go investigate that.
> We have up to 10 PCIe interfaces, and only 20 windows. It means that
> you basically can't use all PCIe interfaces, there will necessarily be
> some limit, due to the limited number of windows.
So there are 10 PCIe interfaces (root ports). That's on the SoC itself
right. Are all 10 (or a large number of them) actually used at once on
any given board design? I suppose this must be the case, or Marvell
wouldn't have wasted the silicon space on 10 root ports... Still, that's
a rather large number of ports!
If only a few PCIe ports are ever in use at once on a design and/or the
PCIe ports generally contain soldered-down devices rather than
user-accessible ports, the statically assigning window *IDs* to
individual ports would make for easier code in the driver, since the BAR
register emulation would never have to allocate/de-allocate windows, but
rather only ever have to enable/disable/configure them.
However, if many PCIe ports are in use at once and there are
user-accessible ports, you can't know ahead of time which ports will
need MMIO vs. MMIO prefetch vs. IO, so you'd have to dynamically
allocate window IDs to ports, in addition to dynamically setting up the
address/size of windows.
> Also, I'd like to point out that the dynamic configuration is needed
> for two reasons:
> 
>  * The number of windows, as we are discussing now.
OK.
>  * The amount of physical address space available. If you don't
>    dynamically configure those windows, then you have to account the
>    "worst case", i.e the PCIe devices that require very large memory
>    areas. So you end up creating static windows that reserve 32M or 64M
>    or 128M *per* PCIe link. You can see that it "consumes" pretty
>    quickly a large part of the 4G physical address space that we have.
>    Thanks to the dynamic window configuration that we do with the
>    PCI-to-PCI bridge, we can size the windows exactly the size needed
>    by the downstream device on each PCIe interface.
That aspect is applicable to any PCIe system; there's always some chunk
of physical address space that maps to PCIe, and which must be divided
into per-root-port chunks.
I think the only difference on the Marvell HW is:
* The overall total size of the physical address space is dynamic rather
than fixed, because it's programmed through windows rather than
hard-coded into HW.
* Hence, the windows /both/ define the physical address space layout
/and/ define the routing of transactions to individual root ports. On
regular PCIe, the root port BARs only divide up the overall physical
(PCIe bus 0 really) address space and hence perform routing; they have
no influence over the CPU physical address space.
So I think the crux of the problem is that you really have 10 PCIe root
ports, each of which is a nominally a separate PCIe domain (since the
windows connect CPU physical address space to an individual/specific
root port's PCIe address space, rather than having separate connections
from CPU -> PCIe bus 0 address space, then BARs/windows connecting PCIe
bus 0 address space to individual root port/subordinate bus address
space), but you're attempting to treat all 10 ports as a single PCIe
domain so that you don't have to dedicate separate physical CPU address
space to each port, which you would have to do if they were actually
treated as separate domains.
>> The thing here is that when the PCIe core writes to a root port BAR
>> window to configure/enable it the first time, you'll need to capture
>> that transaction and dynamically allocate a window and program it in a
>> way equivalent to what the BAR register write would have achieved on
>> standard HW. Later, the window might need resizing, or even to be
>> completely disabled, if the PCIe core were to change the standard BAR
>> register. Dynamically allocating a window when the BAR is written
>> seems a little heavy-weight.
> 
> Why?
Well, it's just a bunch more code; much more than a simple writel().
^ permalink raw reply	[flat|nested] 216+ messages in thread 
- * [PATCH v2 19/27] pci: PCIe driver for Marvell Armada 370/XP systems
  2013-02-01 17:57           ` Stephen Warren
@ 2013-02-01 19:39             ` Jason Gunthorpe
  2013-02-01 20:30               ` Stephen Warren
  0 siblings, 1 reply; 216+ messages in thread
From: Jason Gunthorpe @ 2013-02-01 19:39 UTC (permalink / raw)
  To: linux-arm-kernel
On Fri, Feb 01, 2013 at 10:57:20AM -0700, Stephen Warren wrote:
> > We have 20 windows on Armada XP if I remember correctly, and they are
> > not only used for PCIe, but also to map the BootROM (needed to boot
> > secondary CPUs), to map SPI flashes or NOR flashes, for example. So
> > they are really shared between many uses. In terms of PCIe, there are
> > only two types of windows: I/O and Memory, there is no notion of
> > Prefetchable Memory window as far as I could see.
> 
> In Tegra, we end up having separate MMIO vs. Prefetchable MMIO
> chunks of our overall PCIe aperture. However, the HW setup appears
> the same for both of those. I'm not sure if it's a bug in the
> driver, or if it's just to separate the two address spaces so that
> the page tables can be configured for those two regions with large
> rather than small granularity. I need to go investigate that.
The only purpose of prefetchable space is for legacy PCI. When a P2P
bridge targets legacy PCI it has different behavior for its
prefetchable memory window compared to the non-prefetchable memory
window.
IIRC (though it has been a long time since I looked really close at
this) PCI-X and PCI-E did away with this special bridge behaviour but
kept the prefetchable memory space for compatibility.
These days it is typically used to mark cachable memory on an end
device.
>From a SOC perspective, there is no need to treat MMIO and prefetch
areas any differently. ARM's per-page cachability flags can be used to
deal with the differing caching requirements.
However, the bus tree downstream of each root port will require the
prefetch window to be contiguous. On Marvell, today, this means you
need to burn two mbus windows to get this. If the Linux pci core could
allocate the prefetch space for each root port bridge contiguously
with the mmio space for the same root port then this could be reduced
to one window covering both spaces for the port.
> So there are 10 PCIe interfaces (root ports). That's on the SoC itself
> right. Are all 10 (or a large number of them) actually used at once on
> any given board design? I suppose this must be the case, or Marvell
> wouldn't have wasted the silicon space on 10 root ports... Still, that's
> a rather large number of ports!
Agreed.. I have no idea what the target is for this..
 
> I think the only difference on the Marvell HW is:
> 
> * The overall total size of the physical address space is dynamic rather
> than fixed, because it's programmed through windows rather than
> hard-coded into HW.
Is it hard coded on tegra? I thought there was a register set that was
used to set the overall PCI-E MMIO window location and size. I know
even on x86 the PCI window is set via register, though that typically
isn't disclosed except to bios writers.. 
Jason
^ permalink raw reply	[flat|nested] 216+ messages in thread 
- * [PATCH v2 19/27] pci: PCIe driver for Marvell Armada 370/XP systems
  2013-02-01 19:39             ` Jason Gunthorpe
@ 2013-02-01 20:30               ` Stephen Warren
  0 siblings, 0 replies; 216+ messages in thread
From: Stephen Warren @ 2013-02-01 20:30 UTC (permalink / raw)
  To: linux-arm-kernel
On 02/01/2013 12:39 PM, Jason Gunthorpe wrote:
> On Fri, Feb 01, 2013 at 10:57:20AM -0700, Stephen Warren wrote:
...
>> I think the only difference on the Marvell HW is:
>>
>> * The overall total size of the physical address space is dynamic rather
>> than fixed, because it's programmed through windows rather than
>> hard-coded into HW.
> 
> Is it hard coded on tegra? I thought there was a register set that was
> used to set the overall PCI-E MMIO window location and size. I know
> even on x86 the PCI window is set via register, though that typically
> isn't disclosed except to bios writers.. 
There is a fixed (in HW) 1 GiB physical address window dedicated to
PCIe. That window is divided between host controller registers, PCIe
root port registers (since our root ports don't respond to configuration
transactions), and regular PCIe accesses; config/MMIO/IO. There are
registers in the host controller that configure the division of this
space into config/MMIO/IO, so that can be dynamic. The DT bindings for
the driver Thierry proposed hard-code those divisions in DT.
^ permalink raw reply	[flat|nested] 216+ messages in thread 
 
 
 
 
 
 
 
- * [PATCH v2 20/27] arm: mvebu: PCIe support is now available on mvebu
  2013-01-28 18:56 [PATCH v2] PCIe support for the Armada 370 and Armada XP SoCs Thomas Petazzoni
                   ` (18 preceding siblings ...)
  2013-01-28 18:56 ` [PATCH v2 19/27] pci: PCIe driver for Marvell Armada 370/XP systems Thomas Petazzoni
@ 2013-01-28 18:56 ` Thomas Petazzoni
  2013-01-28 18:56 ` [PATCH v2 21/27] arm: mvebu: add PCIe Device Tree informations for Armada 370 Thomas Petazzoni
                   ` (6 subsequent siblings)
  26 siblings, 0 replies; 216+ messages in thread
From: Thomas Petazzoni @ 2013-01-28 18:56 UTC (permalink / raw)
  To: linux-arm-kernel
Now that the PCIe driver for mvebu has been integrated and all its
relevant dependencies, we can mark the ARCH_MVEBU platform has
MIGHT_HAVE_PCI, which allows to select the PCI bus support if needed.
Signed-off-by: Thomas Petazzoni <thomas.petazzoni@free-electrons.com>
---
 arch/arm/mach-mvebu/Kconfig |    2 ++
 1 file changed, 2 insertions(+)
diff --git a/arch/arm/mach-mvebu/Kconfig b/arch/arm/mach-mvebu/Kconfig
index 440b13e..f12e475 100644
--- a/arch/arm/mach-mvebu/Kconfig
+++ b/arch/arm/mach-mvebu/Kconfig
@@ -13,6 +13,8 @@ config ARCH_MVEBU
 	select MVEBU_CLK_CORE
 	select MVEBU_CLK_CPU
 	select MVEBU_CLK_GATING
+	select MIGHT_HAVE_PCI
+	select PCI_QUIRKS if PCI
 
 if ARCH_MVEBU
 
-- 
1.7.9.5
^ permalink raw reply related	[flat|nested] 216+ messages in thread
- * [PATCH v2 21/27] arm: mvebu: add PCIe Device Tree informations for Armada 370
  2013-01-28 18:56 [PATCH v2] PCIe support for the Armada 370 and Armada XP SoCs Thomas Petazzoni
                   ` (19 preceding siblings ...)
  2013-01-28 18:56 ` [PATCH v2 20/27] arm: mvebu: PCIe support is now available on mvebu Thomas Petazzoni
@ 2013-01-28 18:56 ` Thomas Petazzoni
  2013-01-28 18:56 ` [PATCH v2 22/27] arm: mvebu: add PCIe Device Tree informations for Armada XP Thomas Petazzoni
                   ` (5 subsequent siblings)
  26 siblings, 0 replies; 216+ messages in thread
From: Thomas Petazzoni @ 2013-01-28 18:56 UTC (permalink / raw)
  To: linux-arm-kernel
The Armada 370 SoC has two 1x PCIe 2.0 interfaces, so we add the
necessary Device Tree informations to make these interfaces availabel.
Signed-off-by: Thomas Petazzoni <thomas.petazzoni@free-electrons.com>
---
 arch/arm/boot/dts/armada-370.dtsi |   42 +++++++++++++++++++++++++++++++++++++
 1 file changed, 42 insertions(+)
diff --git a/arch/arm/boot/dts/armada-370.dtsi b/arch/arm/boot/dts/armada-370.dtsi
index 636cf7d..a66e371 100644
--- a/arch/arm/boot/dts/armada-370.dtsi
+++ b/arch/arm/boot/dts/armada-370.dtsi
@@ -132,5 +132,47 @@
 				dmacap,memset;
 			};
 		};
+
+		pcie-controller {
+			compatible = "marvell,armada-370-xp-pcie";
+			status = "disabled";
+
+			#address-cells = <3>;
+			#size-cells = <2>;
+
+			bus-range = <0x00 0xff>;
+
+			ranges = <0x00000800 0 0xd0040000 0xd0040000 0 0x00002000   /* port 0.0 registers */
+				  0x00001000 0 0xd0080000 0xd0080000 0 0x00002000   /* port 1.0 registers */
+				  0x81000000 0 0	  0xc0000000 0 0x00010000   /* downstream I/O */
+				  0x82000000 0 0	  0xc1000000 0 0x08000000>; /* non-prefetchable memory */
+
+			#interrupt-cells = <1>;
+			interrupt-map-mask = <0xf800 0 0 1>;
+			interrupt-map = <0x0800 0 0 1 &mpic 58 /* port 0.0 */
+					 0x1000 0 0 1 &mpic 62>; /* port 1.0 */
+
+			pcie at 0,0 {
+				device_type = "pciex";
+				reg = <0x0800 0 0xd0040000 0 0x2000>;
+				#address-cells = <3>;
+				#size-cells = <2>;
+				marvell,pcie-port = <0>;
+				marvell,pcie-lane = <0>;
+				clocks = <&gateclk 5>;
+				status = "disabled";
+			};
+
+			pcie at 1,0 {
+				device_type = "pciex";
+				reg = <0x1000 0 0xd0080000 0 0x2000>;
+				#address-cells = <3>;
+				#size-cells = <2>;
+				marvell,pcie-port = <1>;
+				marvell,pcie-lane = <0>;
+				clocks = <&gateclk 9>;
+				status = "disabled";
+			};
+		};
 	};
 };
-- 
1.7.9.5
^ permalink raw reply related	[flat|nested] 216+ messages in thread
- * [PATCH v2 22/27] arm: mvebu: add PCIe Device Tree informations for Armada XP
  2013-01-28 18:56 [PATCH v2] PCIe support for the Armada 370 and Armada XP SoCs Thomas Petazzoni
                   ` (20 preceding siblings ...)
  2013-01-28 18:56 ` [PATCH v2 21/27] arm: mvebu: add PCIe Device Tree informations for Armada 370 Thomas Petazzoni
@ 2013-01-28 18:56 ` Thomas Petazzoni
  2013-02-06 22:41   ` Arnd Bergmann
  2013-01-28 18:56 ` [PATCH v2 23/27] arm: mvebu: PCIe Device Tree informations for OpenBlocks AX3-4 Thomas Petazzoni
                   ` (4 subsequent siblings)
  26 siblings, 1 reply; 216+ messages in thread
From: Thomas Petazzoni @ 2013-01-28 18:56 UTC (permalink / raw)
  To: linux-arm-kernel
The Armada XP SoCs have multiple PCIe interfaces. The MV78230 has 2
PCIe units (one 4x or quad 1x, the other 1x only), the MV78260 has 3
PCIe units (two 4x or quad 1x and one 4x/1x), the MV78460 has 4 PCIe
units (two 4x or quad 1x and two 4x/1x). We therefore add the
necessary Device Tree informations to make those PCIe interfaces
usable.
Signed-off-by: Thomas Petazzoni <thomas.petazzoni@free-electrons.com>
---
 arch/arm/boot/dts/armada-xp-mv78230.dtsi |   92 ++++++++++++++++++
 arch/arm/boot/dts/armada-xp-mv78260.dtsi |  105 +++++++++++++++++++++
 arch/arm/boot/dts/armada-xp-mv78460.dtsi |  150 ++++++++++++++++++++++++++++++
 3 files changed, 347 insertions(+)
diff --git a/arch/arm/boot/dts/armada-xp-mv78230.dtsi b/arch/arm/boot/dts/armada-xp-mv78230.dtsi
index e041f42..6abb0ff 100644
--- a/arch/arm/boot/dts/armada-xp-mv78230.dtsi
+++ b/arch/arm/boot/dts/armada-xp-mv78230.dtsi
@@ -70,5 +70,97 @@
 			#interrupts-cells = <2>;
 			interrupts = <87>, <88>, <89>;
 		};
+
+		/*
+		 * MV78230 has 2 PCIe units Gen2.0: One unit can be
+		 * configured as x4 or quad x1 lanes. One unit is
+		 * x4/x1.
+		 */
+		pcie-controller {
+			compatible = "marvell,armada-370-xp-pcie";
+			status = "disabled";
+
+			#address-cells = <3>;
+			#size-cells = <2>;
+
+
+			bus-range = <0x00 0xff>;
+
+			ranges = <0x00000800 0 0xd0040000 0xd0040000 0 0x00002000   /* port 0.0 registers */
+			          0x00004800 0 0xd0042000 0xd0042000 0 0x00002000   /* port 2.0 registers */
+			          0x00001000 0 0xd0044000 0xd0044000 0 0x00002000   /* port 0.1 registers */
+			          0x00001800 0 0xd0048000 0xd0048000 0 0x00002000   /* port 0.2 registers */
+			          0x00002000 0 0xd004C000 0xd004C000 0 0x00002000   /* port 0.3 registers */
+				  0x81000000 0 0	  0xc0000000 0 0x00010000   /* downstream I/O */
+				  0x82000000 0 0	  0xc1000000 0 0x08000000>; /* non-prefetchable memory */
+
+			#interrupt-cells = <1>;
+			interrupt-map-mask = <0xf800 0 0 1>;
+			interrupt-map = <0x0800 0 0 1 &mpic 58 1
+				         0x1000 0 0 1 &mpic 59 1
+					 0x1800 0 0 1 &mpic 60 1
+					 0x2000 0 0 1 &mpic 61 1
+					 0x4800 0 0 1 &mpic 99 1>;
+
+			pcie at 0,0 {
+				device_type = "pciex";
+				reg = <0x0800 0 0xd0040000 0 0x2000>;
+				#address-cells = <3>;
+				#size-cells = <2>;
+				marvell,pcie-port = <0>;
+				marvell,pcie-lane = <0>;
+				interrupts = <1>;
+				clocks = <&gateclk 5>;
+				status = "disabled";
+			};
+
+			pcie at 0,1 {
+				device_type = "pciex";
+				reg = <0x1000 0 0xd0044000 0 0x2000>;
+				#address-cells = <3>;
+				#size-cells = <2>;
+				marvell,pcie-port = <0>;
+				marvell,pcie-lane = <1>;
+				interrupts = <1>;
+				clocks = <&gateclk 6>;
+				status = "disabled";
+			};
+
+			pcie at 0,2 {
+				device_type = "pciex";
+				reg = <0x1800 0 0xd0048000 0 0x2000>;
+				#address-cells = <3>;
+				#size-cells = <2>;
+				marvell,pcie-port = <0>;
+				marvell,pcie-lane = <2>;
+				interrupts = <1>;
+				clocks = <&gateclk 7>;
+				status = "disabled";
+			};
+
+			pcie at 0,3 {
+				device_type = "pciex";
+				reg = <0x2000 0 0xd004C000 0 0xC000>;
+				#address-cells = <3>;
+				#size-cells = <2>;
+				marvell,pcie-port = <0>;
+				marvell,pcie-lane = <3>;
+				interrupts = <1>;
+				clocks = <&gateclk 8>;
+				status = "disabled";
+			};
+
+			pcie at 2,0 {
+				device_type = "pciex";
+				reg = <0x4800 0 0xd0042000 0 0x2000>;
+				#address-cells = <3>;
+				#size-cells = <2>;
+				marvell,pcie-port = <2>;
+				marvell,pcie-lane = <0>;
+				interrupts = <1>;
+				clocks = <&gateclk 26>;
+				status = "disabled";
+			};
+		};
 	};
 };
diff --git a/arch/arm/boot/dts/armada-xp-mv78260.dtsi b/arch/arm/boot/dts/armada-xp-mv78260.dtsi
index 9e23bd8..ab8c593 100644
--- a/arch/arm/boot/dts/armada-xp-mv78260.dtsi
+++ b/arch/arm/boot/dts/armada-xp-mv78260.dtsi
@@ -90,5 +90,110 @@
 				clocks = <&gateclk 1>;
 				status = "disabled";
 		};
+
+		/*
+		 * MV78260 has 3 PCIe units Gen2.0: Two units can be
+		 * configured as x4 or quad x1 lanes. One unit is
+		 * x4/x1.
+		 */
+		pcie-controller {
+			compatible = "marvell,armada-370-xp-pcie";
+			status = "disabled";
+
+			#address-cells = <3>;
+			#size-cells = <2>;
+
+			bus-range = <0x00 0xff>;
+
+			ranges = <0x00000800 0 0xd0040000 0xd0040000 0 0x00002000   /* port 0.0 registers */
+			          0x00004800 0 0xd0042000 0xd0042000 0 0x00002000   /* port 2.0 registers */
+			          0x00001000 0 0xd0044000 0xd0044000 0 0x00002000   /* port 0.1 registers */
+			          0x00001800 0 0xd0048000 0xd0048000 0 0x00002000   /* port 0.2 registers */
+			          0x00002000 0 0xd004C000 0xd004C000 0 0x00002000   /* port 0.3 registers */
+			          0x00005000 0 0xd0082000 0xd0082000 0 0x00002000   /* port 3.0 registers */
+				  0x81000000 0 0	  0xc0000000 0 0x00010000   /* downstream I/O */
+				  0x82000000 0 0	  0xc1000000 0 0x08000000>; /* non-prefetchable memory */
+
+			#interrupt-cells = <1>;
+			interrupt-map-mask = <0xf800 0 0 1>;
+			interrupt-map = <0x0800 0 0 1 &mpic 58 1
+				         0x1000 0 0 1 &mpic 59 1
+					 0x1800 0 0 1 &mpic 60 1
+					 0x2000 0 0 1 &mpic 61 1
+					 0x4800 0 0 1 &mpic 99 1
+					 0x5000 0 0 1 &mpic 103 1>;
+
+			pcie at 0,0 {
+				device_type = "pciex";
+				reg = <0x0800 0 0xd0040000 0 0x2000>;
+				#address-cells = <3>;
+				#size-cells = <2>;
+				marvell,pcie-port = <0>;
+				marvell,pcie-lane = <0>;
+				interrupts = <1>;
+				clocks = <&gateclk 5>;
+				status = "disabled";
+			};
+
+			pcie at 0,1 {
+				device_type = "pciex";
+				reg = <0x1000 0 0xd0044000 0 0x2000>;
+				#address-cells = <3>;
+				#size-cells = <2>;
+				marvell,pcie-port = <0>;
+				marvell,pcie-lane = <1>;
+				interrupts = <1>;
+				clocks = <&gateclk 6>;
+				status = "disabled";
+			};
+
+			pcie at 0,2 {
+				device_type = "pciex";
+				reg = <0x1800 0 0xd0048000 0 0x2000>;
+				#address-cells = <3>;
+				#size-cells = <2>;
+				marvell,pcie-port = <0>;
+				marvell,pcie-lane = <2>;
+				interrupts = <1>;
+				clocks = <&gateclk 7>;
+				status = "disabled";
+			};
+
+			pcie at 0,3 {
+				device_type = "pciex";
+				reg = <0x2000 0 0xd004C000 0 0xC000>;
+				#address-cells = <3>;
+				#size-cells = <2>;
+				marvell,pcie-port = <0>;
+				marvell,pcie-lane = <3>;
+				interrupts = <1>;
+				clocks = <&gateclk 8>;
+				status = "disabled";
+			};
+
+			pcie at 2,0 {
+				device_type = "pciex";
+				reg = <0x4800 0 0xd0042000 0 0x2000>;
+				#address-cells = <3>;
+				#size-cells = <2>;
+				marvell,pcie-port = <2>;
+				marvell,pcie-lane = <0>;
+				interrupts = <1>;
+				clocks = <&gateclk 26>;
+				status = "disabled";
+			};
+
+			pcie at 3,0 {
+				device_type = "pciex";
+				reg = <0x5000 0 0xd0082000 0 0x2000>;
+				#address-cells = <3>;
+				#size-cells = <2>;
+				marvell,pcie-port = <3>;
+				marvell,pcie-lane = <0>;
+				interrupts = <1>;
+				clocks = <&gateclk 27>;
+				status = "disabled";
+			};
+		};
 	};
 };
diff --git a/arch/arm/boot/dts/armada-xp-mv78460.dtsi b/arch/arm/boot/dts/armada-xp-mv78460.dtsi
index 9659661..00c69aa 100644
--- a/arch/arm/boot/dts/armada-xp-mv78460.dtsi
+++ b/arch/arm/boot/dts/armada-xp-mv78460.dtsi
@@ -105,5 +105,155 @@
 				clocks = <&gateclk 1>;
 				status = "disabled";
 		};
+
+		/*
+		 * MV78460 has 4 PCIe units Gen2.0: Two units can be
+		 * configured as x4 or quad x1 lanes. Two units are
+		 * x4/x1.
+		 */
+		pcie-controller {
+			compatible = "marvell,armada-370-xp-pcie";
+			status = "disabled";
+
+			#address-cells = <3>;
+			#size-cells = <2>;
+
+			bus-range = <0x00 0xff>;
+
+			ranges = <0x00000800 0 0xd0040000 0xd0040000 0 0x00002000   /* port 0.0 registers */
+			          0x00004800 0 0xd0042000 0xd0042000 0 0x00002000   /* port 2.0 registers */
+			          0x00001000 0 0xd0044000 0xd0044000 0 0x00002000   /* port 0.1 registers */
+			          0x00001800 0 0xd0048000 0xd0048000 0 0x00002000   /* port 0.2 registers */
+			          0x00002000 0 0xd004C000 0xd004C000 0 0x00002000   /* port 0.3 registers */
+				  0x00002800 0 0xd0080000 0xd0080000 0 0x00002000   /* port 1.0 registers */
+			          0x00005000 0 0xd0082000 0xd0082000 0 0x00002000   /* port 3.0 registers */
+				  0x00003000 0 0xd0084000 0xd0084000 0 0x00002000   /* port 1.1 registers */
+				  0x00003800 0 0xd0088000 0xd0088000 0 0x00002000   /* port 1.2 registers */
+				  0x00004000 0 0xd008C000 0xd008C000 0 0x00002000   /* port 1.3 registers */
+				  0x81000000 0 0	  0xc0000000 0 0x00100000   /* downstream I/O */
+				  0x82000000 0 0	  0xc1000000 0 0x08000000>; /* non-prefetchable memory */
+
+			#interrupt-cells = <1>;
+			interrupt-map-mask = <0xf800 0 0 1>;
+			interrupt-map = <0x0800 0 0 1 &mpic 58
+				         0x1000 0 0 1 &mpic 59
+					 0x1800 0 0 1 &mpic 60
+					 0x2000 0 0 1 &mpic 61
+					 0x2800 0 0 1 &mpic 62
+				         0x3000 0 0 1 &mpic 63
+					 0x3800 0 0 1 &mpic 64
+					 0x4000 0 0 1 &mpic 65
+					 0x4800 0 0 1 &mpic 99
+					 0x5000 0 0 1 &mpic 103>;
+
+			pcie at 0,0 {
+				device_type = "pciex";
+				reg = <0x0800 0 0xd0040000 0 0x2000>;
+				#address-cells = <3>;
+				#size-cells = <2>;
+				marvell,pcie-port = <0>;
+				marvell,pcie-lane = <0>;
+				clocks = <&gateclk 5>;
+				status = "disabled";
+			};
+
+			pcie at 0,1 {
+				device_type = "pciex";
+				reg = <0x1000 0 0xd0044000 0 0x2000>;
+				#address-cells = <3>;
+				#size-cells = <2>;
+				marvell,pcie-port = <0>;
+				marvell,pcie-lane = <1>;
+				clocks = <&gateclk 6>;
+				status = "disabled";
+			};
+
+			pcie at 0,2 {
+				device_type = "pciex";
+				reg = <0x1800 0 0xd0048000 0 0x2000>;
+				#address-cells = <3>;
+				#size-cells = <2>;
+				marvell,pcie-port = <0>;
+				marvell,pcie-lane = <2>;
+				clocks = <&gateclk 7>;
+				status = "disabled";
+			};
+
+			pcie at 0,3 {
+				device_type = "pciex";
+				reg = <0x2000 0 0xd004C000 0 0xC000>;
+				#address-cells = <3>;
+				#size-cells = <2>;
+				marvell,pcie-port = <0>;
+				marvell,pcie-lane = <3>;
+				clocks = <&gateclk 8>;
+				status = "disabled";
+			};
+
+			pcie at 1,0 {
+				device_type = "pciex";
+				reg = <0x2800 0 0xd0080000 0 0x2000>;
+				#address-cells = <3>;
+				#size-cells = <2>;
+				marvell,pcie-port = <1>;
+				marvell,pcie-lane = <0>;
+				clocks = <&gateclk 9>;
+				status = "disabled";
+			};
+
+			pcie at 1,1 {
+				device_type = "pciex";
+				reg = <0x3000 0 0xd0084000 0 0x2000>;
+				#address-cells = <3>;
+				#size-cells = <2>;
+				marvell,pcie-port = <1>;
+				marvell,pcie-lane = <1>;
+				clocks = <&gateclk 10>;
+				status = "disabled";
+			};
+
+			pcie at 1,2 {
+				device_type = "pciex";
+				reg = <0x3800 0 0xd0088000 0 0x2000>;
+				#address-cells = <3>;
+				#size-cells = <2>;
+				marvell,pcie-port = <1>;
+				marvell,pcie-lane = <2>;
+				clocks = <&gateclk 11>;
+				status = "disabled";
+			};
+
+			pcie at 1,3 {
+				device_type = "pciex";
+				reg = <0x4000 0 0xd008C000 0 0x2000>;
+				#address-cells = <3>;
+				#size-cells = <2>;
+				marvell,pcie-port = <1>;
+				marvell,pcie-lane = <3>;
+				clocks = <&gateclk 12>;
+				status = "disabled";
+			};
+			pcie at 2,0 {
+				device_type = "pciex";
+				reg = <0x4800 0 0xd0042000 0 0x2000>;
+				#address-cells = <3>;
+				#size-cells = <2>;
+				marvell,pcie-port = <2>;
+				marvell,pcie-lane = <0>;
+				clocks = <&gateclk 26>;
+				status = "disabled";
+			};
+
+			pcie at 3,0 {
+				device_type = "pciex";
+				reg = <0x5000 0 0xd0082000 0 0x2000>;
+				#address-cells = <3>;
+				#size-cells = <2>;
+				marvell,pcie-port = <3>;
+				marvell,pcie-lane = <0>;
+				clocks = <&gateclk 27>;
+				status = "disabled";
+			};
+		};
 	};
  };
-- 
1.7.9.5
^ permalink raw reply related	[flat|nested] 216+ messages in thread
- * [PATCH v2 22/27] arm: mvebu: add PCIe Device Tree informations for Armada XP
  2013-01-28 18:56 ` [PATCH v2 22/27] arm: mvebu: add PCIe Device Tree informations for Armada XP Thomas Petazzoni
@ 2013-02-06 22:41   ` Arnd Bergmann
  2013-02-06 23:07     ` Thomas Petazzoni
  2013-02-07  1:05     ` Jason Gunthorpe
  0 siblings, 2 replies; 216+ messages in thread
From: Arnd Bergmann @ 2013-02-06 22:41 UTC (permalink / raw)
  To: linux-arm-kernel
On Monday 28 January 2013, Thomas Petazzoni wrote:
> +
> +               /*
> +                * MV78230 has 2 PCIe units Gen2.0: One unit can be
> +                * configured as x4 or quad x1 lanes. One unit is
> +                * x4/x1.
> +                */
> +               pcie-controller {
> +                       compatible = "marvell,armada-370-xp-pcie";
> +                       status = "disabled";
> +
> +                       #address-cells = <3>;
> +                       #size-cells = <2>;
> +
> +
> +                       bus-range = <0x00 0xff>;
> +
> +                       ranges = <0x00000800 0 0xd0040000 0xd0040000 0 0x00002000   /* port 0.0 registers */
> +                                 0x00004800 0 0xd0042000 0xd0042000 0 0x00002000   /* port 2.0 registers */
> +                                 0x00001000 0 0xd0044000 0xd0044000 0 0x00002000   /* port 0.1 registers */
> +                                 0x00001800 0 0xd0048000 0xd0048000 0 0x00002000   /* port 0.2 registers */
> +                                 0x00002000 0 0xd004C000 0xd004C000 0 0x00002000   /* port 0.3 registers */
> +                                 0x81000000 0 0          0xc0000000 0 0x00010000   /* downstream I/O */
> +                                 0x82000000 0 0          0xc1000000 0 0x08000000>; /* non-prefetchable memory */
> +
I've been thinking some more about this, and I wonder if it would
make more sense to describe the address remapping correctly as
a node on top of the pcie-controller node.
This would mean that rather than putting the mapped physical address
(0xc0000000, 0xc1000000, ...) in here, you would actually have 64-bit
address as the destination as well, in whatever format the
address map hardware uses, I assume using a numbered 32 bit
address space for each object that can be remapped.
This would also let you do the PCI memory address assignment for
each port separately, starting at bus address 0, followed by
finding a location in the CPU address space and passing
the start as the sys->mem_offset argument to
pci_add_resource_offset.
> +                       pcie at 0,0 {
> +                               device_type = "pciex";
> +                               reg = <0x0800 0 0xd0040000 0 0x2000>;
> +                               #address-cells = <3>;
> +                               #size-cells = <2>;
> +                               marvell,pcie-port = <0>;
> +                               marvell,pcie-lane = <0>;
> +                               interrupts = <1>;
> +                               clocks = <&gateclk 5>;
> +                               status = "disabled";
> +                       };
I think you are missing a "ranges" property here, at least an empty
one, which is required by the standard but not currently enforced
in the code.
	Arnd
^ permalink raw reply	[flat|nested] 216+ messages in thread
- * [PATCH v2 22/27] arm: mvebu: add PCIe Device Tree informations for Armada XP
  2013-02-06 22:41   ` Arnd Bergmann
@ 2013-02-06 23:07     ` Thomas Petazzoni
  2013-02-07  8:04       ` Arnd Bergmann
  2013-02-07  1:05     ` Jason Gunthorpe
  1 sibling, 1 reply; 216+ messages in thread
From: Thomas Petazzoni @ 2013-02-06 23:07 UTC (permalink / raw)
  To: linux-arm-kernel
Dear Arnd Bergmann,
On Wed, 6 Feb 2013 22:41:14 +0000, Arnd Bergmann wrote:
> I've been thinking some more about this, and I wonder if it would
> make more sense to describe the address remapping correctly as
> a node on top of the pcie-controller node.
> 
> This would mean that rather than putting the mapped physical address
> (0xc0000000, 0xc1000000, ...) in here, you would actually have 64-bit
> address as the destination as well, in whatever format the
> address map hardware uses, I assume using a numbered 32 bit
> address space for each object that can be remapped.
> 
> This would also let you do the PCI memory address assignment for
> each port separately, starting at bus address 0, followed by
> finding a location in the CPU address space and passing
> the start as the sys->mem_offset argument to
> pci_add_resource_offset.
Hum, good you give a skeleton example, because I'm not sure to
understand your suggestion.
> 
> > +                       pcie at 0,0 {
> > +                               device_type = "pciex";
> > +                               reg = <0x0800 0 0xd0040000 0
> > 0x2000>;
> > +                               #address-cells = <3>;
> > +                               #size-cells = <2>;
> > +                               marvell,pcie-port = <0>;
> > +                               marvell,pcie-lane = <0>;
> > +                               interrupts = <1>;
> > +                               clocks = <&gateclk 5>;
> > +                               status = "disabled";
> > +                       };
> 
> I think you are missing a "ranges" property here, at least an empty
> one, which is required by the standard but not currently enforced
> in the code.
Is it really wise to have DT properties that are not used by anything,
and therefore have a very high chance of either being incorrect, or
becoming incorrect?
Best regards,
Thomas
-- 
Thomas Petazzoni, Free Electrons
Kernel, drivers, real-time and embedded Linux
development, consulting, training and support.
http://free-electrons.com
^ permalink raw reply	[flat|nested] 216+ messages in thread
- * [PATCH v2 22/27] arm: mvebu: add PCIe Device Tree informations for Armada XP
  2013-02-06 23:07     ` Thomas Petazzoni
@ 2013-02-07  8:04       ` Arnd Bergmann
  2013-02-07  8:45         ` Thomas Petazzoni
  0 siblings, 1 reply; 216+ messages in thread
From: Arnd Bergmann @ 2013-02-07  8:04 UTC (permalink / raw)
  To: linux-arm-kernel
On Wednesday 06 February 2013, Thomas Petazzoni wrote:
> Dear Arnd Bergmann,
> 
> On Wed, 6 Feb 2013 22:41:14 +0000, Arnd Bergmann wrote:
> 
> > I've been thinking some more about this, and I wonder if it would
> > make more sense to describe the address remapping correctly as
> > a node on top of the pcie-controller node.
> > 
> > This would mean that rather than putting the mapped physical address
> > (0xc0000000, 0xc1000000, ...) in here, you would actually have 64-bit
> > address as the destination as well, in whatever format the
> > address map hardware uses, I assume using a numbered 32 bit
> > address space for each object that can be remapped.
> > 
> > This would also let you do the PCI memory address assignment for
> > each port separately, starting at bus address 0, followed by
> > finding a location in the CPU address space and passing
> > the start as the sys->mem_offset argument to
> > pci_add_resource_offset.
> 
> Hum, good you give a skeleton example, because I'm not sure to
> understand your suggestion.
I mean (roughly, since I don't know how that hardware defines it)
/ {
	#address-cells = <1>;
	#size-cells = <1>;
	memory at 0 {
		/* this node can not get remapped */
		reg = <0x0 0x40000000>;
	};
	address-map {
		/* this device translates 64 bit MMIO bus addresses into 32 bit CPU addresses */
		compatible = "marvell,armada-addr-decoding-controller";
		reg = <0xd0020000 0x258>;
		#addres-cells = <2>;
		#address-cells = <1>;
		/* each remapped window has one entry here */
		ranges = <0xa 0 0xc0000000 0x10000>,     /* map window a to 0xc0000000 */
		         <0xb 0 0xc1000000 0x08000000>,  /* map window b to 0xc1000000 */
			 <...>; /* more windows */
		pciex {
			#addres-cells = <3>;
			#size-cells = <2>;
			ranges = <0x81000000 0 0 0xa 0 0 0x00010000,  /* I/O is window a */
				  0x82000000 0 0 0xb 0 0 0x08000000>; /* non-prefetchable memory */
			...
		};
		something-else {
			...
			reg = <0xc 0 0x10000>; /* window c */
		};
	};
};
> > 
> > > +                       pcie at 0,0 {
> > > +                               device_type = "pciex";
> > > +                               reg = <0x0800 0 0xd0040000 0
> > > 0x2000>;
> > > +                               #address-cells = <3>;
> > > +                               #size-cells = <2>;
> > > +                               marvell,pcie-port = <0>;
> > > +                               marvell,pcie-lane = <0>;
> > > +                               interrupts = <1>;
> > > +                               clocks = <&gateclk 5>;
> > > +                               status = "disabled";
> > > +                       };
> > 
> > I think you are missing a "ranges" property here, at least an empty
> > one, which is required by the standard but not currently enforced
> > in the code.
> 
> Is it really wise to have DT properties that are not used by anything,
> and therefore have a very high chance of either being incorrect, or
> becoming incorrect?
In this case, definitely. It's mandated by an IEEE standard and the only reason
why we let some mistakes slip here is that some ancient PowerMac systems
get it wrong in their firmware. There are lots of cases where the difference
between an empty "ranges" property and an absent one is very significant
and I would really prefer to change the kernel to be standard compliant
here.
	Arnd
^ permalink raw reply	[flat|nested] 216+ messages in thread
- * [PATCH v2 22/27] arm: mvebu: add PCIe Device Tree informations for Armada XP
  2013-02-07  8:04       ` Arnd Bergmann
@ 2013-02-07  8:45         ` Thomas Petazzoni
  2013-02-07  9:09           ` Arnd Bergmann
  0 siblings, 1 reply; 216+ messages in thread
From: Thomas Petazzoni @ 2013-02-07  8:45 UTC (permalink / raw)
  To: linux-arm-kernel
Dear Arnd Bergmann,
On Thu, 7 Feb 2013 08:04:12 +0000, Arnd Bergmann wrote:
> 	address-map {
> 		/* this device translates 64 bit MMIO bus addresses into 32 bit CPU addresses */
> 		compatible = "marvell,armada-addr-decoding-controller";
> 		reg = <0xd0020000 0x258>;
> 		#addres-cells = <2>;
> 		#address-cells = <1>;
> 
> 		/* each remapped window has one entry here */
> 		ranges = <0xa 0 0xc0000000 0x10000>,     /* map window a to 0xc0000000 */
> 		         <0xb 0 0xc1000000 0x08000000>,  /* map window b to 0xc1000000 */
> 			 <...>; /* more windows */
> 
> 		pciex {
> 			#addres-cells = <3>;
> 			#size-cells = <2>;
> 			ranges = <0x81000000 0 0 0xa 0 0 0x00010000,  /* I/O is window a */
> 				  0x82000000 0 0 0xb 0 0 0x08000000>; /* non-prefetchable memory */
> 			...
> 		};
> 
> 
> 		something-else {
> 			...
> 			reg = <0xc 0 0x10000>; /* window c */
> 		};
> 	};
> };
Unfortunately, this means the windows are statically defined in the DT,
which is simply not possible for PCIe, as we have already explained
several times in this thread.
Any solution where the PCIe windows are statically described is
simply /not/ acceptable.
We have 10 PCIe interfaces, each requiring up to two windows, and we
have on the system a *total* of 20 windows. Doing static assignments of
windows is simply not an option.
Of course, you'll tell me that it's up to each board .dts to have a
number of windows that matches the number of actually existing PCIe
interface. But it means that each and every developer adding the
support for a new board must understand this complex problem, which is
something we do not want. We have a solution that makes all of this
PCIe window assignment dynamic, so it surprises me that we have to
continue to explain why a static solution is not appropriate.
Best regards,
Thomas
-- 
Thomas Petazzoni, Free Electrons
Kernel, drivers, real-time and embedded Linux
development, consulting, training and support.
http://free-electrons.com
^ permalink raw reply	[flat|nested] 216+ messages in thread
- * [PATCH v2 22/27] arm: mvebu: add PCIe Device Tree informations for Armada XP
  2013-02-07  8:45         ` Thomas Petazzoni
@ 2013-02-07  9:09           ` Arnd Bergmann
  0 siblings, 0 replies; 216+ messages in thread
From: Arnd Bergmann @ 2013-02-07  9:09 UTC (permalink / raw)
  To: linux-arm-kernel
On Thursday 07 February 2013, Thomas Petazzoni wrote:
> Unfortunately, this means the windows are statically defined in the DT,
> which is simply not possible for PCIe, as we have already explained
> several times in this thread.
> 
> Any solution where the PCIe windows are statically described is
> simply not acceptable.
> 
> We have 10 PCIe interfaces, each requiring up to two windows, and we
> have on the system a total of 20 windows. Doing static assignments of
> windows is simply not an option.
> 
> Of course, you'll tell me that it's up to each board .dts to have a
> number of windows that matches the number of actually existing PCIe
> interface. But it means that each and every developer adding the
> support for a new board must understand this complex problem, which is
> something we do not want. We have a solution that makes all of this
> PCIe window assignment dynamic, so it surprises me that we have to
> continue to explain why a static solution is not appropriate.
No, the idea here was actually to leave out any of the dynamic mappings
from the device tree and do the PCI bus probe for each port based
on a local bus address starting at 0 for each port. After all
BARs are assigned, you can then map each port to a convenient physical
address and register it by passing the start offset so that the
pci resources are adapted correctly.
	Arnd
^ permalink raw reply	[flat|nested] 216+ messages in thread 
 
 
 
- * [PATCH v2 22/27] arm: mvebu: add PCIe Device Tree informations for Armada XP
  2013-02-06 22:41   ` Arnd Bergmann
  2013-02-06 23:07     ` Thomas Petazzoni
@ 2013-02-07  1:05     ` Jason Gunthorpe
  2013-02-07  7:28       ` Thierry Reding
  2013-02-07  8:24       ` Arnd Bergmann
  1 sibling, 2 replies; 216+ messages in thread
From: Jason Gunthorpe @ 2013-02-07  1:05 UTC (permalink / raw)
  To: linux-arm-kernel
On Wed, Feb 06, 2013 at 10:41:14PM +0000, Arnd Bergmann wrote:
> This would mean that rather than putting the mapped physical address
> (0xc0000000, 0xc1000000, ...) in here, you would actually have 64-bit
> address as the destination as well, in whatever format the
> address map hardware uses, I assume using a numbered 32 bit
> address space for each object that can be remapped.
Do you mean like:
mbus_matrix {
   /* At this level addresses are
       <32 bit MBUS target ID> <32 bit target address offset>
      target_id is an internal MBUS specification
     */
   #address-cells = <2>;
   #size-cells = <1>;
   pex0 {
       ranges = <
      // CPU window bridged to PCI MMIO
      0x82000000 0x00000000 0x00000000  0x<target_id> 0x0 0x0 0x8000000
      // CPU IO window bridged to PCI IO
      0x81000000 0x00000000 0x00000000  0x<target_id> 0x0  0x0 0xa0000
      ...>;
   }
   nand {
       reg = <0x<target_id> 0 0x200>;
   };
   internal_regs {
       ranges = <0 0x<target_id> 0 0x10000>;
       timer {
           reg = <0x20300 0x20>;
      };
   };
}
?
I think the big issue would go back to how to pool all the link
decoders together in a way that fits OF and Linux's PCI core will
understand?
How does of_translate_address work in a world like this?
> This would also let you do the PCI memory address assignment for
> each port separately, starting at bus address 0, followed by
> finding a location in the CPU address space and passing
> the start as the sys->mem_offset argument to
> pci_add_resource_offset.
That goes back to the original problem - the goal is to have only one
pci_sys_data, not one for every link.
The host driver would have to request a large region of physical
address space and still dole it out on a link by link basis. Not sure
how to model that in DT??
In any event, changing how all the dynamic windows are configured in
DT is a big job (there was another thread about this) it seems
orthogonal to the PCI host driver..
> > +                       pcie at 0,0 {
> > +                               device_type = "pciex";
> > +                               reg = <0x0800 0 0xd0040000 0 0x2000>;
> > +                               #address-cells = <3>;
> > +                               #size-cells = <2>;
> > +                               marvell,pcie-port = <0>;
> > +                               marvell,pcie-lane = <0>;
> > +                               interrupts = <1>;
> > +                               clocks = <&gateclk 5>;
> > +                               status = "disabled";
> > +                       };
> 
> I think you are missing a "ranges" property here, at least an empty
> one, which is required by the standard but not currently enforced
> in the code.
Maybe.. according to the standard the ranges in this stanza should
reflect the bridge configuration, but that isn't known when the DT is
written. An empty ranges means identity and that isn't really right
either.
Also, what should 'reg' be so that the PCI core binds the OF nodes
properly?  The standard says reg should have the configuration space
address of the bridge, and I noticed Thierry was using something that
almost looked like a config space address in his driver..
But that seems overly tricky.. When using the link stanzas, shouldn't
this scheme be more like this:
// The PCI-E host bridge
pex at 0 {
   device_type = "pciex"; // <<-- Important!!
   ranges = <
      // Driver internal control registers in MMIO space
      0x82000000 0x10000000 0xd0040000  0xd0040000  0x0 0x8000000
      // CPU 0xe0000000 -> e8000000 bridged to PCI MMIO
      0x82000000 0x00000000 0xe0000000  0xe0000000  0x0 0x8000000
      // CPU IO bus 0x0000000 -> 0xa0000 bridged to PCI IO
      0x81000000 0x00000000 0x00000000  0x00000000  0x0 0xa0000
      // CPU 0xe8000000 -> f0000000 bridged to PCI MMIO, prefetchable
      0xC2000000 0x00000000 0xe8000000  0xe8000000  0x0 0x8000000
      >;
   //...
   // PCI-PCI bridge to Physical link 0
   pcie at 0,0 {
     device_type = "pciex";
     // The configuration space address of the PCI-PCI bridge required by OF
     reg = <0x80 0 0  0 0>; // Bus 0, Dev 0x10, Fn 0
     /* The 'bar' on the PCI-PCI bridge, maps to internal control
        registers, required by the driver. */
     assigned-addresses = <0x82000000 0x10000000 0xd0040000  0 0x2000>;
     // ..
  }
}
Thierry, what do you think?
I struggled with this particular area of the OF system recently and
Grant Likely was very helpful - the above is based on that
discussion..
======
Thomas, here is one possible alternate idea, not sure on the merit,
just including it because it is close to what I've got in my DT right
now..:
// The entire multi-lane controller and PCI root bus
pex at 0 {
   device_type = "pciex";
   /* This section configures the Linux PCI host driver. Each line is a
      physical PCI-Link. (Erorrs included :) */
   compatible = "marvell,armada-370-xp-pcie";
   regs = <0xd0040000 0x00002000   // port 0.0
           0xd0042000 0x00002000   // port 2.0
           0xd0044000 0x00002000   // port 0.1
           0xd0048000 0x00002000   // port 0.2
           0xd004C000 0x00002000>; // port 0.3
   io-cpu-window = <0xc0000000 0xa0000>;
   interrupts = <58 59 60 61 99>;
   clocks = <&gateclk 5
             &gateclk 6
             &gateclk 7
             &gateclk 8
             &gateclk 26>
   marvell,port-lane <0 0
                      0 1
                      0 2
                      0 3
                      2 0>;
   /* Below here configures the aggregate PCI bus the PEX0 stanza
      describes */
   #address-cells = <3>;
   #size-cells = <2>;
   // CPU resources allocated to this PCI host bridge
   bus-range = <0x00 0xff>;
   ranges = <
      // CPU 0xe0000000 -> e8000000 bridged to PCI MMIO
      0x82000000 0x00000000 0xe0000000  0xe0000000  0x0 0x8000000
      // CPU IO bus 0x0000000 -> 0xa0000 bridged to PCI IO
      0x81000000 0x00000000 0x00000000  0x00000000  0x0 0xa0000
      // CPU 0xe8000000 -> f0000000 bridged to PCI MMIO, prefetchable
      0xC2000000 0x00000000 0xe8000000  0xe8000000  0x0 0x8000000
      >;
   /* Any PCI devices associated with this bus go here, relative to
      the above ranges. As example: */
   link at 0 {
   	device_type = "pciex";
        // Root port at bus 0, device 0x10, function 0
	reg = <0x00000080 0 0  0 0>;
       ep {
   	 device_type = "pciex";
         // End port on bus 1, device 0, function 0
	 reg = <0x00000100 0 0  0 0>;
       } 
   }
}
Thoughts:
 - 'regs' in the main stanza, keep to CPU addresses instead of
   confusing translated fake ranges address
 - each regs line is matches to an interrupt, clock and port-lane
   line to describe a link. The above describes 5 links.
 - The CPU physical address window to use for the IO space
   is set via io-cpu-window, not much choice here, the PCI
   format ranges must be 0 based.
 - It is not necessary to have per-root port stanzas at all
   with the above.
 - There is only one PCI bus stanza, the top level.
Jason
^ permalink raw reply	[flat|nested] 216+ messages in thread
- * [PATCH v2 22/27] arm: mvebu: add PCIe Device Tree informations for Armada XP
  2013-02-07  1:05     ` Jason Gunthorpe
@ 2013-02-07  7:28       ` Thierry Reding
  2013-02-07 17:49         ` Jason Gunthorpe
  2013-02-07  8:24       ` Arnd Bergmann
  1 sibling, 1 reply; 216+ messages in thread
From: Thierry Reding @ 2013-02-07  7:28 UTC (permalink / raw)
  To: linux-arm-kernel
On Wed, Feb 06, 2013 at 06:05:02PM -0700, Jason Gunthorpe wrote:
[...]
> Also, what should 'reg' be so that the PCI core binds the OF nodes
> properly?  The standard says reg should have the configuration space
> address of the bridge, and I noticed Thierry was using something that
> almost looked like a config space address in his driver..
> 
> But that seems overly tricky.. When using the link stanzas, shouldn't
> this scheme be more like this:
> 
> // The PCI-E host bridge
> pex at 0 {
>    device_type = "pciex"; // <<-- Important!!
That might actually work but is somewhat dangerous. I originally tried
to trick the OF code into parsing the ranges properly by setting this to
"pci". However that breaks horribly because of_bus_pci_match() in
drivers/of/address.c will cause the parent bus of the pex@0 controller
to be PCI, which will cause #address-cells == 3 and #size-cells == 2,
and thus messing up the address translation because you actually have
#address-cells == 1 and #size-cells == 1.
The code doesn't seem to match on "pciex", so you might get away with
it, but I don't see why exactly it would be necessary. For one it is
actually wrong as the device isn't a PCI device but a platform device.
What I did to solve the ranges parsing problem is to use of_find_bus()
instead of of_match_bus() in of_pci_process_ranges(), so that the cell
count is correct.
>    ranges = <
>       // Driver internal control registers in MMIO space
>       0x82000000 0x10000000 0xd0040000  0xd0040000  0x0 0x8000000
I'm not sure I understand what you're doing here. Where does the
0x10000000 in cell 2 come from?
I also just noticed that I used 0x00000800 in the first cell, maybe that
should be 0x02000800, though I think that didn't quite work for some
reason. I'll need to check that. The odd part about this is that the
address is in fact not within PCI MMIO space at all, so I'm not sure
this is even a correct way to represent it. However I find it quite
intuitive to do so and it is really the only way to make the translation
from the PCI-PCI bridge's reg property work while at the same time
having a proper PCI address for the port.
Note that 0x82000800 would probably not be correct as it'd indicate that
the address is relocatable, which it really isn't.
>       // CPU 0xe0000000 -> e8000000 bridged to PCI MMIO
>       0x82000000 0x00000000 0xe0000000  0xe0000000  0x0 0x8000000
>       // CPU IO bus 0x0000000 -> 0xa0000 bridged to PCI IO
>       0x81000000 0x00000000 0x00000000  0x00000000  0x0 0xa0000
>       // CPU 0xe8000000 -> f0000000 bridged to PCI MMIO, prefetchable
>       0xC2000000 0x00000000 0xe8000000  0xe8000000  0x0 0x8000000
>       >;
These look good.
>    // PCI-PCI bridge to Physical link 0
>    pcie at 0,0 {
>      device_type = "pciex";
>      // The configuration space address of the PCI-PCI bridge required by OF
>      reg = <0x80 0 0  0 0>; // Bus 0, Dev 0x10, Fn 0
I think the first cell should be 0x800.
> 
>      /* The 'bar' on the PCI-PCI bridge, maps to internal control
>         registers, required by the driver. */
>      assigned-addresses = <0x82000000 0x10000000 0xd0040000  0 0x2000>;
>      // ..
>   }
The PCI DT binding says that each entry in the assigned-addresses
property is to correspond to one of the PCI device's base address
registers. So unless this is actually the value that ends up being
written to one of the BARs I don't think this is correct.
Thierry
-------------- next part --------------
A non-text attachment was scrubbed...
Name: not available
Type: application/pgp-signature
Size: 836 bytes
Desc: not available
URL: <http://lists.infradead.org/pipermail/linux-arm-kernel/attachments/20130207/5e9430fb/attachment.sig>
^ permalink raw reply	[flat|nested] 216+ messages in thread
- * [PATCH v2 22/27] arm: mvebu: add PCIe Device Tree informations for Armada XP
  2013-02-07  7:28       ` Thierry Reding
@ 2013-02-07 17:49         ` Jason Gunthorpe
  0 siblings, 0 replies; 216+ messages in thread
From: Jason Gunthorpe @ 2013-02-07 17:49 UTC (permalink / raw)
  To: linux-arm-kernel
On Thu, Feb 07, 2013 at 08:28:24AM +0100, Thierry Reding wrote:
> On Wed, Feb 06, 2013 at 06:05:02PM -0700, Jason Gunthorpe wrote:
> [...]
> > Also, what should 'reg' be so that the PCI core binds the OF nodes
> > properly?  The standard says reg should have the configuration space
> > address of the bridge, and I noticed Thierry was using something that
> > almost looked like a config space address in his driver..
> > 
> > But that seems overly tricky.. When using the link stanzas, shouldn't
> > this scheme be more like this:
> > 
> > // The PCI-E host bridge
> > pex at 0 {
> >    device_type = "pciex"; // <<-- Important!!
> 
> That might actually work but is somewhat dangerous. I originally tried
> to trick the OF code into parsing the ranges properly by setting this to
> "pci". However that breaks horribly because of_bus_pci_match() in
> drivers/of/address.c will cause the parent bus of the pex at 0 controller
> to be PCI, which will cause #address-cells == 3 and #size-cells == 2,
> and thus messing up the address translation because you actually have
> #address-cells == 1 and #size-cells == 1.
Oh right, yes, I ment it to be "pci", specifically to engage that
code. Ditto for the other instance.
Not sure about the side effect you are talking about, I am using that
pattern in my DT and it is OK, but the PEX node is off the root node
directly.. It shouldn't affect the parent. Other DT examples using
device_type = pci (ie in PPC) also put it on the top PCI node and have
that node buried, so it should be correct, but all the stuff below has
to be correct first..
 
> >    ranges = <
> >       // Driver internal control registers in MMIO space
> >       0x82000000 0x10000000 0xd0040000  0xd0040000  0x0 0x8000000
> 
> I'm not sure I understand what you're doing here. Where does the
> 0x10000000 in cell 2 come from?
It is just a marker to keep the internal regs distinct from the PCI
MMIO window. I don't really like it, but something is necessary to
pass the non-PCI items into the link stanza. It would probably be
better to just place them outside the link as I did in my other
example..
> I also just noticed that I used 0x00000800 in the first cell, maybe that
> should be 0x02000800, though I think that didn't quite work for some
> reason. I'll need to check that. The odd part about this is that the
I think that is the trickyness you have, 0x00000800 is probably the
correct config space address number for your root port bridge. You are
overriding it to be both a config space address and something that
translates back to a CPU MMIO address.
If you use 0x02000800 then it is no longer a config space address, it is
an MMIO address and your 'reg' won't match anymore.
> address is in fact not within PCI MMIO space at all, so I'm not sure
> this is even a correct way to represent it. However I find it quite
> intuitive to do so and it is really the only way to make the translation
> from the PCI-PCI bridge's reg property work while at the same time
> having a proper PCI address for the port.
Your problem is that in PCI devices 'reg' is not supposed to be
translated to a CPU address. It is the config space address of the PCI
device followed by the *sizes* of all the BARs.
assigned-addresses is the MMIO location (and size) of the 'BARs', and
is intended to be translated to a CPU address.
When the bus is properly in PCI mode the OF address code automatically
uses assigned-addresses instead of regs, that is why you need
device_type = pci
> >    // PCI-PCI bridge to Physical link 0
> >    pcie at 0,0 {
> >      device_type = "pciex";
> >      // The configuration space address of the PCI-PCI bridge required by OF
> >      reg = <0x80 0 0  0 0>; // Bus 0, Dev 0x10, Fn 0
> 
> I think the first cell should be 0x800.
Right.
> >      /* The 'bar' on the PCI-PCI bridge, maps to internal control
> >         registers, required by the driver. */
> >      assigned-addresses = <0x82000000 0x10000000 0xd0040000  0 0x2000>;
> >      // ..
> >   }
> 
> The PCI DT binding says that each entry in the assigned-addresses
> property is to correspond to one of the PCI device's base address
> registers. So unless this is actually the value that ends up being
> written to one of the BARs I don't think this is correct.
Yes, but, it is a compromise, of sorts. DT has no way to pass a
non-PCI described resource into this stanza. It would ideal if the
driver didn't require that at all, but if it does I think it has to be
through assigned-addresses...
Now, perhaps this is better:
assigned-addresses = <0 0 0  0 0 // BAR 0 of the bridge, unused
                      0 0 0  0 0 // BAR 1 of the bridge, unused
                      0x82000000 0x10000000 0xd0040000  0 0x2000>; // Extended
Mark the only two BARs in the bridge as unused and put your non-BAR
registers after them.
Certainly, trying to use reg to convey that the link has an internal
MMIO region at CPU address 0xd0040000 would be even worse..
Jason
^ permalink raw reply	[flat|nested] 216+ messages in thread
 
- * [PATCH v2 22/27] arm: mvebu: add PCIe Device Tree informations for Armada XP
  2013-02-07  1:05     ` Jason Gunthorpe
  2013-02-07  7:28       ` Thierry Reding
@ 2013-02-07  8:24       ` Arnd Bergmann
  2013-02-07 17:00         ` Jason Gunthorpe
  1 sibling, 1 reply; 216+ messages in thread
From: Arnd Bergmann @ 2013-02-07  8:24 UTC (permalink / raw)
  To: linux-arm-kernel
On Thursday 07 February 2013, Jason Gunthorpe wrote:
> On Wed, Feb 06, 2013 at 10:41:14PM +0000, Arnd Bergmann wrote:
> 
> > This would mean that rather than putting the mapped physical address
> > (0xc0000000, 0xc1000000, ...) in here, you would actually have 64-bit
> > address as the destination as well, in whatever format the
> > address map hardware uses, I assume using a numbered 32 bit
> > address space for each object that can be remapped.
> 
> Do you mean like:
> 
> mbus_matrix {
>    /* At this level addresses are
>        <32 bit MBUS target ID> <32 bit target address offset>
>       target_id is an internal MBUS specification
>      */
>    #address-cells = <2>;
>    #size-cells = <1>;
> 
>    pex0 {
>        ranges = <
>       // CPU window bridged to PCI MMIO
>       0x82000000 0x00000000 0x00000000  0x<target_id> 0x0 0x0 0x8000000
>       // CPU IO window bridged to PCI IO
>       0x81000000 0x00000000 0x00000000  0x<target_id> 0x0  0x0 0xa0000
>       ...>;
>    }
> 
>    nand {
>        reg = <0x<target_id> 0 0x200>;
>    };
> 
>    internal_regs {
>        ranges = <0 0x<target_id> 0 0x10000>;
> 
>        timer {
>            reg = <0x20300 0x20>;
>       };
>    };
> }
> 
> ?
Yes, but the mbus-matrix node in the example would need a ranges property
to map the addresses according to how the windows are set up.
> I think the big issue would go back to how to pool all the link
> decoders together in a way that fits OF and Linux's PCI core will
> understand?
> 
> How does of_translate_address work in a world like this?
The trick is that the root node must still be #address-cells=<1> and
refer to the CPU's translated view of the world, rather the raw
view with individul windows. When we set up the address map in
Linux, we would initially read the windows from the ranges property,
but if we make changes to the windows, that property has to be
adapted on the fly. of_translate_address will then for any
device go through the intermediate 64 bit address but end up with
the correct physical address at the root node.
> > This would also let you do the PCI memory address assignment for
> > each port separately, starting at bus address 0, followed by
> > finding a location in the CPU address space and passing
> > the start as the sys->mem_offset argument to
> > pci_add_resource_offset.
> 
> That goes back to the original problem - the goal is to have only one
> pci_sys_data, not one for every link.
> 
> The host driver would have to request a large region of physical
> address space and still dole it out on a link by link basis. Not sure
> how to model that in DT??
> 
> In any event, changing how all the dynamic windows are configured in
> DT is a big job (there was another thread about this) it seems
> orthogonal to the PCI host driver..
Yes, it is orthogonal, you are right, but I think it would make it
easier to understand what we are trying to do here with the PCI
node. 
I'm not sure why you say "the goal is to have only one pci_sys_data",
as far as I'm concerned, the goal is to have a working system that
is both as sensible and as simple as possible. When in hardware
you have a bunch of independent PCIe host bridges, each with their
own address translation, that means to me that (if possible) we 
should in the device tree show multiple independent PCIe host bridges
and how they are set up with address translation, and Linux should
see multiple independent host bridges as well.
Having a single pci_sys_data is a hack to defeat some of the problems
associated with getting bus probing to work in practice, but it's
not necessarily the best solution.
> > > +                       pcie at 0,0 {
> > > +                               device_type = "pciex";
> > > +                               reg = <0x0800 0 0xd0040000 0 0x2000>;
> > > +                               #address-cells = <3>;
> > > +                               #size-cells = <2>;
> > > +                               marvell,pcie-port = <0>;
> > > +                               marvell,pcie-lane = <0>;
> > > +                               interrupts = <1>;
> > > +                               clocks = <&gateclk 5>;
> > > +                               status = "disabled";
> > > +                       };
> > 
> > I think you are missing a "ranges" property here, at least an empty
> > one, which is required by the standard but not currently enforced
> > in the code.
> 
> Maybe.. according to the standard the ranges in this stanza should
> reflect the bridge configuration, but that isn't known when the DT is
> written. An empty ranges means identity and that isn't really right
> either.
Ok, I thought it was an identity mapping here.
> Also, what should 'reg' be so that the PCI core binds the OF nodes
> properly?  The standard says reg should have the configuration space
> address of the bridge, and I noticed Thierry was using something that
> almost looked like a config space address in his driver..
Well, that assumes that a bridge is addressed using configuration space,
which IIRC is normally the case but not here.
> But that seems overly tricky.. When using the link stanzas, shouldn't
> this scheme be more like this:
> 
> // The PCI-E host bridge
> pex at 0 {
>    device_type = "pciex"; // <<-- Important!!
> 
>    ranges = <
>       // Driver internal control registers in MMIO space
>       0x82000000 0x10000000 0xd0040000  0xd0040000  0x0 0x8000000
> 
>       // CPU 0xe0000000 -> e8000000 bridged to PCI MMIO
>       0x82000000 0x00000000 0xe0000000  0xe0000000  0x0 0x8000000
>       // CPU IO bus 0x0000000 -> 0xa0000 bridged to PCI IO
>       0x81000000 0x00000000 0x00000000  0x00000000  0x0 0xa0000
>       // CPU 0xe8000000 -> f0000000 bridged to PCI MMIO, prefetchable
>       0xC2000000 0x00000000 0xe8000000  0xe8000000  0x0 0x8000000
>       >;
> 
>    //...
> 
>    // PCI-PCI bridge to Physical link 0
>    pcie at 0,0 {
>      device_type = "pciex";
>      // The configuration space address of the PCI-PCI bridge required by OF
>      reg = <0x80 0 0  0 0>; // Bus 0, Dev 0x10, Fn 0
> 
>      /* The 'bar' on the PCI-PCI bridge, maps to internal control
>         registers, required by the driver. */
>      assigned-addresses = <0x82000000 0x10000000 0xd0040000  0 0x2000>;
>      // ..
>   }
> }
> 
> Thierry, what do you think?
> 
> I struggled with this particular area of the OF system recently and
> Grant Likely was very helpful - the above is based on that
> discussion..
I never really understood the 'assigned-addresses' property, but it looks
sensible.
> ======
> 
> Thomas, here is one possible alternate idea, not sure on the merit,
> just including it because it is close to what I've got in my DT right
> now..:
> 
> // The entire multi-lane controller and PCI root bus
> pex at 0 {
>    device_type = "pciex";
> 
>    /* This section configures the Linux PCI host driver. Each line is a
>       physical PCI-Link. (Erorrs included :) */
>    compatible = "marvell,armada-370-xp-pcie";
>    regs = <0xd0040000 0x00002000   // port 0.0
>            0xd0042000 0x00002000   // port 2.0
>            0xd0044000 0x00002000   // port 0.1
>            0xd0048000 0x00002000   // port 0.2
>            0xd004C000 0x00002000>; // port 0.3
>    io-cpu-window = <0xc0000000 0xa0000>;
>    interrupts = <58 59 60 61 99>;
>    clocks = <&gateclk 5
>              &gateclk 6
>              &gateclk 7
>              &gateclk 8
>              &gateclk 26>
>    marvell,port-lane <0 0
>                       0 1
>                       0 2
>                       0 3
>                       2 0>;
> 
>    /* Below here configures the aggregate PCI bus the PEX0 stanza
>       describes */
> 
>    #address-cells = <3>;
>    #size-cells = <2>;
> 
>    // CPU resources allocated to this PCI host bridge
>    bus-range = <0x00 0xff>;
>    ranges = <
>       // CPU 0xe0000000 -> e8000000 bridged to PCI MMIO
>       0x82000000 0x00000000 0xe0000000  0xe0000000  0x0 0x8000000
>       // CPU IO bus 0x0000000 -> 0xa0000 bridged to PCI IO
>       0x81000000 0x00000000 0x00000000  0x00000000  0x0 0xa0000
>       // CPU 0xe8000000 -> f0000000 bridged to PCI MMIO, prefetchable
>       0xC2000000 0x00000000 0xe8000000  0xe8000000  0x0 0x8000000
>       >;
> 
>    /* Any PCI devices associated with this bus go here, relative to
>       the above ranges. As example: */
>    link at 0 {
>    	device_type = "pciex";
>         // Root port at bus 0, device 0x10, function 0
> 	reg = <0x00000080 0 0  0 0>;
> 
>        ep {
>    	 device_type = "pciex";
>          // End port on bus 1, device 0, function 0
> 	 reg = <0x00000100 0 0  0 0>;
>        } 
>    }
> }
> 
> Thoughts:
>  - 'regs' in the main stanza, keep to CPU addresses instead of
>    confusing translated fake ranges address
Yes, I like that. If we follow the address translation method I suggested,
this would be a 64-bit address of course, but still easier to understand
than what we have now.
>  - each regs line is matches to an interrupt, clock and port-lane
>    line to describe a link. The above describes 5 links.
nice.
>  - The CPU physical address window to use for the IO space
>    is set via io-cpu-window, not much choice here, the PCI
>    format ranges must be 0 based.
I don't think I understand this part. Why can't you put this into
ranges as before?
-       0x81000000 0x00000000 0x00000000  0x00000000  0x0 0xa0000
+	0x81000000 0x00000000 0x00000000  0xc0000000  0x0 0xa0000
>  - It is not necessary to have per-root port stanzas at all
>    with the above.
>  - There is only one PCI bus stanza, the top level.
Ok.
	Arnd
^ permalink raw reply	[flat|nested] 216+ messages in thread
- * [PATCH v2 22/27] arm: mvebu: add PCIe Device Tree informations for Armada XP
  2013-02-07  8:24       ` Arnd Bergmann
@ 2013-02-07 17:00         ` Jason Gunthorpe
  2013-02-07 23:44           ` Arnd Bergmann
  0 siblings, 1 reply; 216+ messages in thread
From: Jason Gunthorpe @ 2013-02-07 17:00 UTC (permalink / raw)
  To: linux-arm-kernel
On Thu, Feb 07, 2013 at 08:24:50AM +0000, Arnd Bergmann wrote:
> Yes, but the mbus-matrix node in the example would need a ranges property
> to map the addresses according to how the windows are set up.
I'll hang on to this for next time the moving windows config to DT
discussion comes up..
> > Maybe.. according to the standard the ranges in this stanza should
> > reflect the bridge configuration, but that isn't known when the DT is
> > written. An empty ranges means identity and that isn't really right
> > either.
> 
> Ok, I thought it was an identity mapping here.
> 
> > Also, what should 'reg' be so that the PCI core binds the OF nodes
> > properly?  The standard says reg should have the configuration space
> > address of the bridge, and I noticed Thierry was using something that
> > almost looked like a config space address in his driver..
> 
> Well, that assumes that a bridge is addressed using configuration space,
> which IIRC is normally the case but not here.
With Thomas's driver each link has a PCI-PCI bridge in config space, and
'configuration space address' is that wonky format OF defines for
encoding the bus/device/function number into the 3 dword address. So
the correct thing is to put the bus/device/function of the PCI-PCI
bridge for the link in the reg value.
> I never really understood the 'assigned-addresses' property, but it looks
> sensible.
assigned-addresses does the same thing as reg in simple bus, but
handles all the wonky PCI address translation
 
> >  - The CPU physical address window to use for the IO space
> >    is set via io-cpu-window, not much choice here, the PCI
> >    format ranges must be 0 based.
> 
> I don't think I understand this part. Why can't you put this into
> ranges as before?
> 
> -       0x81000000 0x00000000 0x00000000  0x00000000  0x0 0xa0000
> +	0x81000000 0x00000000 0x00000000  0xc0000000  0x0 0xa0000
The OF PCI core translates 0x81000000 IO space addresess into a 'struct
resource' tagged with IORESOURCE_IO.
But 0xc0000000 is not an IORESOURCE_IO address, it is an
IORESOURCE_MEM address..
So, I think with the current OF code this has to be 0, otherwise your
IORESOURCE_IO's end up starting at 0xc000000 - but maybe there is some
trickyness to work with in here? (Although none of this matters when
Linux does resource assignment, the OF translation code is never
enganged)
But I agree, 0xc0000000 seems much better...
To think about it from a different angle, what would you put in the
4th dword on x86? How do you describe the IO numberspace in DT on x86?
Jason
^ permalink raw reply	[flat|nested] 216+ messages in thread 
- * [PATCH v2 22/27] arm: mvebu: add PCIe Device Tree informations for Armada XP
  2013-02-07 17:00         ` Jason Gunthorpe
@ 2013-02-07 23:44           ` Arnd Bergmann
  0 siblings, 0 replies; 216+ messages in thread
From: Arnd Bergmann @ 2013-02-07 23:44 UTC (permalink / raw)
  To: linux-arm-kernel
On Thursday 07 February 2013, Jason Gunthorpe wrote:
> On Thu, Feb 07, 2013 at 08:24:50AM +0000, Arnd Bergmann wrote:
> > >  - The CPU physical address window to use for the IO space
> > >    is set via io-cpu-window, not much choice here, the PCI
> > >    format ranges must be 0 based.
> > 
> > I don't think I understand this part. Why can't you put this into
> > ranges as before?
> > 
> > -       0x81000000 0x00000000 0x00000000  0x00000000  0x0 0xa0000
> > +	0x81000000 0x00000000 0x00000000  0xc0000000  0x0 0xa0000
> 
> The OF PCI core translates 0x81000000 IO space addresess into a 'struct
> resource' tagged with IORESOURCE_IO.
> 
> But 0xc0000000 is not an IORESOURCE_IO address, it is an
> IORESOURCE_MEM address..
> 
> So, I think with the current OF code this has to be 0, otherwise your
> IORESOURCE_IO's end up starting at 0xc000000 - but maybe there is some
> trickyness to work with in here? (Although none of this matters when
> Linux does resource assignment, the OF translation code is never
> enganged)
Yes, I think this is for historic reasons: the PCI binding far
predates the Linux implementation, and I'm sure on MacOS, AIX
and Solaris the PCI drivers did not actually have the same kind
of wrappers for PIO functions that we have on Linux because of
the x86 legacy.
> But I agree, 0xc0000000 seems much better...
> 
> To think about it from a different angle, what would you put in the
> 4th dword on x86? How do you describe the IO numberspace in DT on x86?
I think the best option is to have no translation at all on x86,
leaving 0x81000000 out of the ranges property. I'm not sure if
the authors of the binding actually considered that case though.
	Arnd
^ permalink raw reply	[flat|nested] 216+ messages in thread 
 
 
 
 
 
- * [PATCH v2 23/27] arm: mvebu: PCIe Device Tree informations for OpenBlocks AX3-4
  2013-01-28 18:56 [PATCH v2] PCIe support for the Armada 370 and Armada XP SoCs Thomas Petazzoni
                   ` (21 preceding siblings ...)
  2013-01-28 18:56 ` [PATCH v2 22/27] arm: mvebu: add PCIe Device Tree informations for Armada XP Thomas Petazzoni
@ 2013-01-28 18:56 ` Thomas Petazzoni
  2013-01-28 18:56 ` [PATCH v2 24/27] arm: mvebu: PCIe Device Tree informations for Armada XP DB Thomas Petazzoni
                   ` (3 subsequent siblings)
  26 siblings, 0 replies; 216+ messages in thread
From: Thomas Petazzoni @ 2013-01-28 18:56 UTC (permalink / raw)
  To: linux-arm-kernel
The PlatHome OpenBlocks AX3-4 has an internal mini-PCIe slot that can
be used to plug mini-PCIe devices. We therefore enable the PCIe
interface that corresponds to this slot.
Signed-off-by: Thomas Petazzoni <thomas.petazzoni@free-electrons.com>
---
 arch/arm/boot/dts/armada-xp-openblocks-ax3-4.dts |    7 +++++++
 1 file changed, 7 insertions(+)
diff --git a/arch/arm/boot/dts/armada-xp-openblocks-ax3-4.dts b/arch/arm/boot/dts/armada-xp-openblocks-ax3-4.dts
index b42652f..67fcaaa 100644
--- a/arch/arm/boot/dts/armada-xp-openblocks-ax3-4.dts
+++ b/arch/arm/boot/dts/armada-xp-openblocks-ax3-4.dts
@@ -121,5 +121,12 @@
 			nr-ports = <2>;
 			status = "okay";
 		};
+		pcie-controller {
+			status = "okay";
+			/* Internal mini-PCIe connector */
+			pcie at 0,0 {
+				status = "okay";
+			};
+		};
 	};
 };
-- 
1.7.9.5
^ permalink raw reply related	[flat|nested] 216+ messages in thread
- * [PATCH v2 24/27] arm: mvebu: PCIe Device Tree informations for Armada XP DB
  2013-01-28 18:56 [PATCH v2] PCIe support for the Armada 370 and Armada XP SoCs Thomas Petazzoni
                   ` (22 preceding siblings ...)
  2013-01-28 18:56 ` [PATCH v2 23/27] arm: mvebu: PCIe Device Tree informations for OpenBlocks AX3-4 Thomas Petazzoni
@ 2013-01-28 18:56 ` Thomas Petazzoni
  2013-01-28 18:56 ` [PATCH v2 25/27] arm: mvebu: PCIe Device Tree informations for Armada 370 Mirabox Thomas Petazzoni
                   ` (2 subsequent siblings)
  26 siblings, 0 replies; 216+ messages in thread
From: Thomas Petazzoni @ 2013-01-28 18:56 UTC (permalink / raw)
  To: linux-arm-kernel
The Marvell evaluation board (DB) for the Armada XP SoC has 6
physicals full-size PCIe slots, so we enable the corresponding PCIe
interfaces in the Device Tree.
Signed-off-by: Thomas Petazzoni <thomas.petazzoni@free-electrons.com>
---
 arch/arm/boot/dts/armada-xp-db.dts |   27 +++++++++++++++++++++++++++
 1 file changed, 27 insertions(+)
diff --git a/arch/arm/boot/dts/armada-xp-db.dts b/arch/arm/boot/dts/armada-xp-db.dts
index 8e53b25..7dcc36c 100644
--- a/arch/arm/boot/dts/armada-xp-db.dts
+++ b/arch/arm/boot/dts/armada-xp-db.dts
@@ -90,5 +90,32 @@
 			phy = <&phy3>;
 			phy-mode = "sgmii";
 		};
+
+		pcie-controller {
+			status = "okay";
+
+			/*
+			 * All 6 slots are physically present as
+			 * standard PCIe slots on the board.
+			 */
+			pcie at 0,0 {
+				status = "okay";
+			};
+			pcie at 0,1 {
+				status = "okay";
+			};
+			pcie at 0,2 {
+				status = "okay";
+			};
+			pcie at 0,3 {
+				status = "okay";
+			};
+			pcie at 2,0 {
+				status = "okay";
+			};
+			pcie at 3,0 {
+				status = "okay";
+			};
+		};
 	};
 };
-- 
1.7.9.5
^ permalink raw reply related	[flat|nested] 216+ messages in thread
- * [PATCH v2 25/27] arm: mvebu: PCIe Device Tree informations for Armada 370 Mirabox
  2013-01-28 18:56 [PATCH v2] PCIe support for the Armada 370 and Armada XP SoCs Thomas Petazzoni
                   ` (23 preceding siblings ...)
  2013-01-28 18:56 ` [PATCH v2 24/27] arm: mvebu: PCIe Device Tree informations for Armada XP DB Thomas Petazzoni
@ 2013-01-28 18:56 ` Thomas Petazzoni
  2013-01-28 18:56 ` [PATCH v2 26/27] arm: mvebu: PCIe Device Tree informations for Armada 370 DB Thomas Petazzoni
  2013-01-28 18:56 ` [PATCH v2 27/27] arm: mvebu: update defconfig with PCI and USB support Thomas Petazzoni
  26 siblings, 0 replies; 216+ messages in thread
From: Thomas Petazzoni @ 2013-01-28 18:56 UTC (permalink / raw)
  To: linux-arm-kernel
The Globalscale Mirabox platform uses one PCIe interface for an
available mini-PCIe slot, and the other PCIe interface for an internal
USB 3.0 controller. We add the necessary Device Tree informations to
enable those two interfaces.
Signed-off-by: Thomas Petazzoni <thomas.petazzoni@free-electrons.com>
---
 arch/arm/boot/dts/armada-370-mirabox.dts |   14 ++++++++++++++
 1 file changed, 14 insertions(+)
diff --git a/arch/arm/boot/dts/armada-370-mirabox.dts b/arch/arm/boot/dts/armada-370-mirabox.dts
index 3b40713..591068a 100644
--- a/arch/arm/boot/dts/armada-370-mirabox.dts
+++ b/arch/arm/boot/dts/armada-370-mirabox.dts
@@ -52,5 +52,19 @@
 			phy = <&phy1>;
 			phy-mode = "rgmii-id";
 		};
+
+		pcie-controller {
+			status = "okay";
+
+			/* Internal mini-PCIe connector */
+			pcie at 0,0 {
+				status = "okay";
+			};
+
+			/* Connected on the PCB to a USB 3.0 XHCI controller */
+			pcie at 1,0 {
+				status = "okay";
+			};
+		};
 	};
 };
-- 
1.7.9.5
^ permalink raw reply related	[flat|nested] 216+ messages in thread
- * [PATCH v2 26/27] arm: mvebu: PCIe Device Tree informations for Armada 370 DB
  2013-01-28 18:56 [PATCH v2] PCIe support for the Armada 370 and Armada XP SoCs Thomas Petazzoni
                   ` (24 preceding siblings ...)
  2013-01-28 18:56 ` [PATCH v2 25/27] arm: mvebu: PCIe Device Tree informations for Armada 370 Mirabox Thomas Petazzoni
@ 2013-01-28 18:56 ` Thomas Petazzoni
  2013-01-28 18:56 ` [PATCH v2 27/27] arm: mvebu: update defconfig with PCI and USB support Thomas Petazzoni
  26 siblings, 0 replies; 216+ messages in thread
From: Thomas Petazzoni @ 2013-01-28 18:56 UTC (permalink / raw)
  To: linux-arm-kernel
The Marvell evaluation board (DB) for the Armada 370 SoC has 2
physical full-size PCIe slots, so we enable the corresponding PCIe
interfaces in the Device Tree.
Signed-off-by: Thomas Petazzoni <thomas.petazzoni@free-electrons.com>
---
 arch/arm/boot/dts/armada-370-db.dts |   15 +++++++++++++++
 1 file changed, 15 insertions(+)
diff --git a/arch/arm/boot/dts/armada-370-db.dts b/arch/arm/boot/dts/armada-370-db.dts
index 9b82fac..fba3e8e 100644
--- a/arch/arm/boot/dts/armada-370-db.dts
+++ b/arch/arm/boot/dts/armada-370-db.dts
@@ -59,5 +59,20 @@
 			phy = <&phy1>;
 			phy-mode = "rgmii-id";
 		};
+
+		pcie-controller {
+			status = "okay";
+			/*
+			 * The two PCIe units are accessible through
+			 * both standard PCIe slots and mini-PCIe
+			 * slots on the board.
+			 */
+			pcie at 0,0 {
+				status = "okay";
+			};
+			pcie at 1,0 {
+				status = "okay";
+			};
+		};
 	};
 };
-- 
1.7.9.5
^ permalink raw reply related	[flat|nested] 216+ messages in thread
- * [PATCH v2 27/27] arm: mvebu: update defconfig with PCI and USB support
  2013-01-28 18:56 [PATCH v2] PCIe support for the Armada 370 and Armada XP SoCs Thomas Petazzoni
                   ` (25 preceding siblings ...)
  2013-01-28 18:56 ` [PATCH v2 26/27] arm: mvebu: PCIe Device Tree informations for Armada 370 DB Thomas Petazzoni
@ 2013-01-28 18:56 ` Thomas Petazzoni
  26 siblings, 0 replies; 216+ messages in thread
From: Thomas Petazzoni @ 2013-01-28 18:56 UTC (permalink / raw)
  To: linux-arm-kernel
Now that we have the necessary drivers and Device Tree informations to
support PCIe on Armada 370 and Armada XP, enable the CONFIG_PCI
option.
Also, since the Armada 370 Mirabox has a built-in USB XHCI controller
connected on the PCIe bus, enable the corresponding options as well.
Signed-off-by: Thomas Petazzoni <thomas.petazzoni@free-electrons.com>
---
 arch/arm/configs/mvebu_defconfig |    4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)
diff --git a/arch/arm/configs/mvebu_defconfig b/arch/arm/configs/mvebu_defconfig
index b5bc96c..68ef50b 100644
--- a/arch/arm/configs/mvebu_defconfig
+++ b/arch/arm/configs/mvebu_defconfig
@@ -13,6 +13,7 @@ CONFIG_MACH_ARMADA_370=y
 CONFIG_MACH_ARMADA_XP=y
 # CONFIG_CACHE_L2X0 is not set
 # CONFIG_SWP_EMULATE is not set
+CONFIG_PCI=y
 CONFIG_SMP=y
 # CONFIG_LOCAL_TIMERS is not set
 CONFIG_AEABI=y
@@ -36,7 +37,8 @@ CONFIG_SERIAL_8250_CONSOLE=y
 CONFIG_SERIAL_8250_DW=y
 CONFIG_GPIOLIB=y
 CONFIG_GPIO_SYSFS=y
-# CONFIG_USB_SUPPORT is not set
+CONFIG_USB=y
+CONFIG_USB_XHCI_HCD=y
 CONFIG_RTC_CLASS=y
 CONFIG_RTC_DRV_S35390A=y
 CONFIG_DMADEVICES=y
-- 
1.7.9.5
^ permalink raw reply related	[flat|nested] 216+ messages in thread