LinuxPPC-Dev Archive on lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 2/3] powerpc/e500: add paravirt QEMU platform
From: Scott Wood @ 2012-06-27 23:50 UTC (permalink / raw)
  To: galak; +Cc: linuxppc-dev, agraf, Jia Hongtao
In-Reply-To: <20120627234851.GA9071@tyr.buserror.net>

This gives the kernel a paravirtualized machine to target, without
requiring both sides to pretend to be targeting a specific board
that likely has little to do with the host in KVM scenarios.  This
avoids the need to add new boards to QEMU just to be able to
run KVM on new CPUs.

As this is the first platform that can run with either e500v2 or
e500mc, CONFIG_PPC_E500MC is now a legitimately user configurable
option, so add a help text.

Signed-off-by: Scott Wood <scottwood@freescale.com>
---
 arch/powerpc/platforms/85xx/Kconfig     |   16 +++++++
 arch/powerpc/platforms/85xx/Makefile    |    1 +
 arch/powerpc/platforms/85xx/qemu_e500.c |   66 +++++++++++++++++++++++++++++++
 arch/powerpc/platforms/Kconfig.cputype  |    4 ++
 4 files changed, 87 insertions(+), 0 deletions(-)
 create mode 100644 arch/powerpc/platforms/85xx/qemu_e500.c

diff --git a/arch/powerpc/platforms/85xx/Kconfig b/arch/powerpc/platforms/85xx/Kconfig
index f000d81..7bbebe5 100644
--- a/arch/powerpc/platforms/85xx/Kconfig
+++ b/arch/powerpc/platforms/85xx/Kconfig
@@ -263,6 +263,22 @@ config P5020_DS
 	help
 	  This option enables support for the P5020 DS board
 
+config PPC_QEMU_E500
+	bool "QEMU generic e500 platform"
+	depends on EXPERIMENTAL
+	select DEFAULT_UIMAGE
+	help
+	  This option enables support for running as a QEMU guest using
+	  QEMU's generic e500 machine.  This is not required if you're
+	  using a QEMU machine that targets a specific board, such as
+	  mpc8544ds.
+
+	  Unlike most e500 boards that target a specific CPU, this
+	  platform works with any e500-family CPU that QEMU supports.
+	  Thus, you'll need to make sure CONFIG_PPC_E500MC is set or
+	  unset based on the emulated CPU (or actual host CPU in the case
+	  of KVM).
+
 endif # FSL_SOC_BOOKE
 
 config TQM85xx
diff --git a/arch/powerpc/platforms/85xx/Makefile b/arch/powerpc/platforms/85xx/Makefile
index 2125d4c..f841ac8 100644
--- a/arch/powerpc/platforms/85xx/Makefile
+++ b/arch/powerpc/platforms/85xx/Makefile
@@ -28,3 +28,4 @@ obj-$(CONFIG_SOCRATES)    += socrates.o socrates_fpga_pic.o
 obj-$(CONFIG_KSI8560)	  += ksi8560.o
 obj-$(CONFIG_XES_MPC85xx) += xes_mpc85xx.o
 obj-$(CONFIG_GE_IMP3A)	  += ge_imp3a.o
+obj-$(CONFIG_PPC_QEMU_E500) += qemu_e500.o
diff --git a/arch/powerpc/platforms/85xx/qemu_e500.c b/arch/powerpc/platforms/85xx/qemu_e500.c
new file mode 100644
index 0000000..77c8d5d
--- /dev/null
+++ b/arch/powerpc/platforms/85xx/qemu_e500.c
@@ -0,0 +1,66 @@
+/*
+ * Paravirt target for a generic QEMU e500 machine
+ *
+ * Copyright 2012 Freescale Semiconductor Inc.
+ *
+ * This program is free software; you can redistribute  it and/or modify it
+ * under  the terms of  the GNU General  Public License as published by the
+ * Free Software Foundation;  either version 2 of the  License, or (at your
+ * option) any later version.
+ */
+
+#include <linux/kernel.h>
+#include <linux/of_fdt.h>
+#include <asm/machdep.h>
+#include <asm/time.h>
+#include <asm/udbg.h>
+#include <asm/mpic.h>
+#include <sysdev/fsl_soc.h>
+#include <sysdev/fsl_pci.h>
+#include "smp.h"
+#include "mpc85xx.h"
+
+void __init qemu_e500_pic_init(void)
+{
+	struct mpic *mpic;
+
+	mpic = mpic_alloc(NULL, 0, MPIC_BIG_ENDIAN | MPIC_SINGLE_DEST_CPU,
+			0, 256, " OpenPIC  ");
+
+	BUG_ON(mpic == NULL);
+	mpic_init(mpic);
+}
+
+static void __init qemu_e500_setup_arch(void)
+{
+	ppc_md.progress("qemu_e500_setup_arch()", 0);
+
+	fsl_pci_init();
+	mpc85xx_smp_init();
+}
+
+/*
+ * Called very early, device-tree isn't unflattened
+ */
+static int __init qemu_e500_probe(void)
+{
+	unsigned long root = of_get_flat_dt_root();
+
+	return !!of_flat_dt_is_compatible(root, "fsl,qemu-e500");
+}
+
+machine_device_initcall(qemu_e500, mpc85xx_common_publish_devices);
+
+define_machine(qemu_e500) {
+	.name			= "QEMU e500",
+	.probe			= qemu_e500_probe,
+	.setup_arch		= qemu_e500_setup_arch,
+	.init_IRQ		= qemu_e500_pic_init,
+#ifdef CONFIG_PCI
+	.pcibios_fixup_bus	= fsl_pcibios_fixup_bus,
+#endif
+	.get_irq		= mpic_get_irq,
+	.restart		= fsl_rstcr_restart,
+	.calibrate_decr		= generic_calibrate_decr,
+	.progress		= udbg_progress,
+};
diff --git a/arch/powerpc/platforms/Kconfig.cputype b/arch/powerpc/platforms/Kconfig.cputype
index 61c9550..30fd01d 100644
--- a/arch/powerpc/platforms/Kconfig.cputype
+++ b/arch/powerpc/platforms/Kconfig.cputype
@@ -159,6 +159,10 @@ config PPC_E500MC
 	bool "e500mc Support"
 	select PPC_FPU
 	depends on E500
+	help
+	  This must be enabled for running on e500mc (and derivatives
+	  such as e5500/e6500), and must be disabled for running on
+	  e500v1 or e500v2.
 
 config PPC_FPU
 	bool
-- 
1.7.5.4

^ permalink raw reply related

* [PATCH 1/3] powerpc/fsl-pci: provide common PCI init
From: Scott Wood @ 2012-06-27 23:50 UTC (permalink / raw)
  To: galak; +Cc: linuxppc-dev, agraf, Jia Hongtao
In-Reply-To: <20120627234851.GA9071@tyr.buserror.net>

As an alternative incremental starting point to Jia Hongtao's patchset,
get the FSL PCI init out of the board files, but do not yet convert to a
platform driver.

Rather than having each board supply a magic register offset for
determining the "primary" bus, we look for which PCI host bridge
contains an ISA node within its subtree.  If there is no ISA node,
normally that would mean there is no primary bus, but until certain
bugs are fixed we arbitrarily designate a primary in this case.

Conversion to a platform driver and related improvements can happen
after this, as the ordering issues are sorted out.

Signed-off-by: Scott Wood <scottwood@freescale.com>
---
 arch/powerpc/sysdev/fsl_pci.c |   66 +++++++++++++++++++++++++++++++++++++++++
 arch/powerpc/sysdev/fsl_pci.h |    8 +++++
 2 files changed, 74 insertions(+), 0 deletions(-)

diff --git a/arch/powerpc/sysdev/fsl_pci.c b/arch/powerpc/sysdev/fsl_pci.c
index 6073288..f61e407 100644
--- a/arch/powerpc/sysdev/fsl_pci.c
+++ b/arch/powerpc/sysdev/fsl_pci.c
@@ -807,3 +807,69 @@ u64 fsl_pci_immrbar_base(struct pci_controller *hose)
 
 	return 0;
 }
+
+#if defined(CONFIG_FSL_SOC_BOOKE) || defined(CONFIG_PPC_86xx)
+static const struct of_device_id pci_ids[] = {
+	{ .compatible = "fsl,mpc8540-pci", },
+	{ .compatible = "fsl,mpc8548-pcie", },
+	{ .compatible = "fsl,mpc8610-pci", },
+	{ .compatible = "fsl,mpc8641-pcie", },
+	{ .compatible = "fsl,p1022-pcie", },
+	{ .compatible = "fsl,p1010-pcie", },
+	{ .compatible = "fsl,p1023-pcie", },
+	{ .compatible = "fsl,p4080-pcie", },
+	{ .compatible = "fsl,qoriq-pcie-v2.3", },
+	{ .compatible = "fsl,qoriq-pcie-v2.2", },
+	{},
+};
+
+struct device_node *fsl_pci_primary;
+
+void __devinit fsl_pci_init(void)
+{
+	struct device_node *node;
+	struct pci_controller *hose;
+	dma_addr_t max = 0xffffffff;
+
+	/* If a PCI host bridge has an ISA node under it, it's primary. */
+	node = of_find_node_by_type(NULL, "isa");
+	while ((fsl_pci_primary = of_get_parent(node))) {
+		of_node_put(node);
+		node = fsl_pci_primary;
+
+		if (of_match_node(pci_ids, node))
+			break;
+	}
+
+	node = NULL;
+	for_each_node_by_type(node, "pci") {
+		if (of_match_node(pci_ids, node)) {
+			/*
+			 * If there's no PCI host bridge with ISA, arbitrarily
+			 * designate one as primary.  This can go away once
+			 * various bugs with primary-less systems are fixed.
+			 */
+			if (!fsl_pci_primary)
+				fsl_pci_primary = node;
+
+			fsl_add_bridge(node, fsl_pci_primary == node);
+			hose = pci_find_hose_for_OF_device(node);
+			max = min(max, hose->dma_window_base_cur +
+					hose->dma_window_size);
+		}
+	}
+
+#ifdef CONFIG_SWIOTLB
+	/*
+	 * if we couldn't map all of DRAM via the dma windows
+	 * we need SWIOTLB to handle buffers located outside of
+	 * dma capable memory region
+	 */
+	if (memblock_end_of_DRAM() - 1 > max) {
+		ppc_swiotlb_enable = 1;
+		set_pci_dma_ops(&swiotlb_dma_ops);
+		ppc_md.pci_dma_dev_setup = pci_dma_dev_setup_swiotlb;
+	}
+#endif
+}
+#endif
diff --git a/arch/powerpc/sysdev/fsl_pci.h b/arch/powerpc/sysdev/fsl_pci.h
index a39ed5c..baa0fd1 100644
--- a/arch/powerpc/sysdev/fsl_pci.h
+++ b/arch/powerpc/sysdev/fsl_pci.h
@@ -93,5 +93,13 @@ extern void fsl_pcibios_fixup_bus(struct pci_bus *bus);
 extern int mpc83xx_add_bridge(struct device_node *dev);
 u64 fsl_pci_immrbar_base(struct pci_controller *hose);
 
+extern struct device_node *fsl_pci_primary;
+
+#ifdef CONFIG_FSL_PCI
+void fsl_pci_init(void);
+#else
+static inline void fsl_pci_init(void) {}
+#endif
+
 #endif /* __POWERPC_FSL_PCI_H */
 #endif /* __KERNEL__ */
-- 
1.7.5.4

^ permalink raw reply related

* [PATCH 0/3] powerpc/fsl: PCI refactoring and QEMU paravirt platform
From: Scott Wood @ 2012-06-27 23:48 UTC (permalink / raw)
  To: galak; +Cc: linuxppc-dev, agraf, Jia Hongtao

The QEMU stuff is related to the PCI refactoring because currently
we have a hard time selecting a primary bus under QEMU, and also because
the generic qemu e500 platform wants a full list of FSL PCI compatibles
to check.

Scott Wood (3):
  powerpc/fsl-pci: get PCI init out of board files
  powerpc/e500: add paravirt QEMU platform
  powerpc/mpc85xx_ds: convert to unified PCI init

 arch/powerpc/platforms/85xx/Kconfig      |   16 +++++
 arch/powerpc/platforms/85xx/Makefile     |    1 +
 arch/powerpc/platforms/85xx/mpc85xx_ds.c |   97 +++++++++---------------------
 arch/powerpc/platforms/85xx/qemu_e500.c  |   66 ++++++++++++++++++++
 arch/powerpc/platforms/Kconfig.cputype   |    4 +
 arch/powerpc/sysdev/fsl_pci.c            |   66 ++++++++++++++++++++
 arch/powerpc/sysdev/fsl_pci.h            |    8 +++
 7 files changed, 190 insertions(+), 68 deletions(-)
 create mode 100644 arch/powerpc/platforms/85xx/qemu_e500.c

-- 
1.7.5.4

^ permalink raw reply

* Re: [PATCH 03/21] ppc/eeh: more logs for EEH initialization
From: Michael Ellerman @ 2012-06-27 23:45 UTC (permalink / raw)
  To: Gavin Shan; +Cc: linuxppc-dev
In-Reply-To: <1340812911-6793-4-git-send-email-shangw@linux.vnet.ibm.com>

On Thu, 2012-06-28 at 00:01 +0800, Gavin Shan wrote:
> The patch adds more logs to EEH initialization functions for
> debugging purpose. Also, the machine type ("pSeries") is checked
> in the platform initialization to assure it's the correct platform
> to invoke it.

Hi Gavin,

Our boot logs are full enough. pr_info() is not right for this sort of
stuff.

For debug use:
      * pr_debug() - which can be enabled dynamically.
      * pr_devel() - which needs to be built with #define DEBUG
      * printk(KERN_DEBUG) - for things you always want printed, but
        needn't go to the console by default.

> diff --git a/arch/powerpc/platforms/pseries/eeh_dev.c b/arch/powerpc/platforms/pseries/eeh_dev.c
> index 8e3443b..a0cee3a 100644
> --- a/arch/powerpc/platforms/pseries/eeh_dev.c
> +++ b/arch/powerpc/platforms/pseries/eeh_dev.c
> @@ -100,6 +100,8 @@ static int __init eeh_dev_phb_init(void)
>  	list_for_each_entry_safe(phb, tmp, &hose_list, list_node)
>  		eeh_dev_phb_init_dynamic(phb);
>  
> +	pr_info("EEH: devices created\n");

That's not actually very informative.

> diff --git a/arch/powerpc/platforms/pseries/eeh_pseries.c b/arch/powerpc/platforms/pseries/eeh_pseries.c
> index bcf0bb8..bb2bd90 100644
> --- a/arch/powerpc/platforms/pseries/eeh_pseries.c
> +++ b/arch/powerpc/platforms/pseries/eeh_pseries.c
> @@ -561,7 +561,18 @@ static struct eeh_ops pseries_eeh_ops = {
>   */
>  static int __init eeh_pseries_init(void)
>  {
> -	return eeh_ops_register(&pseries_eeh_ops);
> +	int ret = -EINVAL;
> +
> +	if (!machine_is(pseries))
> +		return ret;
> +
> +	ret = eeh_ops_register(&pseries_eeh_ops);
> +	if (!ret)
> +		pr_info("EEH: pSeries platform initialized\n");
> +	else
> +		pr_info("EEH: pSeries platform initialization failure\n");
> +
> +	return ret;
>  }
>  
>  early_initcall(eeh_pseries_init);

You can achieve the same with initcall_debug.

But if you want to keep it at least print the return code, that's the
first thing you will want to know if it fails.

cheers

^ permalink raw reply

* [PATCH] powerpc: check_and_cede_processor never cedes
From: Anton Blanchard @ 2012-06-27 23:13 UTC (permalink / raw)
  To: Benjamin Herrenschmidt; +Cc: linuxppc-dev, paulus
In-Reply-To: <1340837479.3732.90.camel@pasglop>


Hi,

> I'd rather add a helper, something like lazy_irq_pending()
> and hide the actual check for the bits in irq_happened, in
> case we change the scheme again.

Good idea. Look ok?

--

Commit f948501b36c6 ("Make hard_irq_disable() actually hard-disable
interrupts") caused check_and_cede_processor to stop working.
->irq_happened will never be zero right after a hard_irq_disable
so the compiler removes the call to cede_processor completely.

The bug was introduced back in the lazy interrupt handling rework
of 3.4 but was hidden until recently because hard_irq_disable did
nothing.

This issue will eventually appear in 3.4 stable since the
hard_irq_disable fix is marked stable, so mark this one for stable
too.

Signed-off-by: Anton Blanchard <anton@samba.org>  
Cc: stable@vger.kernel.org
---

v2: create a helper, suggested by Ben.

Index: linux-build/arch/powerpc/platforms/pseries/processor_idle.c
===================================================================
--- linux-build.orig/arch/powerpc/platforms/pseries/processor_idle.c	2012-06-28 08:55:09.422198154 +1000
+++ linux-build/arch/powerpc/platforms/pseries/processor_idle.c	2012-06-28 08:57:36.112591023 +1000
@@ -106,7 +106,7 @@ static void check_and_cede_processor(voi
 	 * we first hard disable then check.
 	 */
 	hard_irq_disable();
-	if (get_paca()->irq_happened == 0)
+	if (!lazy_irq_pending())
 		cede_processor();
 }
 
Index: linux-build/arch/powerpc/include/asm/hw_irq.h
===================================================================
--- linux-build.orig/arch/powerpc/include/asm/hw_irq.h	2012-06-21 09:16:26.265354429 +1000
+++ linux-build/arch/powerpc/include/asm/hw_irq.h	2012-06-28 08:59:22.082320381 +1000
@@ -103,6 +103,11 @@ static inline void hard_irq_disable(void
 /* include/linux/interrupt.h needs hard_irq_disable to be a macro */
 #define hard_irq_disable	hard_irq_disable
 
+static inline bool lazy_irq_pending(void)
+{
+	return !!(get_paca()->irq_happened & ~PACA_IRQ_HARD_DIS);
+}
+
 /*
  * This is called by asynchronous interrupts to conditionally
  * re-enable hard interrupts when soft-disabled after having

^ permalink raw reply

* Re: [PATCH] powerpc: check_and_cede_processor never cedes
From: Benjamin Herrenschmidt @ 2012-06-27 22:51 UTC (permalink / raw)
  To: Anton Blanchard; +Cc: linuxppc-dev, paulus
In-Reply-To: <20120628084509.4caec81d@kryten>

On Thu, 2012-06-28 at 08:45 +1000, Anton Blanchard wrote:
>  	hard_irq_disable();
> -	if (get_paca()->irq_happened == 0)
> +	if (get_paca()->irq_happened == PACA_IRQ_HARD_DIS)
>  		cede_processor();

I'd rather add a helper, something like lazy_irq_pending()
and hide the actual check for the bits in irq_happened, in
case we change the scheme again.

Something like:

static inline bool lazy_irq_pending(void)
{
	return !!(get_paca()->irq_happened & ~PACA_IRQ_HARD_DIS);
}

Cheers,
Ben.

^ permalink raw reply

* [PATCH] powerpc: check_and_cede_processor never cedes
From: Anton Blanchard @ 2012-06-27 22:45 UTC (permalink / raw)
  To: benh, paulus; +Cc: linuxppc-dev


Commit f948501b36c6 ("Make hard_irq_disable() actually hard-disable
interrupts") caused check_and_cede_processor to stop working.
->irq_happened will never be zero right after a hard_irq_disable
so the compiler removes the call to cede_processor completely.

The bug was introduced back in the lazy interrupt handling rework
of 3.4 but was hidden until recently because hard_irq_disable did
nothing.

This issue will eventually appear in 3.4 stable since the
hard_irq_disable fix is marked stable, so mark this one for stable
too.

Signed-off-by: Anton Blanchard <anton@samba.org>  
Cc: stable@vger.kernel.org
---

Index: linux-build/arch/powerpc/platforms/pseries/processor_idle.c
===================================================================
--- linux-build.orig/arch/powerpc/platforms/pseries/processor_idle.c	2012-06-27 21:20:45.403761715 +1000
+++ linux-build/arch/powerpc/platforms/pseries/processor_idle.c	2012-06-27 21:57:14.796788823 +1000
@@ -106,7 +106,7 @@ static void check_and_cede_processor(voi
 	 * we first hard disable then check.
 	 */
 	hard_irq_disable();
-	if (get_paca()->irq_happened == 0)
+	if (get_paca()->irq_happened == PACA_IRQ_HARD_DIS)
 		cede_processor();
 }
 

^ permalink raw reply

* Re: [PATCH V4 2/2] PCI: minimal alignment for bars of P2P bridges
From: Benjamin Herrenschmidt @ 2012-06-27 21:57 UTC (permalink / raw)
  To: Bjorn Helgaas; +Cc: linux-pci, yinghai, Gavin Shan, linuxppc-dev
In-Reply-To: <CAErSpo64g0ZMJdW+YR-dH=dbcR0rApEf8sPzz7YWqb3v8O2V3Q@mail.gmail.com>

On Wed, 2012-06-27 at 12:48 -0600, Bjorn Helgaas wrote:
> On Wed, Jun 27, 2012 at 8:48 AM, Gavin Shan <shangw@linux.vnet.ibm.com> wrote:
> > On some powerpc platforms, device BARs need to be assigned to separate
> > "segments" of the address space in order for the error isolation and HW
> > virtualization mechanisms (EEH) to work properly. Those "segments" have
> > a minimum size that can be fairly large (16M). In order to be able to
> > use the generic resource assignment code rather than re-inventing our
> > own, we chose to group devices by bus. That way, a simple change of the
> > minimum alignment requirements of resources assigned to PCI to PCI (P2P)
> > bridges is enough to ensure that all BARs for devices below those bridges
> > will fit into contiguous sets of segments and there will be no overlap.
> 
> Is this something that is currently broken on powerpc?  I don't see
> any corresponding powerpc change, like a removal of whatever the
> previous way of doing this was.

Subsequent patch. The goal is to get rid of the bulk of the custom
resource allocation code in arch/powerpc/platforms/powernv/pci-ioda.c
where we basically re-implement it all (well I did) but without handling
all the special & corner cases that the generic code does.

The root of the problem is the need to segment the MMIO and IO spaces
based on a somewhat fixed (HW driven) segment size and have devices
isolated in their own groups of segments.

This is related to our "EEH" advanced error handling scheme which among
other things can properly connect errors triggered by MMIO (target
aborts, UE responses, etc...) to the actual device or driver.

> I'm not sure this is generic enough to warrant putting it in the core
> code (though I don't know whether we have any pcibios_*() hooks that
> would allow us to do it in the arch).

We don't have such hooks. This is the less invasive approach as far as I
can tell.

Cheers,
Ben.

> > This patch provides a way for the host bridge to override the default
> > alignment values used by the resource allocation code for that purpose.
> >
> > Signed-off-by: Gavin Shan <shangw@linux.vnet.ibm.com>
> > Reviewed-by: Ram Pai <linuxram@us.ibm.com>
> > Reviewed-by: Richard Yang <weiyang@linux.vnet.ibm.com>
> > ---
> >  drivers/pci/probe.c     |    5 +++++
> >  drivers/pci/setup-bus.c |   28 +++++++++++++++++++++-------
> >  include/linux/pci.h     |    8 ++++++++
> >  3 files changed, 34 insertions(+), 7 deletions(-)
> >
> > diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c
> > index 658ac97..a196529 100644
> > --- a/drivers/pci/probe.c
> > +++ b/drivers/pci/probe.c
> > @@ -431,6 +431,11 @@ static struct pci_host_bridge *pci_alloc_host_bridge(struct pci_bus *b)
> >        if (bridge) {
> >                INIT_LIST_HEAD(&bridge->windows);
> >                bridge->bus = b;
> > +
> > +               /* Set minimal alignment shift of P2P bridges */
> > +               bridge->io_align_shift = PCI_DEFAULT_IO_ALIGN_SHIFT;
> > +               bridge->mem_align_shift = PCI_DEFAULT_MEM_ALIGN_SHIFT;
> > +               bridge->pmem_align_shift = PCI_DEFAULT_PMEM_ALIGN_SHIFT;
> >        }
> >
> >        return bridge;
> > diff --git a/drivers/pci/setup-bus.c b/drivers/pci/setup-bus.c
> > index 8fa2d4b..caebe98 100644
> > --- a/drivers/pci/setup-bus.c
> > +++ b/drivers/pci/setup-bus.c
> > @@ -706,10 +706,12 @@ static resource_size_t calculate_memsize(resource_size_t size,
> >  static void pbus_size_io(struct pci_bus *bus, resource_size_t min_size,
> >                resource_size_t add_size, struct list_head *realloc_head)
> >  {
> > +       struct pci_host_bridge *phb;
> >        struct pci_dev *dev;
> >        struct resource *b_res = find_free_bus_resource(bus, IORESOURCE_IO);
> >        unsigned long size = 0, size0 = 0, size1 = 0;
> >        resource_size_t children_add_size = 0;
> > +       resource_size_t io_align;
> >
> >        if (!b_res)
> >                return;
> > @@ -735,13 +737,17 @@ static void pbus_size_io(struct pci_bus *bus, resource_size_t min_size,
> >                                children_add_size += get_res_add_size(realloc_head, r);
> >                }
> >        }
> > +
> > +       phb = find_pci_host_bridge(bus);
> 
> I guess this explains why you want find_pci_host_bridge() to take a
> pci_bus, not a pci_dev..
> 
> > +       io_align = (1 << phb->io_align_shift);
> > +
> >        size0 = calculate_iosize(size, min_size, size1,
> > -                       resource_size(b_res), 4096);
> > +                       resource_size(b_res), io_align);
> >        if (children_add_size > add_size)
> >                add_size = children_add_size;
> >        size1 = (!realloc_head || (realloc_head && !add_size)) ? size0 :
> >                calculate_iosize(size, min_size, add_size + size1,
> > -                       resource_size(b_res), 4096);
> > +                       resource_size(b_res), io_align);
> >        if (!size0 && !size1) {
> >                if (b_res->start || b_res->end)
> >                        dev_info(&bus->self->dev, "disabling bridge window "
> > @@ -751,11 +757,11 @@ static void pbus_size_io(struct pci_bus *bus, resource_size_t min_size,
> >                return;
> >        }
> >        /* Alignment of the IO window is always 4K */
> > -       b_res->start = 4096;
> > +       b_res->start = io_align;
> 
> This looks like something that will collide with the changes in the
> pipe to support I/O windows smaller than 4K.
> 
> >        b_res->end = b_res->start + size0 - 1;
> >        b_res->flags |= IORESOURCE_STARTALIGN;
> >        if (size1 > size0 && realloc_head) {
> > -               add_to_list(realloc_head, bus->self, b_res, size1-size0, 4096);
> > +               add_to_list(realloc_head, bus->self, b_res, size1-size0, io_align);
> >                dev_printk(KERN_DEBUG, &bus->self->dev, "bridge window "
> >                                 "%pR to [bus %02x-%02x] add_size %lx\n", b_res,
> >                                 bus->secondary, bus->subordinate, size1-size0);
> > @@ -778,6 +784,7 @@ static int pbus_size_mem(struct pci_bus *bus, unsigned long mask,
> >                        resource_size_t add_size,
> >                        struct list_head *realloc_head)
> >  {
> > +       struct pci_host_bridge *phb;
> >        struct pci_dev *dev;
> >        resource_size_t min_align, align, size, size0, size1;
> >        resource_size_t aligns[12];     /* Alignments from 1Mb to 2Gb */
> > @@ -785,10 +792,17 @@ static int pbus_size_mem(struct pci_bus *bus, unsigned long mask,
> >        struct resource *b_res = find_free_bus_resource(bus, type);
> >        unsigned int mem64_mask = 0;
> >        resource_size_t children_add_size = 0;
> > +       int mem_align_shift;
> >
> >        if (!b_res)
> >                return 0;
> >
> > +       phb = find_pci_host_bridge(bus);
> > +       if (type & IORESOURCE_PREFETCH)
> > +               mem_align_shift = phb->pmem_align_shift;
> > +       else
> > +               mem_align_shift = phb->mem_align_shift;
> > +
> >        memset(aligns, 0, sizeof(aligns));
> >        max_order = 0;
> >        size = 0;
> > @@ -818,8 +832,8 @@ static int pbus_size_mem(struct pci_bus *bus, unsigned long mask,
> >  #endif
> >                        /* For bridges size != alignment */
> >                        align = pci_resource_alignment(dev, r);
> > -                       order = __ffs(align) - 20;
> > -                       if (order > 11) {
> > +                       order = __ffs(align) - mem_align_shift;
> > +                       if (order > (11 - (mem_align_shift - 20))) {
> >                                dev_warn(&dev->dev, "disabling BAR %d: %pR "
> >                                         "(bad alignment %#llx)\n", i, r,
> >                                         (unsigned long long) align);
> > @@ -846,7 +860,7 @@ static int pbus_size_mem(struct pci_bus *bus, unsigned long mask,
> >        for (order = 0; order <= max_order; order++) {
> >                resource_size_t align1 = 1;
> >
> > -               align1 <<= (order + 20);
> > +               align1 <<= (order + mem_align_shift);
> 
> This code must encode somewhere the assumption that mem windows must
> be at least 1MB aligned.  Maybe it has something to do with the "20"
> constants above.  Independent of your patch, it'd be nice to make this
> more explicit.
> 
> >
> >                if (!align)
> >                        min_align = align1;
> > diff --git a/include/linux/pci.h b/include/linux/pci.h
> > index 2b559f1..879de4e 100644
> > --- a/include/linux/pci.h
> > +++ b/include/linux/pci.h
> > @@ -376,9 +376,17 @@ struct pci_host_bridge_window {
> >        resource_size_t offset;         /* bus address + offset = CPU address */
> >  };
> >
> > +/* Default shits for P2P I/O and MMIO bar minimal alignment shifts */
> 
> "Default shifts"
> 
> > +#define PCI_DEFAULT_IO_ALIGN_SHIFT     12      /* 4KB  */
> > +#define PCI_DEFAULT_MEM_ALIGN_SHIFT    20      /* 1MB  */
> > +#define PCI_DEFAULT_PMEM_ALIGN_SHIFT   20      /* 1MB */
> > +
> >  struct pci_host_bridge {
> >        struct device dev;
> >        struct pci_bus *bus;            /* root bus */
> > +       int io_align_shift;             /* P2P I/O bar minimal alignment shift  */
> > +       int mem_align_shift;            /* P2P MMIO bar minimal alignment shift */
> > +       int pmem_align_shift;           /* P2P prefetchable MMIO bar minimal alignment shift */
> >        struct list_head windows;       /* pci_host_bridge_windows */
> >        void (*release_fn)(struct pci_host_bridge *);
> >        void *release_data;
> > --
> > 1.7.9.5
> >
> > --
> > To unsubscribe from this list: send the line "unsubscribe linux-pci" in
> > the body of a message to majordomo@vger.kernel.org
> > More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply

* Re: [PATCH] Using alloc_coherent for caam job rings
From: Kim Phillips @ 2012-06-27 19:34 UTC (permalink / raw)
  To: Bharat Bhushan; +Cc: Bharat Bhushan, linuxppc-dev, linux-crypto
In-Reply-To: <1340774912-19042-1-git-send-email-bharat.bhushan@freescale.com>

On Wed, 27 Jun 2012 10:58:32 +0530
Bharat Bhushan <r65777@freescale.com> wrote:

> This resolves the Linux boot crash issue when "swiotlb=force" is set
> in bootargs on systems which have memory more than 4G.

Acked-by: Kim Phillips <kim.phillips@freescale.com>

Thanks Bharat,

Kim

^ permalink raw reply

* Re: [PATCH V4 2/2] PCI: minimal alignment for bars of P2P bridges
From: Bjorn Helgaas @ 2012-06-27 18:48 UTC (permalink / raw)
  To: Gavin Shan; +Cc: linux-pci, yinghai, linuxppc-dev
In-Reply-To: <1340808525-24996-2-git-send-email-shangw@linux.vnet.ibm.com>

On Wed, Jun 27, 2012 at 8:48 AM, Gavin Shan <shangw@linux.vnet.ibm.com> wro=
te:
> On some powerpc platforms, device BARs need to be assigned to separate
> "segments" of the address space in order for the error isolation and HW
> virtualization mechanisms (EEH) to work properly. Those "segments" have
> a minimum size that can be fairly large (16M). In order to be able to
> use the generic resource assignment code rather than re-inventing our
> own, we chose to group devices by bus. That way, a simple change of the
> minimum alignment requirements of resources assigned to PCI to PCI (P2P)
> bridges is enough to ensure that all BARs for devices below those bridges
> will fit into contiguous sets of segments and there will be no overlap.

Is this something that is currently broken on powerpc?  I don't see
any corresponding powerpc change, like a removal of whatever the
previous way of doing this was.

I'm not sure this is generic enough to warrant putting it in the core
code (though I don't know whether we have any pcibios_*() hooks that
would allow us to do it in the arch).

> This patch provides a way for the host bridge to override the default
> alignment values used by the resource allocation code for that purpose.
>
> Signed-off-by: Gavin Shan <shangw@linux.vnet.ibm.com>
> Reviewed-by: Ram Pai <linuxram@us.ibm.com>
> Reviewed-by: Richard Yang <weiyang@linux.vnet.ibm.com>
> ---
> =A0drivers/pci/probe.c =A0 =A0 | =A0 =A05 +++++
> =A0drivers/pci/setup-bus.c | =A0 28 +++++++++++++++++++++-------
> =A0include/linux/pci.h =A0 =A0 | =A0 =A08 ++++++++
> =A03 files changed, 34 insertions(+), 7 deletions(-)
>
> diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c
> index 658ac97..a196529 100644
> --- a/drivers/pci/probe.c
> +++ b/drivers/pci/probe.c
> @@ -431,6 +431,11 @@ static struct pci_host_bridge *pci_alloc_host_bridge=
(struct pci_bus *b)
> =A0 =A0 =A0 =A0if (bridge) {
> =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0INIT_LIST_HEAD(&bridge->windows);
> =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0bridge->bus =3D b;
> +
> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 /* Set minimal alignment shift of P2P bridg=
es */
> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 bridge->io_align_shift =3D PCI_DEFAULT_IO_A=
LIGN_SHIFT;
> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 bridge->mem_align_shift =3D PCI_DEFAULT_MEM=
_ALIGN_SHIFT;
> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 bridge->pmem_align_shift =3D PCI_DEFAULT_PM=
EM_ALIGN_SHIFT;
> =A0 =A0 =A0 =A0}
>
> =A0 =A0 =A0 =A0return bridge;
> diff --git a/drivers/pci/setup-bus.c b/drivers/pci/setup-bus.c
> index 8fa2d4b..caebe98 100644
> --- a/drivers/pci/setup-bus.c
> +++ b/drivers/pci/setup-bus.c
> @@ -706,10 +706,12 @@ static resource_size_t calculate_memsize(resource_s=
ize_t size,
> =A0static void pbus_size_io(struct pci_bus *bus, resource_size_t min_size=
,
> =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0resource_size_t add_size, struct list_head=
 *realloc_head)
> =A0{
> + =A0 =A0 =A0 struct pci_host_bridge *phb;
> =A0 =A0 =A0 =A0struct pci_dev *dev;
> =A0 =A0 =A0 =A0struct resource *b_res =3D find_free_bus_resource(bus, IOR=
ESOURCE_IO);
> =A0 =A0 =A0 =A0unsigned long size =3D 0, size0 =3D 0, size1 =3D 0;
> =A0 =A0 =A0 =A0resource_size_t children_add_size =3D 0;
> + =A0 =A0 =A0 resource_size_t io_align;
>
> =A0 =A0 =A0 =A0if (!b_res)
> =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0return;
> @@ -735,13 +737,17 @@ static void pbus_size_io(struct pci_bus *bus, resou=
rce_size_t min_size,
> =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0children_a=
dd_size +=3D get_res_add_size(realloc_head, r);
> =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0}
> =A0 =A0 =A0 =A0}
> +
> + =A0 =A0 =A0 phb =3D find_pci_host_bridge(bus);

I guess this explains why you want find_pci_host_bridge() to take a
pci_bus, not a pci_dev..

> + =A0 =A0 =A0 io_align =3D (1 << phb->io_align_shift);
> +
> =A0 =A0 =A0 =A0size0 =3D calculate_iosize(size, min_size, size1,
> - =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 resource_size(b_res), 4096)=
;
> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 resource_size(b_res), io_al=
ign);
> =A0 =A0 =A0 =A0if (children_add_size > add_size)
> =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0add_size =3D children_add_size;
> =A0 =A0 =A0 =A0size1 =3D (!realloc_head || (realloc_head && !add_size)) ?=
 size0 :
> =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0calculate_iosize(size, min_size, add_size =
+ size1,
> - =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 resource_size(b_res), 4096)=
;
> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 resource_size(b_res), io_al=
ign);
> =A0 =A0 =A0 =A0if (!size0 && !size1) {
> =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0if (b_res->start || b_res->end)
> =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0dev_info(&bus->self->dev, =
"disabling bridge window "
> @@ -751,11 +757,11 @@ static void pbus_size_io(struct pci_bus *bus, resou=
rce_size_t min_size,
> =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0return;
> =A0 =A0 =A0 =A0}
> =A0 =A0 =A0 =A0/* Alignment of the IO window is always 4K */
> - =A0 =A0 =A0 b_res->start =3D 4096;
> + =A0 =A0 =A0 b_res->start =3D io_align;

This looks like something that will collide with the changes in the
pipe to support I/O windows smaller than 4K.

> =A0 =A0 =A0 =A0b_res->end =3D b_res->start + size0 - 1;
> =A0 =A0 =A0 =A0b_res->flags |=3D IORESOURCE_STARTALIGN;
> =A0 =A0 =A0 =A0if (size1 > size0 && realloc_head) {
> - =A0 =A0 =A0 =A0 =A0 =A0 =A0 add_to_list(realloc_head, bus->self, b_res,=
 size1-size0, 4096);
> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 add_to_list(realloc_head, bus->self, b_res,=
 size1-size0, io_align);
> =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0dev_printk(KERN_DEBUG, &bus->self->dev, "b=
ridge window "
> =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 "%pR to [=
bus %02x-%02x] add_size %lx\n", b_res,
> =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 bus->seco=
ndary, bus->subordinate, size1-size0);
> @@ -778,6 +784,7 @@ static int pbus_size_mem(struct pci_bus *bus, unsigne=
d long mask,
> =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0resource_size_t add_size,
> =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0struct list_head *realloc_=
head)
> =A0{
> + =A0 =A0 =A0 struct pci_host_bridge *phb;
> =A0 =A0 =A0 =A0struct pci_dev *dev;
> =A0 =A0 =A0 =A0resource_size_t min_align, align, size, size0, size1;
> =A0 =A0 =A0 =A0resource_size_t aligns[12]; =A0 =A0 /* Alignments from 1Mb=
 to 2Gb */
> @@ -785,10 +792,17 @@ static int pbus_size_mem(struct pci_bus *bus, unsig=
ned long mask,
> =A0 =A0 =A0 =A0struct resource *b_res =3D find_free_bus_resource(bus, typ=
e);
> =A0 =A0 =A0 =A0unsigned int mem64_mask =3D 0;
> =A0 =A0 =A0 =A0resource_size_t children_add_size =3D 0;
> + =A0 =A0 =A0 int mem_align_shift;
>
> =A0 =A0 =A0 =A0if (!b_res)
> =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0return 0;
>
> + =A0 =A0 =A0 phb =3D find_pci_host_bridge(bus);
> + =A0 =A0 =A0 if (type & IORESOURCE_PREFETCH)
> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 mem_align_shift =3D phb->pmem_align_shift;
> + =A0 =A0 =A0 else
> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 mem_align_shift =3D phb->mem_align_shift;
> +
> =A0 =A0 =A0 =A0memset(aligns, 0, sizeof(aligns));
> =A0 =A0 =A0 =A0max_order =3D 0;
> =A0 =A0 =A0 =A0size =3D 0;
> @@ -818,8 +832,8 @@ static int pbus_size_mem(struct pci_bus *bus, unsigne=
d long mask,
> =A0#endif
> =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0/* For bridges size !=3D a=
lignment */
> =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0align =3D pci_resource_ali=
gnment(dev, r);
> - =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 order =3D __ffs(align) - 20=
;
> - =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 if (order > 11) {
> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 order =3D __ffs(align) - me=
m_align_shift;
> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 if (order > (11 - (mem_alig=
n_shift - 20))) {
> =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0dev_warn(&=
dev->dev, "disabling BAR %d: %pR "
> =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =
=A0 =A0 "(bad alignment %#llx)\n", i, r,
> =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =
=A0 =A0 (unsigned long long) align);
> @@ -846,7 +860,7 @@ static int pbus_size_mem(struct pci_bus *bus, unsigne=
d long mask,
> =A0 =A0 =A0 =A0for (order =3D 0; order <=3D max_order; order++) {
> =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0resource_size_t align1 =3D 1;
>
> - =A0 =A0 =A0 =A0 =A0 =A0 =A0 align1 <<=3D (order + 20);
> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 align1 <<=3D (order + mem_align_shift);

This code must encode somewhere the assumption that mem windows must
be at least 1MB aligned.  Maybe it has something to do with the "20"
constants above.  Independent of your patch, it'd be nice to make this
more explicit.

>
> =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0if (!align)
> =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0min_align =3D align1;
> diff --git a/include/linux/pci.h b/include/linux/pci.h
> index 2b559f1..879de4e 100644
> --- a/include/linux/pci.h
> +++ b/include/linux/pci.h
> @@ -376,9 +376,17 @@ struct pci_host_bridge_window {
> =A0 =A0 =A0 =A0resource_size_t offset; =A0 =A0 =A0 =A0 /* bus address + o=
ffset =3D CPU address */
> =A0};
>
> +/* Default shits for P2P I/O and MMIO bar minimal alignment shifts */

"Default shifts"

> +#define PCI_DEFAULT_IO_ALIGN_SHIFT =A0 =A0 12 =A0 =A0 =A0/* 4KB =A0*/
> +#define PCI_DEFAULT_MEM_ALIGN_SHIFT =A0 =A020 =A0 =A0 =A0/* 1MB =A0*/
> +#define PCI_DEFAULT_PMEM_ALIGN_SHIFT =A0 20 =A0 =A0 =A0/* 1MB */
> +
> =A0struct pci_host_bridge {
> =A0 =A0 =A0 =A0struct device dev;
> =A0 =A0 =A0 =A0struct pci_bus *bus; =A0 =A0 =A0 =A0 =A0 =A0/* root bus */
> + =A0 =A0 =A0 int io_align_shift; =A0 =A0 =A0 =A0 =A0 =A0 /* P2P I/O bar =
minimal alignment shift =A0*/
> + =A0 =A0 =A0 int mem_align_shift; =A0 =A0 =A0 =A0 =A0 =A0/* P2P MMIO bar=
 minimal alignment shift */
> + =A0 =A0 =A0 int pmem_align_shift; =A0 =A0 =A0 =A0 =A0 /* P2P prefetchab=
le MMIO bar minimal alignment shift */
> =A0 =A0 =A0 =A0struct list_head windows; =A0 =A0 =A0 /* pci_host_bridge_w=
indows */
> =A0 =A0 =A0 =A0void (*release_fn)(struct pci_host_bridge *);
> =A0 =A0 =A0 =A0void *release_data;
> --
> 1.7.9.5
>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-pci" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at =A0http://vger.kernel.org/majordomo-info.html

^ permalink raw reply

* Re: [PATCH V4 1/2] PCI: pcibus address to resource converting take bus directly
From: Bjorn Helgaas @ 2012-06-27 18:15 UTC (permalink / raw)
  To: Gavin Shan; +Cc: linux-pci, yinghai, linuxppc-dev
In-Reply-To: <1340808525-24996-1-git-send-email-shangw@linux.vnet.ibm.com>

On Wed, Jun 27, 2012 at 8:48 AM, Gavin Shan <shangw@linux.vnet.ibm.com> wro=
te:
> For allocating resource under bus path, we do have dev pass along,
> and we could just use bus instead. Also, we'd like to make function
> find_pci_host_bridge() global so that some platforms (e.g. PPC) can
> access the pci host bridge directly.

This patch appears to have multiple unrelated changes:

  - change "struct pci_bus *bus" to "struct pci_bus *root_bus"
  - change find_pci_host_bridge() argument from dev to bus
  - fiddle with pcibios_bus_to_resource() and pcibios_resource_to_bus()

These should be split out to make your patches easier to review.

What's the rationale for preferring the pci_bus over the pci_dev?

> Signed-off-by: Yinghai Lu <yinghai@kernel.org>
> ---
> =A0drivers/pci/host-bridge.c | =A0 34 +++++++++++++++++++++-------------
> =A0include/linux/pci.h =A0 =A0 =A0 | =A0 =A04 ++++
> =A02 files changed, 25 insertions(+), 13 deletions(-)
>
> diff --git a/drivers/pci/host-bridge.c b/drivers/pci/host-bridge.c
> index a68dc61..4ccf477 100644
> --- a/drivers/pci/host-bridge.c
> +++ b/drivers/pci/host-bridge.c
> @@ -9,22 +9,19 @@
>
> =A0#include "pci.h"
>
> -static struct pci_bus *find_pci_root_bus(struct pci_dev *dev)
> +static struct pci_bus *find_pci_root_bus(struct pci_bus *bus)
> =A0{
> - =A0 =A0 =A0 struct pci_bus *bus;
> -
> - =A0 =A0 =A0 bus =3D dev->bus;
> =A0 =A0 =A0 =A0while (bus->parent)
> =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0bus =3D bus->parent;
>
> =A0 =A0 =A0 =A0return bus;
> =A0}
>
> -static struct pci_host_bridge *find_pci_host_bridge(struct pci_dev *dev)
> +struct pci_host_bridge *find_pci_host_bridge(struct pci_bus *bus)
> =A0{
> - =A0 =A0 =A0 struct pci_bus *bus =3D find_pci_root_bus(dev);
> + =A0 =A0 =A0 struct pci_bus *root_bus =3D find_pci_root_bus(bus);
>
> - =A0 =A0 =A0 return to_pci_host_bridge(bus->bridge);
> + =A0 =A0 =A0 return to_pci_host_bridge(root_bus->bridge);
> =A0}
>
> =A0void pci_set_host_bridge_release(struct pci_host_bridge *bridge,
> @@ -40,10 +37,11 @@ static bool resource_contains(struct resource *res1, =
struct resource *res2)
> =A0 =A0 =A0 =A0return res1->start <=3D res2->start && res1->end >=3D res2=
->end;
> =A0}
>
> -void pcibios_resource_to_bus(struct pci_dev *dev, struct pci_bus_region =
*region,
> - =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0struct resource =
*res)
> +void __pcibios_resource_to_bus(struct pci_bus *bus,
> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0=
 struct pci_bus_region *region,
> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0=
 struct resource *res)
> =A0{
> - =A0 =A0 =A0 struct pci_host_bridge *bridge =3D find_pci_host_bridge(dev=
);
> + =A0 =A0 =A0 struct pci_host_bridge *bridge =3D find_pci_host_bridge(bus=
);
> =A0 =A0 =A0 =A0struct pci_host_bridge_window *window;
> =A0 =A0 =A0 =A0resource_size_t offset =3D 0;
>
> @@ -60,6 +58,11 @@ void pcibios_resource_to_bus(struct pci_dev *dev, stru=
ct pci_bus_region *region,
> =A0 =A0 =A0 =A0region->start =3D res->start - offset;
> =A0 =A0 =A0 =A0region->end =3D res->end - offset;
> =A0}
> +void pcibios_resource_to_bus(struct pci_dev *dev, struct pci_bus_region =
*region,
> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0struct resource =
*res)
> +{
> + =A0 =A0 =A0 __pcibios_resource_to_bus(dev->bus, region, res);
> +}
> =A0EXPORT_SYMBOL(pcibios_resource_to_bus);
>
> =A0static bool region_contains(struct pci_bus_region *region1,
> @@ -68,10 +71,10 @@ static bool region_contains(struct pci_bus_region *re=
gion1,
> =A0 =A0 =A0 =A0return region1->start <=3D region2->start && region1->end =
>=3D region2->end;
> =A0}
>
> -void pcibios_bus_to_resource(struct pci_dev *dev, struct resource *res,
> - =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0struct pci_bus_r=
egion *region)
> +static void __pcibios_bus_to_resource(struct pci_bus *bus, struct resour=
ce *res,
> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0=
 struct pci_bus_region *region)
> =A0{
> - =A0 =A0 =A0 struct pci_host_bridge *bridge =3D find_pci_host_bridge(dev=
);
> + =A0 =A0 =A0 struct pci_host_bridge *bridge =3D find_pci_host_bridge(bus=
);
> =A0 =A0 =A0 =A0struct pci_host_bridge_window *window;
> =A0 =A0 =A0 =A0resource_size_t offset =3D 0;
>
> @@ -93,4 +96,9 @@ void pcibios_bus_to_resource(struct pci_dev *dev, struc=
t resource *res,
> =A0 =A0 =A0 =A0res->start =3D region->start + offset;
> =A0 =A0 =A0 =A0res->end =3D region->end + offset;
> =A0}
> +void pcibios_bus_to_resource(struct pci_dev *dev, struct resource *res,
> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0struct pci_bus_r=
egion *region)
> +{
> + =A0 =A0 =A0 __pcibios_bus_to_resource(dev->bus, res, region);
> +}
> =A0EXPORT_SYMBOL(pcibios_bus_to_resource);
> diff --git a/include/linux/pci.h b/include/linux/pci.h
> index fefb4e1..2b559f1 100644
> --- a/include/linux/pci.h
> +++ b/include/linux/pci.h
> @@ -385,6 +385,7 @@ struct pci_host_bridge {
> =A0};
>
> =A0#define =A0 =A0 =A0 =A0to_pci_host_bridge(n) container_of(n, struct pc=
i_host_bridge, dev)
> +struct pci_host_bridge *find_pci_host_bridge(struct pci_bus *bus);
> =A0void pci_set_host_bridge_release(struct pci_host_bridge *bridge,
> =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 void (*release_fn)(struct pci_hos=
t_bridge *),
> =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 void *release_data);
> @@ -657,6 +658,9 @@ void pci_fixup_cardbus(struct pci_bus *);
>
> =A0/* Generic PCI functions used internally */
>
> +void __pcibios_resource_to_bus(struct pci_bus *bus,
> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0struct pci_b=
us_region *region,
> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0struct resou=
rce *res);
> =A0void pcibios_resource_to_bus(struct pci_dev *dev, struct pci_bus_regio=
n *region,
> =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 struct resource *=
res);
> =A0void pcibios_bus_to_resource(struct pci_dev *dev, struct resource *res=
,
> --
> 1.7.9.5
>

^ permalink raw reply

* [PATCH 12/21] ppc/eeh: trace error based on PE from beginning
From: Gavin Shan @ 2012-06-27 16:01 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: Gavin Shan
In-Reply-To: <1340812911-6793-1-git-send-email-shangw@linux.vnet.ibm.com>

There're 2 conditions to trigger EEH error detection: invalid value
returned from reading I/O or config space. On each case, the function
eeh_dn_check_failure will be called to initialize EEH event and put
it into the poll for further processing.

The patch changes the function for a little bit so that the EEH error
will be traced based on PE instead of EEH device any more. Also, the
function eeh_find_device_pe() has been removed since the eeh device
is tracing the PE by struct eeh_dev::pe.

Signed-off-by: Gavin Shan <shangw@linux.vnet.ibm.com>
---
 arch/powerpc/include/asm/ppc-pci.h   |    1 -
 arch/powerpc/platforms/pseries/eeh.c |   51 +++++++++++++---------------------
 arch/powerpc/platforms/pseries/msi.c |    6 +++-
 3 files changed, 25 insertions(+), 33 deletions(-)

diff --git a/arch/powerpc/include/asm/ppc-pci.h b/arch/powerpc/include/asm/ppc-pci.h
index c7e5bd6..3e301b1 100644
--- a/arch/powerpc/include/asm/ppc-pci.h
+++ b/arch/powerpc/include/asm/ppc-pci.h
@@ -59,7 +59,6 @@ int rtas_write_config(struct pci_dn *, int where, int size, u32 val);
 int rtas_read_config(struct pci_dn *, int where, int size, u32 *val);
 void eeh_pe_state_mark(struct eeh_pe *pe, int state);
 void eeh_pe_state_clear(struct eeh_pe *pe, int state);
-struct device_node *eeh_find_device_pe(struct device_node *dn);
 
 void eeh_sysfs_add_device(struct pci_dev *pdev);
 void eeh_sysfs_remove_device(struct pci_dev *pdev);
diff --git a/arch/powerpc/platforms/pseries/eeh.c b/arch/powerpc/platforms/pseries/eeh.c
index c527c46..341ba1a 100644
--- a/arch/powerpc/platforms/pseries/eeh.c
+++ b/arch/powerpc/platforms/pseries/eeh.c
@@ -264,21 +264,6 @@ static inline unsigned long eeh_token_to_phys(unsigned long token)
 }
 
 /**
- * eeh_find_device_pe - Retrieve the PE for the given device
- * @dn: device node
- *
- * Return the PE under which this device lies
- */
-struct device_node *eeh_find_device_pe(struct device_node *dn)
-{
-	while (dn->parent && of_node_to_eeh_dev(dn->parent) &&
-	       (of_node_to_eeh_dev(dn->parent)->mode & EEH_MODE_SUPPORTED)) {
-		dn = dn->parent;
-	}
-	return dn;
-}
-
-/**
  * eeh_dn_check_failure - Check if all 1's data is due to EEH slot freeze
  * @dn: device node
  * @dev: pci device, if known
@@ -297,6 +282,7 @@ int eeh_dn_check_failure(struct device_node *dn, struct pci_dev *dev)
 {
 	int ret;
 	unsigned long flags;
+	struct eeh_pe *pe;
 	struct eeh_dev *edev;
 	int rc = 0;
 	const char *location;
@@ -306,23 +292,26 @@ int eeh_dn_check_failure(struct device_node *dn, struct pci_dev *dev)
 	if (!eeh_subsystem_enabled)
 		return 0;
 
-	if (!dn) {
+	if (dn) {
+		edev = of_node_to_eeh_dev(dn);
+	} else if (dev) {
+		edev = pci_dev_to_eeh_dev(dev);
+		dn = pci_device_to_OF_node(dev);
+	} else {
 		eeh_stats.no_dn++;
 		return 0;
 	}
-	dn = eeh_find_device_pe(dn);
-	edev = of_node_to_eeh_dev(dn);
+	pe = edev->pe;
 
 	/* Access to IO BARs might get this far and still not want checking. */
-	if (!(edev->mode & EEH_MODE_SUPPORTED) ||
-	    edev->mode & EEH_MODE_NOCHECK) {
+	if (!pe) {
 		eeh_stats.ignored_check++;
-		pr_debug("EEH: Ignored check (%x) for %s %s\n",
-			edev->mode, eeh_pci_name(dev), dn->full_name);
+		pr_debug("EEH: Ignored check for %s %s\n",
+			eeh_pci_name(dev), dn->full_name);
 		return 0;
 	}
 
-	if (!edev->config_addr && !edev->pe_config_addr) {
+	if (!pe->addr && !pe->config_addr) {
 		eeh_stats.no_cfg_addr++;
 		return 0;
 	}
@@ -335,13 +324,13 @@ int eeh_dn_check_failure(struct device_node *dn, struct pci_dev *dev)
 	 */
 	raw_spin_lock_irqsave(&confirm_error_lock, flags);
 	rc = 1;
-	if (edev->mode & EEH_MODE_ISOLATED) {
-		edev->check_count++;
-		if (edev->check_count % EEH_MAX_FAILS == 0) {
+	if (pe->state & EEH_PE_ISOLATED) {
+		pe->check_count++;
+		if (pe->check_count % EEH_MAX_FAILS == 0) {
 			location = of_get_property(dn, "ibm,loc-code", NULL);
 			printk(KERN_ERR "EEH: %d reads ignored for recovering device at "
 				"location=%s driver=%s pci addr=%s\n",
-				edev->check_count, location,
+				pe->check_count, location,
 				eeh_driver_name(dev), eeh_pci_name(dev));
 			printk(KERN_ERR "EEH: Might be infinite loop in %s driver\n",
 				eeh_driver_name(dev));
@@ -357,7 +346,7 @@ int eeh_dn_check_failure(struct device_node *dn, struct pci_dev *dev)
 	 * function zero of a multi-function device.
 	 * In any case they must share a common PHB.
 	 */
-	ret = eeh_ops->get_state(dn, NULL);
+	ret = eeh_ops->get_state(pe, NULL);
 
 	/* Note that config-io to empty slots may fail;
 	 * they are empty when they don't have children.
@@ -370,7 +359,7 @@ int eeh_dn_check_failure(struct device_node *dn, struct pci_dev *dev)
 	    (ret & (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE)) ==
 	    (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE)) {
 		eeh_stats.false_positives++;
-		edev->false_positives ++;
+		pe->false_positives++;
 		rc = 0;
 		goto dn_unlock;
 	}
@@ -381,10 +370,10 @@ int eeh_dn_check_failure(struct device_node *dn, struct pci_dev *dev)
 	 * with other functions on this device, and functions under
 	 * bridges.
 	 */
-	eeh_mark_slot(dn, EEH_MODE_ISOLATED);
+	eeh_pe_state_mark(pe, EEH_PE_ISOLATED);
 	raw_spin_unlock_irqrestore(&confirm_error_lock, flags);
 
-	eeh_send_failure_event(edev);
+	eeh_send_failure_event(pe);
 
 	/* Most EEH events are due to device driver bugs.  Having
 	 * a stack trace will help the device-driver authors figure
diff --git a/arch/powerpc/platforms/pseries/msi.c b/arch/powerpc/platforms/pseries/msi.c
index 109fdb7..c8534fa 100644
--- a/arch/powerpc/platforms/pseries/msi.c
+++ b/arch/powerpc/platforms/pseries/msi.c
@@ -210,6 +210,7 @@ static struct device_node *find_pe_total_msi(struct pci_dev *dev, int *total)
 static struct device_node *find_pe_dn(struct pci_dev *dev, int *total)
 {
 	struct device_node *dn;
+	struct eeh_dev *edev;
 
 	/* Found our PE and assume 8 at that point. */
 
@@ -217,7 +218,10 @@ static struct device_node *find_pe_dn(struct pci_dev *dev, int *total)
 	if (!dn)
 		return NULL;
 
-	dn = eeh_find_device_pe(dn);
+	/* Get the top level device in the PE */
+	edev = of_node_to_eeh_dev(dn);
+	edev = list_first_entry(&edev->pe->edevs, struct eeh_dev, list);
+	dn = eeh_dev_to_of_node(edev);
 	if (!dn)
 		return NULL;
 
-- 
1.7.9.5

^ permalink raw reply related

* [PATCH 18/21] ppc/eeh: handle EEH error based on PE
From: Gavin Shan @ 2012-06-27 16:01 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: Gavin Shan
In-Reply-To: <1340812911-6793-1-git-send-email-shangw@linux.vnet.ibm.com>

The patch reworks the current implementation so that the eeh errors
will be handled basing on PE instead of eeh device.

Signed-off-by: Gavin Shan <shangw@linux.vnet.ibm.com>
---
 arch/powerpc/include/asm/eeh.h              |    1 +
 arch/powerpc/include/asm/eeh_event.h        |    2 +-
 arch/powerpc/platforms/pseries/eeh_driver.c |  229 +++++++++++----------------
 arch/powerpc/platforms/pseries/eeh_event.c  |    2 +-
 arch/powerpc/platforms/pseries/eeh_pe.c     |   27 ++++
 5 files changed, 124 insertions(+), 137 deletions(-)

diff --git a/arch/powerpc/include/asm/eeh.h b/arch/powerpc/include/asm/eeh.h
index 9a9fe28..e07ece1 100644
--- a/arch/powerpc/include/asm/eeh.h
+++ b/arch/powerpc/include/asm/eeh.h
@@ -174,6 +174,7 @@ int eeh_pe_remove(struct eeh_dev *edev);
 void *eeh_pe_dev_traverse(struct eeh_pe *root,
 		eeh_traverse_func fn, void *flag);
 void eeh_pe_restore_bars(struct eeh_pe *pe);
+struct pci_bus *eeh_pe_bus_get(struct eeh_pe *pe);
 
 void * __devinit eeh_dev_init(struct device_node *dn, void *data);
 void __devinit eeh_dev_phb_init_dynamic(struct pci_controller *phb);
diff --git a/arch/powerpc/include/asm/eeh_event.h b/arch/powerpc/include/asm/eeh_event.h
index dc722b5..de67d83 100644
--- a/arch/powerpc/include/asm/eeh_event.h
+++ b/arch/powerpc/include/asm/eeh_event.h
@@ -32,7 +32,7 @@ struct eeh_event {
 };
 
 int eeh_send_failure_event(struct eeh_pe *pe);
-struct eeh_dev *handle_eeh_events(struct eeh_event *);
+void eeh_handle_event(struct eeh_pe *pe);
 
 #endif /* __KERNEL__ */
 #endif /* ASM_POWERPC_EEH_EVENT_H */
diff --git a/arch/powerpc/platforms/pseries/eeh_driver.c b/arch/powerpc/platforms/pseries/eeh_driver.c
index baf92cd..343c807 100644
--- a/arch/powerpc/platforms/pseries/eeh_driver.c
+++ b/arch/powerpc/platforms/pseries/eeh_driver.c
@@ -116,28 +116,35 @@ static void eeh_enable_irq(struct pci_dev *dev)
 
 /**
  * eeh_report_error - Report pci error to each device driver
- * @dev: PCI device
+ * @data: eeh device
  * @userdata: return value
  * 
  * Report an EEH error to each device driver, collect up and 
  * merge the device driver responses. Cumulative response 
  * passed back in "userdata".
  */
-static int eeh_report_error(struct pci_dev *dev, void *userdata)
+static void *eeh_report_error(void *data, void *userdata)
 {
+	struct eeh_dev *edev = (struct eeh_dev *)data;
+	struct pci_dev *dev = eeh_dev_to_pci_dev(edev);
 	enum pci_ers_result rc, *res = userdata;
 	struct pci_driver *driver = dev->driver;
 
+	/* We might not have the associated PCI device,
+	 * then we should continue for next one.
+	 */
+	if (!dev) return NULL;
+
 	dev->error_state = pci_channel_io_frozen;
 
 	if (!driver)
-		return 0;
+		return NULL;
 
 	eeh_disable_irq(dev);
 
 	if (!driver->err_handler ||
 	    !driver->err_handler->error_detected)
-		return 0;
+		return NULL;
 
 	rc = driver->err_handler->error_detected(dev, pci_channel_io_frozen);
 
@@ -145,27 +152,31 @@ static int eeh_report_error(struct pci_dev *dev, void *userdata)
 	if (rc == PCI_ERS_RESULT_NEED_RESET) *res = rc;
 	if (*res == PCI_ERS_RESULT_NONE) *res = rc;
 
-	return 0;
+	return NULL;
 }
 
 /**
  * eeh_report_mmio_enabled - Tell drivers that MMIO has been enabled
- * @dev: PCI device
+ * @data: eeh device
  * @userdata: return value
  *
  * Tells each device driver that IO ports, MMIO and config space I/O
  * are now enabled. Collects up and merges the device driver responses.
  * Cumulative response passed back in "userdata".
  */
-static int eeh_report_mmio_enabled(struct pci_dev *dev, void *userdata)
+static void *eeh_report_mmio_enabled(void *data, void *userdata)
 {
+	struct eeh_dev *edev = (struct eeh_dev *)data;
+	struct pci_dev *dev = eeh_dev_to_pci_dev(edev);
 	enum pci_ers_result rc, *res = userdata;
-	struct pci_driver *driver = dev->driver;
+	struct pci_driver *driver;
 
-	if (!driver ||
+	if (!dev) return NULL;
+
+	if (!(driver = dev->driver) ||
 	    !driver->err_handler ||
 	    !driver->err_handler->mmio_enabled)
-		return 0;
+		return NULL;
 
 	rc = driver->err_handler->mmio_enabled(dev);
 
@@ -173,12 +184,12 @@ static int eeh_report_mmio_enabled(struct pci_dev *dev, void *userdata)
 	if (rc == PCI_ERS_RESULT_NEED_RESET) *res = rc;
 	if (*res == PCI_ERS_RESULT_NONE) *res = rc;
 
-	return 0;
+	return NULL;
 }
 
 /**
  * eeh_report_reset - Tell device that slot has been reset
- * @dev: PCI device
+ * @data: eeh device
  * @userdata: return value
  *
  * This routine must be called while EEH tries to reset particular
@@ -186,13 +197,15 @@ static int eeh_report_mmio_enabled(struct pci_dev *dev, void *userdata)
  * some actions, usually to save data the driver needs so that the
  * driver can work again while the device is recovered.
  */
-static int eeh_report_reset(struct pci_dev *dev, void *userdata)
+static void *eeh_report_reset(void *data, void *userdata)
 {
+	struct eeh_dev *edev = (struct eeh_dev *)data;
+	struct pci_dev *dev = eeh_dev_to_pci_dev(edev);
 	enum pci_ers_result rc, *res = userdata;
-	struct pci_driver *driver = dev->driver;
+	struct pci_driver *driver;
 
-	if (!driver)
-		return 0;
+	if (!dev || !(driver = dev->driver))
+		return NULL;
 
 	dev->error_state = pci_channel_io_normal;
 
@@ -200,7 +213,7 @@ static int eeh_report_reset(struct pci_dev *dev, void *userdata)
 
 	if (!driver->err_handler ||
 	    !driver->err_handler->slot_reset)
-		return 0;
+		return NULL;
 
 	rc = driver->err_handler->slot_reset(dev);
 	if ((*res == PCI_ERS_RESULT_NONE) ||
@@ -208,82 +221,89 @@ static int eeh_report_reset(struct pci_dev *dev, void *userdata)
 	if (*res == PCI_ERS_RESULT_DISCONNECT &&
 	     rc == PCI_ERS_RESULT_NEED_RESET) *res = rc;
 
-	return 0;
+	return NULL;
 }
 
 /**
  * eeh_report_resume - Tell device to resume normal operations
- * @dev: PCI device
+ * @data: eeh device
  * @userdata: return value
  *
  * This routine must be called to notify the device driver that it
  * could resume so that the device driver can do some initialization
  * to make the recovered device work again.
  */
-static int eeh_report_resume(struct pci_dev *dev, void *userdata)
+static void *eeh_report_resume(void *data, void *userdata)
 {
-	struct pci_driver *driver = dev->driver;
+	struct eeh_dev *edev = (struct eeh_dev *)data;
+	struct pci_dev *dev = eeh_dev_to_pci_dev(edev);
+	struct pci_driver *driver;
+
+	if (!dev) return NULL;
 
 	dev->error_state = pci_channel_io_normal;
 
-	if (!driver)
-		return 0;
+	if (!(driver = dev->driver))
+		return NULL;
 
 	eeh_enable_irq(dev);
 
 	if (!driver->err_handler ||
 	    !driver->err_handler->resume)
-		return 0;
+		return NULL;
 
 	driver->err_handler->resume(dev);
 
-	return 0;
+	return NULL;
 }
 
 /**
  * eeh_report_failure - Tell device driver that device is dead.
- * @dev: PCI device
+ * @data: eeh device
  * @userdata: return value
  *
  * This informs the device driver that the device is permanently
  * dead, and that no further recovery attempts will be made on it.
  */
-static int eeh_report_failure(struct pci_dev *dev, void *userdata)
+static void *eeh_report_failure(void *data, void *userdata)
 {
-	struct pci_driver *driver = dev->driver;
+	struct eeh_dev *edev = (struct eeh_dev *)data;
+	struct pci_dev *dev = eeh_dev_to_pci_dev(edev);
+	struct pci_driver *driver;
+
+	if (!dev) return NULL;
 
 	dev->error_state = pci_channel_io_perm_failure;
 
-	if (!driver)
-		return 0;
+	if (!(driver = dev->driver))
+		return NULL;
 
 	eeh_disable_irq(dev);
 
 	if (!driver->err_handler ||
 	    !driver->err_handler->error_detected)
-		return 0;
+		return NULL;
 
 	driver->err_handler->error_detected(dev, pci_channel_io_perm_failure);
 
-	return 0;
+	return NULL;
 }
 
 /**
  * eeh_reset_device - Perform actual reset of a pci slot
- * @edev: PE associated EEH device
+ * @pe: EEH PE
  * @bus: PCI bus corresponding to the isolcated slot
  *
  * This routine must be called to do reset on the indicated PE.
  * During the reset, udev might be invoked because those affected
  * PCI devices will be removed and then added.
  */
-static int eeh_reset_device(struct eeh_dev *edev, struct pci_bus *bus)
+static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus)
 {
-	struct device_node *dn;
 	int cnt, rc;
 
 	/* pcibios will clear the counter; save the value */
-	cnt = edev->freeze_count;
+	cnt = pe->freeze_count;
 
 	if (bus)
 		pcibios_remove_pci_devices(bus);
@@ -292,25 +312,13 @@ static int eeh_reset_device(struct eeh_dev *edev, struct pci_bus *bus)
 	 * Reconfigure bridges and devices. Don't try to bring the system
 	 * up if the reset failed for some reason.
 	 */
-	rc = eeh_reset_pe(edev);
+	rc = eeh_reset_pe(pe);
 	if (rc)
 		return rc;
 
-	/* Walk over all functions on this device. */
-	dn = eeh_dev_to_of_node(edev);
-	if (!pcibios_find_pci_bus(dn) && of_node_to_eeh_dev(dn->parent))
-		dn = dn->parent->child;
-
-	while (dn) {
-		struct eeh_dev *pedev = of_node_to_eeh_dev(dn);
-
-		/* On Power4, always true because eeh_pe_config_addr=0 */
-		if (edev->pe_config_addr == pedev->pe_config_addr) {
-			eeh_ops->configure_bridge(dn);
-			eeh_restore_bars(pedev);
- 		}
-		dn = dn->sibling;
-	}
+	/* Restore PE */
+	eeh_ops->configure_bridge(pe);
+	eeh_pe_restore_bars(pe);
 
 	/* Give the system 5 seconds to finish running the user-space
 	 * hotplug shutdown scripts, e.g. ifdown for ethernet.  Yes, 
@@ -322,7 +330,7 @@ static int eeh_reset_device(struct eeh_dev *edev, struct pci_bus *bus)
 		ssleep(5);
 		pcibios_add_pci_devices(bus);
 	}
-	edev->freeze_count = cnt;
+	pe->freeze_count = cnt;
 
 	return 0;
 }
@@ -334,7 +342,7 @@ static int eeh_reset_device(struct eeh_dev *edev, struct pci_bus *bus)
 
 /**
  * eeh_handle_event - Reset a PCI device after hard lockup.
- * @event: EEH event
+ * @pe: EEH PE
  *
  * While PHB detects address or data parity errors on particular PCI
  * slot, the associated PE will be frozen. Besides, DMA's occurring
@@ -349,69 +357,24 @@ static int eeh_reset_device(struct eeh_dev *edev, struct pci_bus *bus)
  * drivers (which cause a second set of hotplug events to go out to
  * userspace).
  */
-struct eeh_dev *handle_eeh_events(struct eeh_event *event)
+void eeh_handle_event(struct eeh_pe *pe)
 {
-	struct device_node *frozen_dn;
-	struct eeh_dev *frozen_edev;
 	struct pci_bus *frozen_bus;
 	int rc = 0;
 	enum pci_ers_result result = PCI_ERS_RESULT_NONE;
-	const char *location, *pci_str, *drv_str, *bus_pci_str, *bus_drv_str;
-
-	frozen_dn = eeh_find_device_pe(eeh_dev_to_of_node(event->edev));
-	if (!frozen_dn) {
-		location = of_get_property(eeh_dev_to_of_node(event->edev), "ibm,loc-code", NULL);
-		location = location ? location : "unknown";
-		printk(KERN_ERR "EEH: Error: Cannot find partition endpoint "
-		                "for location=%s pci addr=%s\n",
-			location, eeh_pci_name(eeh_dev_to_pci_dev(event->edev)));
-		return NULL;
-	}
-
-	frozen_bus = pcibios_find_pci_bus(frozen_dn);
-	location = of_get_property(frozen_dn, "ibm,loc-code", NULL);
-	location = location ? location : "unknown";
-
-	/* There are two different styles for coming up with the PE.
-	 * In the old style, it was the highest EEH-capable device
-	 * which was always an EADS pci bridge.  In the new style,
-	 * there might not be any EADS bridges, and even when there are,
-	 * the firmware marks them as "EEH incapable". So another
-	 * two-step is needed to find the pci bus..
-	 */
-	if (!frozen_bus)
-		frozen_bus = pcibios_find_pci_bus(frozen_dn->parent);
 
+	frozen_bus = eeh_pe_bus_get(pe);
 	if (!frozen_bus) {
-		printk(KERN_ERR "EEH: Cannot find PCI bus "
-		        "for location=%s dn=%s\n",
-		        location, frozen_dn->full_name);
-		return NULL;
+		pr_err("%s: Cannot find PCI bus for PHB#%d-PE#%x\n",
+			__func__, pe->phb->global_number, pe->addr);
+		return;
 	}
 
-	frozen_edev = of_node_to_eeh_dev(frozen_dn);
-	frozen_edev->freeze_count++;
-	pci_str = eeh_pci_name(eeh_dev_to_pci_dev(event->edev));
-	drv_str = eeh_pcid_name(eeh_dev_to_pci_dev(event->edev));
-
-	if (frozen_edev->freeze_count > EEH_MAX_ALLOWED_FREEZES)
+	pe->freeze_count++;
+	if (pe->freeze_count > EEH_MAX_ALLOWED_FREEZES)
 		goto excess_failures;
-
-	printk(KERN_WARNING
-	   "EEH: This PCI device has failed %d times in the last hour:\n",
-		frozen_edev->freeze_count);
-
-	if (frozen_edev->pdev) {
-		bus_pci_str = pci_name(frozen_edev->pdev);
-		bus_drv_str = eeh_pcid_name(frozen_edev->pdev);
-		printk(KERN_WARNING
-			"EEH: Bus location=%s driver=%s pci addr=%s\n",
-			location, bus_drv_str, bus_pci_str);
-	}
-
-	printk(KERN_WARNING
-		"EEH: Device location=%s driver=%s pci addr=%s\n",
-		location, drv_str, pci_str);
+	pr_warning("EEH: This PCI device has failed %d times in the last hour\n",
+		pe->freeze_count);
 
 	/* Walk the various device drivers attached to this slot through
 	 * a reset sequence, giving each an opportunity to do what it needs
@@ -419,12 +382,12 @@ struct eeh_dev *handle_eeh_events(struct eeh_event *event)
 	 * status ... if any child can't handle the reset, then the entire
 	 * slot is dlpar removed and added.
 	 */
-	pci_walk_bus(frozen_bus, eeh_report_error, &result);
+	eeh_pe_dev_traverse(pe, eeh_report_error, &result);
 
 	/* Get the current PCI slot state. This can take a long time,
 	 * sometimes over 3 seconds for certain systems.
 	 */
-	rc = eeh_ops->wait_state(eeh_dev_to_of_node(frozen_edev), MAX_WAIT_FOR_RECOVERY*1000);
+	rc = eeh_ops->wait_state(pe, MAX_WAIT_FOR_RECOVERY*1000);
 	if (rc < 0 || rc == EEH_STATE_NOT_SUPPORT) {
 		printk(KERN_WARNING "EEH: Permanent failure\n");
 		goto hard_fail;
@@ -434,14 +397,14 @@ struct eeh_dev *handle_eeh_events(struct eeh_event *event)
 	 * don't post the error log until after all dev drivers
 	 * have been informed.
 	 */
-	eeh_slot_error_detail(frozen_edev, EEH_LOG_TEMP);
+	eeh_slot_error_detail(pe, EEH_LOG_TEMP);
 
 	/* If all device drivers were EEH-unaware, then shut
 	 * down all of the device drivers, and hope they
 	 * go down willingly, without panicing the system.
 	 */
 	if (result == PCI_ERS_RESULT_NONE) {
-		rc = eeh_reset_device(frozen_edev, frozen_bus);
+		rc = eeh_reset_device(pe, frozen_bus);
 		if (rc) {
 			printk(KERN_WARNING "EEH: Unable to reset, rc=%d\n", rc);
 			goto hard_fail;
@@ -450,7 +413,7 @@ struct eeh_dev *handle_eeh_events(struct eeh_event *event)
 
 	/* If all devices reported they can proceed, then re-enable MMIO */
 	if (result == PCI_ERS_RESULT_CAN_RECOVER) {
-		rc = eeh_pci_enable(frozen_edev, EEH_OPT_THAW_MMIO);
+		rc = eeh_pci_enable(pe, EEH_OPT_THAW_MMIO);
 
 		if (rc < 0)
 			goto hard_fail;
@@ -458,13 +421,13 @@ struct eeh_dev *handle_eeh_events(struct eeh_event *event)
 			result = PCI_ERS_RESULT_NEED_RESET;
 		} else {
 			result = PCI_ERS_RESULT_NONE;
-			pci_walk_bus(frozen_bus, eeh_report_mmio_enabled, &result);
+			eeh_pe_dev_traverse(pe, eeh_report_mmio_enabled, &result);
 		}
 	}
 
 	/* If all devices reported they can proceed, then re-enable DMA */
 	if (result == PCI_ERS_RESULT_CAN_RECOVER) {
-		rc = eeh_pci_enable(frozen_edev, EEH_OPT_THAW_DMA);
+		rc = eeh_pci_enable(pe, EEH_OPT_THAW_DMA);
 
 		if (rc < 0)
 			goto hard_fail;
@@ -482,13 +445,13 @@ struct eeh_dev *handle_eeh_events(struct eeh_event *event)
 
 	/* If any device called out for a reset, then reset the slot */
 	if (result == PCI_ERS_RESULT_NEED_RESET) {
-		rc = eeh_reset_device(frozen_edev, NULL);
+		rc = eeh_reset_device(pe, NULL);
 		if (rc) {
 			printk(KERN_WARNING "EEH: Cannot reset, rc=%d\n", rc);
 			goto hard_fail;
 		}
 		result = PCI_ERS_RESULT_NONE;
-		pci_walk_bus(frozen_bus, eeh_report_reset, &result);
+		eeh_pe_dev_traverse(pe, eeh_report_reset, &result);
 	}
 
 	/* All devices should claim they have recovered by now. */
@@ -499,9 +462,9 @@ struct eeh_dev *handle_eeh_events(struct eeh_event *event)
 	}
 
 	/* Tell all device drivers that they can resume operations */
-	pci_walk_bus(frozen_bus, eeh_report_resume, NULL);
+	eeh_pe_dev_traverse(pe, eeh_report_resume, NULL);
 
-	return frozen_edev;
+	return;
 	
 excess_failures:
 	/*
@@ -509,30 +472,26 @@ excess_failures:
 	 * are due to poorly seated PCI cards. Only 10% or so are
 	 * due to actual, failed cards.
 	 */
-	printk(KERN_ERR
-	   "EEH: PCI device at location=%s driver=%s pci addr=%s\n"
-		"has failed %d times in the last hour "
-		"and has been permanently disabled.\n"
-		"Please try reseating this device or replacing it.\n",
-		location, drv_str, pci_str, frozen_edev->freeze_count);
+	pr_err("EEH: PHB#%d-PE#%x has failed %d times in the\n"
+	       "last hour and has been permanently disabled.\n"
+	       "Please try reseating or replacing it.\n",
+		pe->phb->global_number, pe->addr,
+		pe->freeze_count);
 	goto perm_error;
 
 hard_fail:
-	printk(KERN_ERR
-	   "EEH: Unable to recover from failure of PCI device "
-	   "at location=%s driver=%s pci addr=%s\n"
-	   "Please try reseating this device or replacing it.\n",
-		location, drv_str, pci_str);
+	pr_err("EEH: Unable to recover from failure from PHB#%d-PE#%x.\n"
+	       "Please try reseating or replacing it\n",
+		pe->phb->global_number, pe->addr);
 
 perm_error:
-	eeh_slot_error_detail(frozen_edev, EEH_LOG_PERM);
+	eeh_slot_error_detail(pe, EEH_LOG_PERM);
 
 	/* Notify all devices that they're about to go down. */
-	pci_walk_bus(frozen_bus, eeh_report_failure, NULL);
+	eeh_pe_dev_traverse(pe, eeh_report_failure, NULL);
 
 	/* Shut down the device drivers for good. */
-	pcibios_remove_pci_devices(frozen_bus);
-
-	return NULL;
+	if (frozen_bus)
+		pcibios_remove_pci_devices(frozen_bus);
 }
 
diff --git a/arch/powerpc/platforms/pseries/eeh_event.c b/arch/powerpc/platforms/pseries/eeh_event.c
index 67c82c2..7d48feb 100644
--- a/arch/powerpc/platforms/pseries/eeh_event.c
+++ b/arch/powerpc/platforms/pseries/eeh_event.c
@@ -81,7 +81,7 @@ static int eeh_event_handler(void * dummy)
 	pr_info("EEH: Detected PCI bus error on PHB#%d-PE#%x\n",
 		pe->phb->global_number, pe->addr);
 
-	handle_eeh_events(event);
+	eeh_handle_event(pe);
 
 	eeh_pe_state_clear(pe, EEH_PE_RECOVERING);
 
diff --git a/arch/powerpc/platforms/pseries/eeh_pe.c b/arch/powerpc/platforms/pseries/eeh_pe.c
index 8bae0f6..d70a7e4 100644
--- a/arch/powerpc/platforms/pseries/eeh_pe.c
+++ b/arch/powerpc/platforms/pseries/eeh_pe.c
@@ -554,3 +554,30 @@ void eeh_pe_restore_bars(struct eeh_pe *pe)
 	eeh_pe_dev_traverse(pe, eeh_restore_one_device_bars, NULL);
 }
 
+/**
+ * eeh_pe_bus_get - Retrieve PCI bus according to the given PE
+ * @pe: EEH PE
+ *
+ * Retrieve the PCI bus according to the given PE. Basically,
+ * there're 3 types of PEs: PHB/Bus/Device. For PHB PE, the
+ * primary PCI bus will be retrieved. The parent bus will be
+ * returned for BUS PE. However, we don't have associated PCI
+ * bus for DEVICE PE.
+ */
+struct pci_bus *eeh_pe_bus_get(struct eeh_pe *pe)
+{
+	struct pci_bus *bus = NULL;
+	struct eeh_dev *edev;
+	struct pci_dev *pdev;
+
+	if (pe->type == EEH_PE_PHB) {
+		bus = pe->phb->bus;
+	} else if (pe->type == EEH_PE_BUS) {
+		edev = list_first_entry(&pe->edevs, struct eeh_dev, list);
+		pdev = eeh_dev_to_pci_dev(edev);
+		if (pdev)
+			bus = pdev->bus;
+	}
+
+	return bus;
+}
-- 
1.7.9.5

^ permalink raw reply related

* [PATCH 13/21] ppc/eeh: eeh options based on PE
From: Gavin Shan @ 2012-06-27 16:01 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: Gavin Shan
In-Reply-To: <1340812911-6793-1-git-send-email-shangw@linux.vnet.ibm.com>

Originally, all the EEH options were implemented based on OF node.
Actually, it explicitly breaks the rules that the operation target
is PE instead of device. Therefore, the patch makes all the operations
based on PE instead of device.

Unfortunately, the backend for config space has to be kept as original
because it doesn't depend on PE actually.

Signed-off-by: Gavin Shan <shangw@linux.vnet.ibm.com>
---
 arch/powerpc/include/asm/eeh.h               |   14 +--
 arch/powerpc/platforms/pseries/eeh.c         |   13 ++-
 arch/powerpc/platforms/pseries/eeh_pseries.c |  133 +++++++++++---------------
 3 files changed, 74 insertions(+), 86 deletions(-)

diff --git a/arch/powerpc/include/asm/eeh.h b/arch/powerpc/include/asm/eeh.h
index 493dc7c..96451b7 100644
--- a/arch/powerpc/include/asm/eeh.h
+++ b/arch/powerpc/include/asm/eeh.h
@@ -136,13 +136,13 @@ static inline struct pci_dev *eeh_dev_to_pci_dev(struct eeh_dev *edev)
 struct eeh_ops {
 	char *name;
 	int (*init)(void);
-	int (*set_option)(struct device_node *dn, int option);
-	int (*get_pe_addr)(struct device_node *dn);
-	int (*get_state)(struct device_node *dn, int *state);
-	int (*reset)(struct device_node *dn, int option);
-	int (*wait_state)(struct device_node *dn, int max_wait);
-	int (*get_log)(struct device_node *dn, int severity, char *drv_log, unsigned long len);
-	int (*configure_bridge)(struct device_node *dn);
+	int (*set_option)(struct eeh_pe *pe, int option);
+	int (*get_pe_addr)(struct eeh_pe *pe);
+	int (*get_state)(struct eeh_pe *pe, int *state);
+	int (*reset)(struct eeh_pe *pe, int option);
+	int (*wait_state)(struct eeh_pe *pe, int max_wait);
+	int (*get_log)(struct eeh_pe *pe, int severity, char *drv_log, unsigned long len);
+	int (*configure_bridge)(struct eeh_pe *pe);
 	int (*read_config)(struct device_node *dn, int where, int size, u32 *val);
 	int (*write_config)(struct device_node *dn, int where, int size, u32 val);
 };
diff --git a/arch/powerpc/platforms/pseries/eeh.c b/arch/powerpc/platforms/pseries/eeh.c
index 341ba1a..636413f 100644
--- a/arch/powerpc/platforms/pseries/eeh.c
+++ b/arch/powerpc/platforms/pseries/eeh.c
@@ -729,6 +729,7 @@ static void *eeh_early_enable(struct device_node *dn, void *data)
 	const u32 *regs;
 	int enable;
 	struct eeh_dev *edev = of_node_to_eeh_dev(dn);
+	struct eeh_pe pe;
 
 	edev->class_code = 0;
 	edev->mode = 0;
@@ -755,9 +756,14 @@ static void *eeh_early_enable(struct device_node *dn, void *data)
 	 */
 	regs = of_get_property(dn, "reg", NULL);
 	if (regs) {
+		/* Initialize the fake PE */
+		memset(&pe, 0, sizeof(struct eeh_pe));
+		pe.phb = edev->phb;
+		pe.config_addr = regs[0];
+
 		/* First register entry is addr (00BBSS00)  */
 		/* Try to enable eeh */
-		ret = eeh_ops->set_option(dn, EEH_OPT_ENABLE);
+		ret = eeh_ops->set_option(&pe, EEH_OPT_ENABLE);
 
 		enable = 0;
 		if (ret == 0) {
@@ -766,14 +772,15 @@ static void *eeh_early_enable(struct device_node *dn, void *data)
 			/* If the newer, better, ibm,get-config-addr-info is supported, 
 			 * then use that instead.
 			 */
-			edev->pe_config_addr = eeh_ops->get_pe_addr(dn);
+			edev->pe_config_addr = eeh_ops->get_pe_addr(&pe);
+			pe.addr = edev->pe_config_addr;
 
 			/* Some older systems (Power4) allow the
 			 * ibm,set-eeh-option call to succeed even on nodes
 			 * where EEH is not supported. Verify support
 			 * explicitly.
 			 */
-			ret = eeh_ops->get_state(dn, NULL);
+			ret = eeh_ops->get_state(&pe, NULL);
 			if (ret > 0 && ret != EEH_STATE_NOT_SUPPORT)
 				enable = 1;
 		}
diff --git a/arch/powerpc/platforms/pseries/eeh_pseries.c b/arch/powerpc/platforms/pseries/eeh_pseries.c
index bb2bd90..6760e70 100644
--- a/arch/powerpc/platforms/pseries/eeh_pseries.c
+++ b/arch/powerpc/platforms/pseries/eeh_pseries.c
@@ -134,22 +134,18 @@ static int pseries_eeh_init(void)
 
 /**
  * pseries_eeh_set_option - Initialize EEH or MMIO/DMA reenable
- * @dn: device node
+ * @pe: EEH PE
  * @option: operation to be issued
  *
  * The function is used to control the EEH functionality globally.
  * Currently, following options are support according to PAPR:
  * Enable EEH, Disable EEH, Enable MMIO and Enable DMA
  */
-static int pseries_eeh_set_option(struct device_node *dn, int option)
+static int pseries_eeh_set_option(struct eeh_pe *pe, int option)
 {
 	int ret = 0;
-	struct eeh_dev *edev;
-	const u32 *reg;
 	int config_addr;
 
-	edev = of_node_to_eeh_dev(dn);
-
 	/*
 	 * When we're enabling or disabling EEH functioality on
 	 * the particular PE, the PE config address is possibly
@@ -159,15 +155,11 @@ static int pseries_eeh_set_option(struct device_node *dn, int option)
 	switch (option) {
 	case EEH_OPT_DISABLE:
 	case EEH_OPT_ENABLE:
-		reg = of_get_property(dn, "reg", NULL);
-		config_addr = reg[0];
-		break;
-
 	case EEH_OPT_THAW_MMIO:
 	case EEH_OPT_THAW_DMA:
-		config_addr = edev->config_addr;
-		if (edev->pe_config_addr)
-			config_addr = edev->pe_config_addr;
+		config_addr = pe->config_addr;
+		if (pe->addr)
+			config_addr = pe->addr;
 		break;
 
 	default:
@@ -177,15 +169,15 @@ static int pseries_eeh_set_option(struct device_node *dn, int option)
 	}
 
 	ret = rtas_call(ibm_set_eeh_option, 4, 1, NULL,
-			config_addr, BUID_HI(edev->phb->buid),
-			BUID_LO(edev->phb->buid), option);
+			config_addr, BUID_HI(pe->phb->buid),
+			BUID_LO(pe->phb->buid), option);
 
 	return ret;
 }
 
 /**
  * pseries_eeh_get_pe_addr - Retrieve PE address
- * @dn: device node
+ * @pe: EEH PE
  *
  * Retrieve the assocated PE address. Actually, there're 2 RTAS
  * function calls dedicated for the purpose. We need implement
@@ -196,14 +188,11 @@ static int pseries_eeh_set_option(struct device_node *dn, int option)
  * It's notable that zero'ed return value means invalid PE config
  * address.
  */
-static int pseries_eeh_get_pe_addr(struct device_node *dn)
+static int pseries_eeh_get_pe_addr(struct eeh_pe *pe)
 {
-	struct eeh_dev *edev;
 	int ret = 0;
 	int rets[3];
 
-	edev = of_node_to_eeh_dev(dn);
-
 	if (ibm_get_config_addr_info2 != RTAS_UNKNOWN_SERVICE) {
 		/*
 		 * First of all, we need to make sure there has one PE
@@ -211,18 +200,18 @@ static int pseries_eeh_get_pe_addr(struct device_node *dn)
 		 * meaningless.
 		 */
 		ret = rtas_call(ibm_get_config_addr_info2, 4, 2, rets,
-				edev->config_addr, BUID_HI(edev->phb->buid),
-				BUID_LO(edev->phb->buid), 1);
+				pe->config_addr, BUID_HI(pe->phb->buid),
+				BUID_LO(pe->phb->buid), 1);
 		if (ret || (rets[0] == 0))
 			return 0;
 
 		/* Retrieve the associated PE config address */
 		ret = rtas_call(ibm_get_config_addr_info2, 4, 2, rets,
-				edev->config_addr, BUID_HI(edev->phb->buid),
-				BUID_LO(edev->phb->buid), 0);
+				pe->config_addr, BUID_HI(pe->phb->buid),
+				BUID_LO(pe->phb->buid), 0);
 		if (ret) {
-			pr_warning("%s: Failed to get PE address for %s\n",
-				__func__, dn->full_name);
+			pr_warning("%s: Failed to get address for PHB#%d-PE#%x\n",
+				__func__, pe->phb->global_number, pe->config_addr);
 			return 0;
 		}
 
@@ -231,11 +220,11 @@ static int pseries_eeh_get_pe_addr(struct device_node *dn)
 
 	if (ibm_get_config_addr_info != RTAS_UNKNOWN_SERVICE) {
 		ret = rtas_call(ibm_get_config_addr_info, 4, 2, rets,
-				edev->config_addr, BUID_HI(edev->phb->buid),
-				BUID_LO(edev->phb->buid), 0);
+				pe->config_addr, BUID_HI(pe->phb->buid),
+				BUID_LO(pe->phb->buid), 0);
 		if (ret) {
-			pr_warning("%s: Failed to get PE address for %s\n",
-				__func__, dn->full_name);
+			pr_warning("%s: Failed to get address for PHB#%d-PE#%x\n",
+				__func__, pe->phb->global_number, pe->config_addr);
 			return 0;
 		}
 
@@ -247,7 +236,7 @@ static int pseries_eeh_get_pe_addr(struct device_node *dn)
 
 /**
  * pseries_eeh_get_state - Retrieve PE state
- * @dn: PE associated device node
+ * @pe: EEH PE
  * @state: return value
  *
  * Retrieve the state of the specified PE. On RTAS compliant
@@ -258,30 +247,28 @@ static int pseries_eeh_get_pe_addr(struct device_node *dn)
  * RTAS calls for the purpose, we need to try the new one and back
  * to the old one if the new one couldn't work properly.
  */
-static int pseries_eeh_get_state(struct device_node *dn, int *state)
+static int pseries_eeh_get_state(struct eeh_pe *pe, int *state)
 {
-	struct eeh_dev *edev;
 	int config_addr;
 	int ret;
 	int rets[4];
 	int result;
 
 	/* Figure out PE config address if possible */
-	edev = of_node_to_eeh_dev(dn);
-	config_addr = edev->config_addr;
-	if (edev->pe_config_addr)
-		config_addr = edev->pe_config_addr;
+	config_addr = pe->config_addr;
+	if (pe->addr)
+		config_addr = pe->addr;
 
 	if (ibm_read_slot_reset_state2 != RTAS_UNKNOWN_SERVICE) {
 		ret = rtas_call(ibm_read_slot_reset_state2, 3, 4, rets,
-				config_addr, BUID_HI(edev->phb->buid),
-				BUID_LO(edev->phb->buid));
+				config_addr, BUID_HI(pe->phb->buid),
+				BUID_LO(pe->phb->buid));
 	} else if (ibm_read_slot_reset_state != RTAS_UNKNOWN_SERVICE) {
 		/* Fake PE unavailable info */
 		rets[2] = 0;
 		ret = rtas_call(ibm_read_slot_reset_state, 3, 3, rets,
-				config_addr, BUID_HI(edev->phb->buid),
-				BUID_LO(edev->phb->buid));
+				config_addr, BUID_HI(pe->phb->buid),
+				BUID_LO(pe->phb->buid));
 	} else {
 		return EEH_STATE_NOT_SUPPORT;
 	}
@@ -333,34 +320,32 @@ static int pseries_eeh_get_state(struct device_node *dn, int *state)
 
 /**
  * pseries_eeh_reset - Reset the specified PE
- * @dn: PE associated device node
+ * @pe: EEH PE
  * @option: reset option
  *
  * Reset the specified PE
  */
-static int pseries_eeh_reset(struct device_node *dn, int option)
+static int pseries_eeh_reset(struct eeh_pe *pe, int option)
 {
-	struct eeh_dev *edev;
 	int config_addr;
 	int ret;
 
 	/* Figure out PE address */
-	edev = of_node_to_eeh_dev(dn);
-	config_addr = edev->config_addr;
-	if (edev->pe_config_addr)
-		config_addr = edev->pe_config_addr;
+	config_addr = pe->config_addr;
+	if (pe->addr)
+		config_addr = pe->addr;
 
 	/* Reset PE through RTAS call */
 	ret = rtas_call(ibm_set_slot_reset, 4, 1, NULL,
-			config_addr, BUID_HI(edev->phb->buid),
-			BUID_LO(edev->phb->buid), option);
+			config_addr, BUID_HI(pe->phb->buid),
+			BUID_LO(pe->phb->buid), option);
 
 	/* If fundamental-reset not supported, try hot-reset */
 	if (option == EEH_RESET_FUNDAMENTAL &&
 	    ret == -8) {
 		ret = rtas_call(ibm_set_slot_reset, 4, 1, NULL,
-				config_addr, BUID_HI(edev->phb->buid),
-				BUID_LO(edev->phb->buid), EEH_RESET_HOT);
+				config_addr, BUID_HI(pe->phb->buid),
+				BUID_LO(pe->phb->buid), EEH_RESET_HOT);
 	}
 
 	return ret;
@@ -368,13 +353,13 @@ static int pseries_eeh_reset(struct device_node *dn, int option)
 
 /**
  * pseries_eeh_wait_state - Wait for PE state
- * @dn: PE associated device node
+ * @pe: EEH PE
  * @max_wait: maximal period in microsecond
  *
  * Wait for the state of associated PE. It might take some time
  * to retrieve the PE's state.
  */
-static int pseries_eeh_wait_state(struct device_node *dn, int max_wait)
+static int pseries_eeh_wait_state(struct eeh_pe *pe, int max_wait)
 {
 	int ret;
 	int mwait;
@@ -391,7 +376,7 @@ static int pseries_eeh_wait_state(struct device_node *dn, int max_wait)
 #define EEH_STATE_MAX_WAIT_TIME	(300 * 1000)
 
 	while (1) {
-		ret = pseries_eeh_get_state(dn, &mwait);
+		ret = pseries_eeh_get_state(pe, &mwait);
 
 		/*
 		 * If the PE's state is temporarily unavailable,
@@ -426,7 +411,7 @@ static int pseries_eeh_wait_state(struct device_node *dn, int max_wait)
 
 /**
  * pseries_eeh_get_log - Retrieve error log
- * @dn: device node
+ * @pe: EEH PE
  * @severity: temporary or permanent error log
  * @drv_log: driver log to be combined with retrieved error log
  * @len: length of driver log
@@ -435,24 +420,22 @@ static int pseries_eeh_wait_state(struct device_node *dn, int max_wait)
  * Actually, the error will be retrieved through the dedicated
  * RTAS call.
  */
-static int pseries_eeh_get_log(struct device_node *dn, int severity, char *drv_log, unsigned long len)
+static int pseries_eeh_get_log(struct eeh_pe *pe, int severity, char *drv_log, unsigned long len)
 {
-	struct eeh_dev *edev;
 	int config_addr;
 	unsigned long flags;
 	int ret;
 
-	edev = of_node_to_eeh_dev(dn);
 	spin_lock_irqsave(&slot_errbuf_lock, flags);
 	memset(slot_errbuf, 0, eeh_error_buf_size);
 
 	/* Figure out the PE address */
-	config_addr = edev->config_addr;
-	if (edev->pe_config_addr)
-		config_addr = edev->pe_config_addr;
+	config_addr = pe->config_addr;
+	if (pe->addr)
+		config_addr = pe->addr;
 
 	ret = rtas_call(ibm_slot_error_detail, 8, 1, NULL, config_addr,
-			BUID_HI(edev->phb->buid), BUID_LO(edev->phb->buid),
+			BUID_HI(pe->phb->buid), BUID_LO(pe->phb->buid),
 			virt_to_phys(drv_log), len,
 			virt_to_phys(slot_errbuf), eeh_error_buf_size,
 			severity);
@@ -465,40 +448,38 @@ static int pseries_eeh_get_log(struct device_node *dn, int severity, char *drv_l
 
 /**
  * pseries_eeh_configure_bridge - Configure PCI bridges in the indicated PE
- * @dn: PE associated device node
+ * @pe: EEH PE
  *
  * The function will be called to reconfigure the bridges included
  * in the specified PE so that the mulfunctional PE would be recovered
  * again.
  */
-static int pseries_eeh_configure_bridge(struct device_node *dn)
+static int pseries_eeh_configure_bridge(struct eeh_pe *pe)
 {
-	struct eeh_dev *edev;
 	int config_addr;
 	int ret;
 
 	/* Figure out the PE address */
-	edev = of_node_to_eeh_dev(dn);
-	config_addr = edev->config_addr;
-	if (edev->pe_config_addr)
-		config_addr = edev->pe_config_addr;
+	config_addr = pe->config_addr;
+	if (pe->addr)
+		config_addr = pe->addr;
 
 	/* Use new configure-pe function, if supported */
 	if (ibm_configure_pe != RTAS_UNKNOWN_SERVICE) {
 		ret = rtas_call(ibm_configure_pe, 3, 1, NULL,
-				config_addr, BUID_HI(edev->phb->buid),
-				BUID_LO(edev->phb->buid));
+				config_addr, BUID_HI(pe->phb->buid),
+				BUID_LO(pe->phb->buid));
 	} else if (ibm_configure_bridge != RTAS_UNKNOWN_SERVICE) {
 		ret = rtas_call(ibm_configure_bridge, 3, 1, NULL,
-				config_addr, BUID_HI(edev->phb->buid),
-				BUID_LO(edev->phb->buid));
+				config_addr, BUID_HI(pe->phb->buid),
+				BUID_LO(pe->phb->buid));
 	} else {
 		return -EFAULT;
 	}
 
 	if (ret)
-		pr_warning("%s: Unable to configure bridge %d for %s\n",
-			__func__, ret, dn->full_name);
+		pr_warning("%s: Unable to configure bridge PHB#%d-PE#%x (%d)\n",
+			__func__, pe->phb->global_number, pe->addr, ret);
 
 	return ret;
 }
-- 
1.7.9.5

^ permalink raw reply related

* [PATCH V2 00/16] powerpc/eeh: PE support
From: Gavin Shan @ 2012-06-27 16:01 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: Gavin Shan

The series of patches address explicit PE support as well as probe type
support. For explicit PE support, struct eeh_pe has been introduced.
While designing the struct, following factors have been taken into
account.

   * For one particular PE, it might be composed of single PCI device,
     or multiple PCI devices and its educed children PCI devices (e.g.
     by PCIe bridges). The PE struct has included a linked list to refer
     the included PCI devices. Also, the linked list of devices has relected
     top-to-bottom fasion of the PCI subtree. That's to say, the first device
     in the linked list should be the toppest element in the PCI subtree which
     is being managed by the PE.
   * PEs correlate to each other. So the existing PEs have to form hierarchy
     levels. There're some fields in PE struct (e.g. parent/child/silbing)
     have been introduced for the purpose.
   * For one PE, it's only meaningful in the PHB domain.

In addition, the mechniasm used to do memory bars restore, error report have
been reworked based on PE. The eeh cache has been reworked for a little bit
based on Ben's suggestion to trace eeh device. 

In order for explicit probe support, either OF node or pci device, global
variable and some inline functions are introduced. For pSeries platform, it's
going to support OF node probe and figure out PEs from the corresponding OF
nodes. In contrast, powernv platform has to use pci device probe type since
the PEs are being constructed at PHB fixup time.

The series of patches have been verified on Firebird-L machine using "errinjct"
utility. Here's the command used for that.

errinjct eeh -v -f 0 -p U78AE.001.WZS00M9-P1-C18-L1-T2 -a 0x0 -m 0x0

V1 -> V2
	* Rebase to 3.5.RC4.
	* Use the link list to trace the relationships of PEs, PE and eeh
	  devices according to Ram's suggestion.
	* Simplify the PE tranverse function according to Ram's example.
	* Move EEH initialization around according to Ben's suggestion so
	  that we can do memory allocation through slab.
	* Use kzmalloc() to allocate memory chunks for PE and eeh devices.
	* More booting messages for EEH initialization functions.
	* Introduce global EEH mutex to protect the PEs and eeh devices.
	* Added functions to support PE removal.
	* Comments cleanup
	* Change on the comparison of PE or BDF (Bus/Device/Function)
	  address so that code looks more readable.

arch/powerpc/include/asm/eeh.h               |  132 ++++--
arch/powerpc/include/asm/eeh_event.h         |    6 +-
arch/powerpc/include/asm/pci-bridge.h        |    2 +
arch/powerpc/include/asm/ppc-pci.h           |   15 +-
arch/powerpc/kernel/rtas_pci.c               |    5 +-
arch/powerpc/platforms/pseries/Makefile      |    5 +-
arch/powerpc/platforms/pseries/eeh.c         |  527 +++++------------------
arch/powerpc/platforms/pseries/eeh_cache.c   |   19 +-
arch/powerpc/platforms/pseries/eeh_dev.c     |   14 +-
arch/powerpc/platforms/pseries/eeh_driver.c  |  235 +++++------
arch/powerpc/platforms/pseries/eeh_event.c   |   53 +--
arch/powerpc/platforms/pseries/eeh_pe.c      |  583 ++++++++++++++++++++++++++
arch/powerpc/platforms/pseries/eeh_pseries.c |  246 +++++++----
arch/powerpc/platforms/pseries/eeh_sysfs.c   |    9 -
arch/powerpc/platforms/pseries/msi.c         |    6 +-
arch/powerpc/platforms/pseries/setup.c       |    2 -
16 files changed, 1119 insertions(+), 740 deletions(-)
create mode 100644 arch/powerpc/platforms/pseries/eeh_pe.c

Thanks,
Gavin

^ permalink raw reply

* [PATCH 17/21] ppc/eeh: make EEH handler PE sensitive
From: Gavin Shan @ 2012-06-27 16:01 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: Gavin Shan
In-Reply-To: <1340812911-6793-1-git-send-email-shangw@linux.vnet.ibm.com>

Once eeh error is found, eeh event will be created and put it into
the global linked list. At the mean while, kernel thread will be
started to process it. The handler for the kernel thread originally
was eeh device sensitive.

The patch reworks the handler of the kernel thread so that it's PE
sensitive.

Signed-off-by: Gavin Shan <shangw@linux.vnet.ibm.com>
---
 arch/powerpc/platforms/pseries/eeh_event.c |   24 ++++++++++--------------
 1 file changed, 10 insertions(+), 14 deletions(-)

diff --git a/arch/powerpc/platforms/pseries/eeh_event.c b/arch/powerpc/platforms/pseries/eeh_event.c
index ab8ca18..67c82c2 100644
--- a/arch/powerpc/platforms/pseries/eeh_event.c
+++ b/arch/powerpc/platforms/pseries/eeh_event.c
@@ -57,7 +57,7 @@ static int eeh_event_handler(void * dummy)
 {
 	unsigned long flags;
 	struct eeh_event *event;
-	struct eeh_dev *edev;
+	struct eeh_pe *pe;
 
 	set_task_comm(current, "eehd");
 
@@ -76,27 +76,23 @@ static int eeh_event_handler(void * dummy)
 
 	/* Serialize processing of EEH events */
 	mutex_lock(&eeh_event_mutex);
-	edev = event->edev;
-	eeh_mark_slot(eeh_dev_to_of_node(edev), EEH_MODE_RECOVERING);
+	pe = event->pe;
+	eeh_pe_state_mark(pe, EEH_PE_RECOVERING);
+	pr_info("EEH: Detected PCI bus error on PHB#%d-PE#%x\n",
+		pe->phb->global_number, pe->addr);
 
-	printk(KERN_INFO "EEH: Detected PCI bus error on device %s\n",
-	       eeh_pci_name(edev->pdev));
+	handle_eeh_events(event);
 
-	set_current_state(TASK_INTERRUPTIBLE);	/* Don't add to load average */
-	edev = handle_eeh_events(event);
-
-	eeh_clear_slot(eeh_dev_to_of_node(edev), EEH_MODE_RECOVERING);
-	pci_dev_put(edev->pdev);
+	eeh_pe_state_clear(pe, EEH_PE_RECOVERING);
 
 	kfree(event);
 	mutex_unlock(&eeh_event_mutex);
 
 	/* If there are no new errors after an hour, clear the counter. */
-	if (edev && edev->freeze_count>0) {
+	if (pe && pe->freeze_count > 0) {
 		msleep_interruptible(3600*1000);
-		if (edev->freeze_count>0)
-			edev->freeze_count--;
-
+		if (pe->freeze_count > 0)
+			pe->freeze_count--;
 	}
 
 	return 0;
-- 
1.7.9.5

^ permalink raw reply related

* [PATCH 05/21] ppc/eeh: introduce global mutex
From: Gavin Shan @ 2012-06-27 16:01 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: Gavin Shan
In-Reply-To: <1340812911-6793-1-git-send-email-shangw@linux.vnet.ibm.com>

The patch introduces global mutex for EEH so that the core data
structures can be protected by that. Also, 2 inline functions
are exported for that: eeh_lock() and eeh_unlock().

Signed-off-by: Gavin Shan <shangw@linux.vnet.ibm.com>
---
 arch/powerpc/include/asm/eeh.h       |   15 +++++++++++++++
 arch/powerpc/platforms/pseries/eeh.c |    3 +++
 2 files changed, 18 insertions(+)

diff --git a/arch/powerpc/include/asm/eeh.h b/arch/powerpc/include/asm/eeh.h
index f77b6d7..248b3d9 100644
--- a/arch/powerpc/include/asm/eeh.h
+++ b/arch/powerpc/include/asm/eeh.h
@@ -146,6 +146,17 @@ struct eeh_ops {
 
 extern struct eeh_ops *eeh_ops;
 extern int eeh_subsystem_enabled;
+extern struct mutex eeh_mutex;
+
+static inline void eeh_lock(void)
+{
+	mutex_lock(&eeh_mutex);
+}
+
+static inline void eeh_unlock(void)
+{
+	mutex_unlock(&eeh_mutex);
+}
 
 /*
  * Max number of EEH freezes allowed before we consider the device
@@ -206,6 +217,10 @@ static inline void eeh_add_device_tree_early(struct device_node *dn) { }
 static inline void eeh_add_device_tree_late(struct pci_bus *bus) { }
 
 static inline void eeh_remove_bus_device(struct pci_dev *dev) { }
+
+static inline void eeh_lock(void) { }
+static inline void eeh_unlock(void) { }
+
 #define EEH_POSSIBLE_ERROR(val, type) (0)
 #define EEH_IO_ERROR_VALUE(size) (-1UL)
 #endif /* CONFIG_EEH */
diff --git a/arch/powerpc/platforms/pseries/eeh.c b/arch/powerpc/platforms/pseries/eeh.c
index e819448..0ba7e3b 100644
--- a/arch/powerpc/platforms/pseries/eeh.c
+++ b/arch/powerpc/platforms/pseries/eeh.c
@@ -92,6 +92,9 @@ struct eeh_ops *eeh_ops = NULL;
 int eeh_subsystem_enabled;
 EXPORT_SYMBOL(eeh_subsystem_enabled);
 
+/* Global EEH mutex */
+DEFINE_MUTEX(eeh_mutex);
+
 /* Lock to avoid races due to multiple reports of an error */
 static DEFINE_RAW_SPINLOCK(confirm_error_lock);
 
-- 
1.7.9.5

^ permalink raw reply related

* [PATCH 06/21] ppc/eeh: Create PEs for PHBs
From: Gavin Shan @ 2012-06-27 16:01 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: Gavin Shan
In-Reply-To: <1340812911-6793-1-git-send-email-shangw@linux.vnet.ibm.com>

For one particular PE, it's only meaningful in the ancestor PHB
domain. Therefore, each PHB should have its own PE hierarchy tree
to trace those PEs created against the PHB.

The patch creates PEs for the PHBs and put those PEs into the
global link list traced by "eeh_phb_pe". The link list of PEs
would be first level of overall PE hierarchy tree across the
system.

Signed-off-by: Gavin Shan <shangw@linux.vnet.ibm.com>
---
 arch/powerpc/include/asm/eeh.h           |    2 +
 arch/powerpc/platforms/pseries/Makefile  |    5 +-
 arch/powerpc/platforms/pseries/eeh_dev.c |    4 ++
 arch/powerpc/platforms/pseries/eeh_pe.c  |  103 ++++++++++++++++++++++++++++++
 4 files changed, 112 insertions(+), 2 deletions(-)
 create mode 100644 arch/powerpc/platforms/pseries/eeh_pe.c

diff --git a/arch/powerpc/include/asm/eeh.h b/arch/powerpc/include/asm/eeh.h
index 248b3d9..7b9c7d6 100644
--- a/arch/powerpc/include/asm/eeh.h
+++ b/arch/powerpc/include/asm/eeh.h
@@ -164,6 +164,8 @@ static inline void eeh_unlock(void)
  */
 #define EEH_MAX_ALLOWED_FREEZES 5
 
+int __devinit eeh_phb_pe_create(struct pci_controller *phb);
+
 void * __devinit eeh_dev_init(struct device_node *dn, void *data);
 void __devinit eeh_dev_phb_init_dynamic(struct pci_controller *phb);
 int __init eeh_ops_register(struct eeh_ops *ops);
diff --git a/arch/powerpc/platforms/pseries/Makefile b/arch/powerpc/platforms/pseries/Makefile
index c222189..890622b 100644
--- a/arch/powerpc/platforms/pseries/Makefile
+++ b/arch/powerpc/platforms/pseries/Makefile
@@ -6,8 +6,9 @@ obj-y			:= lpar.o hvCall.o nvram.o reconfig.o \
 			   firmware.o power.o dlpar.o mobility.o
 obj-$(CONFIG_SMP)	+= smp.o
 obj-$(CONFIG_SCANLOG)	+= scanlog.o
-obj-$(CONFIG_EEH)	+= eeh.o eeh_dev.o eeh_cache.o eeh_driver.o \
-			   eeh_event.o eeh_sysfs.o eeh_pseries.o
+obj-$(CONFIG_EEH)	+= eeh.o eeh_pe.o eeh_dev.o eeh_cache.o \
+			   eeh_driver.o eeh_event.o eeh_sysfs.o \
+			   eeh_pseries.o
 obj-$(CONFIG_KEXEC)	+= kexec.o
 obj-$(CONFIG_PCI)	+= pci.o pci_dlpar.o
 obj-$(CONFIG_PSERIES_MSI)	+= msi.o
diff --git a/arch/powerpc/platforms/pseries/eeh_dev.c b/arch/powerpc/platforms/pseries/eeh_dev.c
index a0cee3a..6644234 100644
--- a/arch/powerpc/platforms/pseries/eeh_dev.c
+++ b/arch/powerpc/platforms/pseries/eeh_dev.c
@@ -65,6 +65,7 @@ void * __devinit eeh_dev_init(struct device_node *dn, void *data)
 	PCI_DN(dn)->edev = edev;
 	edev->dn  = dn;
 	edev->phb = phb;
+	INIT_LIST_HEAD(&edev->list);
 
 	return NULL;
 }
@@ -80,6 +81,9 @@ void __devinit eeh_dev_phb_init_dynamic(struct pci_controller *phb)
 {
 	struct device_node *dn = phb->dn;
 
+	/* EEH PE for PHB */
+	eeh_phb_pe_create(phb);
+
 	/* EEH device for PHB */
 	eeh_dev_init(dn, phb);
 
diff --git a/arch/powerpc/platforms/pseries/eeh_pe.c b/arch/powerpc/platforms/pseries/eeh_pe.c
new file mode 100644
index 0000000..20d65dc
--- /dev/null
+++ b/arch/powerpc/platforms/pseries/eeh_pe.c
@@ -0,0 +1,103 @@
+/*
+ * The file intends to implement PE based on the information from
+ * platforms. Basically, there have 3 types of PEs: PHB/Bus/Device.
+ * All the PEs should be organized as hierarchy tree. The first level
+ * of the tree will be associated to existing PHBs since the particular
+ * PE is only meaningful in one PHB domain.
+ *
+ * Copyright Benjamin Herrenschmidt & Gavin Shan, IBM Corporation 2012.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
+ */
+
+#include <linux/export.h>
+#include <linux/gfp.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/pci.h>
+#include <linux/string.h>
+
+#include <asm/pci-bridge.h>
+#include <asm/ppc-pci.h>
+
+static LIST_HEAD(eeh_phb_pe);
+
+/**
+ * eeh_phb_pe_create - Create PHB PE 
+ * @phb: PCI controller
+ *
+ * The function should be called while the PHB is detected during
+ * system boot or PCI hotplug in order to create PHB PE.
+ */
+int __devinit eeh_phb_pe_create(struct pci_controller *phb)
+{
+	struct eeh_pe *pe;
+
+	/* Allocate PHB PE */
+	pe = kzalloc(sizeof(struct eeh_pe), GFP_KERNEL);
+	if (!pe) {
+		pr_err("%s: out of memory!\n", __func__);
+		return -ENOMEM;
+	}
+
+	/* Initialize PHB PE */
+	pe->type = EEH_PE_PHB;
+	pe->phb = phb;
+	INIT_LIST_HEAD(&pe->child_list);
+	INIT_LIST_HEAD(&pe->child);
+	INIT_LIST_HEAD(&pe->edevs);
+
+	/* Put it into the list */
+	eeh_lock();
+	list_add_tail(&pe->child, &eeh_phb_pe);
+	eeh_unlock();
+
+	pr_info("EEH: Add PE for PHB#%d\n", phb->global_number);
+
+	return 0;
+}
+
+/**
+ * eeh_phb_pe_get - Retrieve PHB PE based on the given PHB
+ * @phb: PCI controller
+ *
+ * The overall PEs form hierarchy tree. The first layer of the
+ * hierarchy tree is composed of PHB PEs. The function is used
+ * to retrieve the corresponding PHB PE according to the given PHB.
+ */
+static struct eeh_pe *eeh_phb_pe_get(struct pci_controller *phb)
+{
+	struct eeh_pe *pe;
+
+	eeh_lock();
+
+	list_for_each_entry(pe, &eeh_phb_pe, child) {
+		/*
+		 * Actually, we needn't check the type since
+		 * the PE for PHB has been determined when that
+		 * was created.
+		 */
+		if (pe->type == EEH_PE_PHB &&
+		    pe->phb == phb) {
+			eeh_unlock();
+			return pe;
+		}
+	}
+
+	eeh_unlock();
+
+	return NULL;
+}
+
-- 
1.7.9.5

^ permalink raw reply related

* [PATCH 02/21] ppc/eeh: use slab to allocate eeh devices
From: Gavin Shan @ 2012-06-27 16:01 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: Gavin Shan
In-Reply-To: <1340812911-6793-1-git-send-email-shangw@linux.vnet.ibm.com>

The EEH initialization functions have been postponed until slab/slub
are ready. So we use slab/slub to allocate the memory chunks for
newly creatd EEH devices. That would save lots of memory.

The patch also does cleanup to replace "kmalloc" with "kzalloc" so
that we needn't clear the allocated memory chunk explicitly.

Signed-off-by: Gavin Shan <shangw@linux.vnet.ibm.com>
---
 arch/powerpc/platforms/pseries/eeh_cache.c |    2 +-
 arch/powerpc/platforms/pseries/eeh_dev.c   |    2 +-
 arch/powerpc/platforms/pseries/eeh_event.c |    2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/arch/powerpc/platforms/pseries/eeh_cache.c b/arch/powerpc/platforms/pseries/eeh_cache.c
index e5ae1c6..f50b717 100644
--- a/arch/powerpc/platforms/pseries/eeh_cache.c
+++ b/arch/powerpc/platforms/pseries/eeh_cache.c
@@ -151,7 +151,7 @@ pci_addr_cache_insert(struct pci_dev *dev, unsigned long alo,
 			return piar;
 		}
 	}
-	piar = kmalloc(sizeof(struct pci_io_addr_range), GFP_ATOMIC);
+	piar = kzalloc(sizeof(struct pci_io_addr_range), GFP_ATOMIC);
 	if (!piar)
 		return NULL;
 
diff --git a/arch/powerpc/platforms/pseries/eeh_dev.c b/arch/powerpc/platforms/pseries/eeh_dev.c
index ab68c59..8e3443b 100644
--- a/arch/powerpc/platforms/pseries/eeh_dev.c
+++ b/arch/powerpc/platforms/pseries/eeh_dev.c
@@ -55,7 +55,7 @@ void * __devinit eeh_dev_init(struct device_node *dn, void *data)
 	struct eeh_dev *edev;
 
 	/* Allocate EEH device */
-	edev = zalloc_maybe_bootmem(sizeof(*edev), GFP_KERNEL);
+	edev = kzalloc(sizeof(*edev), GFP_KERNEL);
 	if (!edev) {
 		pr_warning("%s: out of memory\n", __func__);
 		return NULL;
diff --git a/arch/powerpc/platforms/pseries/eeh_event.c b/arch/powerpc/platforms/pseries/eeh_event.c
index 4cb375c..87fea2e 100644
--- a/arch/powerpc/platforms/pseries/eeh_event.c
+++ b/arch/powerpc/platforms/pseries/eeh_event.c
@@ -137,7 +137,7 @@ int eeh_send_failure_event(struct eeh_dev *edev)
 		printk(KERN_ERR "EEH: PCI location = %s\n", location);
 		return 1;
 	}
-	event = kmalloc(sizeof(*event), GFP_ATOMIC);
+	event = kzalloc(sizeof(*event), GFP_ATOMIC);
 	if (event == NULL) {
 		printk(KERN_ERR "EEH: out of memory, event not handled\n");
 		return 1;
-- 
1.7.9.5

^ permalink raw reply related

* [PATCH 15/21] ppc/eeh: I/O enable and log retrival based on PE
From: Gavin Shan @ 2012-06-27 16:01 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: Gavin Shan
In-Reply-To: <1340812911-6793-1-git-send-email-shangw@linux.vnet.ibm.com>

The patch refactors the original implementation in order to enable
I/O and do log retrieval based on PE.

Signed-off-by: Gavin Shan <shangw@linux.vnet.ibm.com>
---
 arch/powerpc/include/asm/ppc-pci.h   |    4 ++--
 arch/powerpc/platforms/pseries/eeh.c |   44 +++++++++++++++-------------------
 2 files changed, 21 insertions(+), 27 deletions(-)

diff --git a/arch/powerpc/include/asm/ppc-pci.h b/arch/powerpc/include/asm/ppc-pci.h
index 5cbe3f2..5e34b10 100644
--- a/arch/powerpc/include/asm/ppc-pci.h
+++ b/arch/powerpc/include/asm/ppc-pci.h
@@ -51,8 +51,8 @@ void pci_addr_cache_build(void);
 void pci_addr_cache_insert_device(struct pci_dev *dev);
 void pci_addr_cache_remove_device(struct pci_dev *dev);
 struct pci_dev *pci_addr_cache_get_device(unsigned long addr);
-void eeh_slot_error_detail(struct eeh_dev *edev, int severity);
-int eeh_pci_enable(struct eeh_dev *edev, int function);
+void eeh_slot_error_detail(struct eeh_pe *pe, int severity);
+int eeh_pci_enable(struct eeh_pe *pe, int function);
 int eeh_reset_pe(struct eeh_dev *);
 int rtas_write_config(struct pci_dn *, int where, int size, u32 val);
 int rtas_read_config(struct pci_dn *, int where, int size, u32 *val);
diff --git a/arch/powerpc/platforms/pseries/eeh.c b/arch/powerpc/platforms/pseries/eeh.c
index 28d0c04..031935d 100644
--- a/arch/powerpc/platforms/pseries/eeh.c
+++ b/arch/powerpc/platforms/pseries/eeh.c
@@ -207,22 +207,12 @@ static size_t eeh_gather_pci_data(struct eeh_dev *edev, char * buf, size_t len)
 		}
 	}
 
-	/* Gather status on devices under the bridge */
-	if (dev->class >> 16 == PCI_BASE_CLASS_BRIDGE) {
-		struct device_node *child;
-
-		for_each_child_of_node(dn, child) {
-			if (of_node_to_eeh_dev(child))
-				n += eeh_gather_pci_data(of_node_to_eeh_dev(child), buf+n, len-n);
-		}
-	}
-
 	return n;
 }
 
 /**
  * eeh_slot_error_detail - Generate combined log including driver log and error log
- * @edev: device to report error log for
+ * @pe: EEH PE
  * @severity: temporary or permanent error log
  *
  * This routine should be called to generate the combined log, which
@@ -230,17 +220,22 @@ static size_t eeh_gather_pci_data(struct eeh_dev *edev, char * buf, size_t len)
  * out from the config space of the corresponding PCI device, while
  * the error log is fetched through platform dependent function call.
  */
-void eeh_slot_error_detail(struct eeh_dev *edev, int severity)
+void eeh_slot_error_detail(struct eeh_pe *pe, int severity)
 {
 	size_t loglen = 0;
-	pci_regs_buf[0] = 0;
+	struct eeh_dev *edev;
 
-	eeh_pci_enable(edev, EEH_OPT_THAW_MMIO);
-	eeh_ops->configure_bridge(eeh_dev_to_of_node(edev));
-	eeh_restore_bars(edev);
-	loglen = eeh_gather_pci_data(edev, pci_regs_buf, EEH_PCI_REGS_LOG_LEN);
+	eeh_pci_enable(pe, EEH_OPT_THAW_MMIO);
+	eeh_ops->configure_bridge(pe);
+	eeh_pe_restore_bars(pe);
 
-	eeh_ops->get_log(eeh_dev_to_of_node(edev), severity, pci_regs_buf, loglen);
+	pci_regs_buf[0] = 0;
+	eeh_pe_for_each_dev(pe, edev) {
+		loglen += eeh_gather_pci_data(edev, pci_regs_buf,
+				EEH_PCI_REGS_LOG_LEN);
+        }
+
+	eeh_ops->get_log(pe, severity, pci_regs_buf, loglen);
 }
 
 /**
@@ -427,23 +422,22 @@ EXPORT_SYMBOL(eeh_check_failure);
 
 /**
  * eeh_pci_enable - Enable MMIO or DMA transfers for this slot
- * @edev: pci device node
+ * @pe: EEH PE
  *
  * This routine should be called to reenable frozen MMIO or DMA
  * so that it would work correctly again. It's useful while doing
  * recovery or log collection on the indicated device.
  */
-int eeh_pci_enable(struct eeh_dev *edev, int function)
+int eeh_pci_enable(struct eeh_pe *pe, int function)
 {
 	int rc;
-	struct device_node *dn = eeh_dev_to_of_node(edev);
 
-	rc = eeh_ops->set_option(dn, function);
+	rc = eeh_ops->set_option(pe, function);
 	if (rc)
-		printk(KERN_WARNING "EEH: Unexpected state change %d, err=%d dn=%s\n",
-		        function, rc, dn->full_name);
+		pr_warning("%s: Unexpected state change %d on PHB#%d-PE#%x, err=%d\n",
+			__func__, function, pe->phb->global_number, pe->addr, rc);
 
-	rc = eeh_ops->wait_state(dn, PCI_BUS_RESET_WAIT_MSEC);
+	rc = eeh_ops->wait_state(pe, PCI_BUS_RESET_WAIT_MSEC);
 	if (rc > 0 && (rc & EEH_STATE_MMIO_ENABLED) &&
 	   (function == EEH_OPT_THAW_MMIO))
 		return 0;
-- 
1.7.9.5

^ permalink raw reply related

* [PATCH 21/21] ppc/eeh: trace eeh device from I/O cache
From: Gavin Shan @ 2012-06-27 16:01 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: Gavin Shan
In-Reply-To: <1340812911-6793-1-git-send-email-shangw@linux.vnet.ibm.com>

The idea comes from Benjamin Herrenschmidt. The eeh cache helps
fetching the pci device according to the given I/O address. Since
the eeh cache is serving for eeh, it's reasonable for eeh cache
to trace eeh device except pci device.

The patch make eeh cache to trace eeh device. Also, the major
eeh entry function eeh_dn_check_failure has been renamed to
eeh_dev_check_failure since it will take eeh device as input
parameter.

Signed-off-by: Gavin Shan <shangw@linux.vnet.ibm.com>
---
 arch/powerpc/include/asm/eeh.h             |    7 ++----
 arch/powerpc/include/asm/pci-bridge.h      |    2 ++
 arch/powerpc/include/asm/ppc-pci.h         |    2 +-
 arch/powerpc/kernel/rtas_pci.c             |    2 +-
 arch/powerpc/platforms/pseries/eeh.c       |   33 ++++++++++++----------------
 arch/powerpc/platforms/pseries/eeh_cache.c |   14 +++++++-----
 6 files changed, 28 insertions(+), 32 deletions(-)

diff --git a/arch/powerpc/include/asm/eeh.h b/arch/powerpc/include/asm/eeh.h
index 91c38b7..4d59da0 100644
--- a/arch/powerpc/include/asm/eeh.h
+++ b/arch/powerpc/include/asm/eeh.h
@@ -196,7 +196,7 @@ int __init eeh_ops_register(struct eeh_ops *ops);
 int __exit eeh_ops_unregister(const char *name);
 unsigned long eeh_check_failure(const volatile void __iomem *token,
 				unsigned long val);
-int eeh_dn_check_failure(struct device_node *dn, struct pci_dev *dev);
+int eeh_dev_check_failure(struct eeh_dev *edev);
 void __init pci_addr_cache_build(void);
 void eeh_add_device_tree_early(struct device_node *);
 void eeh_add_device_tree_late(struct pci_bus *);
@@ -231,10 +231,7 @@ static inline unsigned long eeh_check_failure(const volatile void __iomem *token
 	return val;
 }
 
-static inline int eeh_dn_check_failure(struct device_node *dn, struct pci_dev *dev)
-{
-	return 0;
-}
+#define eeh_dev_check_failure(x) (0)
 
 static inline void pci_addr_cache_build(void) { }
 
diff --git a/arch/powerpc/include/asm/pci-bridge.h b/arch/powerpc/include/asm/pci-bridge.h
index ac39e6a..f6bd6b2 100644
--- a/arch/powerpc/include/asm/pci-bridge.h
+++ b/arch/powerpc/include/asm/pci-bridge.h
@@ -183,6 +183,8 @@ static inline struct eeh_dev *of_node_to_eeh_dev(struct device_node *dn)
 {
 	return PCI_DN(dn)->edev;
 }
+#else
+#define of_node_to_eeh_dev(x) (NULL)
 #endif
 
 /** Find the bus corresponding to the indicated device node */
diff --git a/arch/powerpc/include/asm/ppc-pci.h b/arch/powerpc/include/asm/ppc-pci.h
index 56d55c7..962a902 100644
--- a/arch/powerpc/include/asm/ppc-pci.h
+++ b/arch/powerpc/include/asm/ppc-pci.h
@@ -50,7 +50,7 @@ extern int rtas_setup_phb(struct pci_controller *phb);
 void pci_addr_cache_build(void);
 void pci_addr_cache_insert_device(struct pci_dev *dev);
 void pci_addr_cache_remove_device(struct pci_dev *dev);
-struct pci_dev *pci_addr_cache_get_device(unsigned long addr);
+struct eeh_dev *pci_addr_cache_get_device(unsigned long addr);
 void eeh_slot_error_detail(struct eeh_pe *pe, int severity);
 int eeh_pci_enable(struct eeh_pe *pe, int function);
 int eeh_reset_pe(struct eeh_pe *);
diff --git a/arch/powerpc/kernel/rtas_pci.c b/arch/powerpc/kernel/rtas_pci.c
index 140735c..6de63e3 100644
--- a/arch/powerpc/kernel/rtas_pci.c
+++ b/arch/powerpc/kernel/rtas_pci.c
@@ -81,7 +81,7 @@ int rtas_read_config(struct pci_dn *pdn, int where, int size, u32 *val)
 		return PCIBIOS_DEVICE_NOT_FOUND;
 
 	if (returnval == EEH_IO_ERROR_VALUE(size) &&
-	    eeh_dn_check_failure (pdn->node, NULL))
+	    eeh_dev_check_failure(of_node_to_eeh_dev(pdn->node)))
 		return PCIBIOS_DEVICE_NOT_FOUND;
 
 	return PCIBIOS_SUCCESSFUL;
diff --git a/arch/powerpc/platforms/pseries/eeh.c b/arch/powerpc/platforms/pseries/eeh.c
index b2caf84..81e8c8e 100644
--- a/arch/powerpc/platforms/pseries/eeh.c
+++ b/arch/powerpc/platforms/pseries/eeh.c
@@ -270,9 +270,8 @@ static inline unsigned long eeh_token_to_phys(unsigned long token)
 }
 
 /**
- * eeh_dn_check_failure - Check if all 1's data is due to EEH slot freeze
- * @dn: device node
- * @dev: pci device, if known
+ * eeh_dev_check_failure - Check if all 1's data is due to EEH slot freeze
+ * @edev: eeh device
  *
  * Check for an EEH failure for the given device node.  Call this
  * routine if the result of a read was all 0xff's and you want to
@@ -284,12 +283,13 @@ static inline unsigned long eeh_token_to_phys(unsigned long token)
  *
  * It is safe to call this routine in an interrupt context.
  */
-int eeh_dn_check_failure(struct device_node *dn, struct pci_dev *dev)
+int eeh_dev_check_failure(struct eeh_dev *edev)
 {
 	int ret;
 	unsigned long flags;
+	struct device_node *dn;
+	struct pci_dev *dev;
 	struct eeh_pe *pe;
-	struct eeh_dev *edev;
 	int rc = 0;
 	const char *location;
 
@@ -298,15 +298,12 @@ int eeh_dn_check_failure(struct device_node *dn, struct pci_dev *dev)
 	if (!eeh_subsystem_enabled)
 		return 0;
 
-	if (dn) {
-		edev = of_node_to_eeh_dev(dn);
-	} else if (dev) {
-		edev = pci_dev_to_eeh_dev(dev);
-		dn = pci_device_to_OF_node(dev);
-	} else {
+	if (!edev) {
 		eeh_stats.no_dn++;
 		return 0;
 	}
+	dn = eeh_dev_to_of_node(edev);
+	dev = eeh_dev_to_pci_dev(edev);
 	pe = edev->pe;
 
 	/* Access to IO BARs might get this far and still not want checking. */
@@ -393,7 +390,7 @@ dn_unlock:
 	return rc;
 }
 
-EXPORT_SYMBOL_GPL(eeh_dn_check_failure);
+EXPORT_SYMBOL_GPL(eeh_dev_check_failure);
 
 /**
  * eeh_check_failure - Check if all 1's data is due to EEH slot freeze
@@ -410,21 +407,19 @@ EXPORT_SYMBOL_GPL(eeh_dn_check_failure);
 unsigned long eeh_check_failure(const volatile void __iomem *token, unsigned long val)
 {
 	unsigned long addr;
-	struct pci_dev *dev;
-	struct device_node *dn;
+	struct eeh_dev *edev;
 
 	/* Finding the phys addr + pci device; this is pretty quick. */
 	addr = eeh_token_to_phys((unsigned long __force) token);
-	dev = pci_addr_cache_get_device(addr);
-	if (!dev) {
+	edev = pci_addr_cache_get_device(addr);
+	if (!edev) {
 		eeh_stats.no_device++;
 		return val;
 	}
 
-	dn = pci_device_to_OF_node(dev);
-	eeh_dn_check_failure(dn, dev);
+	eeh_dev_check_failure(edev);
 
-	pci_dev_put(dev);
+	pci_dev_put(eeh_dev_to_pci_dev(edev));
 	return val;
 }
 
diff --git a/arch/powerpc/platforms/pseries/eeh_cache.c b/arch/powerpc/platforms/pseries/eeh_cache.c
index a191057..6c5ef75 100644
--- a/arch/powerpc/platforms/pseries/eeh_cache.c
+++ b/arch/powerpc/platforms/pseries/eeh_cache.c
@@ -50,6 +50,7 @@ struct pci_io_addr_range {
 	struct rb_node rb_node;
 	unsigned long addr_lo;
 	unsigned long addr_hi;
+	struct eeh_dev *edev;
 	struct pci_dev *pcidev;
 	unsigned int flags;
 };
@@ -59,7 +60,7 @@ static struct pci_io_addr_cache {
 	spinlock_t piar_lock;
 } pci_io_addr_cache_root;
 
-static inline struct pci_dev *__pci_addr_cache_get_device(unsigned long addr)
+static inline struct eeh_dev *__pci_addr_cache_get_device(unsigned long addr)
 {
 	struct rb_node *n = pci_io_addr_cache_root.rb_root.rb_node;
 
@@ -74,7 +75,7 @@ static inline struct pci_dev *__pci_addr_cache_get_device(unsigned long addr)
 				n = n->rb_right;
 			} else {
 				pci_dev_get(piar->pcidev);
-				return piar->pcidev;
+				return piar->edev;
 			}
 		}
 	}
@@ -92,15 +93,15 @@ static inline struct pci_dev *__pci_addr_cache_get_device(unsigned long addr)
  * from zero (that is, they do *not* have pci_io_addr added in).
  * It is safe to call this function within an interrupt.
  */
-struct pci_dev *pci_addr_cache_get_device(unsigned long addr)
+struct eeh_dev *pci_addr_cache_get_device(unsigned long addr)
 {
-	struct pci_dev *dev;
+	struct eeh_dev *edev;
 	unsigned long flags;
 
 	spin_lock_irqsave(&pci_io_addr_cache_root.piar_lock, flags);
-	dev = __pci_addr_cache_get_device(addr);
+	edev = __pci_addr_cache_get_device(addr);
 	spin_unlock_irqrestore(&pci_io_addr_cache_root.piar_lock, flags);
-	return dev;
+	return edev;
 }
 
 #ifdef DEBUG
@@ -158,6 +159,7 @@ pci_addr_cache_insert(struct pci_dev *dev, unsigned long alo,
 	pci_dev_get(dev);
 	piar->addr_lo = alo;
 	piar->addr_hi = ahi;
+	piar->edev = pci_dev_to_eeh_dev(dev);
 	piar->pcidev = dev;
 	piar->flags = flags;
 
-- 
1.7.9.5

^ permalink raw reply related

* [PATCH 16/21] ppc/eeh: do reset based on PE
From: Gavin Shan @ 2012-06-27 16:01 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: Gavin Shan
In-Reply-To: <1340812911-6793-1-git-send-email-shangw@linux.vnet.ibm.com>

The patch implements reset based on PE instead of eeh device. Also,
The functions used to retrieve the reset type, either hot or fundamental
reset, have been reworked for a little bit. More specificly, it's
implemented based the the eeh device traverse function.

Signed-off-by: Gavin Shan <shangw@linux.vnet.ibm.com>
---
 arch/powerpc/include/asm/ppc-pci.h   |    2 +-
 arch/powerpc/platforms/pseries/eeh.c |   91 +++++++++++++---------------------
 2 files changed, 35 insertions(+), 58 deletions(-)

diff --git a/arch/powerpc/include/asm/ppc-pci.h b/arch/powerpc/include/asm/ppc-pci.h
index 5e34b10..2a80f08 100644
--- a/arch/powerpc/include/asm/ppc-pci.h
+++ b/arch/powerpc/include/asm/ppc-pci.h
@@ -53,7 +53,7 @@ void pci_addr_cache_remove_device(struct pci_dev *dev);
 struct pci_dev *pci_addr_cache_get_device(unsigned long addr);
 void eeh_slot_error_detail(struct eeh_pe *pe, int severity);
 int eeh_pci_enable(struct eeh_pe *pe, int function);
-int eeh_reset_pe(struct eeh_dev *);
+int eeh_reset_pe(struct eeh_pe *);
 int rtas_write_config(struct pci_dn *, int where, int size, u32 val);
 int rtas_read_config(struct pci_dn *, int where, int size, u32 *val);
 void eeh_pe_state_mark(struct eeh_pe *pe, int state);
diff --git a/arch/powerpc/platforms/pseries/eeh.c b/arch/powerpc/platforms/pseries/eeh.c
index 031935d..d855c20 100644
--- a/arch/powerpc/platforms/pseries/eeh.c
+++ b/arch/powerpc/platforms/pseries/eeh.c
@@ -455,17 +455,24 @@ int eeh_pci_enable(struct eeh_pe *pe, int function)
  */
 int pcibios_set_pcie_reset_state(struct pci_dev *dev, enum pcie_reset_state state)
 {
-	struct device_node *dn = pci_device_to_OF_node(dev);
+	struct eeh_dev *edev = pci_dev_to_eeh_dev(dev);
+	struct eeh_pe *pe = edev->pe;
+
+	if (!pe) {
+		pr_err("%s: No PE found on PCI device %s\n",
+			__func__, pci_name(dev));
+		return -EINVAL;
+	}
 
 	switch (state) {
 	case pcie_deassert_reset:
-		eeh_ops->reset(dn, EEH_RESET_DEACTIVATE);
+		eeh_ops->reset(pe, EEH_RESET_DEACTIVATE);
 		break;
 	case pcie_hot_reset:
-		eeh_ops->reset(dn, EEH_RESET_HOT);
+		eeh_ops->reset(pe, EEH_RESET_HOT);
 		break;
 	case pcie_warm_reset:
-		eeh_ops->reset(dn, EEH_RESET_FUNDAMENTAL);
+		eeh_ops->reset(pe, EEH_RESET_FUNDAMENTAL);
 		break;
 	default:
 		return -EINVAL;
@@ -475,66 +482,37 @@ int pcibios_set_pcie_reset_state(struct pci_dev *dev, enum pcie_reset_state stat
 }
 
 /**
- * __eeh_set_pe_freset - Check the required reset for child devices
- * @parent: parent device
- * @freset: return value
- *
- * Each device might have its preferred reset type: fundamental or
- * hot reset. The routine is used to collect the information from
- * the child devices so that they could be reset accordingly.
- */
-void __eeh_set_pe_freset(struct device_node *parent, unsigned int *freset)
-{
-	struct device_node *dn;
-
-	for_each_child_of_node(parent, dn) {
-		if (of_node_to_eeh_dev(dn)) {
-			struct pci_dev *dev = of_node_to_eeh_dev(dn)->pdev;
-
-			if (dev && dev->driver)
-				*freset |= dev->needs_freset;
-
-			__eeh_set_pe_freset(dn, freset);
-		}
-	}
-}
-
-/**
- * eeh_set_pe_freset - Check the required reset for the indicated device and its children
- * @dn: parent device
- * @freset: return value
+ * eeh_set_pe_freset - Check the required reset for the indicated device
+ * @data: EEH device
+ * @flag: return value
  *
  * Each device might have its preferred reset type: fundamental or
  * hot reset. The routine is used to collected the information for
  * the indicated device and its children so that the bunch of the
  * devices could be reset properly.
  */
-void eeh_set_pe_freset(struct device_node *dn, unsigned int *freset)
+static void *eeh_set_dev_freset(void *data, void *flag)
 {
 	struct pci_dev *dev;
-	dn = eeh_find_device_pe(dn);
-
-	/* Back up one, since config addrs might be shared */
-	if (!pcibios_find_pci_bus(dn) && of_node_to_eeh_dev(dn->parent))
-		dn = dn->parent;
+	unsigned int *freset = (unsigned int *)flag;
+	struct eeh_dev *edev = (struct eeh_dev *)data;
 
-	dev = of_node_to_eeh_dev(dn)->pdev;
+	dev = eeh_dev_to_pci_dev(edev);
 	if (dev)
 		*freset |= dev->needs_freset;
 
-	__eeh_set_pe_freset(dn, freset);
+	return NULL;
 }
 
 /**
  * eeh_reset_pe_once - Assert the pci #RST line for 1/4 second
- * @edev: pci device node to be reset.
+ * @pe: EEH PE
  *
  * Assert the PCI #RST line for 1/4 second.
  */
-static void eeh_reset_pe_once(struct eeh_dev *edev)
+static void eeh_reset_pe_once(struct eeh_pe *pe)
 {
 	unsigned int freset = 0;
-	struct device_node *dn = eeh_dev_to_of_node(edev);
 
 	/* Determine type of EEH reset required for
 	 * Partitionable Endpoint, a hot-reset (1)
@@ -542,12 +520,12 @@ static void eeh_reset_pe_once(struct eeh_dev *edev)
 	 * A fundamental reset required by any device under
 	 * Partitionable Endpoint trumps hot-reset.
   	 */
-	eeh_set_pe_freset(dn, &freset);
+	eeh_pe_dev_traverse(pe, eeh_set_dev_freset, &freset);
 
 	if (freset)
-		eeh_ops->reset(dn, EEH_RESET_FUNDAMENTAL);
+		eeh_ops->reset(pe, EEH_RESET_FUNDAMENTAL);
 	else
-		eeh_ops->reset(dn, EEH_RESET_HOT);
+		eeh_ops->reset(pe, EEH_RESET_HOT);
 
 	/* The PCI bus requires that the reset be held high for at least
 	 * a 100 milliseconds. We wait a bit longer 'just in case'.
@@ -559,9 +537,9 @@ static void eeh_reset_pe_once(struct eeh_dev *edev)
 	 * pci slot reset line is dropped. Make sure we don't miss
 	 * these, and clear the flag now.
 	 */
-	eeh_clear_slot(dn, EEH_MODE_ISOLATED);
+	eeh_pe_state_clear(pe, EEH_MODE_ISOLATED);
 
-	eeh_ops->reset(dn, EEH_RESET_DEACTIVATE);
+	eeh_ops->reset(pe, EEH_RESET_DEACTIVATE);
 
 	/* After a PCI slot has been reset, the PCI Express spec requires
 	 * a 1.5 second idle time for the bus to stabilize, before starting
@@ -573,32 +551,31 @@ static void eeh_reset_pe_once(struct eeh_dev *edev)
 
 /**
  * eeh_reset_pe - Reset the indicated PE
- * @edev: PCI device associated EEH device
+ * @pe: EEH PE
  *
  * This routine should be called to reset indicated device, including
  * PE. A PE might include multiple PCI devices and sometimes PCI bridges
  * might be involved as well.
  */
-int eeh_reset_pe(struct eeh_dev *edev)
+int eeh_reset_pe(struct eeh_pe *pe)
 {
 	int i, rc;
-	struct device_node *dn = eeh_dev_to_of_node(edev);
 
 	/* Take three shots at resetting the bus */
 	for (i=0; i<3; i++) {
-		eeh_reset_pe_once(edev);
+		eeh_reset_pe_once(pe);
 
-		rc = eeh_ops->wait_state(dn, PCI_BUS_RESET_WAIT_MSEC);
+		rc = eeh_ops->wait_state(pe, PCI_BUS_RESET_WAIT_MSEC);
 		if (rc == (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE))
 			return 0;
 
 		if (rc < 0) {
-			printk(KERN_ERR "EEH: unrecoverable slot failure %s\n",
-			       dn->full_name);
+			pr_err("%s: Unrecoverable slot failure on PHB#%d-PE#%x",
+				__func__, pe->phb->global_number, pe->addr);
 			return -1;
 		}
-		printk(KERN_ERR "EEH: bus reset %d failed on slot %s, rc=%d\n",
-		       i+1, dn->full_name, rc);
+		pr_err("EEH: bus reset %d failed on PHB#%d-PE#%x, rc=%d\n",
+			i+1, pe->phb->global_number, pe->addr, rc);
 	}
 
 	return -1;
-- 
1.7.9.5

^ permalink raw reply related

* [PATCH 03/21] ppc/eeh: more logs for EEH initialization
From: Gavin Shan @ 2012-06-27 16:01 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: Gavin Shan
In-Reply-To: <1340812911-6793-1-git-send-email-shangw@linux.vnet.ibm.com>

The patch adds more logs to EEH initialization functions for
debugging purpose. Also, the machine type ("pSeries") is checked
in the platform initialization to assure it's the correct platform
to invoke it.

Signed-off-by: Gavin Shan <shangw@linux.vnet.ibm.com>
---
 arch/powerpc/platforms/pseries/eeh_dev.c     |    2 ++
 arch/powerpc/platforms/pseries/eeh_pseries.c |   13 ++++++++++++-
 2 files changed, 14 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/platforms/pseries/eeh_dev.c b/arch/powerpc/platforms/pseries/eeh_dev.c
index 8e3443b..a0cee3a 100644
--- a/arch/powerpc/platforms/pseries/eeh_dev.c
+++ b/arch/powerpc/platforms/pseries/eeh_dev.c
@@ -100,6 +100,8 @@ static int __init eeh_dev_phb_init(void)
 	list_for_each_entry_safe(phb, tmp, &hose_list, list_node)
 		eeh_dev_phb_init_dynamic(phb);
 
+	pr_info("EEH: devices created\n");
+
 	return 0;
 }
 
diff --git a/arch/powerpc/platforms/pseries/eeh_pseries.c b/arch/powerpc/platforms/pseries/eeh_pseries.c
index bcf0bb8..bb2bd90 100644
--- a/arch/powerpc/platforms/pseries/eeh_pseries.c
+++ b/arch/powerpc/platforms/pseries/eeh_pseries.c
@@ -561,7 +561,18 @@ static struct eeh_ops pseries_eeh_ops = {
  */
 static int __init eeh_pseries_init(void)
 {
-	return eeh_ops_register(&pseries_eeh_ops);
+	int ret = -EINVAL;
+
+	if (!machine_is(pseries))
+		return ret;
+
+	ret = eeh_ops_register(&pseries_eeh_ops);
+	if (!ret)
+		pr_info("EEH: pSeries platform initialized\n");
+	else
+		pr_info("EEH: pSeries platform initialization failure\n");
+
+	return ret;
 }
 
 early_initcall(eeh_pseries_init);
-- 
1.7.9.5

^ permalink raw reply related

* [PATCH 07/21] ppc/eeh: Search PE based on requirement
From: Gavin Shan @ 2012-06-27 16:01 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: Gavin Shan
In-Reply-To: <1340812911-6793-1-git-send-email-shangw@linux.vnet.ibm.com>

The patch implements searching PE based on the following
requirements:

 * Search PE according to PE address, which is traditional
   PE address that is composed of PCI bus/device/function
   number, or unified PE address assigned by firmware or
   platform.
 * Search parent PE according to the given EEH device. It's
   useful when creating new PE and put it into right position.

Signed-off-by: Gavin Shan <shangw@linux.vnet.ibm.com>
---
 arch/powerpc/include/asm/eeh.h          |    1 +
 arch/powerpc/platforms/pseries/eeh_pe.c |  146 +++++++++++++++++++++++++++++++
 2 files changed, 147 insertions(+)

diff --git a/arch/powerpc/include/asm/eeh.h b/arch/powerpc/include/asm/eeh.h
index 7b9c7d6..1cc1388 100644
--- a/arch/powerpc/include/asm/eeh.h
+++ b/arch/powerpc/include/asm/eeh.h
@@ -164,6 +164,7 @@ static inline void eeh_unlock(void)
  */
 #define EEH_MAX_ALLOWED_FREEZES 5
 
+typedef void *(*eeh_traverse_func)(void *data, void *flag);
 int __devinit eeh_phb_pe_create(struct pci_controller *phb);
 
 void * __devinit eeh_dev_init(struct device_node *dn, void *data);
diff --git a/arch/powerpc/platforms/pseries/eeh_pe.c b/arch/powerpc/platforms/pseries/eeh_pe.c
index 20d65dc..f019953 100644
--- a/arch/powerpc/platforms/pseries/eeh_pe.c
+++ b/arch/powerpc/platforms/pseries/eeh_pe.c
@@ -101,3 +101,149 @@ static struct eeh_pe *eeh_phb_pe_get(struct pci_controller *phb)
 	return NULL;
 }
 
+/**
+ * eeh_pe_next - Retrieve the next PE in the tree
+ * @pe: current PE
+ * @root: root PE
+ *
+ * The function is used to retrieve the next PE in the
+ * hierarchy PE tree.
+ */
+static struct eeh_pe *eeh_pe_next(struct eeh_pe *pe,
+				  struct eeh_pe *root)
+{
+	struct list_head *next = pe->child_list.next;
+
+	if (next == &pe->child_list) {
+		while (1) {
+			if (pe == root)
+				return NULL;
+			next = pe->child.next;
+			if (next != &pe->parent->child_list)
+				break;
+			pe = pe->parent;
+		}
+	}
+
+	return list_entry(next, struct eeh_pe, child);
+}
+
+/**
+ * eeh_pe_traverse - Traverse PEs in the specified PHB
+ * @root: root PE
+ * @fn: callback
+ * @flag: extra parameter to callback
+ *
+ * The function is used to traverse the specified PE and its
+ * child PEs. The traversing is to be terminated once the
+ * callback returns something other than NULL, or no more PEs
+ * to be traversed.
+ */
+static void *eeh_pe_traverse(struct eeh_pe *root,
+			eeh_traverse_func fn, void *flag)
+{
+	struct eeh_pe *pe;
+	void *ret;
+
+	for (pe = root; pe; pe = eeh_pe_next(pe, root)) {
+		ret = fn(pe, flag);
+		if (ret) return ret;
+	}
+
+	return NULL;
+}
+
+/**
+ * __eeh_pe_get - Check the PE address
+ * @data: EEH PE
+ * @flag: EEH device
+ *
+ * For one particular PE, it can be identified by PE address
+ * or tranditional BDF address. BDF address is composed of
+ * Bus/Device/Function number. The extra data referred by flag
+ * indicates which type of address should be used.
+ */
+static void *__eeh_pe_get(void *data, void *flag)
+{
+	struct eeh_pe *pe = (struct eeh_pe *)data;
+	struct eeh_dev *edev = (struct eeh_dev *)flag;
+
+	/* Unexpected PHB PE */
+	if (pe->type == EEH_PE_PHB)
+		return NULL;
+
+	/* We prefer PE address */
+	if (edev->pe_config_addr &&
+	    (edev->pe_config_addr == pe->addr))
+		return pe;
+
+	/* Try BDF address */
+	if (edev->pe_config_addr &&
+	    (edev->config_addr == pe->config_addr))
+		return pe;
+
+	return NULL;
+}
+
+/**
+ * eeh_pe_get - Search PE based on the given address
+ * @edev: EEH device
+ *
+ * Search the corresponding PE based on the specified address which
+ * is included in the eeh device. The function is used to check if
+ * the associated PE has been created against the PE address. It's
+ * notable that the PE address has 2 format: traditional PE address
+ * which is composed of PCI bus/device/function number, or unified
+ * PE address.
+ */
+static struct eeh_pe *eeh_pe_get(struct eeh_dev *edev)
+{
+	struct eeh_pe *root = eeh_phb_pe_get(edev->phb);
+	struct eeh_pe *pe;
+
+	eeh_lock();
+	pe = eeh_pe_traverse(root, __eeh_pe_get, edev);
+	eeh_unlock();
+
+	return pe;
+}
+
+/**
+ * eeh_pe_get_parent - Retrieve the parent PE
+ * @edev: EEH device
+ *
+ * The whole PEs existing in the system are organized as hierarchy
+ * tree. The function is used to retrieve the parent PE according
+ * to the parent EEH device.
+ */
+static struct eeh_pe *eeh_pe_get_parent(struct eeh_dev *edev)
+{
+	struct device_node *dn;
+	struct eeh_dev *parent;
+
+	/*
+	 * It might have the case for the indirect parent
+	 * EEH device already having associated PE, but
+	 * the direct parent EEH device doesn't have yet.
+	 */
+	dn = edev->dn->parent;
+	while (dn) {
+		/* We're poking out of PCI territory */
+		if (!PCI_DN(dn))
+			return NULL;
+
+		parent = of_node_to_eeh_dev(dn);
+
+		/* We're poking out of PCI territory */
+		if (!parent)
+			return NULL;
+
+		if (parent->pe)
+			return parent->pe;
+
+		dn = dn->parent;
+	}
+
+	return NULL;
+}
+
-- 
1.7.9.5

^ permalink raw reply related

* [PATCH 01/21] ppc/eeh: move EEH initialization around
From: Gavin Shan @ 2012-06-27 16:01 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: Gavin Shan
In-Reply-To: <1340812911-6793-1-git-send-email-shangw@linux.vnet.ibm.com>

Currently, we have 3 phases for EEH initialization on pSeries platform
using builtin functions: platform initialization, EEH device creation,
and EEH subsystem enablement. All of them are done no later than
ppc_md.setup_arch. That means that the slab/slub isn't ready yet, so
we have to allocate memory chunks on basis of PAGE_SIZE for those
dynamically created EEH devices. That's pretty expensive.

In order to utilize slab/slub for memory allocation, we have to move
the EEH initialization functions around, but all of them should be
called after slab/slub is ready.

Signed-off-by: Gavin Shan <shangw@linux.vnet.ibm.com>
---
 arch/powerpc/include/asm/eeh.h               |   16 ----------------
 arch/powerpc/kernel/rtas_pci.c               |    3 ---
 arch/powerpc/platforms/pseries/eeh.c         |   10 +++++++---
 arch/powerpc/platforms/pseries/eeh_dev.c     |    6 +++++-
 arch/powerpc/platforms/pseries/eeh_pseries.c |    4 +++-
 arch/powerpc/platforms/pseries/setup.c       |    2 --
 6 files changed, 15 insertions(+), 26 deletions(-)

diff --git a/arch/powerpc/include/asm/eeh.h b/arch/powerpc/include/asm/eeh.h
index d60f998..06dedff 100644
--- a/arch/powerpc/include/asm/eeh.h
+++ b/arch/powerpc/include/asm/eeh.h
@@ -117,11 +117,6 @@ extern int eeh_subsystem_enabled;
 
 void * __devinit eeh_dev_init(struct device_node *dn, void *data);
 void __devinit eeh_dev_phb_init_dynamic(struct pci_controller *phb);
-void __init eeh_dev_phb_init(void);
-void __init eeh_init(void);
-#ifdef CONFIG_PPC_PSERIES
-int __init eeh_pseries_init(void);
-#endif
 int __init eeh_ops_register(struct eeh_ops *ops);
 int __exit eeh_ops_unregister(const char *name);
 unsigned long eeh_check_failure(const volatile void __iomem *token,
@@ -156,17 +151,6 @@ static inline void *eeh_dev_init(struct device_node *dn, void *data)
 
 static inline void eeh_dev_phb_init_dynamic(struct pci_controller *phb) { }
 
-static inline void eeh_dev_phb_init(void) { }
-
-static inline void eeh_init(void) { }
-
-#ifdef CONFIG_PPC_PSERIES
-static inline int eeh_pseries_init(void)
-{
-	return 0;
-}
-#endif /* CONFIG_PPC_PSERIES */
-
 static inline unsigned long eeh_check_failure(const volatile void __iomem *token, unsigned long val)
 {
 	return val;
diff --git a/arch/powerpc/kernel/rtas_pci.c b/arch/powerpc/kernel/rtas_pci.c
index 179af90..140735c 100644
--- a/arch/powerpc/kernel/rtas_pci.c
+++ b/arch/powerpc/kernel/rtas_pci.c
@@ -275,9 +275,6 @@ void __init find_and_init_phbs(void)
 	of_node_put(root);
 	pci_devs_phb_init();
 
-	/* Create EEH devices for all PHBs */
-	eeh_dev_phb_init();
-
 	/*
 	 * PCI_PROBE_ONLY and PCI_REASSIGN_ALL_BUS can be set via properties
 	 * in chosen.
diff --git a/arch/powerpc/platforms/pseries/eeh.c b/arch/powerpc/platforms/pseries/eeh.c
index ecd394c..e819448 100644
--- a/arch/powerpc/platforms/pseries/eeh.c
+++ b/arch/powerpc/platforms/pseries/eeh.c
@@ -982,7 +982,7 @@ int __exit eeh_ops_unregister(const char *name)
  * Even if force-off is set, the EEH hardware is still enabled, so that
  * newer systems can boot.
  */
-void __init eeh_init(void)
+static int __init eeh_init(void)
 {
 	struct pci_controller *hose, *tmp;
 	struct device_node *phb;
@@ -992,11 +992,11 @@ void __init eeh_init(void)
 	if (!eeh_ops) {
 		pr_warning("%s: Platform EEH operation not found\n",
 			__func__);
-		return;
+		return -EEXIST;
 	} else if ((ret = eeh_ops->init())) {
 		pr_warning("%s: Failed to call platform init function (%d)\n",
 			__func__, ret);
-		return;
+		return ret;
 	}
 
 	raw_spin_lock_init(&confirm_error_lock);
@@ -1011,8 +1011,12 @@ void __init eeh_init(void)
 		printk(KERN_INFO "EEH: PCI Enhanced I/O Error Handling Enabled\n");
 	else
 		printk(KERN_WARNING "EEH: No capable adapters found\n");
+
+	return ret;
 }
 
+core_initcall_sync(eeh_init);
+
 /**
  * eeh_add_device_early - Enable EEH for the indicated device_node
  * @dn: device node for which to set up EEH
diff --git a/arch/powerpc/platforms/pseries/eeh_dev.c b/arch/powerpc/platforms/pseries/eeh_dev.c
index c4507d0..ab68c59 100644
--- a/arch/powerpc/platforms/pseries/eeh_dev.c
+++ b/arch/powerpc/platforms/pseries/eeh_dev.c
@@ -93,10 +93,14 @@ void __devinit eeh_dev_phb_init_dynamic(struct pci_controller *phb)
  * Scan all the existing PHBs and create EEH devices for their OF
  * nodes and their children OF nodes
  */
-void __init eeh_dev_phb_init(void)
+static int __init eeh_dev_phb_init(void)
 {
 	struct pci_controller *phb, *tmp;
 
 	list_for_each_entry_safe(phb, tmp, &hose_list, list_node)
 		eeh_dev_phb_init_dynamic(phb);
+
+	return 0;
 }
+
+core_initcall(eeh_dev_phb_init);
diff --git a/arch/powerpc/platforms/pseries/eeh_pseries.c b/arch/powerpc/platforms/pseries/eeh_pseries.c
index 8752f79..bcf0bb8 100644
--- a/arch/powerpc/platforms/pseries/eeh_pseries.c
+++ b/arch/powerpc/platforms/pseries/eeh_pseries.c
@@ -559,7 +559,9 @@ static struct eeh_ops pseries_eeh_ops = {
  * EEH initialization on pseries platform. This function should be
  * called before any EEH related functions.
  */
-int __init eeh_pseries_init(void)
+static int __init eeh_pseries_init(void)
 {
 	return eeh_ops_register(&pseries_eeh_ops);
 }
+
+early_initcall(eeh_pseries_init);
diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c
index 51ecac9..5406473 100644
--- a/arch/powerpc/platforms/pseries/setup.c
+++ b/arch/powerpc/platforms/pseries/setup.c
@@ -388,10 +388,8 @@ static void __init pSeries_setup_arch(void)
 
 	/* Find and initialize PCI host bridges */
 	init_pci_config_tokens();
-	eeh_pseries_init();
 	find_and_init_phbs();
 	pSeries_reconfig_notifier_register(&pci_dn_reconfig_nb);
-	eeh_init();
 
 	pSeries_nvram_init();
 
-- 
1.7.9.5

^ permalink raw reply related


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox